mod cli;
mod commands;
mod community_detection;
mod conflict_matrix;
mod context_pack;
mod output;
mod rewrite;
mod search_budget;
mod semantic_edit;
mod session_review_budget;
mod token_savings;
mod workflow;
pub use rewrite::rewrite_command;
pub(crate) use rewrite::{apply_rewrite_output_format, execute_rewritten_command, no_rewrite_message};
pub(crate) use community_detection::{
CommunityDetectionReport, annotate_community_members_with_context,
community_tagpath_cache_part, community_tagpath_cache_part_for_loaded,
detect_communities_cached, file_communities_from_callers,
graph_effectiveness_blocked, graph_effectiveness_ready,
resolve_tagpath_handle_for_callee_edge, update_community_annotation_diagnostics,
};
#[allow(unused_imports)]
pub(crate) use conflict_matrix::{
ConflictMatrixCandidate, ConflictMatrixGraphPreparedInputs,
ConflictMatrixPreparedInputs, ConflictMatrixReport,
ConflictMatrixSemanticRef, ConflictMatrixSharedPreparationSummary,
ConflictMatrixWorkerFeedback, ConflictMatrixWorkerPromptPacket,
build_conflict_matrix_report, build_conflict_matrix_report_from_prepared_graph,
cmd_conflict_matrix, collect_conflict_matrix_evidence_packets,
conflict_matrix_candidate_from_evidence, conflict_matrix_graph_index,
conflict_matrix_semantic_ref, conflict_matrix_shared_preparation_summary,
conflict_matrix_source_handle, conflict_matrix_target_scoped_graph_snapshot,
conflict_matrix_worker_feedback,
conflict_risk_label, extract_conflict_target_refs, hash_bytes_hex,
is_planner_config_path, normalize_conflict_target,
prepare_conflict_matrix_graph_orchestration,
prepare_conflict_matrix_inputs, resolve_conflict_matrix_targets,
sorted_intersection, sorted_set,
};
#[allow(unused_imports)]
pub(crate) use context_pack::{
ContextPackReport, ContextPackSummaryRefPreview,
build_context_pack_diff_preview, build_context_pack_log_preview,
build_context_pack_report, build_context_pack_report_with_profile,
build_context_pack_test_preview, context_pack_status_reminders,
exploration_ref_id, materialize_context_pack_exploration_packet,
print_context_pack_human,
};
pub(crate) use search_budget::{
SearchBudgetReportInput,
apply_search_facet_filters, build_search_budget_follow_up, build_search_budget_report,
print_search_budget_human,
};
#[allow(unused_imports)]
pub(crate) use session_review_budget::{
SessionReviewBudgetFailurePreview, SessionReviewBudgetReport,
SessionReviewNextContextBudgetReport, SessionReviewNextTokenAction,
build_session_review_budget_report, build_session_review_next_context_budget_report,
print_session_review_budget_human, print_session_review_next_context_budget_human,
};
#[cfg(test)]
use search_budget::{SearchBudgetReport, search_facet_filters_summary};
pub(crate) use semantic_edit::{
AstSpanPreview, EditBatch, EditResult, EditStatus,
MarkdownEmbeddedSymbol, MarkdownSpanMetadata, MetricDigestOptions,
SemanticEditVerifyOptions, apply_edit_plan_atomically, build_edit_plan, cmd_edit_intents,
};
#[cfg(test)]
use rewrite::{apply_output_cap, effective_rewrite_run_command, resolve_digest_context_path, rewrite_output_cap, OutputCap};
#[cfg(test)]
use std::io::{BufRead as _, BufReader};
#[cfg(test)]
use token_savings::{
TokenSavingsFamily, TokenSavingsFixture, TokenSavingsFixtureCase,
TokenSavingsMarkdownProjectionInput, TokenSavingsMarkdownProjectionInputs,
TokenSavingsRawSymbol, TokenSavingsSourceReadInput, TokenSavingsSourceReadInputs,
build_token_savings_report,
};
use anyhow::{Context, Result, bail};
use clap::Parser;
use cli::{Cli, Commands, DispatchTraceFormat, GraphDbQuery, SemanticRelatedKind};
#[cfg(test)]
use cli::{GraphDbBackend, TraverseFormat};
use commands::digests::{
cmd_context_pack, cmd_diff_digest, cmd_log_digest, cmd_metric_digest, cmd_session_cost,
cmd_session_digest, cmd_session_review_with_budget, cmd_test_digest,
};
#[cfg(test)]
use commands::graph::cmd_explain;
use commands::graph::{
cmd_analyze, cmd_communities, cmd_explain_with_budget, cmd_graph, cmd_path, cmd_traverse,
};
#[cfg(test)]
use commands::index_search::cmd_search;
use commands::index_search::{cmd_index, cmd_search_with_budget, cmd_search_worker};
use commands::infra::{
StatusCommandOptions, cmd_convex_sync, cmd_edit, cmd_graph_db, cmd_init, cmd_locks,
cmd_rewrite, cmd_route, cmd_sql, cmd_status,
};
use commands::memory::cmd_memory;
use commands::quality::{cmd_audit, cmd_audit_tagpath, cmd_lint};
use commands::summarize::cmd_summarize;
use flate2::{Compression, read::GzDecoder, write::GzEncoder};
use output::tagpath::{
TagpathAnnotationDiagnostic, TagpathSearchOpts,
annotate_communities_with_tagpath, annotate_hits_with_tagpath,
annotate_path_nodes_with_tagpath, annotate_stored_edges_with_tagpath,
annotate_stored_symbols_with_tagpath,
};
#[cfg(test)]
use output::ResponseBudgetPreset;
use output::{
OutputFormat, ResponseBudget, ToolEnvelope, ToolEnvelopeMetric,
ToolEnvelopeSummary, TranscriptArtifactRef,
};
use rusqlite::{Connection, OptionalExtension};
use serde::{Deserialize, Serialize};
use sift::{SearchInput, SearchOptions, Sift};
#[cfg(test)]
use std::cell::RefCell;
use std::cmp::Ordering;
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque};
use std::env;
use std::fs;
use std::io::{Read as _, Write as _};
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use std::sync::{Mutex, OnceLock};
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
use substrate::{
ConvexEdgeRow, ConvexNodeRow, ConvexProjectionRows, GraphEdge as SubstrateGraphEdge,
GraphFreshness, GraphNode as SubstrateGraphNode, GraphProjection, GraphPropertyFilter,
GraphProvenance, GraphQueryOptions, GraphQueryPage, GraphStore, SQLITE_GRAPH_SCHEMA_VERSION,
SqliteGraphStore, SqliteProjectionRefresh,
TerseGraphNode as SubstrateTerseGraphNode, TerseGraphEdge as SubstrateTerseGraphEdge,
};
use tsift_core::{NeighborhoodScoring, RankedNeighborhoodOptions};
use tagpath::{family as tagpath_family, ontology as tagpath_ontology};
#[cfg(test)]
use tsift_agent_doc::session_cost;
#[cfg(test)]
use tsift_agent_doc::session_review;
use tsift_digest::{diff_digest, log_digest, metric_digest, test_digest};
use tsift_graph as graph;
use tsift_index::{config, index, init, multiplicity, walk};
use tsift_memory::{MemoryEvent, default_memory_db_path, read_memory_events};
use tsift_quality::{cycle_packet_cache, dci_benchmark, lint, perf_gate, token_gate};
use tsift_resolution as resolution;
use tsift_search::{impact, sift};
use tsift_sqlite as substrate;
use tsift_status::status;
use tsift_summarize::summarize;
#[cfg(feature = "backend-surrealdb")]
use tsift_surrealdb::SurrealdbGraphStore;
use tsift_tokensave::TokensaveDb;
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize)]
pub(crate) enum GraphDbExperimentalBackend {
DuckdbDuckpgq,
Falkordb,
Ladybug,
Kuzu,
Surrealdb,
}
#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize)]
pub(crate) struct SearchFacetFilters {
#[serde(skip_serializing_if = "Vec::is_empty", default)]
pub(crate) languages: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
pub(crate) kinds: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
pub(crate) node_kinds: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
pub(crate) sections: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
pub(crate) parents: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
pub(crate) children: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
pub(crate) fence_languages: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
pub(crate) list_depths: Vec<usize>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
pub(crate) heading_levels: Vec<usize>,
}
impl SearchFacetFilters {
pub(crate) fn is_empty(&self) -> bool {
self.languages.is_empty()
&& self.kinds.is_empty()
&& self.node_kinds.is_empty()
&& self.sections.is_empty()
&& self.parents.is_empty()
&& self.children.is_empty()
&& self.fence_languages.is_empty()
&& self.list_depths.is_empty()
&& self.heading_levels.is_empty()
}
fn needs_ast_context(&self) -> bool {
!self.sections.is_empty()
|| !self.parents.is_empty()
|| !self.children.is_empty()
|| !self.fence_languages.is_empty()
|| !self.list_depths.is_empty()
|| !self.heading_levels.is_empty()
}
}
#[derive(Serialize)]
struct GraphDbBackendPromotionGate {
status: String,
native_adapter_required: bool,
required_checks: Vec<String>,
}
impl GraphDbExperimentalBackend {
fn name(self) -> &'static str {
match self {
Self::DuckdbDuckpgq => "duckdb-duckpgq",
Self::Falkordb => "falkordb",
Self::Ladybug => "ladybug",
Self::Kuzu => "kuzu",
Self::Surrealdb => "surrealdb",
}
}
fn adapter_label(self) -> &'static str {
match self {
Self::DuckdbDuckpgq => "DuckDB/DuckPGQ read-only prototype",
Self::Falkordb => "FalkorDB read-only prototype",
Self::Ladybug => "Ladybug read-only prototype",
Self::Kuzu => "Kuzu (Vela-Engineering/kuzu) read-only prototype",
Self::Surrealdb => "SurrealDB read-only prototype",
}
}
fn projection_load(self) -> &'static str {
match self {
Self::Falkordb => {
"provider-neutral rows loaded into a FalkorDB-shaped read snapshot for parity and timing only; production FalkorDB storage remains behind backend-eval until a real adapter passes the full-projection gate"
}
Self::Kuzu => {
"provider-neutral rows loaded into a Kuzu-compatible in-process read snapshot for parity and performance gates; production Vela-Engineering/kuzu storage remains behind a future optional adapter"
}
Self::Surrealdb => {
"provider-neutral rows loaded into a SurrealDB-compatible read snapshot for parity and timing only; production SurrealDB storage remains behind backend-eval until a real optional adapter passes the full-projection gate"
}
_ => {
"provider-neutral rows loaded into a dependency-free in-process read snapshot for parity and performance gates"
}
}
}
fn lock_behavior(self) -> &'static str {
match self {
Self::Falkordb => {
"read-only FalkorDB prototype snapshot; production promotion must prove multi-process writer behavior and local fallback semantics before replacing SQLite"
}
Self::Kuzu => {
"read-only Kuzu prototype snapshot; no SQLite writer lock is taken during benchmarks, and production Vela-Engineering/kuzu promotion must prove concurrent writer semantics before replacing SQLite"
}
Self::Surrealdb => {
"read-only SurrealDB prototype snapshot; production promotion must prove embedded/file-backed writer and read-only lock behavior before replacing SQLite"
}
_ => "read-only snapshot/row adapter; no writer lock is taken during query benchmarks",
}
}
fn install_portability(self) -> &'static str {
match self {
Self::Falkordb => {
"prototype is dependency-free in this binary; production FalkorDB promotion must keep install optional and preserve cargo build/install without a service"
}
Self::Kuzu => {
"prototype is dependency-free in this binary; production Vela-Engineering/kuzu integration must stay optional so cargo build/install works without a native Kuzu toolchain"
}
Self::Surrealdb => {
"prototype is dependency-free in this binary; production SurrealDB integration must stay optional so cargo build/install works without pulling SurrealDB into the default build"
}
_ => {
"prototype is dependency-free in this binary; a production engine adapter must remain optional before promotion"
}
}
}
fn prototype_hold_reason(self) -> Option<&'static str> {
match self {
Self::DuckdbDuckpgq => Some(
"DuckDB/DuckPGQ remains behind backend-eval until a native production adapter proves projection writes, freshness/parity, full_projection wins, install portability, and lock behavior",
),
Self::Falkordb => Some(
"FalkorDB remains behind backend-eval until a production adapter beats SQLite on full_projection conflict-matrix, evidence, dispatch-trace, path tiers, install portability, and lock behavior",
),
Self::Ladybug => Some(
"Ladybug remains behind backend-eval until a native production adapter proves projection writes, freshness/parity, full_projection wins, install portability, and lock behavior",
),
Self::Kuzu => Some(
"Kuzu remains behind backend-eval until a native optional adapter proves projection writes/load, SQLite parity, full_projection wins, install portability, and lock behavior",
),
Self::Surrealdb => Some(
"SurrealDB remains behind backend-eval until a feature-gated optional adapter proves provider-neutral projection writes/load, SQLite parity, full_projection wins, install portability, and lock behavior",
),
}
}
fn promotion_gate(self) -> GraphDbBackendPromotionGate {
match self {
Self::DuckdbDuckpgq => GraphDbBackendPromotionGate {
status: "hold_native_adapter_required".to_string(),
native_adapter_required: true,
required_checks: vec![
"native_duckdb_duckpgq_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
.to_string(),
"freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
.to_string(),
"embedded_or_service_lock_behavior_match_or_beat_sqlite".to_string(),
"operator_install_cost_keeps_cargo_build_install_duckdb_extension_free_by_default"
.to_string(),
],
},
Self::Falkordb => GraphDbBackendPromotionGate {
status: "hold_native_adapter_required".to_string(),
native_adapter_required: true,
required_checks: vec![
"native_falkordb_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
.to_string(),
"freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
.to_string(),
"multi_process_writer_and_read_only_lock_behavior_match_or_beat_sqlite"
.to_string(),
"operator_install_cost_keeps_cargo_build_install_service_free_by_default"
.to_string(),
],
},
Self::Ladybug => GraphDbBackendPromotionGate {
status: "hold_native_adapter_required".to_string(),
native_adapter_required: true,
required_checks: vec![
"native_ladybug_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
.to_string(),
"freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
.to_string(),
"concurrent_writer_and_read_only_lock_behavior_match_or_beat_sqlite"
.to_string(),
"operator_install_cost_keeps_cargo_build_install_ladybug_free_by_default"
.to_string(),
],
},
Self::Kuzu => GraphDbBackendPromotionGate {
status: "hold_native_adapter_required".to_string(),
native_adapter_required: true,
required_checks: vec![
"native_kuzu_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
.to_string(),
"freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
.to_string(),
"concurrent_writer_and_read_only_lock_behavior_match_or_beat_sqlite"
.to_string(),
"operator_install_cost_keeps_cargo_build_install_native_kuzu_free_by_default"
.to_string(),
],
},
Self::Surrealdb => GraphDbBackendPromotionGate {
status: "hold_native_adapter_required".to_string(),
native_adapter_required: true,
required_checks: vec![
"native_surrealdb_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
.to_string(),
"freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
.to_string(),
"embedded_file_backed_writer_and_read_only_lock_behavior_match_or_beat_sqlite"
.to_string(),
"operator_install_cost_keeps_cargo_build_install_surrealdb_free_by_default"
.to_string(),
],
},
}
}
fn parse(raw: &str) -> Result<Self> {
match raw {
"duckdb-duckpgq" | "duckdb" | "duckpgq" => Ok(Self::DuckdbDuckpgq),
"falkordb" | "falkor" => Ok(Self::Falkordb),
"ladybug" => Ok(Self::Ladybug),
"kuzu" | "vela-kuzu" => Ok(Self::Kuzu),
"surrealdb" | "surreal" | "surreal-db" => Ok(Self::Surrealdb),
_ => {
bail!(
"unknown backend-eval candidate {raw:?}; expected duckdb-duckpgq, falkordb, ladybug, kuzu, or surrealdb"
)
}
}
}
}
pub fn run() -> Result<()> {
let cli = Cli::parse();
let compact = cli.compact;
let pretty = cli.pretty;
let terse = cli.terse || cli.ultra_terse;
let ultra_terse = cli.ultra_terse;
let absolute = cli.absolute;
let tabular = cli.tabular;
let schema = cli.schema;
let envelope = cli.envelope;
match cli.command {
Some(Commands::Search {
query,
path,
limit,
strategy,
exact,
scope,
federated,
lang,
kind,
node_kind,
section,
parent,
child,
fence_language,
list_depth,
heading_level,
json,
autoindex,
no_autoindex,
timeout,
max_items,
max_bytes,
budget,
no_tagpath,
tagpath_strict,
}) => cmd_search_with_budget(
query,
path,
limit,
if exact {
Some("exact".to_string())
} else {
strategy
},
scope,
federated,
json || terse || schema || envelope,
autoindex || !no_autoindex,
timeout,
compact,
pretty,
terse,
ultra_terse,
absolute,
tabular,
schema,
envelope,
ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
TagpathSearchOpts {
no_tagpath,
strict: tagpath_strict,
},
SearchFacetFilters {
languages: lang,
kinds: kind,
node_kinds: node_kind,
sections: section,
parents: parent,
children: child,
fence_languages: fence_language,
list_depths: list_depth,
heading_levels: heading_level,
},
),
Some(Commands::SearchWorker {
path,
cache_dir,
query,
limit,
strategy,
output,
}) => cmd_search_worker(&path, &cache_dir, &query, limit, &strategy, &output),
Some(Commands::DigestRunner {
kind,
path,
runner,
shell_command,
json,
}) => cmd_digest_runner(
&kind,
&path,
runner.as_deref(),
&shell_command,
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
},
),
Some(Commands::Edit { dry_run, file }) => {
cmd_edit(dry_run, file, compact, pretty, terse, schema)
}
Some(Commands::EditIntents {
path,
scope,
file,
json,
apply,
verify,
verify_command,
max_items,
max_bytes,
budget,
}) => cmd_edit_intents(
&path,
scope.as_deref(),
file,
apply,
SemanticEditVerifyOptions {
enabled: verify,
command: verify_command.as_deref(),
},
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
},
ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
),
Some(Commands::Index {
path,
rebuild,
check,
exit_code,
prune,
quiet,
workspace,
submodule,
json,
}) => cmd_index(
&path,
rebuild,
check,
exit_code,
prune,
quiet,
workspace,
submodule.as_deref(),
json || terse || schema || envelope,
compact,
pretty,
terse,
absolute,
schema,
),
Some(Commands::Rewrite { command, run }) => cmd_rewrite(
&command,
run,
OutputFormat {
json_output: terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
},
),
Some(Commands::Route { task, id }) => cmd_route(&task, id),
Some(Commands::Memory { command }) => {
let json = command.json_output();
cmd_memory(
command,
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
},
)
}
Some(Commands::Finding { command }) => match command {
cli::FindingCommand::Add {
path,
kind,
title,
body,
about,
confidence,
status,
relates,
scope,
json,
} => commands::finding::cmd_finding_add(
&path,
&kind,
&title,
&body,
&about,
confidence,
&status,
relates.as_deref(),
scope.as_deref(),
json || terse || schema || envelope,
pretty,
),
cli::FindingCommand::List {
path,
about,
kind,
status,
include_stale,
scope,
json,
} => commands::finding::cmd_finding_list(
&path,
about.as_deref(),
kind.as_deref(),
status.as_deref(),
include_stale,
scope.as_deref(),
json || terse || schema || envelope,
pretty,
),
cli::FindingCommand::Harvest { path, scope, json } => {
commands::finding::cmd_finding_harvest(
&path,
scope.as_deref(),
json || terse || schema || envelope,
pretty,
)
}
cli::FindingCommand::Promote { id, path, json } => {
commands::finding::cmd_finding_promote(
&path,
&id,
json || terse || schema || envelope,
pretty,
)
}
},
Some(Commands::Graph {
symbol,
path,
callers,
callees,
scope,
limit,
json,
no_tagpath,
tagpath_strict,
}) => cmd_graph(
&symbol,
&path,
callers,
callees,
scope.as_deref(),
limit,
json || terse || schema || envelope,
compact,
pretty,
terse,
absolute,
tabular,
schema,
TagpathSearchOpts {
no_tagpath,
strict: tagpath_strict,
},
),
Some(Commands::Sql {
db,
query,
table,
json,
}) => cmd_sql(
&db,
query,
table,
json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
),
Some(Commands::Communities {
path,
scope,
min_size,
limit,
json,
no_tagpath,
tagpath_strict,
}) => cmd_communities(
&path,
scope.as_deref(),
min_size,
limit,
json || terse || schema || envelope,
compact,
pretty,
terse,
tabular,
schema,
TagpathSearchOpts {
no_tagpath,
strict: tagpath_strict,
},
),
Some(Commands::Analyze {
path,
scope,
entry_points,
limit,
json,
}) => cmd_analyze(
&path,
scope.as_deref(),
&entry_points,
limit,
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
},
),
Some(Commands::Path {
from,
to,
path,
scope,
json,
no_tagpath,
tagpath_strict,
}) => cmd_path(
&from,
&to,
&path,
scope.as_deref(),
json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
TagpathSearchOpts {
no_tagpath,
strict: tagpath_strict,
},
),
Some(Commands::Explain {
symbol,
path,
scope,
limit,
json,
max_items,
max_bytes,
budget,
no_tagpath,
tagpath_strict,
}) => cmd_explain_with_budget(
&symbol,
&path,
scope.as_deref(),
limit,
json || terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
absolute,
tabular,
schema,
envelope,
ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
TagpathSearchOpts {
no_tagpath,
strict: tagpath_strict,
},
),
Some(Commands::Traverse {
node,
to,
path,
scope,
depth,
limit,
format,
convex_snapshot,
}) => cmd_traverse(
node.as_deref(),
to.as_deref(),
&path,
scope.as_deref(),
depth,
limit,
format,
pretty,
terse,
schema,
convex_snapshot.as_deref(),
),
Some(Commands::ConvexSync {
path,
scope,
snapshot,
chunk_size,
remote_snapshot,
apply,
endpoint,
auth_token_env,
json,
}) => cmd_convex_sync(
ConvexSyncOptions {
path: &path,
scope: scope.as_deref(),
snapshot: snapshot.as_deref(),
chunk_size,
remote_snapshot,
apply,
endpoint: endpoint.as_deref(),
auth_token_env: &auth_token_env,
},
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
},
),
Some(Commands::GraphDb {
path,
scope,
backend,
convex_snapshot,
json,
query,
}) => cmd_graph_db(
&path,
scope.as_deref(),
backend,
convex_snapshot.as_deref(),
query,
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
},
),
Some(Commands::SourceRead {
file,
path,
start,
lines,
end,
scope,
json,
max_items,
max_bytes,
budget,
}) => cmd_source_read(
&file,
&path,
start,
lines,
end,
scope.as_deref(),
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
},
absolute,
ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
),
Some(Commands::MarkdownAst {
file,
path,
node,
json,
max_items,
max_bytes,
budget,
}) => cmd_markdown_ast(
&file,
&path,
node.as_deref(),
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
},
absolute,
ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
),
Some(Commands::SymbolRead {
symbol,
file,
path,
scope,
json,
max_items,
max_bytes,
budget,
}) => cmd_symbol_read(
&symbol,
file.as_deref(),
&path,
scope.as_deref(),
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
},
absolute,
ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
),
Some(Commands::Audit {
skills_dir,
manifest,
usage,
cleanup,
report,
json,
}) => cmd_audit(
&skills_dir,
manifest,
usage,
cleanup,
report,
json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
),
Some(Commands::AuditTagpath { path, scope, json }) => cmd_audit_tagpath(
&path,
scope.as_deref(),
json || terse || schema || envelope,
pretty,
terse,
schema,
),
Some(Commands::Init {
path,
codex,
opencode,
workspace,
}) => cmd_init(&path, codex, opencode, workspace),
Some(Commands::Lint {
file,
index,
entities_from,
json,
}) => cmd_lint(
&file,
index,
entities_from,
json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
),
Some(Commands::Summarize {
symbol,
file,
extract,
diff,
stats,
path,
json,
}) => cmd_summarize(
symbol,
file,
extract,
diff,
stats,
&path,
json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
),
Some(Commands::Semantic {
query,
path,
scope,
limit,
kind,
json,
}) => cmd_semantic_related(
&query,
&path,
scope.as_deref(),
limit,
kind,
json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
),
Some(Commands::DiffDigest {
path,
cached,
revision,
max_parsed_files,
json,
}) => cmd_diff_digest(
&path,
cached,
revision.as_deref(),
max_parsed_files,
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
},
),
Some(Commands::Impact {
path,
cached,
revision,
scope,
limit,
json,
}) => cmd_impact(
&path,
cached,
revision.as_deref(),
scope.as_deref(),
limit,
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
},
),
Some(Commands::TestDigest {
path,
input,
runner,
json,
}) => cmd_test_digest(
&path,
input.as_deref(),
runner.as_deref(),
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
},
),
Some(Commands::LogDigest { path, input, json }) => cmd_log_digest(
&path,
input.as_deref(),
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
},
),
Some(Commands::ContextPack {
path,
test_input,
runner,
log_input,
json,
max_items,
max_bytes,
budget,
convex_snapshot,
}) => cmd_context_pack(
&path,
test_input.as_deref(),
runner.as_deref(),
log_input.as_deref(),
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
},
ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
convex_snapshot.as_deref(),
),
Some(Commands::ConflictMatrix {
targets,
path,
scope,
depth,
limit,
impact_limit,
json,
}) => cmd_conflict_matrix(
&path,
scope.as_deref(),
&targets,
depth,
limit,
impact_limit,
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
},
),
Some(Commands::DispatchTrace {
targets,
path,
scope,
depth,
limit,
impact_limit,
format,
json,
}) => cmd_dispatch_trace(
DispatchTraceOptions {
path: &path,
scope: scope.as_deref(),
raw_targets: &targets,
depth,
limit,
impact_limit,
trace_format: if json {
DispatchTraceFormat::Json
} else {
format
},
},
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
},
),
Some(Commands::DependencyDag {
targets,
path,
scope,
depth,
limit,
json,
}) => cmd_dependency_dag(
&path,
scope.as_deref(),
&targets,
depth,
limit,
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
},
),
Some(Commands::TokenSavings {
fixture,
fail_under,
json,
}) => token_savings::cmd_token_savings(
&fixture,
fail_under,
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
},
),
Some(Commands::MetricDigest {
input,
baseline,
metrics,
lower_is_better,
higher_is_better,
history,
top,
json,
}) => cmd_metric_digest(
MetricDigestOptions {
input_path: input.as_deref(),
baseline_path: baseline.as_deref(),
metrics: &metrics,
lower_is_better: &lower_is_better,
higher_is_better: &higher_is_better,
history,
top,
},
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
},
),
Some(Commands::DciBenchmark { fixture, json }) => cmd_dci_benchmark(
&fixture,
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
},
),
Some(Commands::TokenGate { command }) => {
cmd_token_gate(command, OutputFormat {
json_output: true,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
})?;
Ok(())
},
Some(Commands::Workflow { topic, json }) => workflow::cmd_workflow(
&topic,
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
},
),
Some(Commands::SessionDigest {
path,
input,
source,
json,
}) => cmd_session_digest(
&path,
input.as_deref(),
source.as_deref(),
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
},
),
Some(Commands::SessionCost {
input,
source,
json,
}) => cmd_session_cost(
input.as_deref(),
source.as_deref(),
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
},
),
Some(Commands::SessionReview {
path,
next_context,
json,
max_items,
max_bytes,
budget,
}) => cmd_session_review_with_budget(
&path,
next_context,
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
ultra_terse,
schema,
envelope,
},
ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
),
Some(Commands::Status {
path,
fix,
no_fix,
json,
}) => cmd_status(
&path,
StatusCommandOptions {
fix,
no_fix,
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
},
),
Some(Commands::Locks { path, scope, json }) => cmd_locks(
&path,
scope.as_deref(),
json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
),
None => {
println!("tsift v{}", env!("CARGO_PKG_VERSION"));
println!("Run `tsift --help` for usage.");
Ok(())
}
}
}
/// Classify a task description into a model tier.
/// Returns (tier_name, model_id).
pub fn classify_task(task: &str) -> (&'static str, &'static str) {
let lower = task.to_lowercase();
// Architecture/design signals → opus
for signal in &[
"architect",
"architecture",
"design",
"plan",
"strateg",
"analy",
"review",
"evaluate",
"assess",
] {
if lower.contains(signal) {
return ("opus", "claude-opus-4-6");
}
}
// Edit/write signals → sonnet
for signal in &[
"edit",
"write",
"fix",
"change",
"update",
"create",
"add ",
"remove",
"delete",
"modify",
"refactor",
"implement",
"build",
] {
if lower.contains(signal) {
return ("sonnet", "claude-sonnet-4-6");
}
}
// Default: search/lookup → haiku
("haiku", "claude-haiku-4-5-20251001")
}
#[cfg(test)]
fn to_json<T: serde::Serialize>(val: &T, pretty: bool, terse: bool) -> anyhow::Result<String> {
to_json_schema(val, pretty, terse, false, false)
}
/// Add top-level `tagpath_index_stale: true` + `tagpath_stale_reason: <reason>`
/// fields to a JSON response when the tagpath adapter reported any helper
/// going stale. JSON consumers (`tsift --envelope` / `--json` callers) can
/// then act on the same condition the stderr `tagpath_index_stale: …` log
/// already surfaces without parsing logs. No-op when `stale=false` or when
/// `value` is not a JSON object.
pub(crate) fn inject_tagpath_stale_into_json(
value: &mut serde_json::Value,
stale: bool,
reason: Option<&str>,
) {
if !stale {
return;
}
if let Some(obj) = value.as_object_mut() {
obj.insert(
"tagpath_index_stale".to_string(),
serde_json::Value::Bool(true),
);
if let Some(reason) = reason {
obj.insert(
"tagpath_stale_reason".to_string(),
serde_json::Value::String(reason.to_string()),
);
}
}
}
pub(crate) fn to_json_schema<T: serde::Serialize>(
val: &T,
pretty: bool,
terse: bool,
ultra_terse: bool,
schema: bool,
) -> anyhow::Result<String> {
if terse || schema {
let value = serde_json::to_value(val)?;
let mut transformed = if terse { terse_transform(value) } else { value };
if ultra_terse {
transformed = ultra_terse_transform(transformed);
transformed = edge_index_transform(transformed);
}
if schema {
transformed = schema_transform(transformed);
}
if terse {
let terse_schema = terse_schema_for(&transformed);
let wrapped = serde_json::json!({"_s": terse_schema, "d": transformed});
if pretty {
Ok(serde_json::to_string_pretty(&wrapped)?)
} else {
Ok(serde_json::to_string(&wrapped)?)
}
} else if pretty {
Ok(serde_json::to_string_pretty(&transformed)?)
} else {
Ok(serde_json::to_string(&transformed)?)
}
} else if pretty {
Ok(serde_json::to_string_pretty(val)?)
} else {
Ok(serde_json::to_string(val)?)
}
}
pub(crate) fn envelope_metric(label: &str, value: impl ToString) -> ToolEnvelopeMetric {
ToolEnvelopeMetric {
label: label.to_string(),
value: value.to_string(),
}
}
pub(crate) fn dedupe_preserve_order(values: Vec<String>) -> Vec<String> {
let mut seen = HashSet::new();
let mut deduped = Vec::new();
for value in values {
if seen.insert(value.clone()) {
deduped.push(value);
}
}
deduped
}
pub(crate) fn print_json_or_envelope<T: Serialize>(
report: &T,
format: &OutputFormat,
tool: &str,
view: &str,
summary: ToolEnvelopeSummary,
truncated: bool,
follow_up: Vec<String>,
) -> Result<()> {
if format.envelope {
let envelope = ToolEnvelope {
tool,
view,
summary,
truncated,
follow_up: dedupe_preserve_order(follow_up),
report,
};
println!(
"{}",
to_json_schema(
&envelope,
format.pretty,
format.terse,
format.ultra_terse,
format.schema
)?
);
} else {
println!(
"{}",
to_json_schema(
report,
format.pretty,
format.terse,
format.ultra_terse,
format.schema
)?
);
}
Ok(())
}
pub(crate) fn estimated_tokens_from_bytes(bytes: usize) -> usize {
bytes.div_ceil(4)
}
fn cmd_token_gate(
command: cli::TokenGateCommand,
format: OutputFormat,
) -> Result<()> {
match command {
cli::TokenGateCommand::Sample {
surface,
path,
scope,
target,
depth,
sample_index,
json: _,
} => cmd_token_gate_sample(&surface, &path, scope.as_deref(), target.as_deref(), depth, sample_index),
cli::TokenGateCommand::Evaluate {
history,
allowed_regression_percent,
json: _,
} => cmd_token_gate_evaluate(history.as_deref(), allowed_regression_percent, &format),
}
}
fn cmd_token_gate_sample(
surface: &str,
path: &Path,
scope: Option<&str>,
target: Option<&str>,
depth: usize,
sample_index: usize,
) -> Result<()> {
if !token_gate::TOKEN_GATE_SURFACES.contains(&surface) {
bail!(
"unknown surface `{}`; expected one of: {}",
surface,
token_gate::TOKEN_GATE_SURFACES.join(", ")
);
}
let path_str = path.to_string_lossy().to_string();
let tsift_bin = std::env::current_exe()?;
let args: Vec<String> = match surface {
"context_pack" => vec![
"context-pack".to_string(),
"--json".to_string(),
path_str,
],
"session_review_next_context" => vec![
"session-review".to_string(),
"--json".to_string(),
"--next-context".to_string(),
path_str,
],
"graph_db_evidence" => {
let tgt = target.unwrap_or("default").to_string();
vec![
"graph-db".to_string(),
"--json".to_string(),
"--path".to_string(),
path_str,
"evidence".to_string(),
tgt,
"--depth".to_string(),
depth.to_string(),
]
}
"conflict_matrix" => {
let tgt = target.unwrap_or("default").to_string();
let mut a = vec![
"conflict-matrix".to_string(),
"--json".to_string(),
"--path".to_string(),
path_str,
"--depth".to_string(),
depth.to_string(),
];
if let Some(s) = scope {
a.push("--scope".to_string());
a.push(s.to_string());
}
a.push(tgt);
a
}
"dispatch_trace" => {
let tgt = target.unwrap_or("default").to_string();
vec![
"dispatch-trace".to_string(),
"--json".to_string(),
"--path".to_string(),
path_str,
tgt,
]
}
_ => bail!("unhandled surface: {}", surface),
};
let start = Instant::now();
let child = Command::new(&tsift_bin)
.args(&args)
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.env("TSIFT_QUIET", "1")
.spawn();
let output = match child {
Ok(c) => c.wait_with_output()?,
Err(e) => bail!("failed to spawn tsift for surface {}: {}", surface, e),
};
let runtime_micros = start.elapsed().as_micros() as f64;
let stdout = String::from_utf8_lossy(&output.stdout);
let envelope_bytes = stdout.trim().len() as f64;
let prompt_tokens = estimated_tokens_from_bytes(stdout.trim().len()) as f64;
let cache_hit_rate_percent = 0.0;
let raw_read_avoidance = 0.0;
let useful_hit_density = if prompt_tokens > 0.0 { 0.5 } else { 0.0 };
let timestamp = iso_timestamp_now();
let id = format!(
"{surface}-baseline-{}-sample-{sample_index}",
×tamp[..10]
);
let label = format!(
"token-gate baseline {surface} sample {sample_index} for {}",
path.display()
);
let mut metrics = BTreeMap::new();
metrics.insert("prompt_tokens".to_string(), prompt_tokens);
metrics.insert("envelope_bytes".to_string(), envelope_bytes);
metrics.insert("runtime_micros".to_string(), runtime_micros);
metrics.insert("cache_hit_rate_percent".to_string(), cache_hit_rate_percent);
metrics.insert("raw_read_avoidance".to_string(), raw_read_avoidance);
metrics.insert("useful_hit_density".to_string(), useful_hit_density);
let sample = token_gate::TokenGateSample {
label,
id,
timestamp: Some(timestamp),
surface: surface.to_string(),
metrics,
};
println!("{}", serde_json::to_string_pretty(&sample)?);
Ok(())
}
fn cmd_token_gate_evaluate(
history_path: Option<&Path>,
allowed_regression_percent: f64,
format: &OutputFormat,
) -> Result<()> {
let history_path = history_path
.map(PathBuf::from)
.unwrap_or_else(|| {
let mut p = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
p.push("../../fixtures/token-gate-history.json");
p
});
let raw = std::fs::read_to_string(&history_path)
.with_context(|| format!("failed to read token gate history: {}", history_path.display()))?;
let samples = token_gate::parse_token_history(&raw)?;
let report = token_gate::evaluate_token_gate(&samples, allowed_regression_percent);
if format.json_output {
println!("{}", to_json_schema(&report, format.pretty, format.terse, false, format.schema)?);
} else {
println!("Token Gate Report");
println!(" min_samples: {}", report.min_samples);
println!(" allowed_regression: {:.1}%", report.allowed_regression_percent);
println!(" decision: {:?}", report.decision);
for eval in &report.surface_evaluations {
println!(
" {} ({} samples): {:?}",
eval.display_name, eval.sample_count, eval.verdict
);
for me in &eval.metric_evaluations {
println!(
" {} ({:?}): {}",
me.metric, me.direction, me.diagnostic
);
}
}
for d in &report.diagnostics {
println!(" ! {}", d);
}
}
Ok(())
}
fn iso_timestamp_now() -> String {
let dur = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default();
let total_secs = dur.as_secs();
let days_since_epoch = total_secs / 86400;
let (year, month, day) = days_to_ymd(days_since_epoch);
let time_of_day = total_secs % 86400;
let hour = (time_of_day / 3600) as u8;
let minute = ((time_of_day % 3600) / 60) as u8;
let second = (time_of_day % 60) as u8;
format!(
"{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z",
year, month, day, hour, minute, second
)
}
fn days_to_ymd(mut days: u64) -> (u64, u8, u8) {
let mut year = 1970u64;
loop {
let days_in_year = if is_leap(year) { 366 } else { 365 };
if days < days_in_year {
break;
}
days -= days_in_year;
year += 1;
}
let leap = is_leap(year);
let month_days: [u8; 12] = if leap {
[31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
} else {
[31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
};
let mut month: u8 = 1;
for &md in &month_days {
if days < md as u64 {
break;
}
days -= md as u64;
month += 1;
}
let day = days as u8 + 1;
(year, month, day)
}
fn is_leap(year: u64) -> bool {
year.is_multiple_of(4) && !year.is_multiple_of(100) || year.is_multiple_of(400)
}
fn persist_transcript_artifact(
root: &Path,
prefix: &str,
suffix: &str,
key: &str,
body: &str,
expand: String,
) -> Result<TranscriptArtifactRef> {
let handle = stable_handle(prefix, key);
let artifacts_dir = root.join(".tsift/artifacts");
fs::create_dir_all(&artifacts_dir).with_context(|| {
format!(
"creating transcript artifacts dir: {}",
artifacts_dir.display()
)
})?;
let file_name = format!("{handle}.{suffix}");
let artifact_path = artifacts_dir.join(file_name);
fs::write(&artifact_path, body)
.with_context(|| format!("writing transcript artifact: {}", artifact_path.display()))?;
let rel_path = relativize_pathbuf(&artifact_path, root);
Ok(TranscriptArtifactRef {
handle,
path: rel_path.display().to_string(),
bytes: body.len(),
lines: body.lines().count(),
expand,
})
}
fn terse_key(key: &str) -> &str {
match key {
"name" => "n",
"kind" => "k",
"file" => "f",
"line" => "l",
"path" => "p",
"from" => "fr",
"type" => "ty",
"text" => "tx",
"new" => "nw",
"run" => "r",
"use" => "u",
"score" => "sc",
"language" => "la",
"status" => "st",
"state" => "stt",
"error" => "err",
"errors" => "ers",
"hops" => "hp",
"tags" => "tg",
"model" => "ml",
"skill" => "sk",
"count" => "ct",
"total" => "tot",
"column" => "col",
"description" => "dsc",
"end_line" => "el",
"signature" => "sig",
"parent_module" => "pm",
"visibility" => "vis",
"match_type" => "mt",
"caller_file" => "cf",
"caller_name" => "cn",
"caller_line" => "cl",
"callee_name" => "en",
"call_site_line" => "csl",
"members" => "m",
"refs" => "refs",
"role" => "rl",
"peer" => "pr",
"modularity" => "q",
"modularity_contribution" => "mc",
"iterations" => "it",
"node_count" => "nc",
"edge_count" => "ec",
"community_count" => "cc",
"communities" => "cms",
"community" => "cm",
"community_diagnostics" => "cd",
"cache_hit" => "cah",
"tagpath_state" => "tps",
"tagpath_stale_reason" => "tsr",
"annotated_community_count" => "acc",
"annotated_member_count" => "amc",
"ambiguous_member_count" => "ambc",
"ambiguous_members" => "amb",
"candidate_count" => "cand",
"tagpath_candidate_count" => "tcand",
"evidence" => "ev",
"chosen_file" => "chf",
"symbol" => "s",
"symbols" => "sy",
"definitions" => "df",
"callers" => "crs",
"callees" => "ces",
"total_tracked" => "tt",
"modified" => "md",
"deleted" => "dl",
"unchanged" => "uc",
"changes" => "ch",
"prune_stats" => "ps",
"hits" => "h",
"rank" => "rk",
"snippet" => "sn",
"confidence" => "co",
"index" => "ix",
"summaries" => "sms",
"recommendations" => "rec",
"total_files" => "tf",
"stale_files" => "sf",
"last_indexed_secs_ago" => "age",
"cached_files" => "caf",
"total_indexed_files" => "tif",
"coverage_pct" => "cov",
"symbol_name" => "syn",
"file_path" => "fp",
"content_hash" => "hsh",
"summary" => "sum",
"tool" => "tl",
"view" => "vw",
"truncated" => "tr",
"follow_up" => "fu",
"report" => "rp",
"metrics" => "ms",
"label" => "lb",
"value" => "v",
"command" => "cmd",
"exit_code" => "xc",
"success" => "ok",
"artifact" => "art",
"digest" => "dg",
"bytes" => "bt",
"lines" => "lns",
"expand" => "xp",
"entities" => "ent",
"relationships" => "rel",
"concept_labels" => "cls",
"extracted_at" => "at",
"tokens_input" => "ti",
"tokens_output" => "tout",
"total_summaries" => "ts",
"stale_count" => "stc",
"total_tokens_input" => "tti",
"total_tokens_output" => "tto",
"estimated_tokens_saved" => "ets",
"files_processed" => "fps",
"symbols_extracted" => "se",
"skills_dir" => "sd",
"healthy" => "ok",
"broken" => "brk",
"skills" => "sks",
"manifest_diffs" => "mdf",
"similar_pairs" => "sim",
"usage" => "usg",
"cleanup" => "cln",
"has_skill_md" => "hsm",
"is_symlink" => "isl",
"issues" => "iss",
"invocation_count" => "inv",
"reasons" => "rsn",
"token_estimate" => "te",
"skill_a" => "sa",
"skill_b" => "sb",
"desc_a" => "da",
"desc_b" => "db",
"annotations" => "ann",
"entity" => "ety",
"suggestion" => "sug",
"columns" => "cols",
"row_count" => "rc",
"notnull" => "nn",
"default_value" => "dv",
"replace_all" => "ra",
other => other,
}
}
fn terse_transform(val: serde_json::Value) -> serde_json::Value {
match val {
serde_json::Value::Object(map) => {
let mut new_map = serde_json::Map::new();
for (k, v) in map {
new_map.insert(terse_key(&k).to_string(), terse_transform(v));
}
serde_json::Value::Object(new_map)
}
serde_json::Value::Array(arr) => {
serde_json::Value::Array(arr.into_iter().map(terse_transform).collect())
}
other => other,
}
}
fn ultra_terse_transform(val: serde_json::Value) -> serde_json::Value {
match val {
serde_json::Value::Object(mut map) => {
let is_graph_node =
map.contains_key("id") && map.contains_key("k") && map.contains_key("n");
let is_graph_edge =
map.contains_key("from_id") && map.contains_key("to_id") && map.contains_key("k");
if is_graph_node || is_graph_edge {
map.remove("properties");
map.remove("provenance");
map.remove("freshness");
}
if is_graph_edge
&& let Some(serde_json::Value::String(s)) = map.get_mut("k") {
*s = abbreviate_edge_kind(s).to_string();
}
let is_coverage = map.contains_key("mode")
&& (map.contains_key("total_sector_count")
|| map.contains_key("dirty_sector_count"));
if is_coverage {
map.remove("active_rebuild");
map.remove("completed_dirty_sector_count");
map.remove("mounted_sector_count");
map.remove("rebuilding_sector_count");
map.remove("resumed_sector_count");
map.remove("reused_sector_count");
}
if let Some(serde_json::Value::String(s)) = map.get_mut("sn") {
*s = truncate_for_ultra_terse(s, 80);
}
if let Some(serde_json::Value::String(s)) = map.get_mut("snippet") {
*s = truncate_for_ultra_terse(s, 80);
}
let new_map: serde_json::Map<String, serde_json::Value> = map
.into_iter()
.map(|(k, v)| (k, ultra_terse_transform(v)))
.collect();
serde_json::Value::Object(new_map)
}
serde_json::Value::Array(arr) => {
serde_json::Value::Array(arr.into_iter().map(ultra_terse_transform).collect())
}
other => other,
}
}
fn edge_index_transform(val: serde_json::Value) -> serde_json::Value {
match val {
serde_json::Value::Object(mut map) => {
let node_ids: Option<Vec<String>> = map.get("nodes").and_then(|nodes| {
nodes.as_array().map(|arr| {
arr.iter()
.filter_map(|n| n.get("id").and_then(|v| v.as_str()).map(String::from))
.collect()
})
});
if let Some(ref ids) = node_ids {
let id_map: std::collections::HashMap<&str, usize> = ids
.iter()
.enumerate()
.map(|(i, id)| (id.as_str(), i))
.collect();
if let Some(serde_json::Value::Array(edges)) = map.get_mut("edges") {
for edge in edges.iter_mut() {
if let serde_json::Value::Object(edge_map) = edge {
if let Some(serde_json::Value::String(fid)) = edge_map.remove("from_id") {
if let Some(&idx) = id_map.get(fid.as_str()) {
edge_map.insert("from".to_string(), serde_json::Value::Number(idx.into()));
} else {
edge_map.insert("from_id".to_string(), serde_json::Value::String(fid));
}
}
if let Some(serde_json::Value::String(tid)) = edge_map.remove("to_id") {
if let Some(&idx) = id_map.get(tid.as_str()) {
edge_map.insert("to".to_string(), serde_json::Value::Number(idx.into()));
} else {
edge_map.insert("to_id".to_string(), serde_json::Value::String(tid));
}
}
}
}
}
}
let new_map: serde_json::Map<String, serde_json::Value> = map
.into_iter()
.map(|(k, v)| (k, edge_index_transform(v)))
.collect();
serde_json::Value::Object(new_map)
}
serde_json::Value::Array(arr) => {
serde_json::Value::Array(arr.into_iter().map(edge_index_transform).collect())
}
other => other,
}
}
fn truncate_for_ultra_terse(s: &str, max_len: usize) -> String {
if s.len() <= max_len {
s.to_string()
} else {
let truncated: String = s.chars().take(max_len.saturating_sub(3)).collect();
format!("{truncated}...")
}
}
fn terse_schema_for(val: &serde_json::Value) -> serde_json::Value {
let mut keys = HashSet::new();
collect_terse_keys(val, &mut keys);
let mut schema = serde_json::Map::new();
for (long, short) in TERSE_PAIRS {
if keys.contains(*short) {
schema.insert(
short.to_string(),
serde_json::Value::String(long.to_string()),
);
}
}
serde_json::Value::Object(schema)
}
fn collect_terse_keys(val: &serde_json::Value, keys: &mut HashSet<String>) {
match val {
serde_json::Value::Object(map) => {
for (k, v) in map {
keys.insert(k.clone());
collect_terse_keys(v, keys);
}
}
serde_json::Value::Array(arr) => {
for v in arr {
collect_terse_keys(v, keys);
}
}
_ => {}
}
}
fn schema_transform(val: serde_json::Value) -> serde_json::Value {
match val {
serde_json::Value::Array(arr) if arr.len() >= 2 => {
if let Some(cols) = homogeneous_keys(&arr) {
let rows: Vec<serde_json::Value> = arr
.into_iter()
.map(|item| {
if let serde_json::Value::Object(map) = item {
let vals: Vec<serde_json::Value> = cols
.iter()
.map(|c| map.get(c).cloned().unwrap_or(serde_json::Value::Null))
.collect();
serde_json::Value::Array(vals)
} else {
item
}
})
.collect();
let col_vals: Vec<serde_json::Value> =
cols.into_iter().map(serde_json::Value::String).collect();
serde_json::json!({"_c": col_vals, "_r": rows})
} else {
serde_json::Value::Array(arr.into_iter().map(schema_transform).collect())
}
}
serde_json::Value::Array(arr) => {
serde_json::Value::Array(arr.into_iter().map(schema_transform).collect())
}
serde_json::Value::Object(map) => {
let new_map: serde_json::Map<String, serde_json::Value> = map
.into_iter()
.map(|(k, v)| (k, schema_transform(v)))
.collect();
serde_json::Value::Object(new_map)
}
other => other,
}
}
fn homogeneous_keys(arr: &[serde_json::Value]) -> Option<Vec<String>> {
let first = arr.first()?.as_object()?;
let keys: Vec<String> = first.keys().cloned().collect();
for item in &arr[1..] {
let obj = item.as_object()?;
if obj.len() != keys.len() {
return None;
}
for k in &keys {
if !obj.contains_key(k) {
return None;
}
}
}
Some(keys)
}
const TERSE_PAIRS: &[(&str, &str)] = &[
("name", "n"),
("kind", "k"),
("file", "f"),
("line", "l"),
("path", "p"),
("from", "fr"),
("type", "ty"),
("text", "tx"),
("new", "nw"),
("run", "r"),
("use", "u"),
("score", "sc"),
("language", "la"),
("status", "st"),
("state", "stt"),
("error", "err"),
("errors", "ers"),
("hops", "hp"),
("tags", "tg"),
("model", "ml"),
("skill", "sk"),
("count", "ct"),
("total", "tot"),
("column", "col"),
("description", "dsc"),
("end_line", "el"),
("signature", "sig"),
("parent_module", "pm"),
("visibility", "vis"),
("match_type", "mt"),
("caller_file", "cf"),
("caller_name", "cn"),
("caller_line", "cl"),
("callee_name", "en"),
("call_site_line", "csl"),
("members", "m"),
("refs", "refs"),
("role", "rl"),
("peer", "pr"),
("modularity", "q"),
("modularity_contribution", "mc"),
("iterations", "it"),
("node_count", "nc"),
("edge_count", "ec"),
("community_count", "cc"),
("communities", "cms"),
("community", "cm"),
("community_diagnostics", "cd"),
("cache_hit", "cah"),
("tagpath_state", "tps"),
("tagpath_stale_reason", "tsr"),
("annotated_community_count", "acc"),
("annotated_member_count", "amc"),
("ambiguous_member_count", "ambc"),
("ambiguous_members", "amb"),
("candidate_count", "cand"),
("tagpath_candidate_count", "tcand"),
("evidence", "ev"),
("chosen_file", "chf"),
("symbol", "s"),
("symbols", "sy"),
("definitions", "df"),
("callers", "crs"),
("callees", "ces"),
("total_tracked", "tt"),
("modified", "md"),
("deleted", "dl"),
("unchanged", "uc"),
("changes", "ch"),
("prune_stats", "ps"),
("hits", "h"),
("rank", "rk"),
("snippet", "sn"),
("confidence", "co"),
("index", "ix"),
("summaries", "sms"),
("recommendations", "rec"),
("total_files", "tf"),
("stale_files", "sf"),
("last_indexed_secs_ago", "age"),
("cached_files", "caf"),
("total_indexed_files", "tif"),
("coverage_pct", "cov"),
("symbol_name", "syn"),
("file_path", "fp"),
("content_hash", "hsh"),
("summary", "sum"),
("tool", "tl"),
("view", "vw"),
("truncated", "tr"),
("follow_up", "fu"),
("report", "rp"),
("metrics", "ms"),
("label", "lb"),
("value", "v"),
("command", "cmd"),
("exit_code", "xc"),
("success", "ok"),
("artifact", "art"),
("digest", "dg"),
("bytes", "bt"),
("lines", "lns"),
("expand", "xp"),
("entities", "ent"),
("relationships", "rel"),
("concept_labels", "cls"),
("extracted_at", "at"),
("tokens_input", "ti"),
("tokens_output", "tout"),
("total_summaries", "ts"),
("stale_count", "stc"),
("total_tokens_input", "tti"),
("total_tokens_output", "tto"),
("estimated_tokens_saved", "ets"),
("files_processed", "fps"),
("symbols_extracted", "se"),
("skills_dir", "sd"),
("healthy", "ok"),
("broken", "brk"),
("skills", "sks"),
("manifest_diffs", "mdf"),
("similar_pairs", "sim"),
("usage", "usg"),
("cleanup", "cln"),
("has_skill_md", "hsm"),
("is_symlink", "isl"),
("issues", "iss"),
("invocation_count", "inv"),
("reasons", "rsn"),
("token_estimate", "te"),
("skill_a", "sa"),
("skill_b", "sb"),
("desc_a", "da"),
("desc_b", "db"),
("annotations", "ann"),
("entity", "ety"),
("suggestion", "sug"),
("columns", "cols"),
("row_count", "rc"),
("notnull", "nn"),
("default_value", "dv"),
("replace_all", "ra"),
];
pub(crate) fn relativize(path: &str, root: &std::path::Path) -> String {
let root_str = root.to_string_lossy();
let prefix = format!("{}/", root_str.trim_end_matches('/'));
path.strip_prefix(&prefix).unwrap_or(path).to_string()
}
fn transcript_artifact_root(path: &Path) -> Result<PathBuf> {
let canonical = path
.canonicalize()
.with_context(|| format!("canonicalizing {}", path.display()))?;
let start = if canonical.is_dir() {
canonical.clone()
} else {
canonical
.parent()
.map(Path::to_path_buf)
.unwrap_or_else(|| canonical.clone())
};
for ancestor in start.ancestors() {
if ancestor.join(".git").exists() || ancestor.join(".gitmodules").is_file() {
return Ok(ancestor.to_path_buf());
}
}
Ok(start)
}
pub(crate) fn relativize_pathbuf(path: &std::path::Path, root: &std::path::Path) -> PathBuf {
path.strip_prefix(root)
.map(|p| p.to_path_buf())
.unwrap_or_else(|_| path.to_path_buf())
}
pub(crate) fn relativize_edges(edges: &mut [index::StoredEdge], root: &std::path::Path) {
for edge in edges {
edge.caller_file = relativize(&edge.caller_file, root);
}
}
pub(crate) fn relativize_symbols(symbols: &mut [index::StoredSymbol], root: &std::path::Path) {
for sym in symbols {
sym.file = relativize(&sym.file, root);
}
}
pub(crate) fn relativize_symbol_hits(hits: &mut [index::SymbolHit], root: &std::path::Path) {
for hit in hits {
hit.file = relativize(&hit.file, root);
}
}
/// Which endpoint of a `StoredEdge` is the row's primary symbol — caller
/// (caller list) or callee (callee list).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EdgeSide {
Caller,
Callee,
}
const JSON_PATH_KEYS: &[&str] = &["file", "path", "caller_file", "file_path"];
pub(crate) fn relativize_json_paths(val: &mut serde_json::Value, root: &std::path::Path) {
let root_str = root.to_string_lossy();
let prefix = format!("{}/", root_str.trim_end_matches('/'));
relativize_json_inner(val, &prefix);
}
fn relativize_json_inner(val: &mut serde_json::Value, prefix: &str) {
match val {
serde_json::Value::Array(arr) => {
for v in arr {
relativize_json_inner(v, prefix);
}
}
serde_json::Value::Object(map) => {
for (k, v) in map.iter_mut() {
if JSON_PATH_KEYS.contains(&k.as_str())
&& let serde_json::Value::String(s) = v
&& let Some(rest) = s.strip_prefix(prefix)
{
*s = rest.to_string();
}
relativize_json_inner(v, prefix);
}
}
_ => {}
}
}
pub(crate) fn format_score(score: f64, compact: bool) -> String {
if compact {
format!("{score:.2}")
} else {
format!("{score:.4}")
}
}
pub(crate) fn truncate_for_compact(input: &str, max_chars: usize) -> String {
let trimmed = input.trim();
let count = trimmed.chars().count();
if count <= max_chars {
return trimmed.to_string();
}
let prefix: String = trimmed.chars().take(max_chars.saturating_sub(3)).collect();
format!("{prefix}...")
}
pub(crate) fn compact_snippet(snippet: &str) -> Option<String> {
snippet
.lines()
.find(|line| !line.trim().is_empty())
.map(|line| truncate_for_compact(line, 100))
}
pub(crate) fn compact_members(members: &[graph::CommunityMember], limit: usize) -> String {
let names: Vec<&str> = members.iter().map(|m| m.name.as_str()).collect();
if names.len() <= limit {
return names.join(", ");
}
format!(
"{} (+{} more)",
names[..limit].join(", "),
names.len() - limit
)
}
pub(crate) fn stable_handle(prefix: &str, key: &str) -> String {
let mut hasher = blake3::Hasher::new();
hasher.update(prefix.as_bytes());
hasher.update(&[0]);
hasher.update(key.as_bytes());
let hex = hasher.finalize().to_hex();
format!("{prefix}-{}", &hex[..10])
}
#[derive(Clone, Debug, PartialEq, Eq)]
struct CanonicalTagFamily {
canonical: String,
tag_alias: String,
}
fn canonical_family_from_tagpath_family(
family: tagpath_family::TagFamily,
) -> Option<CanonicalTagFamily> {
let tag_alias = if family.dimensions.is_empty() {
family.tags.join("/")
} else {
family
.dimensions
.iter()
.filter(|dimension| !dimension.tags.is_empty())
.map(|dimension| dimension.tags.join("."))
.collect::<Vec<_>>()
.join("/")
};
if tag_alias.is_empty() {
None
} else {
Some(CanonicalTagFamily {
canonical: family.canonical,
tag_alias,
})
}
}
fn canonical_tag_family_from_name(name: &str) -> Option<CanonicalTagFamily> {
let trimmed = name.trim();
if trimmed.is_empty() {
return None;
}
canonical_family_from_tagpath_family(tagpath_family::generate_family(trimmed))
}
fn canonical_tag_family_from_tags(tags: &str) -> Option<CanonicalTagFamily> {
let canonical = tags
.split(',')
.map(str::trim)
.filter(|tag| !tag.is_empty())
.collect::<Vec<_>>()
.join("_");
if canonical.is_empty() {
None
} else {
canonical_family_from_tagpath_family(tagpath_family::generate_family(&canonical))
}
}
pub(crate) fn canonical_tag_family_from_symbol(name: &str, tags: Option<&str>) -> Option<CanonicalTagFamily> {
tags.and_then(canonical_tag_family_from_tags)
.or_else(|| canonical_tag_family_from_name(name))
}
fn tag_alias_from_name(name: &str) -> Option<String> {
canonical_tag_family_from_name(name).map(|family| family.tag_alias)
}
fn tag_alias_from_tags(name: &str, tags: Option<&str>) -> Option<String> {
canonical_tag_family_from_symbol(name, tags).map(|family| family.tag_alias)
}
pub(crate) fn family_query_from_tag_alias(tag_alias: &str) -> Option<String> {
let query = tag_alias
.split(['/', '.'])
.map(str::trim)
.filter(|part| !part.is_empty())
.collect::<Vec<_>>()
.join(" ");
if query.is_empty() { None } else { Some(query) }
}
#[derive(Serialize, Clone, Debug, PartialEq, Eq)]
struct CompactOntologyRefPreview {
handle: String,
tag: String,
path: String,
#[serde(skip_serializing_if = "Option::is_none")]
title: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
domain: Option<String>,
}
#[derive(Clone, Debug)]
struct TagOntologyPreviewContext {
project_root: PathBuf,
tags: BTreeMap<String, tagpath_ontology::OntologyTag>,
}
#[derive(Serialize, Clone, Debug, PartialEq, Eq)]
struct CompactSymbolRefPreview {
handle: String,
name: String,
#[serde(skip_serializing_if = "Option::is_none")]
tag_alias: Option<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
ontology_refs: Vec<CompactOntologyRefPreview>,
}
fn build_compact_symbol_ref(
prefix: &str,
key: &str,
name: &str,
tags: Option<&str>,
max_bytes: usize,
) -> CompactSymbolRefPreview {
build_compact_symbol_ref_with_ontology(prefix, key, name, tags, max_bytes, None)
}
fn build_compact_symbol_ref_with_ontology(
prefix: &str,
key: &str,
name: &str,
tags: Option<&str>,
max_bytes: usize,
ontology: Option<&TagOntologyPreviewContext>,
) -> CompactSymbolRefPreview {
let tag_alias = tag_alias_from_tags(name, tags);
let ontology_refs = tag_alias
.as_deref()
.map(|alias| ontology_refs_for_alias(ontology, alias))
.unwrap_or_default();
CompactSymbolRefPreview {
handle: stable_handle(prefix, key),
name: truncate_for_budget(name, max_bytes),
tag_alias: tag_alias.map(|alias| truncate_for_budget(&alias, max_bytes)),
ontology_refs,
}
}
fn load_tag_ontology_preview_context(root: &Path) -> Option<TagOntologyPreviewContext> {
let report = tagpath_ontology::load_project(root).ok()?;
if report.tags.is_empty() {
return None;
}
Some(TagOntologyPreviewContext {
project_root: report.project_path,
tags: report
.tags
.into_iter()
.map(|tag| (tag.tag.clone(), tag))
.collect(),
})
}
fn ontology_refs_for_alias(
ontology: Option<&TagOntologyPreviewContext>,
alias: &str,
) -> Vec<CompactOntologyRefPreview> {
let Some(ontology) = ontology else {
return Vec::new();
};
let mut seen = BTreeSet::new();
alias
.split('/')
.flat_map(|part| part.split('.'))
.map(str::trim)
.filter(|tag| !tag.is_empty())
.filter_map(|tag| {
let key = tag.to_ascii_lowercase();
if !seen.insert(key.clone()) {
return None;
}
let ontology_tag = ontology.tags.get(&key)?;
let path = relativize_ontology_path(&ontology_tag.path, &ontology.project_root);
Some(CompactOntologyRefPreview {
handle: stable_handle("tont", &format!("{}:{path}", ontology_tag.tag)),
tag: ontology_tag.tag.clone(),
path,
title: ontology_tag.title.clone(),
domain: ontology_tag.domain.clone(),
})
})
.collect()
}
fn relativize_ontology_path(path: &Path, root: &Path) -> String {
path.strip_prefix(root)
.unwrap_or(path)
.to_string_lossy()
.replace('\\', "/")
}
fn format_symbol_preview_line(handle: &str, name: &str, tag_alias: Option<&str>) -> String {
match tag_alias {
Some(alias) => format!("{handle} {name} tag:{alias}"),
None => format!("{handle} {name}"),
}
}
fn format_summary_ref_line(summary: &ContextPackSummaryRefPreview) -> String {
match summary.tag_alias.as_deref() {
Some(alias) => format!(
"{} {} tag:{} expand:{}",
summary.handle, summary.symbol, alias, summary.expand
),
None => format!(
"{} {} expand:{}",
summary.handle, summary.symbol, summary.expand
),
}
}
fn compact_symbol_ref_token(symbol: &CompactSymbolRefPreview) -> String {
match symbol.tag_alias.as_deref() {
Some(alias) => format!("{}@{}", symbol.handle, alias),
None => format!("{}@{}", symbol.handle, symbol.name),
}
}
pub(crate) fn truncate_for_budget(input: &str, max_bytes: usize) -> String {
let trimmed = input.trim();
if trimmed.len() <= max_bytes {
return trimmed.to_string();
}
if max_bytes <= 3 {
return ".".repeat(max_bytes);
}
let mut end = 0usize;
for (idx, ch) in trimmed.char_indices() {
let next = idx + ch.len_utf8();
if next > max_bytes.saturating_sub(3) {
break;
}
end = next;
}
if end == 0 {
"...".to_string()
} else {
format!("{}...", &trimmed[..end])
}
}
struct TokenCappedPreview {
preview: Vec<SourceLinePreview>,
capped_end: usize,
was_capped: bool,
}
fn build_token_capped_preview(
all_lines: &[&str],
start: usize,
end: usize,
max_bytes: usize,
token_cap: usize,
) -> TokenCappedPreview {
let mut preview = Vec::new();
let mut accumulated_tokens = 0usize;
let mut capped_end = end;
let mut was_capped = false;
for (idx, line) in all_lines[(start - 1)..end].iter().enumerate() {
let truncated = truncate_for_budget(line, max_bytes);
let line_tokens = estimated_tokens_from_bytes(truncated.len());
if accumulated_tokens + line_tokens > token_cap && !preview.is_empty() {
capped_end = start + idx - 1;
was_capped = true;
break;
}
accumulated_tokens += line_tokens;
preview.push(SourceLinePreview {
line: start + idx,
text: truncated,
});
}
TokenCappedPreview {
preview,
capped_end,
was_capped,
}
}
pub(crate) fn abbreviate_kind(kind: &str) -> &str {
match kind {
"function" => "fn",
"method" => "meth",
"module" | "mod" => "mod",
"struct" => "struct",
"trait" => "trait",
"impl" => "impl",
"class" => "cls",
"interface" => "iface",
"type_alias" => "type",
"data_class" => "data_cls",
"sealed_class" => "sealed_cls",
"enum_class" => "enum_cls",
"companion_object" => "comp_obj",
"object" => "obj",
"heading" => "h",
"code_block" => "code",
"alias" => "alias",
other => other,
}
}
pub(crate) fn abbreviate_edge_kind(kind: &str) -> &str {
match kind {
"calls" => "c",
"defines" => "d",
"contains" => "ct",
"imports" => "i",
"mentions" => "m",
"mentions_concept" => "mc",
"mentions_entity" => "me",
"semantic_relation" => "sr",
"belongs_to" => "bt",
"scopes_context" => "sctx",
"scopes_source" => "ssrc",
"requests_context" => "rctx",
"explains_result" => "er",
"tagged_concept" => "tc",
"tagged_entity" => "te",
"related_concept" => "relc",
"handled_by" => "hb",
"defines_route" => "dr",
"handles_route" => "hr",
"targets" => "tgt",
"has_vector_handle" => "hv",
"parent" => "p",
"child" => "ch",
"uses" => "u",
"projects_source" => "psrc",
"records_memory_source" => "rms",
"records_memory_event" => "rme",
"has_ast_span" => "ha",
"represents_symbol" => "rs",
"contains_embedded_symbol" => "ces",
"embedded_in_fence" => "ef",
"contains_markdown_block" => "cmb",
"contains_embedded_code" => "cec",
"enclosing_module" => "em",
"enclosing_section" => "es",
"previous_sibling" => "psib",
"next_sibling" => "nsib",
"explicit_depends_on" => "edo",
"worker_result_follow_up" => "wrf",
"shared_resource" => "shr",
"community_member" => "cm",
other => other,
}
}
pub(crate) fn abbreviate_match_type(mt: &str) -> &str {
match mt {
"exact_name" => "exact",
"all_tags" => "all_tags",
"partial_tags" => "partial",
other => other,
}
}
pub(crate) fn symbol_path_summary(path: &[graph::PathNode]) -> String {
path.iter()
.map(|n| n.name.as_str())
.collect::<Vec<_>>()
.join(" -> ")
}
const SEARCH_GROUP_SAMPLE_LIMIT: usize = 2;
struct SearchHitGroup {
path: String,
first_rank: usize,
top_score: f64,
confidence: String,
hits: usize,
samples: Vec<String>,
}
fn format_search_sample(hit: &sift::SearchHit) -> Option<String> {
let snippet = compact_snippet(&hit.snippet)?;
Some(match hit.location.as_deref() {
Some(location) => format!("{location}: {snippet}"),
None => snippet,
})
}
pub(crate) fn group_search_hits(
hits: &[sift::SearchHit],
root: &Path,
absolute: bool,
) -> Vec<SearchHitGroup> {
let mut positions = BTreeMap::new();
let mut groups = Vec::new();
for hit in hits {
let path = if absolute {
hit.path.clone()
} else {
relativize(&hit.path, root)
};
let entry = positions.entry(path.clone()).or_insert_with(|| {
groups.push(SearchHitGroup {
path: path.clone(),
first_rank: hit.rank,
top_score: hit.score,
confidence: format!("{:?}", hit.confidence),
hits: 0,
samples: Vec::new(),
});
groups.len() - 1
});
let group = &mut groups[*entry];
group.hits += 1;
if hit.rank < group.first_rank {
group.first_rank = hit.rank;
}
if hit.score > group.top_score {
group.top_score = hit.score;
}
if let Some(sample) = format_search_sample(hit)
&& group.samples.len() < SEARCH_GROUP_SAMPLE_LIMIT
&& !group.samples.contains(&sample)
{
group.samples.push(sample);
}
}
groups.sort_by_key(|group| group.first_rank);
groups
}
pub(crate) fn should_collapse_search_hits(
hits: &[sift::SearchHit],
root: &Path,
absolute: bool,
) -> bool {
let groups = group_search_hits(hits, root, absolute);
let max_hits_per_file = groups.iter().map(|group| group.hits).max().unwrap_or(0);
max_hits_per_file >= 3 || (hits.len() >= 6 && groups.len() < hits.len())
}
pub(crate) fn format_edge_groups(edges: &[index::StoredEdge], use_callers: bool) -> Vec<String> {
let mut grouped: BTreeMap<&str, Vec<&str>> = BTreeMap::new();
for edge in edges {
let key = edge.caller_file.as_str();
let name = if use_callers {
edge.caller_name.as_str()
} else {
edge.callee_name.as_str()
};
let names = grouped.entry(key).or_default();
if !names.contains(&name) {
names.push(name);
}
}
grouped
.into_iter()
.map(|(file, names)| format!(" {} ({}): {}", file, names.len(), names.join(", ")))
.collect()
}
pub(crate) fn should_collapse_edge_groups(edges: &[index::StoredEdge]) -> bool {
let mut grouped: BTreeMap<&str, usize> = BTreeMap::new();
for edge in edges {
*grouped.entry(edge.caller_file.as_str()).or_default() += 1;
}
let max_hits_per_file = grouped.values().copied().max().unwrap_or(0);
max_hits_per_file >= 3 || (edges.len() >= 6 && grouped.len() < edges.len())
}
fn resolve_query_index_target(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
) -> Result<SearchIndexTarget> {
let cfg = config::Config::load(root)?;
if let Some(scope_name) = scope {
if let Some(scope) = config::Config::find_submodule(root, scope_name)? {
return Ok(SearchIndexTarget {
label: format!("submodule `{}` index", scope.id),
db_path: cfg.db_path_for(root, &scope.id),
source_root: scope.source_root.clone(),
scope_name: Some(scope.id.clone()),
reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
});
}
if let Some(package) = multiplicity::find_cargo_package(root, scope_name)? {
return Ok(cargo_package_index_target(root, package));
}
config::Config::resolve_submodule(root, scope_name)?;
}
if let Some(scope) = config::Config::infer_submodule_from_path(root, path_hint)? {
return Ok(SearchIndexTarget {
label: format!("submodule `{}` index", scope.id),
db_path: cfg.db_path_for(root, &scope.id),
source_root: scope.source_root.clone(),
scope_name: Some(scope.id.clone()),
reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
});
}
if let Some(package) = multiplicity::infer_cargo_package_from_path(root, path_hint)? {
return Ok(cargo_package_index_target(root, package));
}
if let Some(scope) = infer_agent_doc_task_submodule(root, path_hint)? {
return Ok(SearchIndexTarget {
label: format!("submodule `{}` index", scope.id),
db_path: cfg.db_path_for(root, &scope.id),
source_root: scope.source_root.clone(),
scope_name: Some(scope.id.clone()),
reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
});
}
let db_path = root.join(".tsift/index.db");
if db_path.exists() {
return Ok(SearchIndexTarget {
label: "index".to_string(),
db_path,
source_root: root.to_path_buf(),
scope_name: None,
reindex_cmd: format!("tsift index {}", root.display()),
});
}
let scopes = config::Config::submodule_dirs(root)?;
if scopes.is_empty() {
return Ok(SearchIndexTarget {
label: "index".to_string(),
db_path,
source_root: root.to_path_buf(),
scope_name: None,
reindex_cmd: format!("tsift index {}", root.display()),
});
}
let available_scopes = scopes
.iter()
.map(|scope| scope.id.as_str())
.collect::<Vec<_>>()
.join(", ");
let indexed_scopes = scopes
.iter()
.filter(|scope| cfg.db_path_for(root, &scope.id).exists())
.map(|scope| scope.id.as_str())
.collect::<Vec<_>>();
let indexed_label = if indexed_scopes.is_empty() {
"none".to_string()
} else {
indexed_scopes.join(", ")
};
bail!(
"workspace root {} has no shared root index at {}. Read-only graph queries require `--scope <scope>` when the workspace is indexed into `.tsift/indexes/*/index.db`. Available scopes: {}. Indexed scopes: {}.",
root.display(),
db_path.display(),
available_scopes,
indexed_label
);
}
pub(crate) fn resolve_query_db_path(root: &Path, path_hint: &Path, scope: Option<&str>) -> Result<PathBuf> {
Ok(resolve_query_index_target(root, path_hint, scope)?.db_path)
}
fn ensure_query_index_current(root: &Path, target: &SearchIndexTarget) -> Result<()> {
let state = inspect_search_index(target)?;
let Some(reason) = index_reason_for_state(state) else {
return Ok(());
};
match apply_search_index_update(root, target) {
Ok(_) => {
index::inspect_scope_invalidate_all();
Ok(())
}
Err(err) if is_active_writer_lock_error(&err) && target.db_path.exists() => {
eprintln!(
"note: active tsift writer detected; skipping graph-query autoindex because {}. \
Continuing with the current read-only index snapshot; graph results may lag. \
Retry `{}` after the active writer finishes for fresh graph results.",
index_reason_detail(target, reason),
target.reindex_cmd
);
Ok(())
}
Err(err) => Err(err),
}
}
pub(crate) fn open_index_db(path: &std::path::Path, scope: Option<&str>) -> Result<index::IndexDb> {
let root = lint::resolve_project_root_or_canonical_path(path)?;
let target = resolve_query_index_target(&root, path, scope)?;
ensure_query_index_current(&root, &target)?;
let db_path = target.db_path;
if !db_path.exists() {
bail!(
"no index found at {}. Run `tsift index` first.",
db_path.display()
);
}
index::IndexDb::open_read_only_resilient(&db_path)
}
pub(crate) fn query_tagpath_root(
root: &std::path::Path,
path_hint: &std::path::Path,
scope: Option<&str>,
) -> Result<PathBuf> {
if let Some(scope_name) = scope {
if let Some(scope) = config::Config::find_submodule(root, scope_name)? {
return Ok(scope.source_root);
}
if let Some(package) = multiplicity::find_cargo_package(root, scope_name)? {
return Ok(package.package_root);
}
config::Config::resolve_submodule(root, scope_name)?;
}
if let Some(scope) = config::Config::infer_submodule_from_path(root, path_hint)? {
return Ok(scope.source_root);
}
if let Some(package) = multiplicity::infer_cargo_package_from_path(root, path_hint)? {
return Ok(package.package_root);
}
Ok(root.to_path_buf())
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct TraversalNode {
handle: String,
kind: String,
label: String,
#[serde(skip_serializing_if = "Option::is_none")]
ref_id: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
path: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
line: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
detail: Option<String>,
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
properties: BTreeMap<String, String>,
expand: String,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct TraversalEdge {
from: String,
to: String,
relation: String,
#[serde(skip_serializing_if = "Option::is_none")]
label: Option<String>,
weight: usize,
}
#[derive(Clone, Debug, Default)]
struct TraversalGraphBuild {
nodes: BTreeMap<String, TraversalNode>,
edges: Vec<TraversalEdge>,
edge_keys: BTreeSet<(String, String, String)>,
warnings: Vec<String>,
}
pub(crate) const GRAPH_PROJECTION_VERSION: &str = "tsift-traversal-v1";
const GRAPH_DB_EVIDENCE_CONTRACT_VERSION: &str = "graph-db-evidence-v1";
const WORKER_PROMPT_PACKET_CONTRACT_VERSION: &str = "worker-prompt-packet-v1";
const CONFLICT_MATRIX_CONTRACT_VERSION: &str = "conflict-matrix-v1";
const CONTEXT_PACK_GRAPH_ORCHESTRATION_CONTRACT_VERSION: &str =
"context-pack-graph-orchestration-v1";
const SESSION_REVIEW_FOLLOW_UP_CONTRACT_VERSION: &str = "session-review-follow-up-v1";
const DISPATCH_TRACE_CONTRACT_VERSION: &str = "dispatch-trace-v1";
const DEPENDENCY_DAG_CONTRACT_VERSION: &str = "dependency-dag-v1";
const GRAPH_PROJECTION_META_KIND: &str = "projection_meta";
const GRAPH_DB_RANKED_NEIGHBOR_CAP: usize = 12;
const GRAPH_DB_SEMANTIC_MIN_EDGE_SCAN_CAP: usize = 16;
const GRAPH_DB_SEMANTIC_MAX_EDGE_SCAN_CAP: usize = 64;
#[derive(Debug, Serialize, PartialEq)]
struct TraversalTotals {
nodes: usize,
edges: usize,
}
#[derive(Debug, Serialize, PartialEq)]
struct TraversalPathReport {
from: TraversalNode,
to: TraversalNode,
hops: usize,
nodes: Vec<TraversalNode>,
edges: Vec<TraversalEdge>,
}
#[derive(Debug, Serialize, PartialEq)]
struct TraversalRecommendation {
handle: String,
kind: String,
label: String,
reason: String,
score: usize,
expand: String,
}
#[derive(Debug, Serialize, PartialEq)]
struct TraversalReport {
root: String,
#[serde(skip_serializing_if = "Option::is_none")]
scope: Option<String>,
mode: String,
totals: TraversalTotals,
#[serde(skip_serializing_if = "Option::is_none")]
query: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
target: Option<String>,
nodes: Vec<TraversalNode>,
edges: Vec<TraversalEdge>,
#[serde(skip_serializing_if = "Option::is_none")]
shortest_path: Option<TraversalPathReport>,
recommendations: Vec<TraversalRecommendation>,
exploration: ExplorationPacket,
truncated: bool,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
warnings: Vec<String>,
}
#[derive(Debug, Serialize, PartialEq)]
struct SemanticRelatedReport {
root: String,
#[serde(skip_serializing_if = "Option::is_none")]
scope: Option<String>,
query: String,
embedding_model: String,
count: usize,
items: Vec<SemanticRelatedItem>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
warnings: Vec<String>,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct SemanticRelatedItem {
handle: String,
kind: String,
label: String,
score: f64,
#[serde(skip_serializing_if = "Option::is_none")]
file_path: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
source_symbol: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
detail: Option<String>,
expand: String,
}
#[derive(Clone)]
struct TraversalSymbolIndexEntry {
handle: String,
node: TraversalNode,
tokens: BTreeSet<String>,
}
#[derive(Clone)]
struct TraversalFileIndexEntry {
handle: String,
node: TraversalNode,
tokens: BTreeSet<String>,
}
#[derive(Clone)]
struct TraversalRouteIndexEntry {
handle: String,
node: TraversalNode,
tokens: BTreeSet<String>,
}
#[derive(Clone)]
struct TraversalAstSpanIndexEntry {
handle: String,
symbol_handle: String,
file_handle: Option<String>,
file: String,
name: String,
kind: String,
language: String,
node_kind: String,
start_byte: usize,
end_byte: usize,
parent_module: Option<String>,
markdown: Option<MarkdownSpanMetadata>,
}
#[derive(Clone)]
struct TraversalMultiplicityIndexEntry {
handle: String,
node: TraversalNode,
tokens: BTreeSet<String>,
}
struct TraversalCodeLookup<'a> {
symbols: &'a [TraversalSymbolIndexEntry],
files: &'a [TraversalFileIndexEntry],
routes: &'a [TraversalRouteIndexEntry],
multiplicities: &'a [TraversalMultiplicityIndexEntry],
symbol_index: HashMap<String, Vec<usize>>,
file_index: HashMap<String, Vec<usize>>,
route_index: HashMap<String, Vec<usize>>,
multiplicity_index: HashMap<String, Vec<usize>>,
file_path_index: HashMap<String, String>,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct ExplorationBudget {
project_size: String,
max_source_windows: usize,
lines_per_window: usize,
relationship_limit: usize,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct ExplorationRelation {
from: String,
relation: String,
to: String,
#[serde(skip_serializing_if = "Option::is_none")]
label: Option<String>,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct ExplorationSourceWindow {
handle: String,
file: String,
start: usize,
end: usize,
reason: String,
expand: String,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct ExplorationWorkerContext {
handle: String,
target: String,
summary: String,
expand: String,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct ExplorationPacket {
budget: ExplorationBudget,
relationship_map: Vec<ExplorationRelation>,
source_windows: Vec<ExplorationSourceWindow>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
worker_context: Vec<ExplorationWorkerContext>,
no_reread_guidance: String,
}
impl TraversalGraphBuild {
fn add_node(&mut self, node: TraversalNode) {
self.nodes.entry(node.handle.clone()).or_insert(node);
}
fn add_edge(
&mut self,
from: &str,
to: &str,
relation: &str,
label: Option<String>,
weight: usize,
) {
if from == to || !self.nodes.contains_key(from) || !self.nodes.contains_key(to) {
return;
}
let key = (from.to_string(), to.to_string(), relation.to_string());
if self.edge_keys.insert(key) {
self.edges.push(TraversalEdge {
from: from.to_string(),
to: to.to_string(),
relation: relation.to_string(),
label,
weight,
});
}
}
}
pub(crate) fn graph_substrate_db_path(root: &Path, scope: Option<&str>) -> PathBuf {
match scope {
Some(scope) => root.join(".tsift/indexes").join(scope).join("graph.db"),
None => root.join(".tsift/graph.db"),
}
}
fn graph_projection_meta_id(scope: Option<&str>) -> String {
format!("projection:tsift-traversal:{}", scope.unwrap_or("root"))
}
pub(crate) fn content_hash<T: Serialize>(value: &T) -> Result<String> {
let bytes = serde_json::to_vec(value)?;
Ok(blake3::hash(&bytes).to_hex().to_string())
}
fn node_with_content_freshness(mut node: SubstrateGraphNode) -> Result<SubstrateGraphNode> {
let mut hashable = node.clone();
hashable.freshness = None;
node.freshness = Some(GraphFreshness::content_hash(content_hash(&hashable)?));
Ok(node)
}
fn edge_with_content_freshness(mut edge: SubstrateGraphEdge) -> Result<SubstrateGraphEdge> {
let mut hashable = edge.clone();
hashable.freshness = None;
edge.freshness = Some(GraphFreshness::content_hash(content_hash(&hashable)?));
Ok(edge)
}
const SEMANTIC_EMBEDDING_DIM: usize = 32;
const SEMANTIC_EMBEDDING_MODEL: &str = "tsift-local-hash-v1";
const CLAUDE_MEM_GRAPH_LIMIT_PER_TABLE: usize = 200;
fn semantic_related_kind_name(kind: SemanticRelatedKind) -> &'static str {
match kind {
SemanticRelatedKind::Concept => "concept",
SemanticRelatedKind::Entity => "entity",
SemanticRelatedKind::All => "all",
}
}
fn semantic_related_command(root: &Path, query: &str, kind: SemanticRelatedKind) -> String {
format!(
"tsift semantic {} --path {} --kind {} --limit 10",
shell_quote(query),
shell_quote(root.to_string_lossy().as_ref()),
semantic_related_kind_name(kind)
)
}
fn semantic_embedding(input: &str) -> Vec<f64> {
let mut vector = vec![0.0; SEMANTIC_EMBEDDING_DIM];
let mut tokens = traversal_tokens(input);
if tokens.is_empty() {
let trimmed = input.trim().to_ascii_lowercase();
if !trimmed.is_empty() {
tokens.insert(trimmed);
}
}
for token in tokens {
let hash = blake3::hash(token.as_bytes());
let bytes = hash.as_bytes();
let idx = usize::from(bytes[0]) % SEMANTIC_EMBEDDING_DIM;
let sign = if bytes[1] & 1 == 0 { 1.0 } else { -1.0 };
vector[idx] += sign;
}
let norm = vector.iter().map(|value| value * value).sum::<f64>().sqrt();
if norm > 0.0 {
for value in &mut vector {
*value /= norm;
}
}
vector
}
fn semantic_embedding_property(input: &str) -> String {
semantic_embedding(input)
.iter()
.map(|value| format!("{value:.6}"))
.collect::<Vec<_>>()
.join(",")
}
fn parse_semantic_embedding_property(value: &str) -> Option<Vec<f64>> {
let parsed = value
.split(',')
.map(str::trim)
.map(str::parse::<f64>)
.collect::<std::result::Result<Vec<_>, _>>()
.ok()?;
(parsed.len() == SEMANTIC_EMBEDDING_DIM).then_some(parsed)
}
fn semantic_cosine(left: &[f64], right: &[f64]) -> f64 {
if left.len() != right.len() {
return 0.0;
}
left.iter()
.zip(right.iter())
.map(|(left, right)| left * right)
.sum::<f64>()
}
fn semantic_entity_handle(name: &str, kind: &str) -> String {
stable_handle(
"gent",
&format!(
"entity:{}:{}",
kind.trim().to_ascii_lowercase(),
name.trim().to_ascii_lowercase()
),
)
}
fn semantic_concept_handle(label: &str) -> String {
stable_handle(
"gcon",
&format!("concept:{}", label.trim().to_ascii_lowercase()),
)
}
fn summary_source_handles(
summary: &summarize::Summary,
file_node_by_path: &BTreeMap<String, String>,
symbol_node_by_file_label: &BTreeMap<(String, String), String>,
) -> Vec<String> {
let mut handles = Vec::new();
if let Some(handle) = file_node_by_path.get(&summary.file_path) {
handles.push(handle.clone());
}
if let Some(handle) =
symbol_node_by_file_label.get(&(summary.file_path.clone(), summary.symbol_name.clone()))
&& !handles.iter().any(|existing| existing == handle)
{
handles.push(handle.clone());
}
handles
}
fn semantic_entity_node(
root: &Path,
summary: &summarize::Summary,
name: &str,
kind: &str,
description: &str,
provenance: &GraphProvenance,
) -> SubstrateGraphNode {
let handle = semantic_entity_handle(name, kind);
let detail = if description.trim().is_empty() {
format!("{kind} entity from cached summaries")
} else {
format!("{kind}: {description}")
};
SubstrateGraphNode::new(handle.clone(), "semantic_entity", name.to_string())
.with_property("handle", handle)
.with_property("ref_id", name.to_string())
.with_property("detail", detail)
.with_property("entity_kind", kind.to_string())
.with_property("description", description.to_string())
.with_property("source_file", summary.file_path.clone())
.with_property("source_symbol", summary.symbol_name.clone())
.with_property("embedding_model", SEMANTIC_EMBEDDING_MODEL)
.with_property(
"embedding",
semantic_embedding_property(&format!("{name} {kind} {description}")),
)
.with_property(
"expand",
semantic_related_command(root, name, SemanticRelatedKind::Entity),
)
.with_provenance(provenance.clone())
}
fn semantic_concept_node(
root: &Path,
summary: &summarize::Summary,
label: &str,
provenance: &GraphProvenance,
) -> SubstrateGraphNode {
let handle = semantic_concept_handle(label);
SubstrateGraphNode::new(handle.clone(), "semantic_concept", label.to_string())
.with_property("handle", handle)
.with_property("ref_id", label.to_string())
.with_property("detail", "concept label from cached summaries".to_string())
.with_property("source_file", summary.file_path.clone())
.with_property("source_symbol", summary.symbol_name.clone())
.with_property("embedding_model", SEMANTIC_EMBEDDING_MODEL)
.with_property("embedding", semantic_embedding_property(label))
.with_property(
"expand",
semantic_related_command(root, label, SemanticRelatedKind::Concept),
)
.with_provenance(provenance.clone())
}
fn insert_semantic_edge(
edge_map: &mut BTreeMap<(String, String, String), SubstrateGraphEdge>,
edge: SubstrateGraphEdge,
) {
edge_map
.entry((edge.from_id.clone(), edge.to_id.clone(), edge.kind.clone()))
.or_insert(edge);
}
fn memory_event_key(event: &MemoryEvent) -> String {
match (event.imported_from.as_deref(), event.imported_id.as_deref()) {
(Some(imported_from), Some(imported_id)) => {
format!("{imported_from}:{imported_id}")
}
_ => event.stable_id(),
}
}
fn memory_event_label(event: &MemoryEvent) -> String {
let first_line = event
.text
.lines()
.map(str::trim)
.find(|line| !line.is_empty())
.unwrap_or(event.kind.as_str());
match event.kind.as_str() {
"imported_observation" => {
let observation_type = event
.metadata
.get("observation_type")
.map(String::as_str)
.unwrap_or("observation");
truncate_for_compact(&format!("{observation_type}: {first_line}"), 80)
}
"imported_session_summary" => truncate_for_compact(&format!("summary: {first_line}"), 80),
"imported_user_prompt" => truncate_for_compact(&format!("prompt: {first_line}"), 80),
_ => truncate_for_compact(first_line, 80),
}
}
fn append_tsift_memory_graph_projection_rows(
root: &Path,
nodes: &mut Vec<SubstrateGraphNode>,
edges: &mut Vec<SubstrateGraphEdge>,
) -> Result<()> {
let memory_db = default_memory_db_path(root);
if !memory_db.exists() {
return Ok(());
}
let events = match read_memory_events(&memory_db, CLAUDE_MEM_GRAPH_LIMIT_PER_TABLE * 3) {
Ok(events) => events,
Err(_) => return Ok(()),
};
if events.is_empty() {
return Ok(());
}
let mut seen_sessions = BTreeSet::new();
let mut edge_map = BTreeMap::<(String, String, String), SubstrateGraphEdge>::new();
for event in &events {
let event_id = event.stable_id();
let event_key = memory_event_key(event);
let source_handle = stable_handle("tmemsrc", &event_key);
let semantic_handle = stable_handle("tmemsem", &event_key);
let provenance = GraphProvenance::new("tsift-memory", &event.source_ref);
let imported_from = event.imported_from.as_deref().unwrap_or("native");
if let Some(session_id) = &event.session_id {
let session_handle =
format!("memsess:{}", blake3::hash(session_id.as_bytes()).to_hex());
if seen_sessions.insert(session_id.clone()) {
let session_node = SubstrateGraphNode::new(
session_handle.clone(),
"memory_session",
truncate_for_compact(session_id, 80),
)
.with_property("handle", session_handle.clone())
.with_property("ref_id", session_id.clone())
.with_property("session_id", session_id.clone())
.with_property("provider", "tsift-memory")
.with_property(
"expand",
format!(
"tsift memory status {} --json",
shell_quote(root.to_string_lossy().as_ref())
),
)
.with_provenance(provenance.clone());
nodes.push(node_with_content_freshness(session_node)?);
}
insert_semantic_edge(
&mut edge_map,
SubstrateGraphEdge::new(
session_handle.clone(),
event_id.clone(),
"records_memory_event",
)
.with_property("label", "tsift-memory session event")
.with_provenance(provenance.clone()),
);
insert_semantic_edge(
&mut edge_map,
SubstrateGraphEdge::new(
session_handle,
source_handle.clone(),
"records_memory_source",
)
.with_property("label", "tsift-memory session source")
.with_provenance(provenance.clone()),
);
}
let label = memory_event_label(event);
let mut event_node =
SubstrateGraphNode::new(event_id.clone(), "memory_event", event.kind.as_str())
.with_property("handle", event_id.clone())
.with_property("ref_id", event.source_ref.clone())
.with_property("source_ref", event.source_ref.clone())
.with_property("provider", "tsift-memory")
.with_property("memory_kind", event.kind.as_str())
.with_property("imported_from", imported_from)
.with_property("text_preview", truncate_for_compact(&event.text, 240))
.with_property("token_estimate", event.token_estimate.to_string())
.with_property(
"expand",
format!(
"tsift memory status {} --json",
shell_quote(root.to_string_lossy().as_ref())
),
)
.with_provenance(provenance.clone());
if let Some(session_id) = &event.session_id {
event_node = event_node.with_property("session_id", session_id.clone());
}
if let Some(observed_at_unix) = event.observed_at_unix {
event_node = event_node.with_property("observed_at_unix", observed_at_unix.to_string());
}
if let Some(imported_id) = &event.imported_id {
event_node = event_node.with_property("imported_id", imported_id.clone());
}
nodes.push(node_with_content_freshness(event_node)?);
let mut source_node =
SubstrateGraphNode::new(source_handle.clone(), "source_handle", label.clone())
.with_property("handle", source_handle.clone())
.with_property("ref_id", event.source_ref.clone())
.with_property("source_ref", event.source_ref.clone())
.with_property("provider", "tsift-memory")
.with_property("memory_kind", event.kind.as_str())
.with_property("imported_from", imported_from)
.with_property("text_preview", truncate_for_compact(&event.text, 240))
.with_property("token_estimate", event.token_estimate.to_string())
.with_property(
"expand",
format!(
"tsift memory status {} --json",
shell_quote(root.to_string_lossy().as_ref())
),
)
.with_provenance(provenance.clone());
if let Some(session_id) = &event.session_id {
source_node = source_node.with_property("session_id", session_id.clone());
}
if let Some(observed_at_unix) = event.observed_at_unix {
source_node =
source_node.with_property("observed_at_unix", observed_at_unix.to_string());
}
if let Some(imported_id) = &event.imported_id {
source_node = source_node.with_property("imported_id", imported_id.clone());
}
nodes.push(node_with_content_freshness(source_node)?);
insert_semantic_edge(
&mut edge_map,
SubstrateGraphEdge::new(event_id.clone(), source_handle.clone(), "projects_source")
.with_property("label", "tsift-memory source projection")
.with_provenance(provenance.clone()),
);
let semantic_text = format!("{} {}", label, event.text);
let semantic_node =
SubstrateGraphNode::new(semantic_handle.clone(), "semantic_concept", label.clone())
.with_property("handle", semantic_handle.clone())
.with_property("ref_id", event.source_ref.clone())
.with_property("detail", "semantic row from tsift-memory")
.with_property("source_ref", event.source_ref.clone())
.with_property("provider", "tsift-memory")
.with_property("memory_kind", event.kind.as_str())
.with_property("imported_from", imported_from)
.with_property("embedding_model", SEMANTIC_EMBEDDING_MODEL)
.with_property("embedding", semantic_embedding_property(&semantic_text))
.with_property(
"expand",
semantic_related_command(root, &label, SemanticRelatedKind::Concept),
)
.with_provenance(provenance.clone());
nodes.push(node_with_content_freshness(semantic_node)?);
insert_semantic_edge(
&mut edge_map,
SubstrateGraphEdge::new(
source_handle.clone(),
semantic_handle.clone(),
"mentions_concept",
)
.with_property("label", "tsift-memory semantic source")
.with_provenance(provenance.clone()),
);
}
for edge in edge_map.into_values() {
edges.push(edge_with_content_freshness(edge)?);
}
Ok(())
}
fn append_summary_semantic_projection_rows(
root: &Path,
graph: &TraversalGraphBuild,
provenance: &GraphProvenance,
nodes: &mut Vec<SubstrateGraphNode>,
edges: &mut Vec<SubstrateGraphEdge>,
) -> Result<()> {
let summaries_db = root.join(".tsift/summaries.db");
if !summaries_db.exists() {
return Ok(());
}
let summary_db = summarize::SummaryDb::open_read_only_resilient(&summaries_db)?;
let summaries = summary_db.all()?;
if summaries.is_empty() {
return Ok(());
}
let file_node_by_path = graph
.nodes
.values()
.filter(|node| node.kind == "file")
.filter_map(|node| {
node.path
.as_ref()
.map(|path| (path.clone(), node.handle.clone()))
})
.collect::<BTreeMap<_, _>>();
let symbol_node_by_file_label = graph
.nodes
.values()
.filter(|node| node.kind == "symbol")
.filter_map(|node| {
Some((
(node.path.clone()?, node.label.clone()),
node.handle.clone(),
))
})
.collect::<BTreeMap<_, _>>();
let mut semantic_nodes = BTreeMap::<String, SubstrateGraphNode>::new();
let mut semantic_edges = BTreeMap::<(String, String, String), SubstrateGraphEdge>::new();
for summary in &summaries {
let source_handles =
summary_source_handles(summary, &file_node_by_path, &symbol_node_by_file_label);
let mut entity_ids_by_name = BTreeMap::<String, String>::new();
if let Some(entities) = &summary.entities {
for entity in entities {
let node = semantic_entity_node(
root,
summary,
&entity.name,
&entity.kind,
&entity.description,
provenance,
);
let entity_id = node.id.clone();
entity_ids_by_name.insert(entity.name.to_ascii_lowercase(), entity_id.clone());
semantic_nodes.entry(entity_id.clone()).or_insert(node);
for source_handle in &source_handles {
insert_semantic_edge(
&mut semantic_edges,
SubstrateGraphEdge::new(
source_handle.clone(),
entity_id.clone(),
"mentions_entity",
)
.with_property("label", format!("summary entity: {}", entity.name))
.with_property("source_file", summary.file_path.clone())
.with_provenance(provenance.clone()),
);
}
}
}
let mut concept_ids = Vec::new();
if let Some(labels) = &summary.concept_labels {
for label in labels
.iter()
.map(|label| label.trim())
.filter(|label| !label.is_empty())
{
let node = semantic_concept_node(root, summary, label, provenance);
let concept_id = node.id.clone();
semantic_nodes.entry(concept_id.clone()).or_insert(node);
concept_ids.push(concept_id.clone());
for source_handle in &source_handles {
insert_semantic_edge(
&mut semantic_edges,
SubstrateGraphEdge::new(
source_handle.clone(),
concept_id.clone(),
"mentions_concept",
)
.with_property("label", format!("summary concept: {label}"))
.with_property("source_file", summary.file_path.clone())
.with_provenance(provenance.clone()),
);
}
}
}
for entity_id in entity_ids_by_name.values() {
for concept_id in &concept_ids {
insert_semantic_edge(
&mut semantic_edges,
SubstrateGraphEdge::new(
entity_id.clone(),
concept_id.clone(),
"tagged_concept",
)
.with_property("label", "entity concept label".to_string())
.with_property("source_file", summary.file_path.clone())
.with_provenance(provenance.clone()),
);
}
}
for idx in 0..concept_ids.len() {
for next_idx in (idx + 1)..concept_ids.len() {
insert_semantic_edge(
&mut semantic_edges,
SubstrateGraphEdge::new(
concept_ids[idx].clone(),
concept_ids[next_idx].clone(),
"related_concept",
)
.with_property("label", format!("co-occurs in {}", summary.symbol_name))
.with_property("source_file", summary.file_path.clone())
.with_provenance(provenance.clone()),
);
}
}
if let Some(relationships) = &summary.relationships {
for relationship in relationships {
let from_id = entity_ids_by_name
.get(&relationship.from.to_ascii_lowercase())
.cloned()
.unwrap_or_else(|| {
let node = semantic_entity_node(
root,
summary,
&relationship.from,
"unknown",
"",
provenance,
);
let id = node.id.clone();
semantic_nodes.entry(id.clone()).or_insert(node);
id
});
let to_id = entity_ids_by_name
.get(&relationship.to.to_ascii_lowercase())
.cloned()
.unwrap_or_else(|| {
let node = semantic_entity_node(
root,
summary,
&relationship.to,
"unknown",
"",
provenance,
);
let id = node.id.clone();
semantic_nodes.entry(id.clone()).or_insert(node);
id
});
insert_semantic_edge(
&mut semantic_edges,
SubstrateGraphEdge::new(from_id, to_id, "semantic_relation")
.with_property("relationship_kind", relationship.kind.clone())
.with_property("label", relationship.kind.clone())
.with_property("source_file", summary.file_path.clone())
.with_property("source_symbol", summary.symbol_name.clone())
.with_provenance(provenance.clone()),
);
}
}
}
for node in semantic_nodes.into_values() {
nodes.push(node_with_content_freshness(node)?);
}
for edge in semantic_edges.into_values() {
edges.push(edge_with_content_freshness(edge)?);
}
Ok(())
}
fn projection_content_hash(
nodes: &[SubstrateGraphNode],
edges: &[SubstrateGraphEdge],
) -> Result<String> {
#[derive(Serialize)]
struct Payload<'a> {
version: &'static str,
nodes: &'a [SubstrateGraphNode],
edges: &'a [SubstrateGraphEdge],
}
content_hash(&Payload {
version: GRAPH_PROJECTION_VERSION,
nodes,
edges,
})
}
pub(crate) fn graph_projection_content_hash(projection: &GraphProjection) -> Option<String> {
projection
.nodes
.iter()
.find(|node| node.kind == GRAPH_PROJECTION_META_KIND)
.and_then(|node| node.properties.get("content_hash").cloned())
}
fn traversal_projection_from_graph(
root: &Path,
scope: Option<&str>,
graph: &TraversalGraphBuild,
) -> Result<GraphProjection> {
let provenance = GraphProvenance::new(
"tsift.traverse",
format!("{}:{}", root.display(), scope.unwrap_or("root")),
);
let mut nodes = Vec::with_capacity(graph.nodes.len() + 1);
for node in graph.nodes.values() {
let mut projected =
SubstrateGraphNode::new(node.handle.clone(), node.kind.clone(), node.label.clone())
.with_property("handle", node.handle.clone())
.with_property("expand", node.expand.clone())
.with_provenance(provenance.clone());
if let Some(ref_id) = &node.ref_id {
projected = projected.with_property("ref_id", ref_id.clone());
}
if let Some(path) = &node.path {
projected = projected.with_property("path", path.clone());
}
if let Some(line) = node.line {
projected = projected.with_property("line", line.to_string());
}
if let Some(detail) = &node.detail {
projected = projected.with_property("detail", detail.clone());
}
for (key, value) in &node.properties {
projected = projected.with_property(key.clone(), value.clone());
}
nodes.push(node_with_content_freshness(projected)?);
}
let mut edges = Vec::with_capacity(graph.edges.len());
for edge in &graph.edges {
let mut projected =
SubstrateGraphEdge::new(edge.from.clone(), edge.to.clone(), edge.relation.clone())
.with_property("weight", edge.weight.to_string())
.with_provenance(provenance.clone());
if let Some(label) = &edge.label {
projected = projected.with_property("label", label.clone());
}
edges.push(edge_with_content_freshness(projected)?);
}
append_traversal_context_projection_rows(root, graph, &provenance, &mut nodes, &mut edges)?;
append_summary_semantic_projection_rows(root, graph, &provenance, &mut nodes, &mut edges)?;
append_tsift_memory_graph_projection_rows(root, &mut nodes, &mut edges)?;
let projection_hash = projection_content_hash(&nodes, &edges)?;
let meta = SubstrateGraphNode::new(
graph_projection_meta_id(scope),
GRAPH_PROJECTION_META_KIND,
"tsift traversal projection",
)
.with_property("projection_version", GRAPH_PROJECTION_VERSION)
.with_property("content_hash", projection_hash.clone())
.with_property("root", root.to_string_lossy().to_string())
.with_property("scope", scope.unwrap_or("root"))
.with_property("node_count", graph.nodes.len().to_string())
.with_property("edge_count", graph.edges.len().to_string())
.with_provenance(provenance)
.with_freshness(GraphFreshness::content_hash(projection_hash));
nodes.push(meta);
Ok(GraphProjection { nodes, edges })
}
#[allow(clippy::too_many_arguments)]
fn ensure_traversal_source_handle(
root: &Path,
provenance: &GraphProvenance,
file_node_by_path: &BTreeMap<String, String>,
node: &TraversalNode,
budget: &ExplorationBudget,
source_handle_by_node: &mut BTreeMap<String, String>,
seen_windows: &mut BTreeMap<(String, usize, usize), String>,
nodes: &mut Vec<SubstrateGraphNode>,
edges: &mut Vec<SubstrateGraphEdge>,
) -> Result<Option<String>> {
if let Some(handle) = source_handle_by_node.get(&node.handle) {
return Ok(Some(handle.clone()));
}
let Some(window) = exploration_source_window_for_node(root, node, budget) else {
return Ok(None);
};
let window_key = (window.file.clone(), window.start, window.end);
let handle = if let Some(handle) = seen_windows.get(&window_key) {
handle.clone()
} else {
let label = format!("{}:{}-{}", window.file, window.start, window.end);
let projected = SubstrateGraphNode::new(window.handle.clone(), "source_handle", label)
.with_property("handle", window.handle.clone())
.with_property("file", window.file.clone())
.with_property("start", window.start.to_string())
.with_property("end", window.end.to_string())
.with_property("reason", window.reason.clone())
.with_property("expand", window.expand.clone())
.with_provenance(provenance.clone());
nodes.push(node_with_content_freshness(projected)?);
if let Some(file_handle) = file_node_by_path.get(&window.file) {
let edge = SubstrateGraphEdge::new(
window.handle.clone(),
file_handle.clone(),
"expands_source",
)
.with_property("label", window.reason.clone())
.with_provenance(provenance.clone());
edges.push(edge_with_content_freshness(edge)?);
}
if node.kind != "file" {
let edge = SubstrateGraphEdge::new(
window.handle.clone(),
node.handle.clone(),
"anchors_source",
)
.with_property("label", window.reason.clone())
.with_provenance(provenance.clone());
edges.push(edge_with_content_freshness(edge)?);
}
seen_windows.insert(window_key, window.handle.clone());
window.handle
};
source_handle_by_node.insert(node.handle.clone(), handle.clone());
Ok(Some(handle))
}
fn push_traversal_backlog_target_handles<'a>(
backlog: &TraversalNode,
edges_by_from: &BTreeMap<&'a str, Vec<&'a TraversalEdge>>,
node_by_handle: &BTreeMap<&'a str, &'a TraversalNode>,
max_handles: usize,
seen_target_nodes: &mut BTreeSet<String>,
target_node_handles: &mut Vec<String>,
) {
for edge in edges_by_from
.get(backlog.handle.as_str())
.into_iter()
.flatten()
.filter(|edge| edge.relation == "mentions")
{
let Some(target_node) = node_by_handle.get(edge.to.as_str()) else {
continue;
};
if !matches!(
target_node.kind.as_str(),
"file" | "symbol" | "route" | "cargo_package" | "cargo_workspace"
) {
continue;
}
if target_node
.path
.as_deref()
.zip(backlog.path.as_deref())
.is_some_and(|(target_path, backlog_path)| {
target_path == backlog_path && target_path.ends_with(".md")
})
{
continue;
}
if seen_target_nodes.insert(target_node.handle.clone()) {
target_node_handles.push(target_node.handle.clone());
}
if target_node_handles.len() >= max_handles {
break;
}
}
}
fn append_traversal_context_projection_rows(
root: &Path,
graph: &TraversalGraphBuild,
provenance: &GraphProvenance,
nodes: &mut Vec<SubstrateGraphNode>,
edges: &mut Vec<SubstrateGraphEdge>,
) -> Result<()> {
let budget = exploration_budget_for_counts(graph.nodes.len(), graph.edges.len());
let file_node_by_path = graph
.nodes
.values()
.filter(|node| node.kind == "file")
.filter_map(|node| {
node.path
.as_ref()
.map(|path| (path.clone(), node.handle.clone()))
})
.collect::<BTreeMap<_, _>>();
let node_by_handle = graph
.nodes
.values()
.map(|node| (node.handle.as_str(), node))
.collect::<BTreeMap<_, _>>();
let mut edges_by_from = BTreeMap::<&str, Vec<&TraversalEdge>>::new();
for edge in &graph.edges {
edges_by_from
.entry(edge.from.as_str())
.or_default()
.push(edge);
}
for rows in edges_by_from.values_mut() {
rows.sort_by(|left, right| {
right
.weight
.cmp(&left.weight)
.then(left.relation.cmp(&right.relation))
.then(left.to.cmp(&right.to))
});
}
let mut seen_windows = BTreeMap::<(String, usize, usize), String>::new();
let mut source_handle_by_node = BTreeMap::<String, String>::new();
let mut code_context_count = 0usize;
let code_context_limit = budget.relationship_limit.min(8);
for node in graph.nodes.values() {
if !matches!(
node.kind.as_str(),
"backlog" | "job_packet" | "worker_result"
) {
continue;
}
let mut target_node_handles = Vec::new();
let mut fallback_target_handles = Vec::new();
let mut seen_target_nodes = BTreeSet::new();
if node.kind == "backlog" || node.kind == "worker_result" {
push_traversal_backlog_target_handles(
node,
&edges_by_from,
&node_by_handle,
budget.max_source_windows,
&mut seen_target_nodes,
&mut target_node_handles,
);
fallback_target_handles.push(node.handle.clone());
} else {
for edge in edges_by_from
.get(node.handle.as_str())
.into_iter()
.flatten()
.filter(|edge| edge.relation == "targets")
{
let Some(backlog) = node_by_handle.get(edge.to.as_str()) else {
continue;
};
fallback_target_handles.push(backlog.handle.clone());
push_traversal_backlog_target_handles(
backlog,
&edges_by_from,
&node_by_handle,
budget.max_source_windows,
&mut seen_target_nodes,
&mut target_node_handles,
);
if target_node_handles.len() >= budget.max_source_windows {
break;
}
}
if fallback_target_handles.is_empty() {
continue;
}
}
let code_context = !target_node_handles.is_empty();
if target_node_handles.is_empty() {
target_node_handles = dedupe_preserve_order(fallback_target_handles);
} else if code_context_count >= code_context_limit {
continue;
}
let mut worker_source_handles = Vec::new();
let mut seen_worker_handles = BTreeSet::new();
for target_handle in target_node_handles {
if worker_source_handles.len() >= budget.max_source_windows {
break;
}
let Some(target_node) = node_by_handle.get(target_handle.as_str()) else {
continue;
};
let Some(handle) = ensure_traversal_source_handle(
root,
provenance,
&file_node_by_path,
target_node,
&budget,
&mut source_handle_by_node,
&mut seen_windows,
nodes,
edges,
)?
else {
continue;
};
if seen_worker_handles.insert(handle.clone()) {
worker_source_handles.push(handle);
}
}
if worker_source_handles.is_empty() {
continue;
}
let target = node
.path
.clone()
.unwrap_or_else(|| root.to_string_lossy().to_string());
let summary = node.detail.clone().unwrap_or_else(|| node.label.clone());
let handle = stable_handle("xwrk", &format!("{}:{}:{}", target, node.handle, summary));
let projected = SubstrateGraphNode::new(handle.clone(), "worker_context", summary.clone())
.with_property("handle", handle.clone())
.with_property("target", target.clone())
.with_property("summary", summary)
.with_property(
"source_handle_count",
worker_source_handles.len().to_string(),
)
.with_property(
"expand",
format!(
"tsift --envelope context-pack {} --budget normal",
shell_quote(&target)
),
)
.with_provenance(provenance.clone());
nodes.push(node_with_content_freshness(projected)?);
let request_edge =
SubstrateGraphEdge::new(node.handle.clone(), handle.clone(), "requests_context")
.with_property("label", "bounded worker context".to_string())
.with_provenance(provenance.clone());
edges.push(edge_with_content_freshness(request_edge)?);
for source_handle in &worker_source_handles {
let scope_edge =
SubstrateGraphEdge::new(handle.clone(), source_handle.clone(), "scopes_source")
.with_property("label", "bounded worker source window".to_string())
.with_provenance(provenance.clone());
edges.push(edge_with_content_freshness(scope_edge)?);
}
if code_context {
code_context_count += 1;
}
}
Ok(())
}
fn traversal_node_from_graph_node(root: &Path, node: SubstrateGraphNode) -> TraversalNode {
let handle = node
.properties
.get("handle")
.cloned()
.unwrap_or_else(|| node.id.clone());
TraversalNode {
expand: node
.properties
.get("expand")
.cloned()
.unwrap_or_else(|| traversal_expand_command(root, &handle)),
handle,
kind: node.kind,
label: node.label,
ref_id: node.properties.get("ref_id").cloned(),
path: node.properties.get("path").cloned(),
line: node
.properties
.get("line")
.and_then(|value| value.parse::<i64>().ok()),
detail: node.properties.get("detail").cloned(),
properties: node.properties,
}
}
fn traversal_graph_from_store(root: &Path, store: &impl GraphStore) -> Result<TraversalGraphBuild> {
let mut graph = TraversalGraphBuild::default();
for node in store.all_nodes()? {
if node.kind == GRAPH_PROJECTION_META_KIND {
continue;
}
graph.add_node(traversal_node_from_graph_node(root, node));
}
for edge in store.all_edges()? {
graph.add_edge(
&edge.from_id,
&edge.to_id,
&edge.kind,
edge.properties.get("label").cloned(),
edge.properties
.get("weight")
.and_then(|value| value.parse::<usize>().ok())
.unwrap_or(1),
);
}
Ok(graph)
}
pub(crate) fn convex_rows_from_graph_store(
store: &impl GraphStore,
) -> Result<ConvexProjectionRows> {
Ok(GraphProjection {
nodes: store.all_nodes()?,
edges: store.all_edges()?,
}
.to_convex_rows())
}
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
struct ConvexRequiredIndex {
table: String,
name: String,
fields: Vec<String>,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct ConvexSyncChunk {
operation: String,
chunk: usize,
count: usize,
keys: Vec<String>,
max_attempts: usize,
retry_policy: String,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct ConvexTransportSummary {
endpoint_env: String,
endpoint_configured: bool,
auth_token_env: String,
auth_configured: bool,
remote_snapshot: bool,
applied_chunks: usize,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct ConvexTransportReceipt {
operation: String,
chunk: usize,
attempt: usize,
status: String,
message: Option<String>,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct ConvexTransportRequest<'a> {
operation: &'a str,
chunk: usize,
projection_version: &'a str,
projection_hash: Option<&'a str>,
#[serde(skip_serializing_if = "Option::is_none")]
projection_meta_id: Option<&'a str>,
node_rows: Vec<ConvexNodeRow>,
edge_rows: Vec<ConvexEdgeRow>,
keys: Vec<String>,
#[serde(skip_serializing_if = "Option::is_none")]
cursor: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
limit: Option<usize>,
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
struct ConvexTransportResponse {
status: Option<String>,
message: Option<String>,
rows: Option<ConvexProjectionRows>,
#[serde(default)]
meta: Option<ConvexSnapshotMeta>,
#[serde(default)]
page: Option<ConvexSnapshotPage>,
}
#[derive(Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
struct ConvexSnapshotMeta {
// Captured for completeness/debugging; not currently consumed by the
// freshness diff (indexes are already validated against the required set
// via `convex_required_indexes`, and `page_size` is informational only).
#[serde(default)]
#[allow(dead_code)]
indexes: Vec<ConvexRequiredIndex>,
#[serde(default)]
#[allow(dead_code)]
node_count: Option<usize>,
#[serde(default)]
#[allow(dead_code)]
edge_count: Option<usize>,
#[serde(default)]
projection_hash: Option<String>,
#[serde(default)]
#[allow(dead_code)]
page_size: Option<usize>,
}
/// Paginated snapshot page response. `rows` is either node rows or edge rows
/// depending on which operation was called; we deserialize as raw values to
/// keep the transport struct shared between both shapes, then narrow per call
/// site.
#[derive(Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
struct ConvexSnapshotPage {
rows: Vec<serde_json::Value>,
#[serde(default)]
next_cursor: Option<String>,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct ConvexProjectionFreshness {
status: String,
fail_closed: bool,
local_hash: Option<String>,
snapshot_hash: Option<String>,
missing_nodes: Vec<String>,
stale_nodes: Vec<String>,
missing_edges: Vec<String>,
stale_edges: Vec<String>,
diagnostics: Vec<String>,
}
const DEFAULT_CONVEX_GRAPH_URL_ENV: &str = "TSIFT_CONVEX_GRAPH_URL";
impl ConvexProjectionFreshness {
fn current(local_hash: Option<String>, snapshot_hash: Option<String>) -> Self {
Self {
status: "current".to_string(),
fail_closed: false,
local_hash,
snapshot_hash,
missing_nodes: Vec::new(),
stale_nodes: Vec::new(),
missing_edges: Vec::new(),
stale_edges: Vec::new(),
diagnostics: Vec::new(),
}
}
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct ConvexSyncReport {
root: String,
#[serde(skip_serializing_if = "Option::is_none")]
scope: Option<String>,
graph_db: String,
dry_run: bool,
projection_version: String,
projection_hash: Option<String>,
required_indexes: Vec<ConvexRequiredIndex>,
node_upserts: Vec<ConvexNodeRow>,
edge_upserts: Vec<ConvexEdgeRow>,
node_tombstones: Vec<String>,
edge_tombstones: Vec<String>,
chunks: Vec<ConvexSyncChunk>,
freshness: ConvexProjectionFreshness,
transport: Option<ConvexTransportSummary>,
receipts: Vec<ConvexTransportReceipt>,
diagnostics: Vec<String>,
warnings: Vec<String>,
}
fn convex_required_indexes() -> Vec<ConvexRequiredIndex> {
vec![
ConvexRequiredIndex {
table: "nodes".to_string(),
name: "by_external_id".to_string(),
fields: vec!["externalId".to_string()],
},
ConvexRequiredIndex {
table: "nodes".to_string(),
name: "by_kind".to_string(),
fields: vec!["kind".to_string()],
},
ConvexRequiredIndex {
table: "edges".to_string(),
name: "by_edge_key".to_string(),
fields: vec!["edgeKey".to_string()],
},
ConvexRequiredIndex {
table: "edges".to_string(),
name: "by_from_kind".to_string(),
fields: vec!["fromExternalId".to_string(), "kind".to_string()],
},
ConvexRequiredIndex {
table: "edges".to_string(),
name: "by_to_kind".to_string(),
fields: vec!["toExternalId".to_string(), "kind".to_string()],
},
]
}
pub(crate) fn load_convex_projection_rows(path: &Path) -> Result<ConvexProjectionRows> {
let content = fs::read_to_string(path)
.with_context(|| format!("reading Convex projection snapshot {}", path.display()))?;
serde_json::from_str(&content)
.with_context(|| format!("parsing Convex projection snapshot {}", path.display()))
}
fn convex_projection_row_diagnostics(rows: &ConvexProjectionRows) -> Vec<String> {
let mut diagnostics = Vec::new();
let mut node_counts = BTreeMap::<&str, usize>::new();
for row in &rows.nodes {
*node_counts.entry(row.external_id.as_str()).or_default() += 1;
}
for (external_id, count) in node_counts.iter().filter(|(_, count)| **count > 1) {
diagnostics.push(format!(
"Convex snapshot contains duplicate node externalId {external_id} ({count} rows)"
));
}
let node_ids = node_counts.keys().copied().collect::<BTreeSet<_>>();
let mut edge_counts = BTreeMap::<&str, usize>::new();
for edge in &rows.edges {
*edge_counts.entry(edge.edge_key.as_str()).or_default() += 1;
if !node_ids.contains(edge.from_external_id.as_str()) {
diagnostics.push(format!(
"Convex snapshot edge {} references missing from node {}",
edge.edge_key, edge.from_external_id
));
}
if !node_ids.contains(edge.to_external_id.as_str()) {
diagnostics.push(format!(
"Convex snapshot edge {} references missing to node {}",
edge.edge_key, edge.to_external_id
));
}
let expected_key =
ConvexEdgeRow::stable_key(&edge.from_external_id, &edge.to_external_id, &edge.kind);
if edge.edge_key != expected_key {
diagnostics.push(format!(
"Convex snapshot edge {} has non-canonical key; expected {} for ({}, {}, {})",
edge.edge_key, expected_key, edge.from_external_id, edge.kind, edge.to_external_id
));
}
}
for (edge_key, count) in edge_counts.iter().filter(|(_, count)| **count > 1) {
diagnostics.push(format!(
"Convex snapshot contains duplicate edgeKey {edge_key} ({count} rows)"
));
}
diagnostics
}
pub(crate) fn validate_convex_projection_rows(rows: &ConvexProjectionRows) -> Result<()> {
let diagnostics = convex_projection_row_diagnostics(rows);
if diagnostics.is_empty() {
Ok(())
} else {
bail!("{}", diagnostics.join("; "))
}
}
pub(crate) struct ConvexHttpTransport {
endpoint: String,
auth_token_env: String,
auth_token: Option<String>,
}
impl ConvexHttpTransport {
fn from_options(endpoint: Option<&str>, auth_token_env: &str) -> Result<Self> {
let endpoint = endpoint
.map(str::to_string)
.or_else(|| env::var(DEFAULT_CONVEX_GRAPH_URL_ENV).ok())
.context("Convex transport requires --endpoint or TSIFT_CONVEX_GRAPH_URL")?;
let auth_token = env::var(auth_token_env)
.ok()
.filter(|value| !value.trim().is_empty());
Ok(Self {
endpoint,
auth_token_env: auth_token_env.to_string(),
auth_token,
})
}
fn summary(&self, remote_snapshot: bool, applied_chunks: usize) -> ConvexTransportSummary {
ConvexTransportSummary {
endpoint_env: DEFAULT_CONVEX_GRAPH_URL_ENV.to_string(),
endpoint_configured: true,
auth_token_env: self.auth_token_env.clone(),
auth_configured: self.auth_token.is_some(),
remote_snapshot,
applied_chunks,
}
}
fn post(&self, request: &ConvexTransportRequest<'_>) -> Result<ConvexTransportResponse> {
let mut builder = ureq::post(&self.endpoint);
if let Some(token) = &self.auth_token {
builder = builder.header("Authorization", &format!("Bearer {token}"));
}
builder
.send_json(request)
.with_context(|| format!("calling Convex graph transport {}", self.endpoint))?
.body_mut()
.read_json::<ConvexTransportResponse>()
.with_context(|| format!("parsing Convex graph transport response {}", self.endpoint))
}
/// Fetch a full snapshot of the Convex graph backend.
///
/// Uses the paginated `snapshot_meta` + `snapshot_nodes_page` +
/// `snapshot_edges_page` triplet so the call works on tables larger than
/// ~5k rows (the single-shot `snapshot` query hits Convex's 15s per-request
/// syscall budget at that scale; see `#convexsnapshotscale`).
///
/// Falls back to the legacy single-shot `snapshot` operation if the
/// backend doesn't recognize `snapshot_meta` (older deployments that
/// haven't redeployed the new schema).
fn fetch_snapshot(
&self,
projection_version: &str,
scope: Option<&str>,
local_hash: Option<&str>,
local_rows: Option<&ConvexProjectionRows>,
) -> Result<(ConvexProjectionRows, Vec<String>)> {
match self.fetch_snapshot_paginated(projection_version, scope, local_hash, local_rows) {
Ok(rows) => Ok(rows),
Err(err) => {
// Only fall through to the legacy path if the failure looks
// like "operation unknown" (older backend). Any other failure
// (HTTP timeout, deserialization mismatch) should surface so
// the operator sees the real cause.
let msg = format!("{err:#}");
let is_unknown_op = msg.contains("unknown operation")
|| msg.contains("snapshot_meta")
|| msg.contains("404");
if !is_unknown_op {
return Err(err);
}
self.fetch_snapshot_legacy(projection_version)
.map(|rows| (rows, Vec::new()))
}
}
}
fn fetch_snapshot_legacy(&self, projection_version: &str) -> Result<ConvexProjectionRows> {
let response = self.post(&ConvexTransportRequest {
operation: "snapshot",
chunk: 0,
projection_version,
projection_hash: None,
projection_meta_id: None,
node_rows: Vec::new(),
edge_rows: Vec::new(),
keys: Vec::new(),
cursor: None,
limit: None,
})?;
response
.rows
.context("Convex snapshot response did not include rows")
}
fn fetch_snapshot_paginated(
&self,
projection_version: &str,
scope: Option<&str>,
local_hash: Option<&str>,
local_rows: Option<&ConvexProjectionRows>,
) -> Result<(ConvexProjectionRows, Vec<String>)> {
let projection_meta_id = graph_projection_meta_id(scope);
let meta_response = self.post(&ConvexTransportRequest {
operation: "snapshot_meta",
chunk: 0,
projection_version,
projection_hash: None,
projection_meta_id: Some(&projection_meta_id),
node_rows: Vec::new(),
edge_rows: Vec::new(),
keys: Vec::new(),
cursor: None,
limit: None,
})?;
if matches!(meta_response.status.as_deref(), Some("error")) {
anyhow::bail!(
"Convex snapshot_meta returned error: {}",
meta_response.message.unwrap_or_default()
);
}
let meta = meta_response
.meta
.context("Convex snapshot_meta response did not include meta")?;
if let (Some(remote_hash), Some(local_hash), Some(local_rows)) =
(meta.projection_hash.as_deref(), local_hash, local_rows)
&& remote_hash == local_hash
{
return Ok((
local_rows.clone(),
vec![
"remote projection hash matched local graph; skipped full row-page snapshot diff"
.to_string(),
],
));
}
let mut nodes: Vec<ConvexNodeRow> = Vec::with_capacity(meta.node_count.unwrap_or_default());
let mut node_cursor: Option<String> = None;
loop {
let response = self.post(&ConvexTransportRequest {
operation: "snapshot_nodes_page",
chunk: 0,
projection_version,
projection_hash: None,
projection_meta_id: None,
node_rows: Vec::new(),
edge_rows: Vec::new(),
keys: Vec::new(),
cursor: node_cursor.clone(),
limit: None,
})?;
let page = response
.page
.context("Convex snapshot_nodes_page response did not include page")?;
for raw in page.rows {
let row: ConvexNodeRow =
serde_json::from_value(raw).context("decoding Convex snapshot node row")?;
nodes.push(row);
}
match page.next_cursor {
Some(next) => node_cursor = Some(next),
None => break,
}
}
let mut edges: Vec<ConvexEdgeRow> = Vec::with_capacity(meta.edge_count.unwrap_or_default());
let mut edge_cursor: Option<String> = None;
loop {
let response = self.post(&ConvexTransportRequest {
operation: "snapshot_edges_page",
chunk: 0,
projection_version,
projection_hash: None,
projection_meta_id: None,
node_rows: Vec::new(),
edge_rows: Vec::new(),
keys: Vec::new(),
cursor: edge_cursor.clone(),
limit: None,
})?;
let page = response
.page
.context("Convex snapshot_edges_page response did not include page")?;
for raw in page.rows {
let row: ConvexEdgeRow =
serde_json::from_value(raw).context("decoding Convex snapshot edge row")?;
edges.push(row);
}
match page.next_cursor {
Some(next) => edge_cursor = Some(next),
None => break,
}
}
Ok((ConvexProjectionRows { nodes, edges }, Vec::new()))
}
fn apply_chunk(
&self,
report: &ConvexSyncReport,
chunk: &ConvexSyncChunk,
) -> Result<ConvexTransportReceipt> {
let node_rows = if chunk.operation == "upsert_nodes" {
report
.node_upserts
.iter()
.filter(|row| chunk.keys.contains(&row.external_id))
.cloned()
.collect()
} else {
Vec::new()
};
let edge_rows = if chunk.operation == "upsert_edges" {
report
.edge_upserts
.iter()
.filter(|row| chunk.keys.contains(&row.edge_key))
.cloned()
.collect()
} else {
Vec::new()
};
let request = ConvexTransportRequest {
operation: &chunk.operation,
chunk: chunk.chunk,
projection_version: &report.projection_version,
projection_hash: report.projection_hash.as_deref(),
projection_meta_id: None,
node_rows,
edge_rows,
keys: chunk.keys.clone(),
cursor: None,
limit: None,
};
let mut last_error = None;
for attempt in 1..=chunk.max_attempts {
match self.post(&request) {
Ok(response) => {
return Ok(ConvexTransportReceipt {
operation: chunk.operation.clone(),
chunk: chunk.chunk,
attempt,
status: response.status.unwrap_or_else(|| "ok".to_string()),
message: response.message,
});
}
Err(err) => {
last_error = Some(err);
if attempt < chunk.max_attempts {
std::thread::sleep(Duration::from_millis(100 * attempt as u64));
}
}
}
}
Err(last_error.unwrap_or_else(|| anyhow::anyhow!("Convex transport chunk failed")))
.with_context(|| format!("applying Convex {} chunk {}", chunk.operation, chunk.chunk))
}
}
fn convex_projection_hash(rows: &ConvexProjectionRows, scope: Option<&str>) -> Option<String> {
let meta_id = graph_projection_meta_id(scope);
rows.nodes
.iter()
.find(|row| row.external_id == meta_id && row.kind == GRAPH_PROJECTION_META_KIND)
.and_then(|row| row.properties.get("content_hash").cloned())
}
fn convex_projection_freshness(
local: &ConvexProjectionRows,
snapshot: Option<&ConvexProjectionRows>,
scope: Option<&str>,
) -> ConvexProjectionFreshness {
let local_hash = convex_projection_hash(local, scope);
let Some(snapshot) = snapshot else {
return ConvexProjectionFreshness {
status: "unchecked".to_string(),
fail_closed: false,
local_hash,
snapshot_hash: None,
missing_nodes: Vec::new(),
stale_nodes: Vec::new(),
missing_edges: Vec::new(),
stale_edges: Vec::new(),
diagnostics: vec![
"no Convex snapshot supplied; sync output is a local dry-run plan".to_string(),
],
};
};
let snapshot_hash = convex_projection_hash(snapshot, scope);
let snapshot_nodes = snapshot
.nodes
.iter()
.map(|row| (row.external_id.as_str(), row))
.collect::<BTreeMap<_, _>>();
let snapshot_edges = snapshot
.edges
.iter()
.map(|row| (row.edge_key.as_str(), row))
.collect::<BTreeMap<_, _>>();
let mut missing_nodes = Vec::new();
let mut stale_nodes = Vec::new();
for row in &local.nodes {
match snapshot_nodes.get(row.external_id.as_str()) {
Some(snapshot_row) if *snapshot_row == row => {}
Some(_) => stale_nodes.push(row.external_id.clone()),
None => missing_nodes.push(row.external_id.clone()),
}
}
let mut missing_edges = Vec::new();
let mut stale_edges = Vec::new();
for row in &local.edges {
match snapshot_edges.get(row.edge_key.as_str()) {
Some(snapshot_row) if *snapshot_row == row => {}
Some(_) => stale_edges.push(row.edge_key.clone()),
None => missing_edges.push(row.edge_key.clone()),
}
}
let hash_current = local_hash.is_some() && local_hash == snapshot_hash;
let rows_current = missing_nodes.is_empty()
&& stale_nodes.is_empty()
&& missing_edges.is_empty()
&& stale_edges.is_empty();
if hash_current && rows_current {
return ConvexProjectionFreshness::current(local_hash, snapshot_hash);
}
let mut diagnostics = Vec::new();
if local_hash != snapshot_hash {
diagnostics.push(format!(
"projection hash mismatch: local={} snapshot={}",
local_hash.as_deref().unwrap_or("missing"),
snapshot_hash.as_deref().unwrap_or("missing")
));
}
if !missing_nodes.is_empty() || !missing_edges.is_empty() {
diagnostics.push(format!(
"Convex snapshot is missing {} node(s) and {} edge(s)",
missing_nodes.len(),
missing_edges.len()
));
}
if !stale_nodes.is_empty() || !stale_edges.is_empty() {
diagnostics.push(format!(
"Convex snapshot has {} stale node row(s) and {} stale edge row(s)",
stale_nodes.len(),
stale_edges.len()
));
}
ConvexProjectionFreshness {
status: "stale".to_string(),
fail_closed: true,
local_hash,
snapshot_hash,
missing_nodes,
stale_nodes,
missing_edges,
stale_edges,
diagnostics,
}
}
pub(crate) fn verify_convex_projection_snapshot(
root: &Path,
scope: Option<&str>,
snapshot_path: &Path,
) -> Result<()> {
let graph_db = graph_substrate_db_path(root, scope);
let store = SqliteGraphStore::open_read_only_resilient(&graph_db)?;
let local = convex_rows_from_graph_store(&store)?;
let snapshot = load_convex_projection_rows(snapshot_path)?;
validate_convex_projection_rows(&snapshot)?;
let freshness = convex_projection_freshness(&local, Some(&snapshot), scope);
if freshness.fail_closed {
bail!(
"Convex graph projection is not current for {}: {}",
root.display(),
freshness.diagnostics.join("; ")
);
}
Ok(())
}
fn convex_rows_diff(
local: &ConvexProjectionRows,
snapshot: Option<&ConvexProjectionRows>,
) -> (
Vec<ConvexNodeRow>,
Vec<ConvexEdgeRow>,
Vec<String>,
Vec<String>,
) {
let Some(snapshot) = snapshot else {
return (
local.nodes.clone(),
local.edges.clone(),
Vec::new(),
Vec::new(),
);
};
let local_nodes = local
.nodes
.iter()
.map(|row| (row.external_id.as_str(), row))
.collect::<BTreeMap<_, _>>();
let local_edges = local
.edges
.iter()
.map(|row| (row.edge_key.as_str(), row))
.collect::<BTreeMap<_, _>>();
let snapshot_nodes = snapshot
.nodes
.iter()
.map(|row| (row.external_id.as_str(), row))
.collect::<BTreeMap<_, _>>();
let snapshot_edges = snapshot
.edges
.iter()
.map(|row| (row.edge_key.as_str(), row))
.collect::<BTreeMap<_, _>>();
let node_upserts = local
.nodes
.iter()
.filter(|row| {
snapshot_nodes
.get(row.external_id.as_str())
.is_none_or(|snapshot_row| *snapshot_row != *row)
})
.cloned()
.collect::<Vec<_>>();
let edge_upserts = local
.edges
.iter()
.filter(|row| {
snapshot_edges
.get(row.edge_key.as_str())
.is_none_or(|snapshot_row| *snapshot_row != *row)
})
.cloned()
.collect::<Vec<_>>();
let node_tombstones = snapshot
.nodes
.iter()
.filter(|row| !local_nodes.contains_key(row.external_id.as_str()))
.map(|row| row.external_id.clone())
.collect::<Vec<_>>();
let edge_tombstones = snapshot
.edges
.iter()
.filter(|row| !local_edges.contains_key(row.edge_key.as_str()))
.map(|row| row.edge_key.clone())
.collect::<Vec<_>>();
(node_upserts, edge_upserts, node_tombstones, edge_tombstones)
}
fn push_sync_chunks(
chunks: &mut Vec<ConvexSyncChunk>,
operation: &str,
keys: Vec<String>,
size: usize,
) {
if keys.is_empty() {
return;
}
for (idx, chunk) in keys.chunks(size).enumerate() {
chunks.push(ConvexSyncChunk {
operation: operation.to_string(),
chunk: idx + 1,
count: chunk.len(),
keys: chunk.to_vec(),
max_attempts: 3,
retry_policy:
"retry the whole chunk; rows are idempotent by externalId/edgeKey, stop on a repeated partial failure"
.to_string(),
});
}
}
pub(crate) fn build_convex_sync_report_with_snapshot(
path: &Path,
scope: Option<&str>,
snapshot: Option<ConvexProjectionRows>,
chunk_size: usize,
dry_run: bool,
) -> Result<ConvexSyncReport> {
if chunk_size == 0 {
bail!("--chunk-size must be greater than zero");
}
let root = lint::resolve_project_root_or_canonical_path(path)?;
let (graph, _refresh) = write_traversal_graph_store(&root, path, scope)?;
let graph_db = graph_substrate_db_path(&root, scope);
let store = SqliteGraphStore::open_read_only_resilient(&graph_db)?;
let local = convex_rows_from_graph_store(&store)?;
let freshness = convex_projection_freshness(&local, snapshot.as_ref(), scope);
let (node_upserts, edge_upserts, node_tombstones, edge_tombstones) =
convex_rows_diff(&local, snapshot.as_ref());
let mut chunks = Vec::new();
push_sync_chunks(
&mut chunks,
"delete_edges",
edge_tombstones.clone(),
chunk_size,
);
push_sync_chunks(
&mut chunks,
"upsert_nodes",
node_upserts
.iter()
.map(|row| row.external_id.clone())
.collect(),
chunk_size,
);
push_sync_chunks(
&mut chunks,
"upsert_edges",
edge_upserts
.iter()
.map(|row| row.edge_key.clone())
.collect(),
chunk_size,
);
push_sync_chunks(
&mut chunks,
"delete_nodes",
node_tombstones.clone(),
chunk_size,
);
let mut diagnostics = vec![
"apply node upserts before edge upserts; apply edge tombstones before node tombstones"
.to_string(),
];
if dry_run {
diagnostics.push("dry-run only: no Convex network mutation was attempted".to_string());
}
if freshness.fail_closed {
diagnostics.push(
"Convex-backed traverse/context-pack reads must fail closed until this plan is applied"
.to_string(),
);
}
Ok(ConvexSyncReport {
root: root.to_string_lossy().to_string(),
scope: scope.map(str::to_string),
graph_db: graph_db.to_string_lossy().to_string(),
dry_run,
projection_version: GRAPH_PROJECTION_VERSION.to_string(),
projection_hash: convex_projection_hash(&local, scope),
required_indexes: convex_required_indexes(),
node_upserts,
edge_upserts,
node_tombstones,
edge_tombstones,
chunks,
freshness,
transport: None,
receipts: Vec::new(),
diagnostics,
warnings: graph.warnings,
})
}
#[cfg(test)]
fn build_convex_sync_report(
path: &Path,
scope: Option<&str>,
snapshot_path: Option<&Path>,
chunk_size: usize,
) -> Result<ConvexSyncReport> {
let snapshot = snapshot_path.map(load_convex_projection_rows).transpose()?;
build_convex_sync_report_with_snapshot(path, scope, snapshot, chunk_size, true)
}
pub(crate) fn print_convex_sync_human(report: &ConvexSyncReport, compact: bool) {
if compact {
println!(
"convex-sync nodes:+{} -{} edges:+{} -{} chunks:{} freshness:{}",
report.node_upserts.len(),
report.node_tombstones.len(),
report.edge_upserts.len(),
report.edge_tombstones.len(),
report.chunks.len(),
report.freshness.status
);
return;
}
println!(
"Convex graph sync {}",
if report.dry_run { "dry-run" } else { "apply" }
);
println!("root: {}", report.root);
println!("graph_db: {}", report.graph_db);
println!(
"upserts: {} node(s), {} edge(s)",
report.node_upserts.len(),
report.edge_upserts.len()
);
println!(
"tombstones: {} node(s), {} edge(s)",
report.node_tombstones.len(),
report.edge_tombstones.len()
);
println!("chunks: {}", report.chunks.len());
println!("freshness: {}", report.freshness.status);
if let Some(transport) = &report.transport {
println!(
"transport: endpoint_env={} auth_env={} applied_chunks={}",
transport.endpoint_env, transport.auth_token_env, transport.applied_chunks
);
}
for receipt in &report.receipts {
println!(
"receipt: {} chunk {} attempt {} {}",
receipt.operation, receipt.chunk, receipt.attempt, receipt.status
);
}
for diagnostic in report
.diagnostics
.iter()
.chain(report.freshness.diagnostics.iter())
{
println!("- {}", diagnostic);
}
}
pub(crate) struct ConvexSyncOptions<'a> {
path: &'a Path,
scope: Option<&'a str>,
snapshot: Option<&'a Path>,
chunk_size: usize,
remote_snapshot: bool,
apply: bool,
endpoint: Option<&'a str>,
auth_token_env: &'a str,
}
#[derive(Serialize)]
struct GraphDbSchemaField {
name: &'static str,
value_type: &'static str,
description: &'static str,
}
#[derive(Serialize)]
struct GraphDbSchemaOperation {
command: &'static str,
description: &'static str,
}
#[derive(Serialize)]
struct GraphDbSchemaContract {
name: &'static str,
version: &'static str,
description: &'static str,
}
#[derive(Serialize)]
struct GraphDbSchema {
contract_versions: Vec<GraphDbSchemaContract>,
node_fields: Vec<GraphDbSchemaField>,
edge_fields: Vec<GraphDbSchemaField>,
operations: Vec<GraphDbSchemaOperation>,
}
#[derive(Clone, Serialize, Deserialize)]
struct GraphDbFreshnessReport {
status: String,
fail_closed: bool,
projection_version: Option<String>,
content_hash: Option<String>,
source_watermark: Option<String>,
diagnostics: Vec<String>,
}
#[derive(Clone, Debug, Serialize)]
pub(crate) struct GraphEffectivenessReadiness {
pub(crate) status: String,
pub(crate) fail_closed: bool,
pub(crate) reason: String,
pub(crate) diagnostics: Vec<String>,
pub(crate) next_commands: Vec<String>,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct GraphDbPropertyFilter {
key: String,
value: String,
}
#[derive(Clone, Debug, Default)]
struct GraphDbQueryOptions {
cursor: Option<String>,
limit: Option<usize>,
property_filters: Vec<GraphDbPropertyFilter>,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct GraphDbPageReport {
#[serde(skip_serializing_if = "Option::is_none")]
cursor: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
limit: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
next_cursor: Option<String>,
returned_nodes: usize,
returned_edges: usize,
truncated: bool,
property_filters: Vec<GraphDbPropertyFilter>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
diagnostics: Vec<String>,
}
type GraphDbRankedNeighbor = resolution::RankedNeighbor;
#[derive(Clone, Debug, Serialize)]
struct CommunityTruncationSummary {
total_communities: usize,
fully_kept: usize,
partially_pruned: usize,
fully_pruned: usize,
pruned_community_kinds: Vec<String>,
pruned_community_top_labels: Vec<String>,
}
#[derive(Clone, Debug, Serialize)]
struct GraphDbRankedNeighborhoodComparison {
traversal_nodes: usize,
traversal_edges: usize,
pruned_count: usize,
total_discovered: usize,
latency_micros: u128,
overlap_with_unranked_pct: f64,
useful_hit_density_ranked: f64,
useful_hit_density_unranked: f64,
duplicate_name_count_ranked: usize,
duplicate_name_count_unranked: usize,
handle_coverage_ranked_pct: f64,
handle_coverage_unranked_pct: f64,
#[serde(skip_serializing_if = "Option::is_none")]
community_truncation_summary: Option<CommunityTruncationSummary>,
diagnostics: Vec<String>,
}
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
struct GraphDbDroppedByBudget {
item: String,
kind: String,
dropped: usize,
reason: String,
}
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
struct GraphDbOutputBudgetReport {
max_tokens: usize,
estimated_tokens: usize,
selected_nodes: usize,
selected_edges: usize,
candidate_nodes: usize,
candidate_edges: usize,
dropped_by_budget: Vec<GraphDbDroppedByBudget>,
diagnostics: Vec<String>,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct GraphDbKnowledgeRetrieval {
mode: String,
query: String,
seed_kind: String,
seed_limit: usize,
seed_count: usize,
depth: usize,
limit: usize,
node_count: usize,
edge_count: usize,
truncated: bool,
traversal: String,
freshness_boundary: String,
privacy_boundary: String,
diagnostics: Vec<String>,
}
struct GraphDbSemanticSeededSubgraph {
nodes: Vec<SubstrateGraphNode>,
edges: Vec<SubstrateGraphEdge>,
truncated: bool,
diagnostics: Vec<String>,
}
type GraphDbNeighborhoodRankingGate = resolution::NeighborhoodRankingGate;
#[derive(Serialize)]
struct GraphDbReport {
root: String,
#[serde(skip_serializing_if = "Option::is_none")]
scope: Option<String>,
backend: String,
query: String,
freshness: GraphDbFreshnessReport,
#[serde(skip_serializing_if = "Option::is_none")]
readiness: Option<GraphEffectivenessReadiness>,
#[serde(skip_serializing_if = "Option::is_none")]
schema: Option<GraphDbSchema>,
#[serde(skip_serializing_if = "Option::is_none")]
node: Option<SubstrateTerseGraphNode>,
#[serde(skip_serializing_if = "Option::is_none")]
edge: Option<SubstrateTerseGraphEdge>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
nodes: Vec<SubstrateTerseGraphNode>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
edges: Vec<SubstrateTerseGraphEdge>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
ranked_neighbors: Vec<GraphDbRankedNeighbor>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
semantic_related: Vec<SemanticRelatedItem>,
#[serde(skip_serializing_if = "Option::is_none")]
neighborhood_ranking_gate: Option<GraphDbNeighborhoodRankingGate>,
#[serde(skip_serializing_if = "Option::is_none")]
ranked_neighborhood_comparison: Option<GraphDbRankedNeighborhoodComparison>,
#[serde(skip_serializing_if = "Option::is_none")]
knowledge_retrieval: Option<GraphDbKnowledgeRetrieval>,
#[serde(skip_serializing_if = "Option::is_none")]
output_budget: Option<GraphDbOutputBudgetReport>,
#[serde(skip_serializing_if = "Option::is_none")]
path: Option<substrate::GraphPath>,
#[serde(skip_serializing_if = "Option::is_none")]
page: Option<GraphDbPageReport>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
warnings: Vec<String>,
}
struct ExperimentalReadOnlyGraphStore {
backend: GraphDbExperimentalBackend,
nodes: BTreeMap<String, SubstrateGraphNode>,
edges: BTreeMap<String, SubstrateGraphEdge>,
node_ids_by_kind: BTreeMap<String, Vec<String>>,
outgoing_edge_keys_by_from: BTreeMap<String, Vec<String>>,
}
impl ExperimentalReadOnlyGraphStore {
fn from_rows(backend: GraphDbExperimentalBackend, rows: &ConvexProjectionRows) -> Result<Self> {
validate_convex_projection_rows(rows)?;
let nodes = rows
.nodes
.iter()
.map(|row| {
let node = SubstrateGraphNode {
id: row.external_id.clone(),
kind: row.kind.clone(),
label: row.label.clone(),
properties: row.properties.clone(),
provenance: row.provenance.clone(),
freshness: row.freshness.clone(),
};
(node.id.clone(), node)
})
.collect::<BTreeMap<_, _>>();
let edges = rows
.edges
.iter()
.map(|row| {
let edge = SubstrateGraphEdge {
id: row.edge_key.clone(),
from_id: row.from_external_id.clone(),
to_id: row.to_external_id.clone(),
kind: row.kind.clone(),
properties: row.properties.clone(),
provenance: row.provenance.clone(),
freshness: row.freshness.clone(),
};
(graph_db_edge_key(&edge), edge)
})
.collect::<BTreeMap<_, _>>();
let mut node_ids_by_kind = BTreeMap::<String, Vec<String>>::new();
for node in nodes.values() {
node_ids_by_kind
.entry(node.kind.clone())
.or_default()
.push(node.id.clone());
}
for ids in node_ids_by_kind.values_mut() {
ids.sort();
}
let mut outgoing_edge_keys_by_from = BTreeMap::<String, Vec<String>>::new();
for edge in edges.values() {
outgoing_edge_keys_by_from
.entry(edge.from_id.clone())
.or_default()
.push(graph_db_edge_key(edge));
}
for edge_keys in outgoing_edge_keys_by_from.values_mut() {
edge_keys.sort_by(|left_key, right_key| {
let left = &edges[left_key];
let right = &edges[right_key];
left.to_id
.cmp(&right.to_id)
.then(left.kind.cmp(&right.kind))
.then(left_key.cmp(right_key))
});
}
Ok(Self {
backend,
nodes,
edges,
node_ids_by_kind,
outgoing_edge_keys_by_from,
})
}
}
impl GraphStore for ExperimentalReadOnlyGraphStore {
fn upsert_node(&self, _node: &SubstrateGraphNode) -> Result<()> {
bail!("{} backend-eval adapter is read-only", self.backend.name())
}
fn upsert_edge(&self, _edge: &SubstrateGraphEdge) -> Result<()> {
bail!("{} backend-eval adapter is read-only", self.backend.name())
}
fn delete_node(&self, _id: &str) -> Result<usize> {
bail!("{} backend-eval adapter is read-only", self.backend.name())
}
fn delete_edge(&self, _from_id: &str, _to_id: &str, _kind: &str) -> Result<usize> {
bail!("{} backend-eval adapter is read-only", self.backend.name())
}
fn node(&self, id: &str) -> Result<Option<SubstrateGraphNode>> {
Ok(self.nodes.get(id).cloned())
}
fn all_nodes(&self) -> Result<Vec<SubstrateGraphNode>> {
Ok(self.nodes.values().cloned().collect())
}
fn all_edges(&self) -> Result<Vec<SubstrateGraphEdge>> {
let mut edges = self.edges.values().cloned().collect::<Vec<_>>();
edges.sort_by(|left, right| {
left.from_id
.cmp(&right.from_id)
.then(left.kind.cmp(&right.kind))
.then(left.to_id.cmp(&right.to_id))
});
Ok(edges)
}
fn graph_counts(&self) -> Result<(usize, usize)> {
Ok((self.nodes.len(), self.edges.len()))
}
fn sample_edge(&self, kind: Option<&str>) -> Result<Option<SubstrateGraphEdge>> {
let mut edges = self
.edges
.values()
.filter(|edge| edge.from_id != edge.to_id)
.filter(|edge| kind.is_none_or(|kind| edge.kind == kind))
.cloned()
.collect::<Vec<_>>();
edges.sort_by(|left, right| {
left.from_id
.cmp(&right.from_id)
.then(left.kind.cmp(&right.kind))
.then(left.to_id.cmp(&right.to_id))
});
Ok(edges.into_iter().next())
}
fn sample_edge_with_property(
&self,
) -> Result<Option<(SubstrateGraphEdge, GraphPropertyFilter)>> {
Ok(self
.edges
.values()
.filter(|edge| edge.from_id != edge.to_id)
.filter_map(|edge| {
edge.properties.iter().next().map(|(key, value)| {
(
edge,
GraphPropertyFilter {
key: key.clone(),
value: value.clone(),
},
)
})
})
.min_by(|(left_edge, left_filter), (right_edge, right_filter)| {
left_filter
.key
.cmp(&right_filter.key)
.then(left_filter.value.cmp(&right_filter.value))
.then_with(|| graph_db_edge_key(left_edge).cmp(&graph_db_edge_key(right_edge)))
})
.map(|(edge, filter)| (edge.clone(), filter)))
}
fn nodes_by_kind(&self, kind: &str) -> Result<Vec<SubstrateGraphNode>> {
Ok(self
.node_ids_by_kind
.get(kind)
.into_iter()
.flatten()
.filter_map(|id| self.nodes.get(id).cloned())
.collect())
}
fn outgoing_edges(&self, from_id: &str, kind: Option<&str>) -> Result<Vec<SubstrateGraphEdge>> {
Ok(self
.outgoing_edge_keys_by_from
.get(from_id)
.into_iter()
.flatten()
.filter_map(|key| self.edges.get(key))
.filter(|edge| kind.is_none_or(|kind| edge.kind == kind))
.cloned()
.collect())
}
fn edges_between_nodes(&self, node_ids: &BTreeSet<String>) -> Result<Vec<SubstrateGraphEdge>> {
Ok(self
.edges
.values()
.filter(|edge| node_ids.contains(&edge.from_id) && node_ids.contains(&edge.to_id))
.cloned()
.collect())
}
fn shortest_path(
&self,
from_id: &str,
to_id: &str,
kind: Option<&str>,
) -> Result<Option<substrate::GraphPath>> {
if from_id == to_id {
return Ok(Some(substrate::GraphPath {
nodes: vec![from_id.to_string()],
hops: 0,
}));
}
let mut queue = VecDeque::new();
let mut parent = BTreeMap::<String, String>::new();
parent.insert(from_id.to_string(), String::new());
queue.push_back(from_id.to_string());
while let Some(current) = queue.pop_front() {
for edge in self.outgoing_edges(¤t, kind)? {
if parent.contains_key(&edge.to_id) {
continue;
}
parent.insert(edge.to_id.clone(), current.clone());
if edge.to_id == to_id {
let mut nodes = vec![to_id.to_string()];
let mut cursor = to_id;
while let Some(previous) = parent.get(cursor) {
if previous.is_empty() {
break;
}
nodes.push(previous.clone());
cursor = previous;
}
nodes.reverse();
return Ok(Some(substrate::GraphPath {
hops: nodes.len().saturating_sub(1),
nodes,
}));
}
queue.push_back(edge.to_id);
}
}
Ok(None)
}
fn reachable_nodes_by_kinds(
&self,
from_id: &str,
kinds: &[&str],
depth: usize,
limit: usize,
) -> Result<BTreeMap<String, Vec<(SubstrateGraphNode, substrate::GraphPath)>>> {
let requested = kinds.iter().copied().collect::<BTreeSet<_>>();
let mut rows = requested
.iter()
.map(|kind| {
(
(*kind).to_string(),
BTreeMap::<String, (SubstrateGraphNode, substrate::GraphPath)>::new(),
)
})
.collect::<BTreeMap<_, _>>();
if requested.is_empty() {
return Ok(BTreeMap::new());
}
let mut seen = BTreeSet::from([from_id.to_string()]);
let mut queue = VecDeque::from([(from_id.to_string(), vec![from_id.to_string()])]);
while let Some((current, path)) = queue.pop_front() {
let current_depth = path.len().saturating_sub(1);
if current_depth >= depth {
continue;
}
for edge in self.outgoing_edges(¤t, None)? {
if !seen.insert(edge.to_id.clone()) {
continue;
}
let Some(node) = self.nodes.get(&edge.to_id).cloned() else {
continue;
};
let mut next_path = path.clone();
next_path.push(edge.to_id.clone());
let graph_path = substrate::GraphPath {
hops: next_path.len().saturating_sub(1),
nodes: next_path.clone(),
};
if requested.contains(node.kind.as_str()) {
rows.entry(node.kind.clone())
.or_default()
.entry(node.id.clone())
.or_insert((node.clone(), graph_path));
}
queue.push_back((edge.to_id, next_path));
}
}
Ok(rows
.into_iter()
.map(|(kind, values)| {
let mut values = values.into_values().collect::<Vec<_>>();
values.sort_by(|(left_node, left_path), (right_node, right_path)| {
left_path
.hops
.cmp(&right_path.hops)
.then(left_node.label.cmp(&right_node.label))
.then(left_node.id.cmp(&right_node.id))
});
if limit > 0 && values.len() > limit {
values.truncate(limit);
}
(kind, values)
})
.collect())
}
}
pub(crate) const GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS: usize = 64;
pub(crate) const GRAPH_DB_BACKEND_EVAL_EXTENDED_PATH_HOPS: [usize; 3] = [128, 256, 512];
pub(crate) const GRAPH_DB_BACKEND_EVAL_DIRECT_PATH_HOPS: usize = 1;
const GRAPH_DB_BACKEND_EVAL_ALLOWED_REGRESSION_PERCENT: f64 = 10.0;
pub(crate) const GRAPH_DB_BACKEND_EVAL_NORMALIZATION_ROW_UNIT: f64 = 1000.0;
const GRAPH_DB_BACKEND_EVAL_MIN_SAMPLE_RUNS: usize = 3;
const CONFLICT_MATRIX_PREPARATION_CACHE_VERSION: &str = "conflict-matrix-prep-v1";
const CONFLICT_MATRIX_GRAPH_PREPARATION_CACHE_VERSION: &str = "conflict-matrix-graph-prep-v1";
const GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION: &str = "backend-eval-full-projection-v5";
#[derive(Clone, Serialize, Deserialize)]
pub(crate) struct GraphDbBackendEvalPhaseTiming {
name: String,
duration_micros: u128,
detail: String,
}
#[derive(Serialize, Deserialize)]
struct GraphDbBackendEvalFullProjectionCache {
version: String,
key: String,
source_watermark: String,
projection: GraphProjection,
warnings: Vec<String>,
}
#[derive(Clone, Default)]
struct GraphDbBackendEvalFullProjectionCacheStats {
hit: bool,
disk_bytes: u64,
json_bytes: u64,
pruned_files: usize,
pruned_bytes: u64,
}
#[derive(Serialize)]
struct GraphDbBackendEvalRawSourceWatermarkRow {
path: String,
bytes: u64,
content_hash: String,
}
#[derive(Clone)]
struct GraphDbBackendEvalFullProjectionSourceWatermark {
value: String,
detail: String,
}
#[derive(Serialize)]
pub(crate) struct GraphDbBackendEvalConfig {
high_degree_nodes: usize,
high_degree_fanout: usize,
deep_chain_nodes: usize,
deep_chain_fanout: usize,
depth: usize,
limit: usize,
impact_limit: usize,
path_max_hops: usize,
path_direct_hop_budget: usize,
path_deep_chain_hop_budget: usize,
path_extended_hop_budgets: Vec<usize>,
path_hop_policy: String,
path_probe_strategy: String,
path_query_plan_checks: Vec<String>,
full_projection_enabled: bool,
full_projection_profile: String,
normalization_row_unit: usize,
}
#[derive(Clone)]
struct GraphDbBackendEvalSignature {
operation: String,
value: serde_json::Value,
}
#[derive(Serialize)]
struct GraphDbBackendEvalOperation {
name: String,
supported: bool,
status: String,
duration_micros: u128,
#[serde(skip_serializing_if = "Option::is_none")]
rows: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
error: Option<String>,
}
#[derive(Serialize)]
struct GraphDbBackendEvalParity {
matches_sqlite: bool,
diagnostics: Vec<String>,
}
#[derive(Serialize)]
struct GraphDbBackendEvalBackendReport {
backend: String,
adapter: String,
read_only: bool,
projection_load: String,
operations: Vec<GraphDbBackendEvalOperation>,
total_micros: u128,
parity: GraphDbBackendEvalParity,
lock_behavior: String,
install_portability: String,
}
#[derive(Serialize)]
struct GraphDbBackendEvalDataset {
name: String,
target_count: usize,
nodes: usize,
edges: usize,
backends: Vec<GraphDbBackendEvalBackendReport>,
}
#[derive(Serialize)]
struct GraphDbBackendPromotionDecision {
backend: String,
decision: String,
reasons: Vec<String>,
gate: GraphDbBackendPromotionGate,
}
#[derive(Serialize)]
struct GraphDbBackendEvalPerformanceGate {
baseline_fixture: String,
ci_profile: String,
opt_in_real_profile: String,
full_projection_cache_hit_gate: String,
allowed_regression_percent: f64,
minimum_sample_runs: usize,
normalized_metric_unit: String,
required_metrics: Vec<String>,
digest_command: String,
repeated_sample_command: String,
hop_cap_promotion: GraphDbHopCapPromotionGate,
backend_adapter_spike: GraphDbBackendAdapterSpikeGate,
}
#[derive(Serialize)]
struct GraphDbHopCapPromotionGate {
status: String,
current_default_hops: usize,
candidate_hop_tiers: Vec<usize>,
required_backend: String,
required_workloads: Vec<String>,
required_metrics: Vec<String>,
allowed_regression_percent: f64,
minimum_sample_runs: usize,
decision_rule: String,
}
#[derive(Serialize)]
struct GraphDbBackendAdapterSpikeGate {
status: String,
candidate_backends: Vec<GraphDbBackendAdapterSpikeCandidate>,
required_workloads: Vec<String>,
required_checks: Vec<String>,
decision_rule: String,
evidence_plan: String,
}
#[derive(Serialize)]
struct GraphDbBackendAdapterSpikeCandidate {
backend: String,
adapter_label: String,
projection_load: String,
lock_behavior: String,
install_portability: String,
}
#[derive(Serialize)]
pub(crate) struct GraphDbBackendEvalReport {
root: String,
#[serde(skip_serializing_if = "Option::is_none")]
scope: Option<String>,
label: String,
baseline_backend: String,
candidates: Vec<String>,
targets: Vec<String>,
config: GraphDbBackendEvalConfig,
phase_timings: Vec<GraphDbBackendEvalPhaseTiming>,
datasets: Vec<GraphDbBackendEvalDataset>,
promotion: Vec<GraphDbBackendPromotionDecision>,
performance_gate: GraphDbBackendEvalPerformanceGate,
metrics: BTreeMap<String, f64>,
metric_digest_command: String,
warnings: Vec<String>,
}
#[derive(Clone, Debug, Serialize)]
struct GraphDbDoctorCheck {
name: String,
status: String,
fail_closed: bool,
diagnostics: Vec<String>,
repair_commands: Vec<String>,
}
#[derive(Serialize)]
pub(crate) struct GraphDbDoctorReport {
root: String,
#[serde(skip_serializing_if = "Option::is_none")]
scope: Option<String>,
backend: String,
graph_db: String,
#[serde(skip_serializing_if = "Option::is_none")]
convex_snapshot: Option<String>,
status: String,
fail_closed: bool,
checks: Vec<GraphDbDoctorCheck>,
repair_commands: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
required_indexes: Vec<ConvexRequiredIndex>,
}
#[derive(Serialize)]
struct GraphDbDriftSummary {
node_upserts: usize,
edge_upserts: usize,
node_tombstones: usize,
edge_tombstones: usize,
stale_nodes: usize,
stale_edges: usize,
stale_projection_metadata: usize,
duplicate_failures: usize,
orphan_failures: usize,
missing_required_indexes: usize,
}
#[derive(Serialize)]
struct GraphDbDriftReport {
root: String,
#[serde(skip_serializing_if = "Option::is_none")]
scope: Option<String>,
graph_db: String,
convex_snapshot: String,
status: String,
graph_reads_allowed: bool,
projection_version: String,
local_hash: Option<String>,
snapshot_hash: Option<String>,
summary: GraphDbDriftSummary,
node_upserts: Vec<String>,
edge_upserts: Vec<String>,
node_tombstones: Vec<String>,
edge_tombstones: Vec<String>,
stale_nodes: Vec<String>,
stale_edges: Vec<String>,
diagnostics: Vec<String>,
next_commands: Vec<String>,
required_indexes: Vec<ConvexRequiredIndex>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
warnings: Vec<String>,
}
#[derive(Clone, Serialize)]
struct GraphDbTombstoneCounts {
nodes: usize,
edges: usize,
total: usize,
}
#[derive(Clone, Serialize)]
struct GraphDbOperatorCounts {
nodes: usize,
edges: usize,
tombstones: GraphDbTombstoneCounts,
#[serde(skip_serializing_if = "Option::is_none")]
file_size_bytes: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
freelist_bytes: Option<u64>,
}
#[derive(Clone, Serialize)]
struct GraphDbCompactionPolicy {
status: String,
tombstone_scan_rows: usize,
live_rows: usize,
file_size_bytes: Option<u64>,
freelist_bytes: Option<u64>,
safe_to_prune_tombstones: bool,
requires_convex_reconciliation: bool,
recommendations: Vec<String>,
proof: Vec<String>,
}
#[derive(Serialize)]
pub(crate) struct GraphDbRefreshSummary {
scope: String,
projection_version: String,
mode: String,
#[serde(skip_serializing_if = "Option::is_none")]
source_watermark: Option<String>,
tombstoned_nodes: usize,
tombstoned_edges: usize,
upserted_nodes: usize,
upserted_edges: usize,
unchanged_nodes: usize,
unchanged_edges: usize,
upserted_properties: usize,
unchanged_properties: usize,
deleted_properties: usize,
deleted_nodes: usize,
deleted_edges: usize,
pruned_tombstones: usize,
#[serde(skip_serializing_if = "Option::is_none")]
file_size_bytes_before: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
file_size_bytes_after: Option<u64>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
phase_timings: Vec<GraphDbBackendEvalPhaseTiming>,
}
#[derive(Serialize)]
struct GraphDbOperatorReport {
root: String,
#[serde(skip_serializing_if = "Option::is_none")]
scope: Option<String>,
graph_db: String,
operation: String,
status: String,
materialized: bool,
freshness: GraphDbFreshnessReport,
readiness: GraphEffectivenessReadiness,
counts: GraphDbOperatorCounts,
#[serde(skip_serializing_if = "Option::is_none")]
refresh: Option<GraphDbRefreshSummary>,
compaction: GraphDbCompactionPolicy,
#[serde(skip_serializing_if = "Option::is_none")]
recovery: Option<index::ReadOnlyRecovery>,
next_commands: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
warnings: Vec<String>,
}
#[derive(Serialize)]
pub(crate) struct GraphDbCompactionReport {
root: String,
#[serde(skip_serializing_if = "Option::is_none")]
scope: Option<String>,
graph_db: String,
applied: bool,
pruned_tombstones: usize,
counts_before: GraphDbOperatorCounts,
counts_after: GraphDbOperatorCounts,
compaction_before: GraphDbCompactionPolicy,
compaction_after: GraphDbCompactionPolicy,
reclaimed_bytes: i64,
next_commands: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
warnings: Vec<String>,
}
#[derive(Clone, Serialize, Deserialize)]
struct GraphDbEvidencePath {
to: String,
kind: String,
label: String,
#[serde(skip_serializing_if = "Option::is_none")]
path: Option<substrate::GraphPath>,
#[serde(skip_serializing_if = "Option::is_none")]
expand: Option<String>,
}
#[derive(Clone, Serialize, Deserialize)]
struct GraphDbFixtureCoverage {
test: String,
fixture: String,
assertions: Vec<String>,
}
#[derive(Clone, Serialize, Deserialize)]
struct GraphDbEvidenceReport {
root: String,
#[serde(skip_serializing_if = "Option::is_none")]
scope: Option<String>,
backend: String,
contract_version: String,
target: String,
packet_id: String,
#[serde(skip_serializing_if = "Option::is_none")]
projection_hash: Option<String>,
freshness: GraphDbFreshnessReport,
target_node: SubstrateTerseGraphNode,
worker_context: Vec<SubstrateTerseGraphNode>,
source_handles: Vec<SubstrateTerseGraphNode>,
worker_results: Vec<SubstrateTerseGraphNode>,
semantic_related: Vec<SubstrateTerseGraphNode>,
shortest_paths: Vec<GraphDbEvidencePath>,
#[serde(skip_serializing_if = "Option::is_none")]
output_budget: Option<GraphDbOutputBudgetReport>,
#[serde(default)]
truncated: bool,
#[serde(skip_serializing_if = "Option::is_none")]
next_cursor: Option<String>,
next_commands: Vec<String>,
replay_commands: Vec<String>,
repair_commands: Vec<String>,
fixture_coverage: GraphDbFixtureCoverage,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
warnings: Vec<String>,
}
pub(crate) struct GraphDbEvidenceInput<'a, S: GraphStore> {
root: &'a Path,
scope: Option<&'a str>,
backend: &'a str,
target: &'a str,
depth: usize,
limit: usize,
cursor: Option<&'a str>,
store: &'a S,
freshness: GraphDbFreshnessReport,
warnings: Vec<String>,
}
impl GraphDbDoctorReport {
fn new(
root: &Path,
scope: Option<&str>,
backend: &str,
graph_db: &Path,
convex_snapshot: Option<&Path>,
) -> Self {
Self {
root: root.to_string_lossy().to_string(),
scope: scope.map(str::to_string),
backend: backend.to_string(),
graph_db: graph_db.to_string_lossy().to_string(),
convex_snapshot: convex_snapshot.map(|path| path.to_string_lossy().to_string()),
status: "ok".to_string(),
fail_closed: false,
checks: Vec::new(),
repair_commands: Vec::new(),
required_indexes: Vec::new(),
}
}
fn push_check(&mut self, check: GraphDbDoctorCheck) {
self.checks.push(check);
}
fn finalize(&mut self) {
self.fail_closed = self.checks.iter().any(|check| check.fail_closed);
self.status = if self.fail_closed {
"fail_closed"
} else {
"ok"
}
.to_string();
let mut commands = BTreeSet::new();
for check in &self.checks {
commands.extend(check.repair_commands.iter().cloned());
}
self.repair_commands = commands.into_iter().collect();
}
fn summary(&self) -> String {
self.checks
.iter()
.filter(|check| check.fail_closed)
.flat_map(|check| check.diagnostics.iter())
.take(3)
.cloned()
.collect::<Vec<_>>()
.join("; ")
}
}
fn graph_db_doctor_check(
name: impl Into<String>,
diagnostics: Vec<String>,
repair_commands: Vec<String>,
) -> GraphDbDoctorCheck {
let fail_closed = !diagnostics.is_empty();
GraphDbDoctorCheck {
name: name.into(),
status: if fail_closed { "fail_closed" } else { "ok" }.to_string(),
fail_closed,
diagnostics,
repair_commands: if fail_closed {
repair_commands
} else {
Vec::new()
},
}
}
pub(crate) fn graph_db_scope_arg(scope: Option<&str>) -> String {
scope
.map(|scope| format!(" --scope {}", shell_quote(scope)))
.unwrap_or_default()
}
fn graph_db_refresh_command(root: &Path, scope: Option<&str>) -> String {
format!(
"tsift graph-db --path {}{} refresh --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope)
)
}
fn graph_db_rebuild_command(root: &Path, scope: Option<&str>) -> String {
graph_db_refresh_command(root, scope)
}
fn graph_db_backup_rebuild_command(root: &Path, scope: Option<&str>, graph_db: &Path) -> String {
let backup = format!("{}.bak", graph_db.to_string_lossy());
format!(
"mv {} {} && {}",
shell_quote(graph_db.to_string_lossy().as_ref()),
shell_quote(&backup),
graph_db_rebuild_command(root, scope)
)
}
fn convex_refresh_command(root: &Path, scope: Option<&str>) -> String {
format!(
"tsift convex-sync {}{} --remote-snapshot --apply --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope)
)
}
fn open_sqlite_graph_db_readonly(graph_db: &Path) -> Result<substrate::SqliteReadOnlyConnection> {
substrate::open_graph_read_only_connection_resilient(graph_db)
}
fn sqlite_table_exists(conn: &Connection, table: &str) -> Result<bool> {
conn.query_row(
"SELECT EXISTS(SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = ?1)",
[table],
|row| row.get::<_, bool>(0),
)
.map_err(Into::into)
}
fn sqlite_known_table_count(conn: &Connection, table: &str) -> Result<usize> {
let sql = match table {
"graph_nodes" => "SELECT COUNT(*) FROM graph_nodes",
"graph_edges" => "SELECT COUNT(*) FROM graph_edges",
"graph_tombstones" => "SELECT COUNT(*) FROM graph_tombstones",
other => bail!("unsupported graph count table {other}"),
};
conn.query_row(sql, [], |row| row.get::<_, usize>(0))
.map_err(Into::into)
}
fn sqlite_tombstone_counts(conn: &Connection) -> Result<GraphDbTombstoneCounts> {
if !sqlite_table_exists(conn, "graph_tombstones")? {
return Ok(GraphDbTombstoneCounts {
nodes: 0,
edges: 0,
total: 0,
});
}
let mut stmt =
conn.prepare("SELECT row_kind, COUNT(*) FROM graph_tombstones GROUP BY row_kind")?;
let mut rows = stmt.query([])?;
let mut nodes = 0usize;
let mut edges = 0usize;
while let Some(row) = rows.next()? {
let row_kind: String = row.get(0)?;
let count: usize = row.get(1)?;
match row_kind.as_str() {
"node" => nodes = count,
"edge" => edges = count,
_ => {}
}
}
Ok(GraphDbTombstoneCounts {
nodes,
edges,
total: nodes + edges,
})
}
fn sqlite_graph_counts_from_cache(
conn: &Connection,
scope: &str,
) -> Result<Option<GraphDbOperatorCounts>> {
if !sqlite_table_exists(conn, "graph_operator_stats")? {
return Ok(None);
}
let row = conn
.query_row(
r#"
SELECT nodes, edges, tombstone_nodes, tombstone_edges, file_size_bytes, freelist_bytes
FROM graph_operator_stats
WHERE scope = ?1
"#,
[scope],
|row| {
Ok((
row.get::<_, usize>(0)?,
row.get::<_, usize>(1)?,
row.get::<_, usize>(2)?,
row.get::<_, usize>(3)?,
row.get::<_, Option<i64>>(4)?,
row.get::<_, Option<i64>>(5)?,
))
},
)
.optional()?;
Ok(row.map(
|(nodes, edges, tombstone_nodes, tombstone_edges, file_size_bytes, freelist_bytes)| {
GraphDbOperatorCounts {
nodes,
edges,
tombstones: GraphDbTombstoneCounts {
nodes: tombstone_nodes,
edges: tombstone_edges,
total: tombstone_nodes + tombstone_edges,
},
file_size_bytes: file_size_bytes
.and_then(|value| u64::try_from(value).ok())
.or_else(|| sqlite_database_size_bytes(conn).ok()),
freelist_bytes: freelist_bytes
.and_then(|value| u64::try_from(value).ok())
.or_else(|| sqlite_database_freelist_bytes(conn).ok()),
}
},
))
}
fn sqlite_graph_counts(conn: &Connection, scope: &str) -> Result<GraphDbOperatorCounts> {
if let Some(counts) = sqlite_graph_counts_from_cache(conn, scope)? {
return Ok(counts);
}
let nodes = if sqlite_table_exists(conn, "graph_nodes")? {
sqlite_known_table_count(conn, "graph_nodes")?
} else {
0
};
let edges = if sqlite_table_exists(conn, "graph_edges")? {
sqlite_known_table_count(conn, "graph_edges")?
} else {
0
};
Ok(GraphDbOperatorCounts {
nodes,
edges,
tombstones: sqlite_tombstone_counts(conn)?,
file_size_bytes: sqlite_database_size_bytes(conn).ok(),
freelist_bytes: sqlite_database_freelist_bytes(conn).ok(),
})
}
fn sqlite_graph_semantic_node_count(conn: &Connection) -> Result<usize> {
if !sqlite_table_exists(conn, "graph_nodes")? {
return Ok(0);
}
let count: i64 = conn.query_row(
"SELECT COUNT(*) FROM graph_nodes WHERE kind IN ('semantic_concept', 'semantic_entity')",
[],
|row| row.get(0),
)?;
Ok(count as usize)
}
pub(crate) fn graph_db_compaction_policy(
root: &Path,
scope: Option<&str>,
counts: &GraphDbOperatorCounts,
prune_confirmed: bool,
) -> GraphDbCompactionPolicy {
let live_rows = counts.nodes + counts.edges;
let tombstone_scan_rows = counts.tombstones.total;
let tombstone_heavy = tombstone_scan_rows > live_rows.max(1);
let freelist_heavy = counts
.file_size_bytes
.zip(counts.freelist_bytes)
.is_some_and(|(file_size, freelist)| freelist > 0 && freelist >= file_size / 20);
let status = if tombstone_heavy || freelist_heavy {
"recommended"
} else {
"not_needed"
}
.to_string();
let mut recommendations = vec![
convex_refresh_command(root, scope),
graph_db_refresh_command(root, scope),
format!(
"tsift graph-db --path {}{} compact --apply --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope)
),
];
if prune_confirmed {
recommendations.push(format!(
"tsift graph-db --path {}{} compact --apply --prune-tombstones --confirmed-convex-reconciled --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope)
));
}
let proof = vec![
format!("{live_rows} live graph row(s)"),
format!("{tombstone_scan_rows} retained tombstone row(s) scanned by status/doctor"),
format!(
"graph.db file_size={} byte(s), freelist={} byte(s)",
counts.file_size_bytes.unwrap_or(0),
counts.freelist_bytes.unwrap_or(0)
),
];
GraphDbCompactionPolicy {
status,
tombstone_scan_rows,
live_rows,
file_size_bytes: counts.file_size_bytes,
freelist_bytes: counts.freelist_bytes,
safe_to_prune_tombstones: prune_confirmed,
requires_convex_reconciliation: tombstone_scan_rows > 0 && !prune_confirmed,
recommendations,
proof,
}
}
fn sqlite_database_size_bytes(conn: &Connection) -> Result<u64> {
let page_count: u64 = conn.query_row("PRAGMA page_count", [], |row| row.get(0))?;
let page_size: u64 = conn.query_row("PRAGMA page_size", [], |row| row.get(0))?;
Ok(page_count.saturating_mul(page_size))
}
fn sqlite_database_freelist_bytes(conn: &Connection) -> Result<u64> {
let freelist_count: u64 = conn.query_row("PRAGMA freelist_count", [], |row| row.get(0))?;
let page_size: u64 = conn.query_row("PRAGMA page_size", [], |row| row.get(0))?;
Ok(freelist_count.saturating_mul(page_size))
}
fn sqlite_graph_tombstone_retention_diagnostics(
conn: &Connection,
scope: &str,
) -> Result<Vec<String>> {
if !sqlite_table_exists(conn, "graph_tombstones")? {
return Ok(Vec::new());
}
let cached = sqlite_graph_counts_from_cache(conn, scope)?;
let counts = match cached.clone() {
Some(counts) => counts,
None => sqlite_graph_counts(conn, scope)?,
};
let live_rows = counts.nodes + counts.edges;
let file_size = counts.file_size_bytes.unwrap_or(0);
let freelist = counts.freelist_bytes.unwrap_or(0);
let stale_live_tombstones = if cached.is_some() {
0
} else {
let mut live_keys = BTreeSet::new();
if sqlite_table_exists(conn, "graph_nodes")? {
let mut stmt = conn.prepare("SELECT id FROM graph_nodes")?;
for row in stmt.query_map([], |row| row.get::<_, String>(0))? {
live_keys.insert(format!("node:{}", row?));
}
}
if sqlite_table_exists(conn, "graph_edges")? {
let mut stmt = conn.prepare("SELECT edge_key FROM graph_edges")?;
for row in stmt.query_map([], |row| row.get::<_, String>(0))? {
live_keys.insert(format!("edge:{}", row?));
}
}
let mut stale_live_tombstones = 0usize;
let mut stmt = conn.prepare("SELECT row_key FROM graph_tombstones ORDER BY row_key")?;
for row in stmt.query_map([], |row| row.get::<_, String>(0))? {
if live_keys.contains(&row?) {
stale_live_tombstones += 1;
}
}
stale_live_tombstones
};
let mut diagnostics = Vec::new();
if stale_live_tombstones > 0 {
diagnostics.push(format!(
"{stale_live_tombstones} tombstone(s) reference rows that are live again; the next graph-db refresh prunes those stale tombstones before inserting new deletion markers"
));
}
if counts.tombstones.total > live_rows.max(1) {
let source = if cached.is_some() {
"cached refresh stats"
} else {
"live row scan"
};
diagnostics.push(format!(
"tombstone retention exceeds live graph rows: {} tombstone(s) vs {} live row(s) from {}; graph.db file_size={} byte(s), freelist={} byte(s), status/doctor tombstone scans inspect {} extra row(s). Run convex-sync against the remote snapshot before rebuild/compaction if a remote consumer may still need deletion reconciliation.",
counts.tombstones.total,
live_rows,
source,
file_size,
freelist,
counts.tombstones.total
));
}
Ok(diagnostics)
}
fn sqlite_graph_freshness_from_conn(
conn: &Connection,
scope: &str,
) -> Result<GraphDbFreshnessReport> {
if !sqlite_table_exists(conn, "graph_projection_versions")? {
return Ok(GraphDbFreshnessReport {
status: "missing".to_string(),
fail_closed: true,
projection_version: None,
content_hash: None,
source_watermark: None,
diagnostics: vec![
"graph projection metadata table is missing; refresh graph.db before trusting reads"
.to_string(),
],
});
}
let version = conn
.query_row(
r#"
SELECT projection_version, content_hash, source_watermark
FROM graph_projection_versions
WHERE scope = ?1
"#,
[scope],
|row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
))
},
)
.optional()?;
let Some((projection_version, content_hash, source_watermark)) = version else {
return Ok(GraphDbFreshnessReport {
status: "missing".to_string(),
fail_closed: true,
projection_version: None,
content_hash: None,
source_watermark: None,
diagnostics: vec![
"graph projection metadata is missing; refresh graph.db before trusting reads"
.to_string(),
],
});
};
let mut diagnostics = Vec::new();
if projection_version != GRAPH_PROJECTION_VERSION {
diagnostics.push(format!(
"projection version mismatch: expected {} got {}",
GRAPH_PROJECTION_VERSION, projection_version
));
}
if content_hash.is_none() {
diagnostics.push("projection content hash is missing".to_string());
}
let fail_closed = !diagnostics.is_empty();
Ok(GraphDbFreshnessReport {
status: if fail_closed { "stale" } else { "current" }.to_string(),
fail_closed,
projection_version: Some(projection_version),
content_hash,
source_watermark,
diagnostics,
})
}
fn graph_db_operator_next_commands(
root: &Path,
scope: Option<&str>,
include_refresh: bool,
) -> Vec<String> {
let mut commands = Vec::new();
if include_refresh {
commands.push(graph_db_refresh_command(root, scope));
}
commands.push(format!(
"tsift graph-db --path {}{} doctor --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope)
));
commands.push(format!(
"tsift graph-db --path {}{} --backend convex-snapshot --convex-snapshot <rows.json> drift --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope)
));
commands.push(format!(
"tsift convex-sync {}{} --remote-snapshot --apply --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope)
));
commands
}
pub(crate) fn graph_db_read_recovery_diagnostic(recovery: index::ReadOnlyRecovery) -> String {
match recovery {
index::ReadOnlyRecovery::SnapshotFallback => {
"graph.db read recovered through snapshot fallback after a rollback-journal lock on the live database".to_string()
}
index::ReadOnlyRecovery::SnapshotFallbackWal => {
"graph.db read recovered through WAL-aware snapshot fallback after copying live -wal/-shm sidecars".to_string()
}
}
}
fn sqlite_string_set(conn: &Connection, sql: &str) -> Result<BTreeSet<String>> {
let mut stmt = conn.prepare(sql)?;
let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
let mut values = BTreeSet::new();
for row in rows {
values.insert(row?);
}
Ok(values)
}
fn sqlite_column_names(conn: &Connection, table: &str) -> Result<BTreeSet<String>> {
let mut stmt = conn.prepare(&format!("PRAGMA table_info({table})"))?;
let rows = stmt.query_map([], |row| row.get::<_, String>(1))?;
let mut columns = BTreeSet::new();
for row in rows {
columns.insert(row?);
}
Ok(columns)
}
fn sqlite_graph_schema_diagnostics(conn: &Connection) -> Result<Vec<String>> {
let mut diagnostics = Vec::new();
let user_version: i64 =
conn.pragma_query_value(None, "user_version", |row| row.get::<_, i64>(0))?;
if user_version > SQLITE_GRAPH_SCHEMA_VERSION {
diagnostics.push(format!(
"graph.db schema version {user_version} is newer than supported version {SQLITE_GRAPH_SCHEMA_VERSION}"
));
} else if user_version < SQLITE_GRAPH_SCHEMA_VERSION {
diagnostics.push(format!(
"graph.db schema version {user_version} is older than supported version {SQLITE_GRAPH_SCHEMA_VERSION}"
));
}
let tables = sqlite_string_set(
conn,
"SELECT name FROM sqlite_master WHERE type = 'table' ORDER BY name",
)?;
let required_tables = [
(
"graph_nodes",
vec![
"id",
"kind",
"label",
"properties_json",
"provenance_json",
"freshness_json",
"row_hash",
"source_watermark",
],
),
(
"graph_edges",
vec![
"edge_key",
"from_id",
"to_id",
"kind",
"properties_json",
"provenance_json",
"freshness_json",
"row_hash",
"source_watermark",
],
),
(
"graph_projection_versions",
vec![
"scope",
"projection_version",
"content_hash",
"source_watermark",
"observed_at_unix",
],
),
(
"graph_tombstones",
vec!["row_key", "row_kind", "deleted_at_unix"],
),
("graph_node_properties", vec!["node_id", "key", "value"]),
("graph_edge_properties", vec!["edge_key", "key", "value"]),
];
for (table, required_columns) in required_tables {
if !tables.contains(table) {
diagnostics.push(format!("graph.db schema drift: missing table {table}"));
continue;
}
let columns = sqlite_column_names(conn, table)?;
for column in required_columns {
if !columns.contains(column) {
diagnostics.push(format!(
"graph.db schema drift: missing column {table}.{column}"
));
}
}
}
let indexes = sqlite_string_set(
conn,
"SELECT name FROM sqlite_master WHERE type = 'index' ORDER BY name",
)?;
for index in [
"idx_graph_nodes_kind",
"idx_graph_edges_from_kind",
"idx_graph_edges_to_kind",
"idx_graph_edges_edge_key",
"idx_graph_node_properties_key_value_node",
"idx_graph_edge_properties_key_value_edge",
] {
if !indexes.contains(index) {
diagnostics.push(format!("graph.db schema drift: missing index {index}"));
}
}
if tables.contains("graph_edges") {
let mut stmt = conn.prepare("PRAGMA foreign_key_list(graph_edges)")?;
let rows = stmt.query_map([], |row| {
Ok((row.get::<_, String>(3)?, row.get::<_, String>(4)?))
})?;
let mut fks = BTreeSet::new();
for row in rows {
fks.insert(row?);
}
for expected in [
("from_id".to_string(), "id".to_string()),
("to_id".to_string(), "id".to_string()),
] {
if !fks.contains(&expected) {
diagnostics.push(format!(
"graph.db schema drift: missing graph_edges foreign key {} -> graph_nodes.{}",
expected.0, expected.1
));
}
}
}
Ok(diagnostics)
}
fn sqlite_query_diagnostics(conn: &Connection, sql: &str) -> Result<Vec<String>> {
let mut stmt = conn.prepare(sql)?;
let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
let mut diagnostics = Vec::new();
for row in rows {
diagnostics.push(row?);
}
Ok(diagnostics)
}
fn sqlite_graph_duplicate_diagnostics(conn: &Connection) -> Result<Vec<String>> {
let mut diagnostics = sqlite_query_diagnostics(
conn,
r#"
SELECT 'duplicate graph_nodes.id ' || id || ' (' || COUNT(*) || ' rows)'
FROM graph_nodes
GROUP BY id
HAVING COUNT(*) > 1
ORDER BY id
"#,
)?;
diagnostics.extend(sqlite_query_diagnostics(
conn,
r#"
SELECT 'duplicate graph_edges key ' || from_id || ' -' || kind || '-> ' || to_id || ' (' || COUNT(*) || ' rows)'
FROM graph_edges
GROUP BY from_id, to_id, kind
HAVING COUNT(*) > 1
ORDER BY from_id, kind, to_id
"#,
)?);
diagnostics.extend(sqlite_query_diagnostics(
conn,
r#"
SELECT 'duplicate graph_edges.edge_key ' || edge_key || ' (' || COUNT(*) || ' rows)'
FROM graph_edges
GROUP BY edge_key
HAVING COUNT(*) > 1
ORDER BY edge_key
"#,
)?);
Ok(diagnostics)
}
fn sqlite_graph_orphan_diagnostics(conn: &Connection) -> Result<Vec<String>> {
sqlite_query_diagnostics(
conn,
r#"
SELECT 'orphan edge missing from node: ' || e.from_id || ' -' || e.kind || '-> ' || e.to_id
FROM graph_edges e
LEFT JOIN graph_nodes n ON n.id = e.from_id
WHERE n.id IS NULL
UNION ALL
SELECT 'orphan edge missing to node: ' || e.from_id || ' -' || e.kind || '-> ' || e.to_id
FROM graph_edges e
LEFT JOIN graph_nodes n ON n.id = e.to_id
WHERE n.id IS NULL
ORDER BY 1
"#,
)
}
fn sqlite_graph_json_diagnostics(conn: &Connection) -> Result<Vec<String>> {
let mut diagnostics = Vec::new();
let mut node_stmt = conn.prepare(
"SELECT id, properties_json, provenance_json, freshness_json FROM graph_nodes ORDER BY id",
)?;
let node_rows = node_stmt.query_map([], |row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, String>(1)?,
row.get::<_, String>(2)?,
row.get::<_, Option<String>>(3)?,
))
})?;
for row in node_rows {
let (id, properties_json, provenance_json, freshness_json) = row?;
if let Err(err) = serde_json::from_str::<BTreeMap<String, String>>(&properties_json) {
diagnostics.push(format!(
"graph_nodes {id} properties_json is invalid: {err}"
));
}
if let Err(err) = serde_json::from_str::<Vec<GraphProvenance>>(&provenance_json) {
diagnostics.push(format!(
"graph_nodes {id} provenance_json is invalid: {err}"
));
}
if let Some(freshness_json) = freshness_json
&& let Err(err) = serde_json::from_str::<GraphFreshness>(&freshness_json)
{
diagnostics.push(format!("graph_nodes {id} freshness_json is invalid: {err}"));
}
}
let mut edge_stmt = conn.prepare(
"SELECT edge_key, from_id, to_id, kind, properties_json, provenance_json, freshness_json FROM graph_edges ORDER BY from_id, kind, to_id",
)?;
let edge_rows = edge_stmt.query_map([], |row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, String>(1)?,
row.get::<_, String>(2)?,
row.get::<_, String>(3)?,
row.get::<_, String>(4)?,
row.get::<_, String>(5)?,
row.get::<_, Option<String>>(6)?,
))
})?;
for row in edge_rows {
let (edge_key, from_id, to_id, kind, properties_json, provenance_json, freshness_json) =
row?;
let edge = format!("{edge_key} {from_id} -{kind}-> {to_id}");
if let Err(err) = serde_json::from_str::<BTreeMap<String, String>>(&properties_json) {
diagnostics.push(format!(
"graph_edges {edge} properties_json is invalid: {err}"
));
}
if let Err(err) = serde_json::from_str::<Vec<GraphProvenance>>(&provenance_json) {
diagnostics.push(format!(
"graph_edges {edge} provenance_json is invalid: {err}"
));
}
if let Some(freshness_json) = freshness_json
&& let Err(err) = serde_json::from_str::<GraphFreshness>(&freshness_json)
{
diagnostics.push(format!(
"graph_edges {edge} freshness_json is invalid: {err}"
));
}
}
Ok(diagnostics)
}
fn sqlite_graph_projection_metadata_diagnostics(
conn: &Connection,
scope: Option<&str>,
) -> Result<Vec<String>> {
let mut diagnostics = Vec::new();
let scope_key = scope.unwrap_or("root");
let version = conn
.query_row(
r#"
SELECT projection_version, content_hash, source_watermark
FROM graph_projection_versions
WHERE scope = ?1
"#,
[scope_key],
|row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
))
},
)
.optional()?;
let Some((projection_version, content_hash, _source_watermark)) = version else {
diagnostics.push(format!(
"graph projection metadata is missing for scope {scope_key}"
));
return Ok(diagnostics);
};
if projection_version != GRAPH_PROJECTION_VERSION {
diagnostics.push(format!(
"projection version mismatch: expected {GRAPH_PROJECTION_VERSION} got {projection_version}"
));
}
if content_hash.is_none() {
diagnostics.push("projection content hash is missing".to_string());
}
let meta_id = graph_projection_meta_id(scope);
let meta_properties = conn
.query_row(
"SELECT properties_json FROM graph_nodes WHERE id = ?1 AND kind = ?2",
(&meta_id, GRAPH_PROJECTION_META_KIND),
|row| row.get::<_, String>(0),
)
.optional()?;
let Some(meta_properties) = meta_properties else {
diagnostics.push(format!("projection_meta node {meta_id} is missing"));
return Ok(diagnostics);
};
let properties = serde_json::from_str::<BTreeMap<String, String>>(&meta_properties)
.with_context(|| format!("parsing projection_meta properties for {meta_id}"))?;
if properties.get("projection_version").map(String::as_str) != Some(GRAPH_PROJECTION_VERSION) {
diagnostics.push(format!(
"projection_meta node {meta_id} has stale projection_version"
));
}
if properties.get("content_hash") != content_hash.as_ref() {
diagnostics.push(format!(
"projection_meta node {meta_id} content_hash does not match graph_projection_versions"
));
}
Ok(diagnostics)
}
pub(crate) fn sqlite_convex_rows_from_conn(conn: &Connection) -> Result<ConvexProjectionRows> {
let mut node_stmt = conn.prepare(
"SELECT id, kind, label, properties_json, provenance_json, freshness_json FROM graph_nodes ORDER BY id",
)?;
let node_rows = node_stmt.query_map([], |row| {
let properties_json: String = row.get(3)?;
let provenance_json: String = row.get(4)?;
let freshness_json: Option<String> = row.get(5)?;
Ok((
row.get::<_, String>(0)?,
row.get::<_, String>(1)?,
row.get::<_, String>(2)?,
properties_json,
provenance_json,
freshness_json,
))
})?;
let mut nodes = Vec::new();
for row in node_rows {
let (external_id, kind, label, properties_json, provenance_json, freshness_json) = row?;
nodes.push(ConvexNodeRow {
external_id,
kind,
label,
properties: serde_json::from_str(&properties_json)?,
provenance: serde_json::from_str(&provenance_json)?,
freshness: freshness_json
.map(|value| serde_json::from_str(&value))
.transpose()?,
});
}
let mut edge_stmt = conn.prepare(
"SELECT edge_key, from_id, to_id, kind, properties_json, provenance_json, freshness_json FROM graph_edges ORDER BY from_id, kind, to_id",
)?;
let edge_rows = edge_stmt.query_map([], |row| {
let properties_json: String = row.get(4)?;
let provenance_json: String = row.get(5)?;
let freshness_json: Option<String> = row.get(6)?;
Ok((
row.get::<_, String>(0)?,
row.get::<_, String>(1)?,
row.get::<_, String>(2)?,
row.get::<_, String>(3)?,
properties_json,
provenance_json,
freshness_json,
))
})?;
let mut edges = Vec::new();
for row in edge_rows {
let (
edge_key,
from_external_id,
to_external_id,
kind,
properties_json,
provenance_json,
freshness_json,
) = row?;
edges.push(ConvexEdgeRow {
edge_key,
from_external_id,
to_external_id,
kind,
properties: serde_json::from_str(&properties_json)?,
provenance: serde_json::from_str(&provenance_json)?,
freshness: freshness_json
.map(|value| serde_json::from_str(&value))
.transpose()?,
});
}
Ok(ConvexProjectionRows { nodes, edges })
}
fn convex_required_index_label(index: &ConvexRequiredIndex) -> String {
format!("{}.{}({})", index.table, index.name, index.fields.join(","))
}
fn convex_snapshot_index_value(value: &serde_json::Value) -> Option<&serde_json::Value> {
value
.get("indexes")
.or_else(|| value.get("requiredIndexes"))
.or_else(|| {
value
.get("metadata")
.and_then(|metadata| metadata.get("indexes"))
})
}
fn convex_snapshot_declared_indexes(
value: &serde_json::Value,
) -> Result<Option<Vec<ConvexRequiredIndex>>> {
convex_snapshot_index_value(value)
.map(|indexes| {
serde_json::from_value::<Vec<ConvexRequiredIndex>>(indexes.clone())
.context("parsing Convex snapshot index metadata")
})
.transpose()
}
fn convex_snapshot_index_diagnostics(value: &serde_json::Value) -> Result<Vec<String>> {
let required = convex_required_indexes();
let Some(declared) = convex_snapshot_declared_indexes(value)? else {
return Ok(vec![format!(
"Convex snapshot index metadata is missing; required indexes not confirmed: {}",
required
.iter()
.map(convex_required_index_label)
.collect::<Vec<_>>()
.join(", ")
)]);
};
let declared = declared.into_iter().collect::<BTreeSet<_>>();
let missing = required
.iter()
.filter(|index| !declared.contains(*index))
.map(convex_required_index_label)
.collect::<Vec<_>>();
if missing.is_empty() {
Ok(Vec::new())
} else {
Ok(vec![format!(
"Convex snapshot is missing required index metadata: {}",
missing.join(", ")
)])
}
}
pub(crate) fn load_convex_projection_snapshot_value(
snapshot_path: &Path,
) -> Result<(ConvexProjectionRows, serde_json::Value)> {
let content = fs::read_to_string(snapshot_path).with_context(|| {
format!(
"reading Convex projection snapshot {}",
snapshot_path.display()
)
})?;
let value = serde_json::from_str::<serde_json::Value>(&content).with_context(|| {
format!(
"parsing Convex projection snapshot {}",
snapshot_path.display()
)
})?;
let rows = serde_json::from_value::<ConvexProjectionRows>(value.clone())
.with_context(|| format!("parsing Convex projection rows {}", snapshot_path.display()))?;
Ok((rows, value))
}
pub(crate) fn append_sqlite_graph_doctor_checks(
report: &mut GraphDbDoctorReport,
root: &Path,
scope: Option<&str>,
graph_db: &Path,
) -> Option<substrate::SqliteReadOnlyConnection> {
let rebuild = graph_db_rebuild_command(root, scope);
let backup_rebuild = graph_db_backup_rebuild_command(root, scope, graph_db);
if !graph_db.exists() {
report.push_check(graph_db_doctor_check(
"sqlite_graph_db_exists",
vec![format!("graph.db is missing at {}", graph_db.display())],
vec![rebuild],
));
return None;
}
report.push_check(graph_db_doctor_check(
"sqlite_graph_db_exists",
Vec::new(),
vec![rebuild.clone()],
));
let conn = match open_sqlite_graph_db_readonly(graph_db) {
Ok(conn) => conn,
Err(err) => {
report.push_check(graph_db_doctor_check(
"sqlite_graph_db_open",
vec![err.to_string()],
vec![backup_rebuild],
));
return None;
}
};
report.push_check(graph_db_doctor_check(
"sqlite_graph_db_open",
Vec::new(),
vec![rebuild.clone()],
));
if let Some(recovery) = conn.recovery() {
report.push_check(GraphDbDoctorCheck {
name: "sqlite_graph_db_read_recovery".to_string(),
status: "recovered".to_string(),
fail_closed: false,
diagnostics: vec![graph_db_read_recovery_diagnostic(recovery)],
repair_commands: Vec::new(),
});
}
let schema_diagnostics = sqlite_graph_schema_diagnostics(conn.conn())
.unwrap_or_else(|err| vec![format!("graph.db schema inspection failed: {err}")]);
report.push_check(graph_db_doctor_check(
"sqlite_schema",
schema_diagnostics,
vec![backup_rebuild.clone()],
));
let metadata_diagnostics = sqlite_graph_projection_metadata_diagnostics(conn.conn(), scope)
.unwrap_or_else(|err| {
vec![format!(
"graph projection metadata inspection failed: {err}"
)]
});
report.push_check(graph_db_doctor_check(
"sqlite_projection_metadata",
metadata_diagnostics,
vec![rebuild.clone()],
));
let duplicate_diagnostics = sqlite_graph_duplicate_diagnostics(conn.conn())
.unwrap_or_else(|err| vec![format!("duplicate id inspection failed: {err}")]);
report.push_check(graph_db_doctor_check(
"sqlite_duplicate_ids",
duplicate_diagnostics,
vec![backup_rebuild.clone()],
));
let orphan_diagnostics = sqlite_graph_orphan_diagnostics(conn.conn())
.unwrap_or_else(|err| vec![format!("orphan edge inspection failed: {err}")]);
report.push_check(graph_db_doctor_check(
"sqlite_orphan_edges",
orphan_diagnostics,
vec![rebuild.clone()],
));
let json_diagnostics = sqlite_graph_json_diagnostics(conn.conn())
.unwrap_or_else(|err| vec![format!("graph row JSON inspection failed: {err}")]);
report.push_check(graph_db_doctor_check(
"sqlite_row_json",
json_diagnostics,
vec![backup_rebuild],
));
let tombstone_diagnostics =
sqlite_graph_tombstone_retention_diagnostics(conn.conn(), scope.unwrap_or("root"))
.unwrap_or_else(|err| {
vec![format!(
"graph tombstone retention inspection failed: {err}"
)]
});
report.push_check(GraphDbDoctorCheck {
name: "sqlite_tombstone_retention".to_string(),
status: if tombstone_diagnostics.is_empty() {
"ok".to_string()
} else {
"warning".to_string()
},
fail_closed: false,
diagnostics: tombstone_diagnostics,
repair_commands: Vec::new(),
});
let compaction_check = match sqlite_graph_counts(conn.conn(), scope.unwrap_or("root")) {
Ok(counts) => {
let policy = graph_db_compaction_policy(root, scope, &counts, false);
GraphDbDoctorCheck {
name: "sqlite_compaction_policy".to_string(),
status: policy.status.clone(),
fail_closed: false,
diagnostics: policy.proof,
repair_commands: if policy.status == "recommended" {
policy.recommendations
} else {
Vec::new()
},
}
}
Err(err) => GraphDbDoctorCheck {
name: "sqlite_compaction_policy".to_string(),
status: "warning".to_string(),
fail_closed: false,
diagnostics: vec![format!("graph compaction policy inspection failed: {err}")],
repair_commands: Vec::new(),
},
};
report.push_check(compaction_check);
Some(conn)
}
pub(crate) fn append_convex_snapshot_doctor_checks(
report: &mut GraphDbDoctorReport,
root: &Path,
scope: Option<&str>,
local_rows: Option<&ConvexProjectionRows>,
snapshot_path: Option<&Path>,
) {
let repair = convex_refresh_command(root, scope);
let Some(snapshot_path) = snapshot_path else {
report.push_check(graph_db_doctor_check(
"convex_snapshot_present",
vec!["--backend convex-snapshot requires --convex-snapshot <rows.json>".to_string()],
vec![format!(
"tsift convex-sync {}{} --json > convex-rows.json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope)
)],
));
return;
};
report.push_check(graph_db_doctor_check(
"convex_snapshot_present",
Vec::new(),
vec![repair.clone()],
));
let (snapshot, snapshot_value) = match load_convex_projection_snapshot_value(snapshot_path) {
Ok(snapshot) => snapshot,
Err(err) => {
report.push_check(graph_db_doctor_check(
"convex_snapshot_parse",
vec![err.to_string()],
vec![repair],
));
return;
}
};
report.push_check(graph_db_doctor_check(
"convex_snapshot_parse",
Vec::new(),
vec![repair.clone()],
));
let row_diagnostics = convex_projection_row_diagnostics(&snapshot);
report.push_check(graph_db_doctor_check(
"convex_snapshot_rows",
row_diagnostics,
vec![repair.clone()],
));
let index_diagnostics = convex_snapshot_index_diagnostics(&snapshot_value)
.unwrap_or_else(|err| vec![err.to_string()]);
report.required_indexes = convex_required_indexes();
report.push_check(graph_db_doctor_check(
"convex_required_indexes",
index_diagnostics,
vec![
"Add the indexes from examples/convex-graph/schema.ts, then redeploy the Convex app"
.to_string(),
],
));
if let Some(local_rows) = local_rows {
let freshness = convex_projection_freshness(local_rows, Some(&snapshot), scope);
report.push_check(graph_db_doctor_check(
"convex_projection_freshness",
freshness.diagnostics,
vec![repair],
));
} else {
report.push_check(graph_db_doctor_check(
"convex_projection_freshness",
vec![
"local SQLite graph.db could not be read, so Convex freshness cannot be verified"
.to_string(),
],
vec![graph_db_rebuild_command(root, scope)],
));
}
}
fn graph_db_convex_snapshot_doctor_command(
root: &Path,
scope: Option<&str>,
snapshot_path: &Path,
) -> String {
format!(
"tsift graph-db --path {}{} --backend convex-snapshot --convex-snapshot {} doctor --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope),
shell_quote(snapshot_path.to_string_lossy().as_ref())
)
}
fn graph_db_convex_snapshot_read_command(
root: &Path,
scope: Option<&str>,
snapshot_path: &Path,
) -> String {
format!(
"tsift graph-db --path {}{} --backend convex-snapshot --convex-snapshot {} schema --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope),
shell_quote(snapshot_path.to_string_lossy().as_ref())
)
}
fn convex_sync_snapshot_diff_command(
root: &Path,
scope: Option<&str>,
snapshot_path: &Path,
) -> String {
format!(
"tsift convex-sync {}{} --snapshot {} --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope),
shell_quote(snapshot_path.to_string_lossy().as_ref())
)
}
pub(crate) struct GraphDbDriftInput<'a> {
root: &'a Path,
scope: Option<&'a str>,
graph_db: &'a Path,
snapshot_path: &'a Path,
local: &'a ConvexProjectionRows,
snapshot: &'a ConvexProjectionRows,
snapshot_value: &'a serde_json::Value,
warnings: Vec<String>,
}
pub(crate) fn graph_db_drift_report(input: GraphDbDriftInput<'_>) -> GraphDbDriftReport {
let GraphDbDriftInput {
root,
scope,
graph_db,
snapshot_path,
local,
snapshot,
snapshot_value,
warnings,
} = input;
let freshness = convex_projection_freshness(local, Some(snapshot), scope);
let (node_upserts, edge_upserts, node_tombstones, edge_tombstones) =
convex_rows_diff(local, Some(snapshot));
let row_diagnostics = convex_projection_row_diagnostics(snapshot);
let index_diagnostics = convex_snapshot_index_diagnostics(snapshot_value)
.unwrap_or_else(|err| vec![format!("Convex snapshot index metadata failed: {err}")]);
let local_hash = freshness.local_hash.clone();
let snapshot_hash = freshness.snapshot_hash.clone();
let stale_nodes = freshness.stale_nodes.clone();
let stale_edges = freshness.stale_edges.clone();
let duplicate_failures = row_diagnostics
.iter()
.filter(|diagnostic| diagnostic.contains("duplicate"))
.count();
let orphan_failures = row_diagnostics
.iter()
.filter(|diagnostic| diagnostic.contains("references missing"))
.count();
let missing_required_indexes = index_diagnostics.len();
let stale_projection_metadata =
usize::from(local_hash != snapshot_hash || snapshot_hash.is_none());
let hard_failures = duplicate_failures + orphan_failures + missing_required_indexes;
let has_drift = freshness.fail_closed
|| !node_upserts.is_empty()
|| !edge_upserts.is_empty()
|| !node_tombstones.is_empty()
|| !edge_tombstones.is_empty();
let status = if hard_failures > 0 {
"fail_closed"
} else if has_drift {
"drift"
} else {
"current"
}
.to_string();
let mut diagnostics = Vec::new();
diagnostics.extend(row_diagnostics);
diagnostics.extend(index_diagnostics);
diagnostics.extend(freshness.diagnostics.clone());
if has_drift {
diagnostics.push(format!(
"projection diff: {} node upsert(s), {} edge upsert(s), {} node tombstone(s), {} edge tombstone(s)",
node_upserts.len(),
edge_upserts.len(),
node_tombstones.len(),
edge_tombstones.len()
));
}
let mut next_commands = vec![graph_db_convex_snapshot_doctor_command(
root,
scope,
snapshot_path,
)];
if status == "current" {
next_commands.push(graph_db_convex_snapshot_read_command(
root,
scope,
snapshot_path,
));
} else {
next_commands.push(convex_sync_snapshot_diff_command(
root,
scope,
snapshot_path,
));
next_commands.push(convex_refresh_command(root, scope));
}
GraphDbDriftReport {
root: root.to_string_lossy().to_string(),
scope: scope.map(str::to_string),
graph_db: graph_db.to_string_lossy().to_string(),
convex_snapshot: snapshot_path.to_string_lossy().to_string(),
status: status.clone(),
graph_reads_allowed: status == "current",
projection_version: GRAPH_PROJECTION_VERSION.to_string(),
local_hash,
snapshot_hash,
summary: GraphDbDriftSummary {
node_upserts: node_upserts.len(),
edge_upserts: edge_upserts.len(),
node_tombstones: node_tombstones.len(),
edge_tombstones: edge_tombstones.len(),
stale_nodes: stale_nodes.len(),
stale_edges: stale_edges.len(),
stale_projection_metadata,
duplicate_failures,
orphan_failures,
missing_required_indexes,
},
node_upserts: node_upserts
.into_iter()
.map(|row| row.external_id)
.collect(),
edge_upserts: edge_upserts.into_iter().map(|row| row.edge_key).collect(),
node_tombstones,
edge_tombstones,
stale_nodes,
stale_edges,
diagnostics,
next_commands,
required_indexes: convex_required_indexes(),
warnings,
}
}
pub(crate) fn print_graph_db_drift_human(report: &GraphDbDriftReport) {
println!(
"graph-db drift status: {} reads_allowed: {}",
report.status, report.graph_reads_allowed
);
println!("graph_db: {}", report.graph_db);
println!("convex_snapshot: {}", report.convex_snapshot);
println!(
"upserts: {} node(s), {} edge(s)",
report.summary.node_upserts, report.summary.edge_upserts
);
println!(
"tombstones: {} node(s), {} edge(s)",
report.summary.node_tombstones, report.summary.edge_tombstones
);
for diagnostic in &report.diagnostics {
println!("diagnostic: {diagnostic}");
}
for command in &report.next_commands {
println!("next: {command}");
}
}
pub(crate) fn print_graph_db_doctor_human(report: &GraphDbDoctorReport) {
println!(
"graph-db doctor backend: {} status: {}",
report.backend, report.status
);
println!("graph_db: {}", report.graph_db);
if let Some(snapshot) = &report.convex_snapshot {
println!("convex_snapshot: {snapshot}");
}
for check in &report.checks {
println!("check: {} {}", check.name, check.status);
for diagnostic in &check.diagnostics {
println!(" diagnostic: {diagnostic}");
}
}
for command in &report.repair_commands {
println!("repair: {command}");
}
}
pub(crate) fn graph_db_operator_report_from_disk(
root: &Path,
scope: Option<&str>,
graph_db: &Path,
operation: &str,
refresh: Option<GraphDbRefreshSummary>,
warnings: Vec<String>,
) -> Result<GraphDbOperatorReport> {
if !graph_db.exists() {
let next_commands = graph_db_operator_next_commands(root, scope, true);
let counts = GraphDbOperatorCounts {
nodes: 0,
edges: 0,
tombstones: GraphDbTombstoneCounts {
nodes: 0,
edges: 0,
total: 0,
},
file_size_bytes: None,
freelist_bytes: None,
};
return Ok(GraphDbOperatorReport {
root: root.to_string_lossy().to_string(),
scope: scope.map(str::to_string),
graph_db: graph_db.to_string_lossy().to_string(),
operation: operation.to_string(),
status: "missing".to_string(),
materialized: false,
freshness: GraphDbFreshnessReport {
status: "missing".to_string(),
fail_closed: true,
projection_version: None,
content_hash: None,
source_watermark: None,
diagnostics: vec![
"graph.db is missing; run graph-db refresh before trusting graph reads"
.to_string(),
],
},
readiness: graph_effectiveness_blocked(
"graph_db_missing",
vec![
"graph.db is missing; materialize the projection before relying on graph effectiveness".to_string(),
],
next_commands.clone(),
),
counts: counts.clone(),
refresh,
compaction: graph_db_compaction_policy(root, scope, &counts, false),
recovery: None,
next_commands,
warnings,
});
}
let conn = open_sqlite_graph_db_readonly(graph_db)?;
let recovery = conn.recovery();
let mut warnings = warnings;
if let Some(recovery) = recovery {
warnings.push(graph_db_read_recovery_diagnostic(recovery));
}
let mut freshness = sqlite_graph_freshness_from_conn(conn.conn(), scope.unwrap_or("root"))?;
let schema_diagnostics = sqlite_graph_schema_diagnostics(conn.conn())
.unwrap_or_else(|err| vec![format!("graph.db schema inspection failed: {err}")]);
if !schema_diagnostics.is_empty() {
freshness.diagnostics.extend(schema_diagnostics);
freshness.fail_closed = true;
freshness.status = "stale".to_string();
}
let counts = sqlite_graph_counts(conn.conn(), scope.unwrap_or("root"))?;
let semantic_row_count = sqlite_graph_semantic_node_count(conn.conn()).ok();
warnings.extend(
sqlite_graph_tombstone_retention_diagnostics(conn.conn(), scope.unwrap_or("root"))
.unwrap_or_else(|err| {
vec![format!(
"graph tombstone retention inspection failed: {err}"
)]
}),
);
let status = if freshness.fail_closed {
"stale"
} else {
"current"
}
.to_string();
Ok(GraphDbOperatorReport {
root: root.to_string_lossy().to_string(),
scope: scope.map(str::to_string),
graph_db: graph_db.to_string_lossy().to_string(),
operation: operation.to_string(),
status,
materialized: true,
freshness,
readiness: graph_db_semantic_readiness(root, scope, semantic_row_count),
compaction: graph_db_compaction_policy(root, scope, &counts, false),
counts,
refresh,
recovery,
next_commands: graph_db_operator_next_commands(root, scope, false),
warnings,
})
}
fn print_graph_db_operator_human(report: &GraphDbOperatorReport) {
println!(
"graph-db {} status: {} materialized: {}",
report.operation, report.status, report.materialized
);
println!("graph_db: {}", report.graph_db);
println!(
"projection: version={} hash={} watermark={}",
report
.freshness
.projection_version
.as_deref()
.unwrap_or("<missing>"),
report
.freshness
.content_hash
.as_deref()
.unwrap_or("<missing>"),
report
.freshness
.source_watermark
.as_deref()
.unwrap_or("<missing>")
);
println!(
"rows: {} node(s), {} edge(s), {} tombstone(s)",
report.counts.nodes, report.counts.edges, report.counts.tombstones.total
);
println!(
"readiness: {} reason: {} fail_closed: {}",
report.readiness.status, report.readiness.reason, report.readiness.fail_closed
);
if let Some(file_size) = report.counts.file_size_bytes {
println!(
"storage: {} byte(s), {} free byte(s)",
file_size,
report.counts.freelist_bytes.unwrap_or(0)
);
}
if let Some(refresh) = &report.refresh {
println!(
"refresh: {} tombstoned node(s), {} tombstoned edge(s)",
refresh.tombstoned_nodes, refresh.tombstoned_edges
);
println!(
"delta: {} node upsert(s), {} edge upsert(s), {} property row upsert(s), {} unchanged node(s), {} unchanged edge(s), {} unchanged property row(s), {} deleted property row(s), {} pruned tombstone(s)",
refresh.upserted_nodes,
refresh.upserted_edges,
refresh.upserted_properties,
refresh.unchanged_nodes,
refresh.unchanged_edges,
refresh.unchanged_properties,
refresh.deleted_properties,
refresh.pruned_tombstones
);
}
println!(
"compaction: {} tombstone_scan_rows={} live_rows={}",
report.compaction.status,
report.compaction.tombstone_scan_rows,
report.compaction.live_rows
);
for proof in &report.compaction.proof {
println!("compaction proof: {proof}");
}
if let Some(recovery) = report.recovery {
println!("recovery: {}", graph_db_read_recovery_diagnostic(recovery));
}
for diagnostic in &report.freshness.diagnostics {
println!("diagnostic: {diagnostic}");
}
for diagnostic in &report.readiness.diagnostics {
println!("readiness diagnostic: {diagnostic}");
}
for warning in &report.warnings {
println!("warning: {warning}");
}
for command in &report.readiness.next_commands {
println!("readiness next: {command}");
}
for command in &report.next_commands {
println!("next: {command}");
}
}
pub(crate) fn print_graph_db_operator_report(
report: &GraphDbOperatorReport,
format: OutputFormat,
) -> Result<()> {
if format.json_output {
print_json_or_envelope(
report,
&format,
"graph-db",
&report.operation,
ToolEnvelopeSummary {
text: format!(
"Graph DB {} status {} with {} node(s), {} edge(s), {} tombstone(s)",
report.operation,
report.status,
report.counts.nodes,
report.counts.edges,
report.counts.tombstones.total
),
metrics: vec![
envelope_metric("operation", &report.operation),
envelope_metric("status", &report.status),
envelope_metric("nodes", report.counts.nodes),
envelope_metric("edges", report.counts.edges),
envelope_metric("tombstones", report.counts.tombstones.total),
envelope_metric("compaction", &report.compaction.status),
envelope_metric("readiness", &report.readiness.status),
],
},
false,
report.next_commands.clone(),
)
} else {
print_graph_db_operator_human(report);
Ok(())
}
}
fn status_run_command_without_notes(run: &str) -> &str {
run.split_once(" (")
.map(|(command, _)| command)
.unwrap_or(run)
}
fn status_summarize_extract_command(run: &str) -> &str {
let run = status_run_command_without_notes(run);
run.split(" && ")
.find(|command| command.contains("summarize --extract"))
.unwrap_or(run)
}
fn graph_db_status_summarize_command(report: &status::StatusReport) -> String {
report
.recommendations
.run
.as_deref()
.filter(|command| command.contains("summarize --extract"))
.map(status_summarize_extract_command)
.unwrap_or("tsift summarize --extract .")
.to_string()
}
fn graph_db_semantic_rows_readiness(row_count: usize, source: &str) -> GraphEffectivenessReadiness {
let mut readiness = graph_effectiveness_ready("semantic_rows_available");
readiness.diagnostics.push(format!(
"graph projection has {row_count} semantic_concept/semantic_entity row(s) from {source}; graph semantic rows are available"
));
readiness
}
fn graph_db_semantic_readiness(
root: &Path,
scope: Option<&str>,
semantic_row_count: Option<usize>,
) -> GraphEffectivenessReadiness {
if let Some(row_count) = semantic_row_count
&& row_count > 0
{
return graph_db_semantic_rows_readiness(row_count, "materialized graph projection");
}
let report = match status::check_status(root) {
Ok(report) => report,
Err(err) => {
return graph_effectiveness_blocked(
"status_check_unavailable",
vec![format!(
"semantic readiness could not inspect summary cache after graph-db refresh: {err:#}"
)],
vec![graph_db_refresh_command(root, scope)],
);
}
};
match &report.summaries {
status::SummaryStatus::Available {
cached_files,
total_indexed_files,
coverage_pct,
..
} => {
let mut readiness = graph_effectiveness_ready("semantic_rows_available");
readiness.diagnostics.push(format!(
"summary cache has {cached_files}/{total_indexed_files} indexed file(s) cached ({coverage_pct}% coverage); graph semantic rows are available"
));
readiness
}
status::SummaryStatus::None { .. } => {
let summarize = graph_db_status_summarize_command(&report);
let index_command = report
.recommendations
.run
.as_deref()
.filter(|cmd| cmd.contains("index"))
.map(str::to_string);
let mut repair = Vec::new();
if let Some(cmd) = index_command {
repair.push(cmd);
}
repair.push(summarize.clone());
repair.push(graph_db_refresh_command(root, scope));
graph_effectiveness_blocked(
"summary_cache_empty",
vec![format!(
"summary cache empty: graph-db materialized code/session rows but semantic rows are unavailable; run `{}` from {} and rerun `{}` before relying on semantic evidence",
summarize,
root.display(),
graph_db_refresh_command(root, scope)
)],
repair,
)
}
status::SummaryStatus::Unavailable => {
let mut repair: Vec<String> = report
.recommendations
.run
.clone()
.into_iter()
.collect();
let summarize = "tsift summarize --extract .".to_string();
repair.push(summarize);
repair.push(graph_db_refresh_command(root, scope));
graph_effectiveness_blocked(
"summary_cache_unavailable",
vec![
"summary cache unavailable because the source index is missing; build the index, extract summaries, and refresh the graph before relying on semantic graph evidence".to_string(),
],
repair,
)
}
}
}
pub(crate) fn graph_db_operator_status_warnings(root: &Path, scope: Option<&str>) -> Vec<String> {
let report = match status::check_status(root) {
Ok(report) => report,
Err(err) => {
return vec![format!(
"status check unavailable after graph-db refresh: {err:#}"
)];
}
};
let summarize_run = if matches!(report.summaries, status::SummaryStatus::None { .. }) {
Some(graph_db_status_summarize_command(&report))
} else {
None
};
let mut warnings = report.reminders;
if matches!(report.summaries, status::SummaryStatus::None { .. }) {
let run = summarize_run.unwrap_or_else(|| "tsift summarize --extract .".to_string());
warnings.push(format!(
"summary cache empty: graph-db refresh materialized code/session rows but semantic rows are unavailable; run `{}` from {} and rerun `{}` before relying on semantic evidence",
run,
root.display(),
graph_db_refresh_command(root, scope)
));
}
dedupe_preserve_order(warnings)
}
pub(crate) fn print_graph_db_compaction_human(report: &GraphDbCompactionReport) {
println!(
"graph-db compact applied:{} pruned_tombstones:{} reclaimed:{} byte(s)",
report.applied, report.pruned_tombstones, report.reclaimed_bytes
);
println!("graph_db: {}", report.graph_db);
println!(
"before: {} node(s), {} edge(s), {} tombstone(s), file={} free={}",
report.counts_before.nodes,
report.counts_before.edges,
report.counts_before.tombstones.total,
report.counts_before.file_size_bytes.unwrap_or(0),
report.counts_before.freelist_bytes.unwrap_or(0)
);
println!(
"after: {} node(s), {} edge(s), {} tombstone(s), file={} free={}",
report.counts_after.nodes,
report.counts_after.edges,
report.counts_after.tombstones.total,
report.counts_after.file_size_bytes.unwrap_or(0),
report.counts_after.freelist_bytes.unwrap_or(0)
);
for proof in &report.compaction_after.proof {
println!("proof: {proof}");
}
for warning in &report.warnings {
println!("warning: {warning}");
}
for command in &report.next_commands {
println!("next: {command}");
}
}
fn parse_graph_db_property_filters(raw: &[String]) -> Result<Vec<GraphDbPropertyFilter>> {
raw.iter()
.map(|value| {
let (key, filter_value) = value
.split_once('=')
.with_context(|| format!("graph-db --property expects KEY=VALUE, got {value:?}"))?;
let key = key.trim();
let filter_value = filter_value.trim();
if key.is_empty() || filter_value.is_empty() {
bail!("graph-db --property expects non-empty KEY=VALUE, got {value:?}");
}
Ok(GraphDbPropertyFilter {
key: key.to_string(),
value: filter_value.to_string(),
})
})
.collect()
}
fn graph_db_query_options(
cursor: Option<String>,
limit: Option<usize>,
property_filters: &[String],
) -> Result<GraphDbQueryOptions> {
Ok(GraphDbQueryOptions {
cursor,
limit: limit.filter(|limit| *limit > 0),
property_filters: parse_graph_db_property_filters(property_filters)?,
})
}
fn graph_db_query_options_for_store(options: &GraphDbQueryOptions) -> GraphQueryOptions {
GraphQueryOptions {
cursor: options.cursor.clone(),
limit: options.limit,
property_filters: options
.property_filters
.iter()
.map(|filter| GraphPropertyFilter {
key: filter.key.clone(),
value: filter.value.clone(),
})
.collect(),
}
}
fn graph_db_page_report_from_store(
page: GraphQueryPage,
property_filters: Vec<GraphDbPropertyFilter>,
) -> GraphDbPageReport {
GraphDbPageReport {
cursor: page.cursor,
limit: page.limit,
next_cursor: page.next_cursor,
returned_nodes: page.returned_nodes,
returned_edges: page.returned_edges,
truncated: page.truncated,
property_filters,
diagnostics: page.diagnostics,
}
}
fn graph_db_neighborhood_ranking_gate(
ranked_neighbor_cap: usize,
) -> GraphDbNeighborhoodRankingGate {
GraphDbNeighborhoodRankingGate {
status: "held_default_order_unchanged".to_string(),
ranked_output_default: false,
default_order: "stable_node_id".to_string(),
default_change_gate: "community_search_quality_metrics".to_string(),
required_workloads: metric_digest::COMMUNITY_SEARCH_WORKLOADS
.iter()
.map(|workload| (*workload).to_string())
.collect(),
required_metrics: metric_digest::COMMUNITY_SEARCH_REQUIRED_METRICS
.iter()
.map(|metric| (*metric).to_string())
.collect(),
max_duration_regression_percent: metric_digest::COMMUNITY_MAX_DURATION_REGRESSION_PERCENT,
min_handle_coverage_pct: metric_digest::COMMUNITY_MIN_HANDLE_COVERAGE_PCT,
min_duplicate_name_precision: metric_digest::COMMUNITY_MIN_DUPLICATE_NAME_PRECISION,
min_top_community_stability: metric_digest::COMMUNITY_MIN_TOP_COMMUNITY_STABILITY,
diagnostics: vec![
"ranked_neighbors is additive; neighborhood nodes remain ordered by stable node id for cursor pagination".to_string(),
format!(
"ranked_neighbors is score-capped at {ranked_neighbor_cap} entries so previews stay bounded while cursor pagination remains exhaustive"
),
"changing the default neighborhood order requires the community-search gate to pass for every required workload".to_string(),
],
}
}
fn graph_db_ranked_neighbor_cap(limit: Option<usize>) -> usize {
match limit {
Some(0) | None => GRAPH_DB_RANKED_NEIGHBOR_CAP,
Some(limit) => limit.clamp(1, GRAPH_DB_RANKED_NEIGHBOR_CAP),
}
}
fn graph_db_ranked_neighbors(
center_id: &str,
nodes: &[SubstrateGraphNode],
edges: &[SubstrateGraphEdge],
cap: usize,
) -> Vec<GraphDbRankedNeighbor> {
resolution::ranked_neighbors_capped(center_id, nodes, edges, cap)
}
fn graph_db_ranked_neighborhood_comparison<S: GraphStore>(
center_id: &str,
depth: usize,
edge_kind: Option<&str>,
limit: Option<usize>,
unranked_nodes: &[SubstrateGraphNode],
unranked_edges: &[SubstrateGraphEdge],
store: &S,
) -> Result<Option<GraphDbRankedNeighborhoodComparison>> {
use std::time::Instant;
let max_nodes = match limit {
Some(0) | None => 200,
Some(n) => n.clamp(10, 500),
};
let mut options = RankedNeighborhoodOptions::new(depth, max_nodes)
.with_scoring(NeighborhoodScoring::EdgeKindWeighted);
if let Some(kind) = edge_kind {
options = options.with_edge_kind(kind);
}
let start = Instant::now();
let result = store.ranked_neighborhood(center_id, &options)?;
let latency = start.elapsed().as_micros();
let Some(ranked) = result else {
return Ok(None);
};
let unranked_ids: BTreeSet<_> = unranked_nodes.iter().map(|n| n.id.as_str()).collect();
let ranked_ids: BTreeSet<_> = ranked.nodes.iter().map(|n| n.id.as_str()).collect();
let overlap_count = ranked_ids.intersection(&unranked_ids).count();
let overlap_pct = if unranked_ids.is_empty() || ranked_ids.is_empty() {
0.0
} else {
(overlap_count as f64 / unranked_ids.len().max(ranked_ids.len()) as f64) * 100.0
};
let count_duplicates = |nodes: &[SubstrateGraphNode]| -> usize {
let mut name_count = BTreeMap::<&str, usize>::new();
for n in nodes {
*name_count.entry(&n.label).or_default() += 1;
}
name_count.values().filter(|&&c| c > 1).count()
};
let count_handle_coverage = |nodes: &[SubstrateGraphNode]| -> f64 {
if nodes.is_empty() {
return 100.0;
}
let with_handle = nodes
.iter()
.filter(|n| n.properties.contains_key("handle") || n.properties.contains_key("ref_id"))
.count();
(with_handle as f64 / nodes.len() as f64) * 100.0
};
let useful_density = |nodes: &[SubstrateGraphNode], edges: &[SubstrateGraphEdge]| -> f64 {
if nodes.is_empty() {
return 0.0;
}
let semantic_kinds = [
"semantic_concept",
"semantic_entity",
"symbol",
"file",
"source_handle",
];
let useful = nodes
.iter()
.filter(|n| semantic_kinds.contains(&n.kind.as_str()))
.count();
let edge_diversity = edges.iter().map(|e| &e.kind).collect::<BTreeSet<_>>().len();
let kind_diversity = nodes.iter().map(|n| &n.kind).collect::<BTreeSet<_>>().len();
(useful as f64 * 0.5 + kind_diversity as f64 * 0.3 + edge_diversity as f64 * 0.2)
/ nodes.len() as f64
};
let community_truncation_summary = if ranked.pruned_count > 0 && !ranked.edges.is_empty() {
let edge_pairs: Vec<(String, String)> = ranked
.edges
.iter()
.map(|e| (e.from_id.clone(), e.to_id.clone()))
.collect();
let cr = tsift_graph::detect_communities(&edge_pairs);
let kept_labels: BTreeSet<&str> = ranked.nodes.iter().map(|n| n.label.as_str()).collect();
let mut fully_kept = 0usize;
let mut partially_pruned = 0usize;
let mut fully_pruned = 0usize;
let mut pruned_kinds = BTreeSet::new();
let mut pruned_labels = Vec::new();
for comm in &cr.communities {
let kept_in_comm: Vec<&str> = comm
.members
.iter()
.filter(|m| kept_labels.contains(m.name.as_str()))
.map(|m| m.name.as_str())
.collect();
if kept_in_comm.len() == comm.members.len() {
fully_kept += 1;
} else if kept_in_comm.is_empty() {
fully_pruned += 1;
for m in &comm.members {
if let Some(n) = ranked.nodes.iter().find(|n| n.label == m.name) {
pruned_kinds.insert(n.kind.clone());
}
pruned_labels.push(m.name.clone());
}
} else {
partially_pruned += 1;
}
}
pruned_labels.truncate(5);
Some(CommunityTruncationSummary {
total_communities: cr.communities.len(),
fully_kept,
partially_pruned,
fully_pruned,
pruned_community_kinds: pruned_kinds.into_iter().collect(),
pruned_community_top_labels: pruned_labels,
})
} else {
None
};
Ok(Some(GraphDbRankedNeighborhoodComparison {
traversal_nodes: ranked.nodes.len(),
traversal_edges: ranked.edges.len(),
pruned_count: ranked.pruned_count,
total_discovered: ranked.total_discovered,
latency_micros: latency,
overlap_with_unranked_pct: (overlap_pct * 100.0).round() / 100.0,
useful_hit_density_ranked: (useful_density(&ranked.nodes, &ranked.edges) * 1000.0).round()
/ 1000.0,
useful_hit_density_unranked: (useful_density(unranked_nodes, unranked_edges) * 1000.0)
.round()
/ 1000.0,
duplicate_name_count_ranked: count_duplicates(&ranked.nodes),
duplicate_name_count_unranked: count_duplicates(unranked_nodes),
handle_coverage_ranked_pct: (count_handle_coverage(&ranked.nodes) * 100.0).round() / 100.0,
handle_coverage_unranked_pct: (count_handle_coverage(unranked_nodes) * 100.0).round()
/ 100.0,
community_truncation_summary,
diagnostics: vec![
format!(
"ranked_neighborhood traversed {} node(s), {} edge(s) with {} pruned of {} discovered in {}µs",
ranked.nodes.len(),
ranked.edges.len(),
ranked.pruned_count,
ranked.total_discovered,
latency
),
format!(
"overlap with unranked BFS: {:.1}% ({} shared of {} unranked, {} ranked)",
overlap_pct,
overlap_count,
unranked_ids.len(),
ranked_ids.len()
),
"comparison is diagnostic; promotion requires community-search quality gate to pass for every required workload".to_string(),
],
}))
}
struct GraphDbBudgetedSubgraph {
nodes: Vec<SubstrateGraphNode>,
edges: Vec<SubstrateGraphEdge>,
report: GraphDbOutputBudgetReport,
truncated: bool,
next_cursor: Option<String>,
}
const GRAPH_DB_OUTPUT_DEFAULT_TOKEN_CAP: usize = 6_000;
const GRAPH_DB_OUTPUT_MIN_TOKEN_CAP: usize = 1_200;
const GRAPH_DB_OUTPUT_MAX_TOKEN_CAP: usize = 12_000;
fn graph_db_output_token_cap(limit: Option<usize>) -> usize {
match limit {
Some(0) | None => GRAPH_DB_OUTPUT_DEFAULT_TOKEN_CAP,
Some(limit) => limit
.saturating_mul(320)
.clamp(GRAPH_DB_OUTPUT_MIN_TOKEN_CAP, GRAPH_DB_OUTPUT_MAX_TOKEN_CAP),
}
}
fn graph_db_node_kind_quota(kind: &str, limit: Option<usize>) -> usize {
if matches!(limit, Some(0) | None) {
return match kind {
"source_handle" => 10,
"worker_context" | "worker_result" => 8,
"semantic_concept" | "semantic_entity" => 10,
"file" | "symbol" | "route" => 12,
_ => 8,
};
}
let base = limit.unwrap_or(0).max(1);
match kind {
"source_handle" => base.saturating_add(4),
"worker_context" | "worker_result" => base.saturating_add(2),
"semantic_concept" | "semantic_entity" => base.saturating_add(4),
"file" | "symbol" | "route" => base.saturating_add(4),
_ => base.saturating_add(1),
}
}
fn graph_db_edge_kind_quota(kind: &str, limit: Option<usize>) -> usize {
if matches!(limit, Some(0) | None) {
return match kind {
"mentions" | "mentions_concept" | "mentions_entity" => 24,
"semantic_relation" | "calls" | "defines" => 20,
_ => 16,
};
}
let base = limit.unwrap_or(0).max(1);
match kind {
"mentions" | "mentions_concept" | "mentions_entity" => base.saturating_mul(3),
"semantic_relation" | "calls" | "defines" => base.saturating_mul(2),
_ => base.saturating_add(2),
}
}
fn graph_db_estimated_tokens<T: Serialize>(value: &T) -> usize {
serde_json::to_vec(value)
.map(|bytes| bytes.len().div_ceil(4).max(1))
.unwrap_or(1)
}
fn graph_db_node_search_text(node: &SubstrateGraphNode) -> String {
let mut parts = vec![node.kind.clone(), node.label.clone()];
for key in [
"detail",
"description",
"source_ref",
"path",
"source_file",
"source_symbol",
"text_preview",
] {
if let Some(value) = node.properties.get(key) {
parts.push(value.clone());
}
}
parts.join(" ")
}
fn graph_db_semantic_scores_for_query(
query: Option<&str>,
nodes: &[SubstrateGraphNode],
) -> BTreeMap<String, f64> {
let Some(query) = query.filter(|value| !value.trim().is_empty()) else {
return BTreeMap::new();
};
let query_embedding = semantic_embedding(query);
nodes
.iter()
.filter(|node| matches!(node.kind.as_str(), "semantic_concept" | "semantic_entity"))
.filter_map(|node| {
let embedding = node
.properties
.get("embedding")
.and_then(|value| parse_semantic_embedding_property(value))?;
Some((
node.id.clone(),
semantic_cosine(&query_embedding, &embedding),
))
})
.collect()
}
fn graph_db_depth_by_id(
origin_ids: &[String],
edges: &[SubstrateGraphEdge],
) -> BTreeMap<String, usize> {
let mut adjacency = BTreeMap::<String, Vec<String>>::new();
for edge in edges {
adjacency
.entry(edge.from_id.clone())
.or_default()
.push(edge.to_id.clone());
adjacency
.entry(edge.to_id.clone())
.or_default()
.push(edge.from_id.clone());
}
let mut depth_by_id = BTreeMap::<String, usize>::new();
let mut queue = VecDeque::<String>::new();
for origin in origin_ids {
if depth_by_id.insert(origin.clone(), 0).is_none() {
queue.push_back(origin.clone());
}
}
while let Some(current) = queue.pop_front() {
let depth = depth_by_id.get(¤t).copied().unwrap_or(0);
for next in adjacency.get(¤t).into_iter().flatten() {
if depth_by_id.contains_key(next) {
continue;
}
depth_by_id.insert(next.clone(), depth.saturating_add(1));
queue.push_back(next.clone());
}
}
depth_by_id
}
fn graph_db_source_covered_ids(
nodes: &[SubstrateGraphNode],
edges: &[SubstrateGraphEdge],
) -> BTreeSet<String> {
let source_ids = nodes
.iter()
.filter(|node| node.kind == "source_handle")
.map(|node| node.id.as_str())
.collect::<BTreeSet<_>>();
let mut covered = source_ids
.iter()
.map(|id| (*id).to_string())
.collect::<BTreeSet<_>>();
for edge in edges {
if source_ids.contains(edge.from_id.as_str()) {
covered.insert(edge.to_id.clone());
}
if source_ids.contains(edge.to_id.as_str()) {
covered.insert(edge.from_id.clone());
}
}
covered
}
fn graph_db_recency_score(node: &SubstrateGraphNode) -> i64 {
for key in [
"observed_at_unix",
"completed_at_unix",
"created_at_unix",
"started_at_unix",
] {
if let Some(value) = node.properties.get(key)
&& let Ok(epoch) = value.parse::<i64>()
{
return epoch.div_euclid(86_400).clamp(0, 40_000);
}
}
0
}
fn graph_db_node_kind_score(kind: &str) -> i64 {
match kind {
"source_handle" => 180,
"worker_context" => 170,
"worker_result" => 160,
"semantic_concept" | "semantic_entity" => 150,
"backlog" | "job_packet" => 130,
"symbol" => 120,
"file" => 110,
"route" => 105,
"session" => 90,
_ => 40,
}
}
fn graph_db_edge_kind_score(kind: &str) -> i64 {
match kind {
"mentions_concept" | "mentions_entity" => 180,
"semantic_relation" => 170,
"mentions" => 165,
"requests_context" | "scopes_context" | "scopes_source" => 155,
"explains_result" => 150,
"calls" => 145,
"defines" | "handled_by" | "defines_route" => 130,
"contains" | "targets" => 120,
"records_memory_source" | "has_vector_handle" => 115,
_ => 40,
}
}
fn graph_db_node_usefulness_score(
node: &SubstrateGraphNode,
depth_by_id: &BTreeMap<String, usize>,
semantic_scores: &BTreeMap<String, f64>,
source_covered_ids: &BTreeSet<String>,
origin_ids: &[String],
) -> i64 {
if origin_ids.iter().any(|origin| origin == &node.id) {
return 1_000_000;
}
let semantic = semantic_scores
.get(&node.id)
.map(|score| (score.max(0.0) * 1_000.0) as i64)
.unwrap_or(0);
let depth_penalty = depth_by_id
.get(&node.id)
.map(|depth| (*depth as i64).saturating_mul(55))
.unwrap_or(180);
let source_coverage = if source_covered_ids.contains(&node.id)
|| node.properties.contains_key("source_ref")
|| node.properties.contains_key("path")
{
120
} else {
0
};
graph_db_node_kind_score(&node.kind)
+ semantic
+ source_coverage
+ graph_db_recency_score(node).min(80)
- depth_penalty
}
fn graph_db_edge_usefulness_score(
edge: &SubstrateGraphEdge,
node_score_by_id: &BTreeMap<String, i64>,
depth_by_id: &BTreeMap<String, usize>,
) -> i64 {
let endpoint_score = node_score_by_id
.get(&edge.from_id)
.copied()
.unwrap_or_default()
.max(
node_score_by_id
.get(&edge.to_id)
.copied()
.unwrap_or_default(),
);
let depth_penalty = depth_by_id
.get(&edge.from_id)
.into_iter()
.chain(depth_by_id.get(&edge.to_id))
.min()
.map(|depth| (*depth as i64).saturating_mul(35))
.unwrap_or(140);
graph_db_edge_kind_score(&edge.kind) + (endpoint_score / 8) - depth_penalty
}
fn graph_db_push_drop(
drops: &mut BTreeMap<(String, String, String), usize>,
item: &str,
kind: &str,
reason: &str,
) {
*drops
.entry((item.to_string(), kind.to_string(), reason.to_string()))
.or_default() += 1;
}
fn graph_db_budget_drop_report(
drops: BTreeMap<(String, String, String), usize>,
) -> Vec<GraphDbDroppedByBudget> {
drops
.into_iter()
.map(|((item, kind, reason), dropped)| GraphDbDroppedByBudget {
item,
kind,
reason,
dropped,
})
.collect()
}
fn graph_db_apply_output_budget(
origin_ids: &[String],
semantic_scores: &BTreeMap<String, f64>,
nodes: Vec<SubstrateGraphNode>,
edges: Vec<SubstrateGraphEdge>,
limit: Option<usize>,
) -> GraphDbBudgetedSubgraph {
graph_db_apply_output_budget_with_depths_and_cursor(
origin_ids,
semantic_scores,
nodes,
edges,
limit,
None,
None,
)
}
fn graph_db_apply_output_budget_with_depths_and_cursor(
origin_ids: &[String],
semantic_scores: &BTreeMap<String, f64>,
nodes: Vec<SubstrateGraphNode>,
edges: Vec<SubstrateGraphEdge>,
limit: Option<usize>,
depth_overrides: Option<&BTreeMap<String, usize>>,
cursor: Option<&str>,
) -> GraphDbBudgetedSubgraph {
let max_tokens = graph_db_output_token_cap(limit);
let candidate_nodes = nodes.len();
let candidate_edges = edges.len();
let mut depth_by_id = graph_db_depth_by_id(origin_ids, &edges);
if let Some(depth_overrides) = depth_overrides {
for (id, depth) in depth_overrides {
depth_by_id
.entry(id.clone())
.and_modify(|current| *current = (*current).min(*depth))
.or_insert(*depth);
}
}
let source_covered_ids = graph_db_source_covered_ids(&nodes, &edges);
let node_score_by_id = nodes
.iter()
.map(|node| {
(
node.id.clone(),
graph_db_node_usefulness_score(
node,
&depth_by_id,
semantic_scores,
&source_covered_ids,
origin_ids,
),
)
})
.collect::<BTreeMap<_, _>>();
let mut node_candidates = nodes.iter().collect::<Vec<_>>();
node_candidates.sort_by(|left, right| {
node_score_by_id
.get(&right.id)
.cmp(&node_score_by_id.get(&left.id))
.then_with(|| left.kind.cmp(&right.kind))
.then_with(|| left.label.cmp(&right.label))
.then_with(|| left.id.cmp(&right.id))
});
let cursor_skip = if let Some(cursor) = cursor {
node_candidates
.iter()
.position(|node| node.id == cursor)
.map(|pos| pos.saturating_add(1))
.unwrap_or(0)
} else {
0
};
if cursor_skip > 0 {
node_candidates = node_candidates.into_iter().skip(cursor_skip).collect();
}
let mut selected_node_ids = BTreeSet::new();
let mut selected_node_counts = BTreeMap::<String, usize>::new();
let mut estimated_tokens = 0usize;
let mut drops = BTreeMap::<(String, String, String), usize>::new();
for node in &node_candidates {
let kind_count = selected_node_counts
.get(&node.kind)
.copied()
.unwrap_or_default();
if !origin_ids.iter().any(|origin| origin == &node.id)
&& kind_count >= graph_db_node_kind_quota(&node.kind, limit)
{
graph_db_push_drop(&mut drops, "node", &node.kind, "per_kind_quota");
continue;
}
let tokens = graph_db_estimated_tokens(node);
if !origin_ids.iter().any(|origin| origin == &node.id)
&& estimated_tokens.saturating_add(tokens) > max_tokens
{
graph_db_push_drop(&mut drops, "node", &node.kind, "estimated_token_cap");
continue;
}
selected_node_ids.insert(node.id.clone());
*selected_node_counts.entry(node.kind.clone()).or_default() += 1;
estimated_tokens = estimated_tokens.saturating_add(tokens);
}
let has_remaining_candidates = node_candidates
.iter()
.any(|node| !selected_node_ids.contains(&node.id));
let mut selected_nodes = nodes
.into_iter()
.filter(|node| selected_node_ids.contains(&node.id))
.collect::<Vec<_>>();
let mut edge_candidates = edges
.iter()
.filter(|edge| {
selected_node_ids.contains(&edge.from_id) && selected_node_ids.contains(&edge.to_id)
})
.collect::<Vec<_>>();
let edge_score_by_key = edge_candidates
.iter()
.map(|edge| {
(
graph_db_edge_key(edge),
graph_db_edge_usefulness_score(edge, &node_score_by_id, &depth_by_id),
)
})
.collect::<BTreeMap<_, _>>();
edge_candidates.sort_by(|left, right| {
edge_score_by_key
.get(&graph_db_edge_key(right))
.cmp(&edge_score_by_key.get(&graph_db_edge_key(left)))
.then_with(|| left.kind.cmp(&right.kind))
.then_with(|| left.from_id.cmp(&right.from_id))
.then_with(|| left.to_id.cmp(&right.to_id))
});
let endpoint_dropped_edges = edges
.iter()
.filter(|edge| {
!selected_node_ids.contains(&edge.from_id) || !selected_node_ids.contains(&edge.to_id)
})
.count();
if endpoint_dropped_edges > 0 {
drops.insert(
(
"edge".to_string(),
"*".to_string(),
"endpoint_node_dropped".to_string(),
),
endpoint_dropped_edges,
);
}
let mut selected_edge_ids = BTreeSet::new();
let mut selected_edge_counts = BTreeMap::<String, usize>::new();
for edge in edge_candidates {
let kind_count = selected_edge_counts
.get(&edge.kind)
.copied()
.unwrap_or_default();
if kind_count >= graph_db_edge_kind_quota(&edge.kind, limit) {
graph_db_push_drop(&mut drops, "edge", &edge.kind, "per_kind_quota");
continue;
}
let tokens = graph_db_estimated_tokens(edge);
if estimated_tokens.saturating_add(tokens) > max_tokens {
graph_db_push_drop(&mut drops, "edge", &edge.kind, "estimated_token_cap");
continue;
}
selected_edge_ids.insert(graph_db_edge_key(edge));
*selected_edge_counts.entry(edge.kind.clone()).or_default() += 1;
estimated_tokens = estimated_tokens.saturating_add(tokens);
}
let selected_edges = edges
.into_iter()
.filter(|edge| selected_edge_ids.contains(&graph_db_edge_key(edge)))
.collect::<Vec<_>>();
let dropped_by_budget = graph_db_budget_drop_report(drops);
let truncated = has_remaining_candidates;
let next_cursor = if truncated {
selected_nodes.last().map(|node| node.id.clone())
} else {
None
};
let mut diagnostics = vec![
"budget ranking signals: semantic_match, edge_kind, depth, recency, source_handle_coverage"
.to_string(),
format!(
"selected {} of {} candidate node(s) and {} of {} candidate edge(s) within estimated token cap {}",
selected_nodes.len(),
candidate_nodes,
selected_edges.len(),
candidate_edges,
max_tokens
),
];
if cursor.is_some() {
diagnostics.push(format!(
"cursor skipped {} previously returned candidate(s)",
cursor_skip
));
}
if next_cursor.is_some() {
diagnostics.push(
"result was truncated; pass next_cursor as --cursor for the next page".to_string(),
);
}
selected_nodes.shrink_to_fit();
GraphDbBudgetedSubgraph {
nodes: selected_nodes,
edges: selected_edges,
report: GraphDbOutputBudgetReport {
max_tokens,
estimated_tokens,
selected_nodes: selected_node_ids.len(),
selected_edges: selected_edge_ids.len(),
candidate_nodes,
candidate_edges,
dropped_by_budget,
diagnostics,
},
truncated,
next_cursor,
}
}
fn graph_db_edge_key(edge: &SubstrateGraphEdge) -> String {
if edge.id.is_empty() {
substrate::ConvexEdgeRow::stable_key(&edge.from_id, &edge.to_id, &edge.kind)
} else {
edge.id.clone()
}
}
fn graph_db_schema() -> GraphDbSchema {
GraphDbSchema {
contract_versions: vec![
GraphDbSchemaContract {
name: "graph_db_evidence",
version: GRAPH_DB_EVIDENCE_CONTRACT_VERSION,
description: "graph-db evidence JSON packet including packet_id, projection hash, worker context, source handles, worker results, semantic rows, replay commands, and repair commands",
},
GraphDbSchemaContract {
name: "worker_prompt_packet",
version: WORKER_PROMPT_PACKET_CONTRACT_VERSION,
description: "conflict-matrix worker prompt packet with owned scope, scheduler fields, stable graph handles, expected tests, expansion commands, token budget, semantic ranking reasons, worker feedback closure controls, and fail-closed prompt text",
},
GraphDbSchemaContract {
name: "conflict_matrix",
version: CONFLICT_MATRIX_CONTRACT_VERSION,
description: "parallel-dispatch decision report keyed by graph evidence packets, scheduler block fields, hard file/symbol/test/config gates, and soft worker-feedback closure ranking",
},
GraphDbSchemaContract {
name: "context_pack_graph_orchestration",
version: CONTEXT_PACK_GRAPH_ORCHESTRATION_CONTRACT_VERSION,
description: "context-pack graph orchestration summary with projection freshness, evidence packet ids, ownership blocks, and follow-up graph commands",
},
GraphDbSchemaContract {
name: "session_review_follow_up",
version: SESSION_REVIEW_FOLLOW_UP_CONTRACT_VERSION,
description: "session-review next-context follow-up command contract for resumable digest/context-pack commands",
},
GraphDbSchemaContract {
name: "dispatch_trace",
version: DISPATCH_TRACE_CONTRACT_VERSION,
description: "operator review trace linking backlog, job packets, worker results, source handles, semantic rows, scheduler fields, evidence packet ids, worker feedback closure controls, and worker prompt packets",
},
GraphDbSchemaContract {
name: "dependency_dag",
version: DEPENDENCY_DAG_CONTRACT_VERSION,
description: "topological planning DAG for agent-doc backlog targets with replayable dependency edges, topo batches, and cycle diagnostics",
},
],
node_fields: vec![
GraphDbSchemaField {
name: "id",
value_type: "string",
description: "Stable provider-neutral node id",
},
GraphDbSchemaField {
name: "kind",
value_type: "string",
description: "Application-defined node family such as file, symbol, or backlog",
},
GraphDbSchemaField {
name: "label",
value_type: "string",
description: "Human-readable label",
},
GraphDbSchemaField {
name: "properties",
value_type: "object<string,string>",
description: "Adapter-specific string properties",
},
GraphDbSchemaField {
name: "provenance",
value_type: "array",
description: "Source system and source reference metadata",
},
GraphDbSchemaField {
name: "freshness",
value_type: "object|null",
description: "Optional content hash and observed timestamp",
},
],
edge_fields: vec![
GraphDbSchemaField {
name: "id",
value_type: "string",
description: "Stable provider-neutral edge id derived from from_id, kind, and to_id",
},
GraphDbSchemaField {
name: "from_id",
value_type: "string",
description: "Source node id",
},
GraphDbSchemaField {
name: "to_id",
value_type: "string",
description: "Target node id",
},
GraphDbSchemaField {
name: "kind",
value_type: "string",
description: "Application-defined edge relation",
},
GraphDbSchemaField {
name: "properties",
value_type: "object<string,string>",
description: "Adapter-specific string properties",
},
GraphDbSchemaField {
name: "provenance",
value_type: "array",
description: "Source system and source reference metadata",
},
GraphDbSchemaField {
name: "freshness",
value_type: "object|null",
description: "Optional content hash and observed timestamp",
},
],
operations: vec![
GraphDbSchemaOperation {
command: "refresh",
description: "Materialize .tsift/graph.db explicitly with delta upserts/deletes, row hash watermarks, tombstone pruning, projection metadata, row counts, and operator next commands",
},
GraphDbSchemaOperation {
command: "status",
description: "Inspect .tsift/graph.db freshness, projection metadata, row counts, tombstone counts, file-size impact, and operator next commands without refreshing",
},
GraphDbSchemaOperation {
command: "doctor",
description: "Validate graph.db or Convex snapshot health and return fail-closed repair diagnostics plus non-fatal SQLite tombstone-retention warnings",
},
GraphDbSchemaOperation {
command: "drift",
description: "Compare local SQLite projection rows with a Convex snapshot and return upsert, tombstone, metadata, duplicate, orphan, and next-command diagnostics",
},
GraphDbSchemaOperation {
command: "compact [--apply] [--prune-tombstones --confirmed-convex-reconciled]",
description: "Return or apply the post-reconciliation SQLite graph compaction policy, including WAL checkpoint/VACUUM proof and guarded tombstone pruning",
},
GraphDbSchemaOperation {
command: "backend-eval [--candidate duckdb-duckpgq|falkordb|ladybug|kuzu|surrealdb] [--target ID] [--full-projection]",
description: "Benchmark experimental read-only GraphStore backend prototypes against SQLite on bounded real, optional full-project, and synthetic projections across refresh/status/path tiers/evidence/conflict-matrix/dispatch-trace and emit promotion hold/eligibility gates",
},
GraphDbSchemaOperation {
command: "evidence <target> [--depth N] [--limit N]",
description: "Return a bounded versioned graph-db handoff packet for a backlog id or job packet handle, including packet_id, projection hash, worker_context rows, source_handle rows, worker_result rows, semantic_concept/entity rows, shortest paths, replay commands, repair commands, and next commands",
},
GraphDbSchemaOperation {
command: "related <phrase> [--kind concept|entity|all] [--depth N] [--seed-limit N] [--limit N]",
description: "Resolve a natural-language phrase to cached semantic concept/entity seed nodes, then return an incident/outgoing GraphStore neighborhood around those seeds for general knowledge retrieval without changing stable neighborhood pagination defaults",
},
GraphDbSchemaOperation {
command: "dispatch-trace [target...] --path <session> [--format json|html]",
description: "Export a compact graph-backed dispatch trace with evidence packet ids, worker-result feedback closure summaries, graph links, and conflict-matrix worker prompt packets",
},
GraphDbSchemaOperation {
command: "dependency-dag [target...] --path <session>",
description: "Extract a versioned agent-doc dependency DAG from backlog ids, explicit depends-on text, shared file/symbol/test/config evidence, semantic overlap, and worker-result follow-up ids",
},
GraphDbSchemaOperation {
command: "schema",
description: "Return record and operation schemas",
},
GraphDbSchemaOperation {
command: "node <id>",
description: "Return one node by stable id",
},
GraphDbSchemaOperation {
command: "edge <id>",
description: "Return one edge by stable edge id",
},
GraphDbSchemaOperation {
command: "edges [--edge-kind <kind>] [--property KEY=VALUE] [--cursor EDGE_ID] [--limit N]",
description: "Return edge records ordered by stable edge id with SQLite-pushed edge-property filtering and cursor pagination",
},
GraphDbSchemaOperation {
command: "incident <id> [--edge-kind <kind>] [--property KEY=VALUE] [--cursor EDGE_ID] [--limit N]",
description: "Return incoming and outgoing edges incident to one node, ordered by stable edge id with optional kind and edge-property filters",
},
GraphDbSchemaOperation {
command: "kind <kind> [--property KEY=VALUE] [--cursor ID] [--limit N]",
description: "Return nodes of one kind ordered by id with SQLite-pushed property filtering/cursor pagination and query-plan diagnostics",
},
GraphDbSchemaOperation {
command: "neighborhood <id> --depth <n> [--edge-kind <kind>] [--property KEY=VALUE] [--cursor ID] [--limit N]",
description: "Return a directed outgoing subgraph around a node using batched SQLite recursive traversal plus pushed filters/paging when available; JSON also includes additive ranked_neighbors while default nodes remain stable-id ordered",
},
GraphDbSchemaOperation {
command: "path <from> <to> [--edge-kind <kind>] [--max-hops N]",
description: "Return the shortest directed path by node id, optionally bounded by hop count",
},
],
}
}
pub(crate) fn sqlite_graph_freshness(
store: &SqliteGraphStore,
scope: &str,
) -> Result<GraphDbFreshnessReport> {
let version = store.projection_version(scope)?;
let Some(version) = version else {
return Ok(GraphDbFreshnessReport {
status: "missing".to_string(),
fail_closed: true,
projection_version: None,
content_hash: None,
source_watermark: None,
diagnostics: vec![
"graph projection metadata is missing; rebuild the graph before trusting reads"
.to_string(),
],
});
};
let mut diagnostics = Vec::new();
let fail_closed =
version.projection_version != GRAPH_PROJECTION_VERSION || version.content_hash.is_none();
if version.projection_version != GRAPH_PROJECTION_VERSION {
diagnostics.push(format!(
"projection version mismatch: expected {} got {}",
GRAPH_PROJECTION_VERSION, version.projection_version
));
}
if version.content_hash.is_none() {
diagnostics.push("projection content hash is missing".to_string());
}
Ok(GraphDbFreshnessReport {
status: if fail_closed { "stale" } else { "current" }.to_string(),
fail_closed,
projection_version: Some(version.projection_version),
content_hash: version.content_hash,
source_watermark: version.source_watermark,
diagnostics,
})
}
pub(crate) fn convex_graph_freshness(
local: &ConvexProjectionRows,
snapshot: &ConvexProjectionRows,
scope: Option<&str>,
) -> GraphDbFreshnessReport {
let freshness = convex_projection_freshness(local, Some(snapshot), scope);
GraphDbFreshnessReport {
status: freshness.status,
fail_closed: freshness.fail_closed,
projection_version: Some(GRAPH_PROJECTION_VERSION.to_string()),
content_hash: freshness.snapshot_hash,
source_watermark: None,
diagnostics: freshness.diagnostics,
}
}
pub(crate) fn tokensave_graph_freshness(store: &TokensaveDb) -> Result<GraphDbFreshnessReport> {
let (nodes, edges) = store.graph_counts()?;
let files = store.file_count()?;
Ok(GraphDbFreshnessReport {
status: "current".to_string(),
fail_closed: false,
projection_version: Some("tokensave-readonly".to_string()),
content_hash: None,
source_watermark: Some(store.db_path().to_string_lossy().to_string()),
diagnostics: vec![format!(
"tokensave read-only adapter opened {} node(s), {} edge(s), {} file(s)",
nodes, edges, files
)],
})
}
pub(crate) fn append_tokensave_graph_doctor_checks(report: &mut GraphDbDoctorReport, root: &Path) {
match TokensaveDb::discover(root) {
Ok(Some(store)) => {
report.push_check(GraphDbDoctorCheck {
name: "tokensave_db_open".to_string(),
status: "ok".to_string(),
fail_closed: false,
diagnostics: vec![format!(
"opened tokensave database at {}",
store.db_path().display()
)],
repair_commands: Vec::new(),
});
match (store.node_count(), store.edge_count(), store.file_count()) {
(Ok(nodes), Ok(edges), Ok(files)) => {
report.push_check(GraphDbDoctorCheck {
name: "tokensave_counts".to_string(),
status: "ok".to_string(),
fail_closed: false,
diagnostics: vec![format!(
"tokensave contains {} node(s), {} edge(s), {} file(s)",
nodes, edges, files
)],
repair_commands: Vec::new(),
});
}
(nodes, edges, files) => {
report.push_check(graph_db_doctor_check(
"tokensave_counts",
vec![format!(
"tokensave count inspection failed: nodes={:?} edges={:?} files={:?}",
nodes.err(),
edges.err(),
files.err()
)],
Vec::new(),
));
}
}
}
Ok(None) => report.push_check(graph_db_doctor_check(
"tokensave_db_exists",
vec![format!(
"tokensave database is missing at {}",
root.join(".tokensave").join("tokensave.db").display()
)],
Vec::new(),
)),
Err(err) => report.push_check(graph_db_doctor_check(
"tokensave_db_open",
vec![err.to_string()],
Vec::new(),
)),
}
}
pub(crate) fn graph_db_resolve_evidence_target(
store: &impl GraphStore,
target: &str,
) -> Result<Option<SubstrateGraphNode>> {
store.resolve_evidence_target(
target,
&[
"backlog",
"job_packet",
"worker_result",
"worker_context",
"source_handle",
],
)
}
fn graph_db_reachable_nodes_by_kind(
store: &impl GraphStore,
from_id: &str,
kind: &str,
depth: usize,
limit: usize,
) -> Result<Vec<(SubstrateGraphNode, substrate::GraphPath)>> {
store.reachable_nodes_by_kind(from_id, kind, depth, limit)
}
fn graph_db_evidence_completed_queue_drift_warnings(
store: &impl GraphStore,
target: &SubstrateGraphNode,
worker_results: &[SubstrateGraphNode],
) -> Result<Vec<String>> {
let ref_id = target.properties.get("ref_id").map(String::as_str);
let has_completed_result = worker_results.iter().any(|node| {
node.properties.get("status").map(String::as_str) == Some("completed")
&& node.properties.get("ref_id").map(String::as_str) == ref_id
});
if !has_completed_result {
return Ok(Vec::new());
}
let active_jobs = store
.nodes_by_kind("job_packet")?
.into_iter()
.filter(|node| {
node.properties.get("ref_id").map(String::as_str) == ref_id
&& node.label.starts_with("do #")
})
.collect::<Vec<_>>();
if active_jobs.is_empty() {
return Ok(Vec::new());
}
let repair = match (target.properties.get("path"), ref_id) {
(Some(path), Some(id)) => format!(
"repair with `agent-doc write --commit {} --done {}` or the next `agent-doc finalize --done {}` closeout",
shell_quote(path),
shell_quote(id),
shell_quote(id)
),
_ => {
"repair by marking the queue item done/reaping it in the agent-doc session".to_string()
}
};
Ok(vec![format!(
"queue-head drift: target {} has {} active queued do packet(s) but already has a completed worker_result; {repair}; do not redispatch or reactivate the completed item",
target.label,
active_jobs.len()
)])
}
fn graph_db_evidence_next_commands(
root: &Path,
scope: Option<&str>,
target: &SubstrateGraphNode,
worker_context: &[SubstrateGraphNode],
source_handles: &[SubstrateGraphNode],
worker_results: &[SubstrateGraphNode],
semantic_related: &[SubstrateGraphNode],
) -> Vec<String> {
let mut commands = BTreeSet::new();
if let Some(expand) = target.properties.get("expand") {
commands.insert(expand.clone());
}
for worker in worker_context {
if let Some(expand) = worker.properties.get("expand") {
commands.insert(expand.clone());
}
}
for source in source_handles {
if let Some(expand) = source.properties.get("expand") {
commands.insert(expand.clone());
}
}
for result in worker_results {
if let Some(expand) = result.properties.get("expand") {
commands.insert(expand.clone());
}
}
for semantic in semantic_related {
if let Some(expand) = semantic.properties.get("expand") {
commands.insert(expand.clone());
}
}
commands.insert(format!(
"tsift graph-db --path {}{} status --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope)
));
commands.insert(format!(
"tsift graph-db --path {}{} doctor --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope)
));
commands.into_iter().collect()
}
fn graph_db_repair_commands(root: &Path, scope: Option<&str>) -> Vec<String> {
vec![
format!(
"tsift graph-db --path {}{} refresh --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope)
),
format!(
"tsift graph-db --path {}{} doctor --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope)
),
]
}
fn graph_db_evidence_replay_commands(
root: &Path,
scope: Option<&str>,
target: &str,
depth: usize,
limit: usize,
) -> Vec<String> {
vec![
format!(
"tsift graph-db --path {}{} evidence {} --depth {} --limit {} --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope),
shell_quote(target),
depth,
limit
),
format!(
"tsift conflict-matrix --path {} {} --json",
shell_quote(root.to_string_lossy().as_ref()),
shell_quote(target)
),
]
}
fn graph_db_evidence_packet_id(
target: &str,
target_node: &SubstrateGraphNode,
freshness: &GraphDbFreshnessReport,
) -> String {
stable_handle(
"gevd",
&format!(
"{}:{}:{}:{}",
GRAPH_DB_EVIDENCE_CONTRACT_VERSION,
target,
target_node.id,
freshness.content_hash.as_deref().unwrap_or("no-hash")
),
)
}
pub(crate) fn graph_db_evidence_report_from_store<S: GraphStore>(
input: GraphDbEvidenceInput<'_, S>,
) -> Result<GraphDbEvidenceReport> {
let GraphDbEvidenceInput {
root,
scope,
backend,
target,
depth,
limit,
cursor,
store,
freshness,
mut warnings,
} = input;
let repair_commands = graph_db_repair_commands(root, scope);
if freshness.fail_closed {
bail!(
"graph database evidence failed closed for {} backend: {}; repair: {}",
backend,
freshness.diagnostics.join("; "),
repair_commands.join("; ")
);
}
let semantic_readiness = graph_db_semantic_readiness(
root,
scope,
graph_store_semantic_node_count(store).ok(),
);
if semantic_readiness.fail_closed {
warnings.push(format!(
"graph evidence semantic readiness blocked: {} — {}",
semantic_readiness.reason,
semantic_readiness.diagnostics.join("; ")
));
warnings.push(format!(
"repair: {}",
semantic_readiness.next_commands.join("; then ")
));
}
let target_node = graph_db_resolve_evidence_target(store, target)?
.with_context(|| format!("graph-db evidence target not found: {target}"))?;
let max_rows = if limit == 0 { usize::MAX } else { limit };
let mut reachable = store.reachable_nodes_by_kinds(
&target_node.id,
&[
"worker_context",
"source_handle",
"worker_result",
"semantic_concept",
"semantic_entity",
],
depth,
max_rows,
)?;
let worker_paths = reachable.remove("worker_context").unwrap_or_default();
let source_paths = reachable.remove("source_handle").unwrap_or_default();
let worker_result_paths = reachable.remove("worker_result").unwrap_or_default();
let mut semantic_paths = reachable.remove("semantic_concept").unwrap_or_default();
semantic_paths.extend(reachable.remove("semantic_entity").unwrap_or_default());
semantic_paths.sort_by(|(left_node, left_path), (right_node, right_path)| {
left_path
.hops
.cmp(&right_path.hops)
.then(left_node.kind.cmp(&right_node.kind))
.then(left_node.label.cmp(&right_node.label))
.then(left_node.id.cmp(&right_node.id))
});
if max_rows != usize::MAX && semantic_paths.len() > max_rows {
semantic_paths.truncate(max_rows);
}
let evidence_nodes = worker_paths
.iter()
.chain(source_paths.iter())
.chain(worker_result_paths.iter())
.chain(semantic_paths.iter())
.map(|(node, _)| node.clone())
.collect::<Vec<_>>();
let evidence_depth_by_id = worker_paths
.iter()
.chain(source_paths.iter())
.chain(worker_result_paths.iter())
.chain(semantic_paths.iter())
.map(|(node, path)| (node.id.clone(), path.hops))
.collect::<BTreeMap<_, _>>();
let target_query = graph_db_node_search_text(&target_node);
let semantic_scores = graph_db_semantic_scores_for_query(Some(&target_query), &evidence_nodes);
let budgeted = graph_db_apply_output_budget_with_depths_and_cursor(
std::slice::from_ref(&target_node.id),
&semantic_scores,
evidence_nodes,
Vec::new(),
Some(limit),
Some(&evidence_depth_by_id),
cursor,
);
let output_budget = budgeted.report;
let truncated = budgeted.truncated;
let next_cursor = budgeted.next_cursor;
let retained_evidence_ids = budgeted
.nodes
.iter()
.map(|node| node.id.as_str())
.collect::<BTreeSet<_>>();
let worker_context = worker_paths
.iter()
.filter(|(node, _)| retained_evidence_ids.contains(node.id.as_str()))
.map(|(node, _)| node.clone())
.collect::<Vec<_>>();
let source_handles = source_paths
.iter()
.filter(|(node, _)| retained_evidence_ids.contains(node.id.as_str()))
.map(|(node, _)| node.clone())
.collect::<Vec<_>>();
let worker_results = worker_result_paths
.iter()
.filter(|(node, _)| retained_evidence_ids.contains(node.id.as_str()))
.map(|(node, _)| node.clone())
.collect::<Vec<_>>();
let semantic_related = semantic_paths
.iter()
.filter(|(node, _)| retained_evidence_ids.contains(node.id.as_str()))
.map(|(node, _)| node.clone())
.collect::<Vec<_>>();
warnings.extend(graph_db_evidence_completed_queue_drift_warnings(
store,
&target_node,
&worker_results,
)?);
if worker_context.is_empty()
&& source_handles.is_empty()
&& worker_results.is_empty()
&& semantic_related.is_empty()
{
warnings.push(format!(
"graph-db evidence target {} resolved to a {} node but has no projection-linked context rows; add source/file tokens to the backlog text or rerun graph-db refresh after the session document is indexed",
target, target_node.kind
));
}
let shortest_paths = worker_paths
.iter()
.chain(source_paths.iter())
.chain(worker_result_paths.iter())
.chain(semantic_paths.iter())
.filter(|(node, _)| retained_evidence_ids.contains(node.id.as_str()))
.map(|(node, path)| GraphDbEvidencePath {
to: node.id.clone(),
kind: node.kind.clone(),
label: node.label.clone(),
path: Some(path.clone()),
expand: node.properties.get("expand").cloned(),
})
.collect::<Vec<_>>();
let next_commands = graph_db_evidence_next_commands(
root,
scope,
&target_node,
&worker_context,
&source_handles,
&worker_results,
&semantic_related,
);
let replay_commands = graph_db_evidence_replay_commands(root, scope, target, depth, limit);
let packet_id = graph_db_evidence_packet_id(target, &target_node, &freshness);
let projection_hash = freshness.content_hash.clone();
Ok(GraphDbEvidenceReport {
root: root.to_string_lossy().to_string(),
scope: scope.map(str::to_string),
backend: backend.to_string(),
contract_version: GRAPH_DB_EVIDENCE_CONTRACT_VERSION.to_string(),
target: target.to_string(),
packet_id,
projection_hash,
freshness,
target_node: target_node.into(),
worker_context: worker_context.into_iter().map(Into::into).collect(),
source_handles: source_handles.into_iter().map(Into::into).collect(),
worker_results: worker_results.into_iter().map(Into::into).collect(),
semantic_related: semantic_related.into_iter().map(Into::into).collect(),
shortest_paths,
output_budget: Some(output_budget),
truncated,
next_cursor,
next_commands,
replay_commands,
repair_commands,
fixture_coverage: GraphDbFixtureCoverage {
test: "graph_db_evidence_packet_covers_backlog_job_worker_context_and_source_handles"
.to_string(),
fixture: "tests/graph_db_conformance.rs::graph_db_project".to_string(),
assertions: vec![
"backlog id and job packet handle resolve to graph nodes".to_string(),
"worker_context rows are reachable from queued work".to_string(),
"source_handle rows are reachable through bounded shortest paths".to_string(),
"worker_result rows are reachable from completed or blocked work".to_string(),
],
},
warnings,
})
}
fn print_graph_db_evidence_human(report: &GraphDbEvidenceReport) {
println!(
"graph-db evidence backend: {} target: {} [{}] packet:{}",
report.backend, report.target_node.id, report.target_node.kind, report.packet_id
);
let page_info = if report.truncated {
let cursor = report.next_cursor.as_deref().unwrap_or("?");
format!(" (truncated, next_cursor: {cursor})")
} else {
String::new()
};
println!(
"evidence: {} worker_context row(s), {} source_handle row(s), {} worker_result row(s), {} semantic row(s), {} path(s){page_info}",
report.worker_context.len(),
report.source_handles.len(),
report.worker_results.len(),
report.semantic_related.len(),
report.shortest_paths.len()
);
for path in &report.shortest_paths {
if let Some(graph_path) = &path.path {
println!(
"path: {} hop(s) {}",
graph_path.hops,
graph_path.nodes.join(" -> ")
);
}
}
for command in &report.next_commands {
println!("next: {command}");
}
for warning in &report.warnings {
println!("warning: {warning}");
}
}
pub(crate) fn print_graph_db_evidence_report(
report: &GraphDbEvidenceReport,
format: OutputFormat,
) -> Result<()> {
if format.json_output {
let page_info = if report.truncated {
let cursor = report.next_cursor.as_deref().unwrap_or("?");
format!(" (truncated, next_cursor: {cursor})")
} else {
String::new()
};
print_json_or_envelope(
report,
&format,
"graph-db",
"evidence",
ToolEnvelopeSummary {
text: format!(
"Graph DB evidence for {} returned {} worker context row(s), {} source handle(s), {} worker result row(s), {} semantic row(s), and {} shortest path(s){page_info}",
report.target,
report.worker_context.len(),
report.source_handles.len(),
report.worker_results.len(),
report.semantic_related.len(),
report.shortest_paths.len()
),
metrics: vec![
envelope_metric("backend", &report.backend),
envelope_metric("worker_context", report.worker_context.len()),
envelope_metric("source_handles", report.source_handles.len()),
envelope_metric("worker_results", report.worker_results.len()),
envelope_metric("semantic_related", report.semantic_related.len()),
envelope_metric("paths", report.shortest_paths.len()),
],
},
report.truncated,
report.next_commands.clone(),
)
} else {
print_graph_db_evidence_human(report);
Ok(())
}
}
pub(crate) fn graph_db_report_from_store(
root: &Path,
scope: Option<&str>,
backend: &str,
query: GraphDbQuery,
store: &impl GraphStore,
freshness: GraphDbFreshnessReport,
warnings: Vec<String>,
) -> Result<GraphDbReport> {
if freshness.fail_closed {
bail!(
"graph database read failed closed for {} backend: {}",
backend,
freshness.diagnostics.join("; ")
);
}
let mut report = GraphDbReport {
root: root.to_string_lossy().to_string(),
scope: scope.map(str::to_string),
backend: backend.to_string(),
query: format!("{query:?}"),
freshness,
readiness: None,
schema: None,
node: None,
edge: None,
nodes: Vec::new(),
edges: Vec::new(),
ranked_neighbors: Vec::new(),
semantic_related: Vec::new(),
neighborhood_ranking_gate: None,
ranked_neighborhood_comparison: None,
knowledge_retrieval: None,
output_budget: None,
path: None,
page: None,
warnings,
};
match query {
GraphDbQuery::Refresh => {
bail!("graph-db refresh must be handled by the refresh command path");
}
GraphDbQuery::Status => {
bail!("graph-db status must be handled by the status command path");
}
GraphDbQuery::Doctor => {
bail!("graph-db doctor must be handled by the doctor command path");
}
GraphDbQuery::Drift => {
bail!("graph-db drift must be handled by the drift command path");
}
GraphDbQuery::Compact { .. } => {
bail!("graph-db compact must be handled by the compact command path");
}
GraphDbQuery::BackendEval { .. } => {
bail!("graph-db backend-eval must be handled by the benchmark command path");
}
GraphDbQuery::Evidence { .. } => {
bail!("graph-db evidence must be handled by the evidence command path");
}
GraphDbQuery::Related {
query,
kind,
depth,
seed_limit,
limit,
} => {
let semantic =
semantic_related_report_from_store(root, scope, &query, seed_limit, kind, store)?;
let SemanticRelatedReport {
items,
warnings: semantic_warnings,
..
} = semantic;
let readiness = graph_db_semantic_readiness(
root,
scope,
(!items.is_empty()).then_some(items.len()),
);
report.warnings.extend(semantic_warnings);
let seed_ids = items
.iter()
.map(|item| item.handle.clone())
.collect::<Vec<_>>();
let semantic_scores = items
.iter()
.map(|item| (item.handle.clone(), item.score))
.collect::<BTreeMap<_, _>>();
let subgraph = graph_db_semantic_seeded_neighborhood(store, &seed_ids, depth, limit)?;
let seed_count = seed_ids.len();
let mut diagnostics = subgraph.diagnostics;
let budgeted = graph_db_apply_output_budget(
&seed_ids,
&semantic_scores,
subgraph.nodes,
subgraph.edges,
Some(limit),
);
let budget_report = budgeted.report;
let dropped_by_budget = !budget_report.dropped_by_budget.is_empty();
diagnostics.extend(budget_report.diagnostics.clone());
diagnostics.extend(readiness.diagnostics.clone());
report.readiness = Some(readiness);
report.semantic_related = items;
if let Some(seed_id) = seed_ids.first() {
let ranked_neighbor_cap = graph_db_ranked_neighbor_cap(Some(limit));
report.ranked_neighbors = graph_db_ranked_neighbors(
seed_id,
&budgeted.nodes,
&budgeted.edges,
ranked_neighbor_cap,
);
report.neighborhood_ranking_gate =
Some(graph_db_neighborhood_ranking_gate(ranked_neighbor_cap));
}
report.nodes = budgeted.nodes.into_iter().map(Into::into).collect();
report.edges = budgeted.edges.into_iter().map(Into::into).collect();
report.knowledge_retrieval = Some(GraphDbKnowledgeRetrieval {
mode: "semantic_seeded_neighborhood".to_string(),
query,
seed_kind: semantic_related_kind_name(kind).to_string(),
seed_limit,
seed_count,
depth,
limit,
node_count: report.nodes.len(),
edge_count: report.edges.len(),
truncated: subgraph.truncated || dropped_by_budget,
traversal: "incident_plus_outgoing_edges".to_string(),
freshness_boundary:
"semantic rows must come from refreshed summary or tsift-memory graph records"
.to_string(),
privacy_boundary:
"GraphStore stores substrate records only; user consent, deletion policy, persona policy, and LiveKit session state stay in the avatar/agent adapter"
.to_string(),
diagnostics,
});
report.output_budget = Some(budget_report);
}
GraphDbQuery::Schema => {
report.schema = Some(graph_db_schema());
}
GraphDbQuery::Node { id } => {
report.node = store.node(&id)?.map(Into::into);
}
GraphDbQuery::Edge { id } => {
report.edge = store.edge(&id)?.map(Into::into);
}
GraphDbQuery::Edges {
edge_kind,
cursor,
limit,
property_filters,
} => {
let options = graph_db_query_options(cursor, limit, &property_filters)?;
let paged = store.paged_edges(
edge_kind.as_deref(),
graph_db_query_options_for_store(&options),
)?;
report.edges = paged.edges.into_iter().map(Into::into).collect();
report.page = Some(graph_db_page_report_from_store(
paged.page,
options.property_filters,
));
}
GraphDbQuery::Incident {
id,
edge_kind,
cursor,
limit,
property_filters,
} => {
let options = graph_db_query_options(cursor, limit, &property_filters)?;
let paged = store.paged_incident_edges(
&id,
edge_kind.as_deref(),
graph_db_query_options_for_store(&options),
)?;
report.edges = paged.edges.into_iter().map(Into::into).collect();
report.page = Some(graph_db_page_report_from_store(
paged.page,
options.property_filters,
));
}
GraphDbQuery::Kind {
kind,
cursor,
limit,
property_filters,
} => {
let options = graph_db_query_options(cursor, limit, &property_filters)?;
let paged =
store.paged_nodes_by_kind(&kind, graph_db_query_options_for_store(&options))?;
report.nodes = paged.nodes.into_iter().map(Into::into).collect();
report.edges = paged.edges.into_iter().map(Into::into).collect();
report.page = Some(graph_db_page_report_from_store(
paged.page,
options.property_filters,
));
}
GraphDbQuery::Neighborhood {
id,
depth,
edge_kind,
cursor,
limit,
property_filters,
} => {
let options = graph_db_query_options(cursor, limit, &property_filters)?;
if let Some(paged) = store.paged_neighborhood(
&id,
depth,
edge_kind.as_deref(),
graph_db_query_options_for_store(&options),
)? {
let budgeted = graph_db_apply_output_budget(
std::slice::from_ref(&id),
&BTreeMap::new(),
paged.nodes,
paged.edges,
options.limit,
);
let budget_report = budgeted.report;
let ranked_neighbor_cap = graph_db_ranked_neighbor_cap(options.limit);
let ranked_neighbors = graph_db_ranked_neighbors(
&id,
&budgeted.nodes,
&budgeted.edges,
ranked_neighbor_cap,
);
let comparison = graph_db_ranked_neighborhood_comparison(
&id,
depth,
edge_kind.as_deref(),
options.limit,
&budgeted.nodes,
&budgeted.edges,
store,
)?;
report.nodes = budgeted.nodes.into_iter().map(Into::into).collect();
report.edges = budgeted.edges.into_iter().map(Into::into).collect();
report.ranked_neighbors = ranked_neighbors;
report.neighborhood_ranking_gate =
Some(graph_db_neighborhood_ranking_gate(ranked_neighbor_cap));
let mut page =
graph_db_page_report_from_store(paged.page, options.property_filters);
page.returned_nodes = report.nodes.len();
page.returned_edges = report.edges.len();
page.truncated |= !budget_report.dropped_by_budget.is_empty();
page.diagnostics.extend(budget_report.diagnostics.clone());
report.page = Some(page);
report.output_budget = Some(budget_report);
if let Some(comparison) = comparison {
report.ranked_neighborhood_comparison = Some(comparison);
}
}
}
GraphDbQuery::Path {
from,
to,
edge_kind,
max_hops,
} => {
report.path =
store.shortest_path_with_max_hops(&from, &to, edge_kind.as_deref(), max_hops)?;
if let Some(max_hops) = max_hops
&& report.path.is_none()
{
report.warnings.push(format!(
"no directed path found within --max-hops {}",
max_hops
));
}
}
GraphDbQuery::Map { .. } => {
bail!("graph-db map must be handled by the map command path");
}
}
Ok(report)
}
pub(crate) fn print_graph_db_human(report: &GraphDbReport, compact: bool) {
if compact {
println!(
"graph-db backend:{} query:{} nodes:{} edges:{} freshness:{}",
report.backend,
report.query,
report.nodes.len() + usize::from(report.node.is_some()),
report.edges.len() + usize::from(report.edge.is_some()),
report.freshness.status
);
return;
}
println!("graph-db backend: {}", report.backend);
println!("freshness: {}", report.freshness.status);
if let Some(readiness) = &report.readiness {
println!(
"readiness: {} reason: {} fail_closed: {}",
readiness.status, readiness.reason, readiness.fail_closed
);
for diagnostic in &readiness.diagnostics {
println!("readiness diagnostic: {diagnostic}");
}
for command in &readiness.next_commands {
println!("readiness next: {command}");
}
}
if let Some(schema) = &report.schema {
println!(
"schema: {} node fields, {} edge fields, {} operations",
schema.node_fields.len(),
schema.edge_fields.len(),
schema.operations.len()
);
}
if let Some(node) = &report.node {
println!("node: {} [{}] {}", node.id, node.kind, node.label);
}
if let Some(edge) = &report.edge {
let edge_full: SubstrateGraphEdge = edge.into();
println!(
"edge: {} {} -{}-> {}",
graph_db_edge_key(&edge_full),
edge.from_id,
edge.kind,
edge.to_id
);
}
if let Some(knowledge) = &report.knowledge_retrieval {
println!(
"knowledge_retrieval: {} seeds:{} depth:{} traversal:{}",
knowledge.mode, knowledge.seed_count, knowledge.depth, knowledge.traversal
);
}
for item in &report.semantic_related {
println!(
"semantic_seed: {:.3} [{}] {} ({})",
item.score, item.kind, item.label, item.handle
);
}
for node in &report.nodes {
println!("node: {} [{}] {}", node.id, node.kind, node.label);
}
for edge in &report.edges {
let edge_full: SubstrateGraphEdge = edge.into();
println!(
"edge: {} {} -{}-> {}",
graph_db_edge_key(&edge_full),
edge.from_id,
edge.kind,
edge.to_id
);
}
for neighbor in &report.ranked_neighbors {
println!(
"ranked_neighbor: #{} score:{} depth:{} {} [{}] {}",
neighbor.rank,
neighbor.score,
neighbor
.depth
.map(|depth| depth.to_string())
.unwrap_or_else(|| "unknown".to_string()),
neighbor.node_id,
neighbor.kind,
neighbor.label
);
}
if let Some(gate) = &report.neighborhood_ranking_gate {
println!(
"neighborhood_ranking_gate: {} default_order:{} ranked_output_default:{}",
gate.status, gate.default_order, gate.ranked_output_default
);
}
if let Some(path) = &report.path {
println!("path: {} hop(s) {}", path.hops, path.nodes.join(" -> "));
}
if let Some(page) = &report.page {
if let Some(next_cursor) = &page.next_cursor {
println!("next_cursor: {next_cursor}");
}
for diagnostic in &page.diagnostics {
println!("page: {diagnostic}");
}
}
for warning in &report.warnings {
println!("warning: {warning}");
}
}
pub(crate) fn graph_db_backend_eval_phase_timing(
name: &str,
duration_micros: u128,
detail: &str,
) -> GraphDbBackendEvalPhaseTiming {
GraphDbBackendEvalPhaseTiming {
name: name.to_string(),
duration_micros,
detail: detail.to_string(),
}
}
pub(crate) fn graph_db_backend_eval_timed_phase<T>(
phases: &mut Vec<GraphDbBackendEvalPhaseTiming>,
name: &str,
detail: &str,
run: impl FnOnce() -> Result<T>,
) -> Result<T> {
let started = Instant::now();
let result = run();
phases.push(graph_db_backend_eval_phase_timing(
name,
started.elapsed().as_micros(),
detail,
));
result
}
pub(crate) fn graph_db_backend_eval_refresh_total_micros(
phases: &[GraphDbBackendEvalPhaseTiming],
) -> u128 {
phases
.iter()
.filter(|phase| phase.name != "conflict_matrix_preparation")
.map(|phase| phase.duration_micros)
.sum()
}
pub(crate) fn graph_db_backend_eval_cached_refresh(
root: &Path,
scope: Option<&str>,
source_watermark: Option<&str>,
) -> Result<
Option<(
TraversalGraphBuild,
SqliteProjectionRefresh,
Vec<GraphDbBackendEvalPhaseTiming>,
)>,
> {
let Some(source_watermark) = source_watermark else {
return Ok(None);
};
let graph_db = graph_substrate_db_path(root, scope);
if !graph_db.exists() {
return Ok(None);
}
let started = Instant::now();
let store = match SqliteGraphStore::open_read_only_resilient(&graph_db) {
Ok(store) => store,
Err(_) => return Ok(None),
};
if store.has_user_triggers().unwrap_or(true) {
return Ok(None);
}
let freshness = sqlite_graph_freshness(&store, scope.unwrap_or("root"))?;
if freshness.fail_closed || freshness.source_watermark.as_deref() != Some(source_watermark) {
return Ok(None);
}
let phases = vec![
graph_db_backend_eval_phase_timing(
"source_graph_build",
started.elapsed().as_micros(),
"reused current graph.db projection because the source watermark matched; skipped code-index loading, session markdown scanning, source-handle construction, and semantic summary reads",
),
graph_db_backend_eval_phase_timing(
"projection_rows",
0,
"reused cached provider-neutral projection rows from graph.db",
),
graph_db_backend_eval_phase_timing(
"sqlite_open",
0,
"reused existing graph.db projection without opening a write transaction",
),
];
let refresh = SqliteProjectionRefresh {
scope: scope.unwrap_or("root").to_string(),
projection_version: freshness
.projection_version
.unwrap_or_else(|| GRAPH_PROJECTION_VERSION.to_string()),
source_watermark: Some(source_watermark.to_string()),
tombstoned_nodes: Vec::new(),
tombstoned_edges: Vec::new(),
upserted_nodes: 0,
upserted_edges: 0,
unchanged_nodes: 0,
unchanged_edges: 0,
upserted_properties: 0,
unchanged_properties: 0,
deleted_properties: 0,
deleted_nodes: 0,
deleted_edges: 0,
pruned_tombstones: 0,
file_size_bytes_before: None,
file_size_bytes_after: None,
phase_timings: Vec::new(),
};
Ok(Some((TraversalGraphBuild::default(), refresh, phases)))
}
pub(crate) fn graph_db_backend_eval_reused_cached_projection(
phases: &[GraphDbBackendEvalPhaseTiming],
) -> bool {
phases.iter().any(|phase| {
phase.name == "source_graph_build"
&& phase.detail.contains("reused current graph.db projection")
})
}
pub(crate) fn graph_db_backend_eval_update_source_watermark(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
) -> Result<()> {
let Some(source_watermark) = traversal_source_watermark(root, path_hint, scope, false)? else {
return Ok(());
};
let graph_db = graph_substrate_db_path(root, scope);
let mut store = SqliteGraphStore::open(&graph_db)?;
store.update_projection_source_watermark(scope.unwrap_or("root"), Some(source_watermark))?;
Ok(())
}
pub(crate) fn graph_db_backend_eval_refresh_with_profile(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
) -> Result<(
TraversalGraphBuild,
SqliteProjectionRefresh,
Vec<GraphDbBackendEvalPhaseTiming>,
)> {
let source_watermark = traversal_source_watermark(root, path_hint, scope, false)?;
if let Some(cached) =
graph_db_backend_eval_cached_refresh(root, scope, source_watermark.as_deref())?
{
return Ok(cached);
}
let mut phases = Vec::new();
let source_graph_detail = if hinted_markdown_file(root, path_hint).is_some() {
"bounded session projection: index/source loading plus agent-doc session markdown scan, source-handle construction, and semantic summary reads; skips global call-edge materialization because full-projection is the complete-call-graph regression guard"
} else {
"index/source loading plus agent-doc session markdown scan, source-handle construction, and semantic summary reads when summaries are cached"
};
let source_graph = graph_db_backend_eval_timed_phase(
&mut phases,
"source_graph_build",
source_graph_detail,
|| build_traversal_graph_source_with_options(root, path_hint, scope, false),
)?;
let projection = graph_db_backend_eval_timed_phase(
&mut phases,
"projection_rows",
"provider-neutral GraphStore node/edge row construction before SQLite persistence",
|| traversal_projection_from_graph(root, scope, &source_graph),
)?;
let graph_db = graph_substrate_db_path(root, scope);
let mut store = graph_db_backend_eval_timed_phase(
&mut phases,
"sqlite_open",
"open the local SQLite graph.db with WAL and busy-timeout settings",
|| SqliteGraphStore::open(&graph_db),
)?;
let refreshed_source_watermark = traversal_source_watermark(root, path_hint, scope, false)
.ok()
.flatten();
let refresh = store.replace_projection_with_version(
scope.unwrap_or("root"),
&projection,
Some(GRAPH_PROJECTION_VERSION),
refreshed_source_watermark
.or(source_watermark)
.or_else(|| graph_projection_content_hash(&projection)),
)?;
phases.extend(
refresh
.phase_timings
.iter()
.map(|phase| GraphDbBackendEvalPhaseTiming {
name: phase.name.clone(),
duration_micros: phase.duration_micros,
detail: phase.detail.clone(),
}),
);
Ok((source_graph, refresh, phases))
}
fn graph_db_backend_eval_disk_cache_dir(root: &Path) -> PathBuf {
root.join(".tsift/backend-eval-cache")
}
fn graph_db_backend_eval_disk_cache_path(root: &Path, kind: &str, key: &str) -> PathBuf {
graph_db_backend_eval_disk_cache_dir(root)
.join(kind)
.join(format!("{key}.json.gz"))
}
fn graph_db_backend_eval_legacy_disk_cache_path(root: &Path, kind: &str, key: &str) -> PathBuf {
graph_db_backend_eval_disk_cache_dir(root)
.join(kind)
.join(format!("{key}.json"))
}
#[derive(Default, Clone)]
struct GraphDbBackendEvalDiskCacheReadProfile {
file_read_micros: u128,
gzip_decode_micros: u128,
serde_decode_micros: u128,
legacy: bool,
}
fn graph_db_backend_eval_read_disk_cache<T: for<'de> Deserialize<'de>>(
root: &Path,
kind: &str,
key: &str,
) -> Option<(T, u64, u64, GraphDbBackendEvalDiskCacheReadProfile)> {
let mut profile = GraphDbBackendEvalDiskCacheReadProfile::default();
let path = graph_db_backend_eval_disk_cache_path(root, kind, key);
let read_started = Instant::now();
let read_result = fs::read(&path);
profile.file_read_micros = read_started.elapsed().as_micros();
if let Ok(bytes) = read_result {
let decode_started = Instant::now();
let mut decoder = GzDecoder::new(bytes.as_slice());
let mut decoded = Vec::new();
let decode_ok = decoder.read_to_end(&mut decoded).is_ok();
profile.gzip_decode_micros = decode_started.elapsed().as_micros();
if decode_ok {
let serde_started = Instant::now();
let parsed: Option<T> = serde_json::from_slice(&decoded).ok();
profile.serde_decode_micros = serde_started.elapsed().as_micros();
if let Some(value) = parsed {
return Some((value, bytes.len() as u64, decoded.len() as u64, profile));
}
}
}
let legacy_path = graph_db_backend_eval_legacy_disk_cache_path(root, kind, key);
let legacy_started = Instant::now();
let bytes = fs::read(legacy_path).ok()?;
profile.file_read_micros = profile
.file_read_micros
.saturating_add(legacy_started.elapsed().as_micros());
let serde_started = Instant::now();
let value = serde_json::from_slice(&bytes).ok()?;
profile.serde_decode_micros = profile
.serde_decode_micros
.saturating_add(serde_started.elapsed().as_micros());
profile.legacy = true;
Some((value, bytes.len() as u64, bytes.len() as u64, profile))
}
#[derive(Default, Clone)]
struct GraphDbBackendEvalDiskCacheWriteProfile {
serde_encode_micros: u128,
gzip_encode_micros: u128,
file_write_micros: u128,
}
fn graph_db_backend_eval_write_disk_cache<T: Serialize>(
root: &Path,
kind: &str,
key: &str,
value: &T,
) -> Option<(u64, u64, GraphDbBackendEvalDiskCacheWriteProfile)> {
let mut profile = GraphDbBackendEvalDiskCacheWriteProfile::default();
let path = graph_db_backend_eval_disk_cache_path(root, kind, key);
let parent = path.parent()?;
if fs::create_dir_all(parent).is_err() {
return None;
}
let serde_started = Instant::now();
let bytes = serde_json::to_vec(value).ok()?;
profile.serde_encode_micros = serde_started.elapsed().as_micros();
let gzip_started = Instant::now();
let mut encoder = GzEncoder::new(Vec::new(), Compression::fast());
if encoder.write_all(&bytes).is_err() {
return None;
}
let encoded = encoder.finish().ok()?;
profile.gzip_encode_micros = gzip_started.elapsed().as_micros();
let write_started = Instant::now();
if fs::write(&path, &encoded).is_err() {
return None;
}
profile.file_write_micros = write_started.elapsed().as_micros();
Some((encoded.len() as u64, bytes.len() as u64, profile))
}
fn graph_db_backend_eval_prune_disk_cache(root: &Path, kind: &str, keep_key: &str) -> (usize, u64) {
let dir = graph_db_backend_eval_disk_cache_dir(root).join(kind);
let Ok(entries) = fs::read_dir(dir) else {
return (0, 0);
};
let keep_name = format!("{keep_key}.json.gz");
let mut pruned_files = 0usize;
let mut pruned_bytes = 0u64;
for entry in entries.flatten() {
let path = entry.path();
if !path.is_file() {
continue;
}
let Some(name) = path.file_name().and_then(|name| name.to_str()) else {
continue;
};
if name == keep_name {
continue;
}
let is_backend_eval_cache = name.ends_with(".json") || name.ends_with(".json.gz");
if !is_backend_eval_cache {
continue;
}
let bytes = entry.metadata().map(|metadata| metadata.len()).unwrap_or(0);
if fs::remove_file(&path).is_ok() {
pruned_files += 1;
pruned_bytes += bytes;
}
}
(pruned_files, pruned_bytes)
}
fn graph_db_backend_eval_full_projection_raw_watermark_rows(
root: &Path,
source_root: &Path,
) -> Result<Vec<GraphDbBackendEvalRawSourceWatermarkRow>> {
let mut rows = Vec::new();
let mut entries = walk::walk_files(source_root)?;
entries.sort_by(|left, right| left.path.cmp(&right.path));
for entry in entries {
if traversal_path_is_generated_artifact(root, source_root, &entry.path) {
continue;
}
if traversal_path_is_session_markdown(root, source_root, &entry.path) {
continue;
}
let bytes = fs::read(&entry.path)
.with_context(|| format!("reading source input {}", entry.path.display()))?;
rows.push(GraphDbBackendEvalRawSourceWatermarkRow {
path: traversal_watermark_path(root, &entry.path),
bytes: bytes.len() as u64,
content_hash: content_hash(&bytes)?,
});
}
Ok(rows)
}
fn graph_db_backend_eval_full_projection_source_watermark(
root: &Path,
scope: Option<&str>,
) -> Result<GraphDbBackendEvalFullProjectionSourceWatermark> {
let path_hint = root;
let mut detail_parts = Vec::new();
let mut parts = vec![
format!("projection_version:{GRAPH_PROJECTION_VERSION}"),
format!("cache_version:{GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION}"),
"watermark_kind:stable_full_projection_inputs".to_string(),
format!("scope:{}", scope.unwrap_or("root")),
format!("path_hint:{}", traversal_watermark_path(root, path_hint)),
];
let gate = prepare_agent_doc_index_gate(root, path_hint, scope, "full-projection cache key");
match gate.db_path.as_ref().filter(|db_path| db_path.exists()) {
Some(db_path) => {
let db = index::IndexDb::open_read_only_resilient(db_path)?;
parts.push("index_mode:indexed".to_string());
detail_parts.push("mode=indexed".to_string());
parts.push(format!(
"index_source_root:{}",
traversal_watermark_path(root, &gate.source_root)
));
let symbols = db
.all_symbols()?
.into_iter()
.filter(|symbol| {
!traversal_path_is_generated_artifact(
root,
&gate.source_root,
Path::new(&symbol.file),
) && !traversal_path_is_session_markdown(
root,
&gate.source_root,
Path::new(&symbol.file),
)
})
.collect::<Vec<_>>();
let symbols_hash = content_hash(&symbols)?;
detail_parts.push(format!("symbols={symbols_hash}"));
parts.push(format!("index_symbols:{symbols_hash}"));
let edges = db
.all_stored_edges()?
.into_iter()
.filter(|edge| {
!traversal_path_is_generated_artifact(
root,
&gate.source_root,
Path::new(&edge.caller_file),
) && !traversal_path_is_session_markdown(
root,
&gate.source_root,
Path::new(&edge.caller_file),
)
})
.collect::<Vec<_>>();
let edges_hash = content_hash(&edges)?;
detail_parts.push(format!("call_edges={edges_hash}"));
parts.push(format!("index_call_edges:{edges_hash}"));
let routes = db
.all_routes()?
.into_iter()
.filter(|route| {
!traversal_path_is_generated_artifact(
root,
&gate.source_root,
Path::new(&route.file),
) && !traversal_path_is_session_markdown(
root,
&gate.source_root,
Path::new(&route.file),
)
})
.collect::<Vec<_>>();
let routes_hash = content_hash(&routes)?;
detail_parts.push(format!("routes={routes_hash}"));
parts.push(format!("index_routes:{routes_hash}"));
}
None => {
parts.push("index_mode:raw_fallback".to_string());
detail_parts.push("mode=raw_fallback".to_string());
parts.push(format!(
"raw_source_root:{}",
traversal_watermark_path(root, &gate.source_root)
));
let raw_rows =
graph_db_backend_eval_full_projection_raw_watermark_rows(root, &gate.source_root)?;
let raw_hash = content_hash(&raw_rows)?;
detail_parts.push(format!("raw_source_files={raw_hash}"));
parts.push(format!("raw_source_files:{raw_hash}"));
}
}
parts.push("agent_doc_session_markdown:bounded_real_dataset_only".to_string());
detail_parts.push("session_markdown=bounded_real_dataset_only".to_string());
let summaries_start = parts.len();
push_traversal_summaries_watermark_part(root, &mut parts)?;
let summaries_hash = content_hash(&parts[summaries_start..].to_vec())?;
detail_parts.push(format!("summaries={summaries_hash}"));
let value = content_hash(&parts)?;
detail_parts.push(format!("watermark={value}"));
Ok(GraphDbBackendEvalFullProjectionSourceWatermark {
value,
detail: detail_parts.join(" "),
})
}
fn graph_db_backend_eval_full_projection_cache_key(
root: &Path,
scope: Option<&str>,
) -> Result<(String, String, String)> {
let source_watermark = graph_db_backend_eval_full_projection_source_watermark(root, scope)?;
let key = graph_db_backend_eval_full_projection_cache_key_for_watermark(
root,
scope,
&source_watermark.value,
)?;
Ok((source_watermark.value, key, source_watermark.detail))
}
fn graph_db_backend_eval_full_projection_cache_key_for_watermark(
root: &Path,
scope: Option<&str>,
source_watermark: &str,
) -> Result<String> {
content_hash(&serde_json::json!({
"version": GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION,
"root": root.display().to_string(),
"scope": scope.unwrap_or("root"),
"source_watermark": source_watermark,
}))
}
pub(crate) fn graph_db_backend_eval_full_projection_with_profile(
root: &Path,
scope: Option<&str>,
) -> Result<(
GraphProjection,
Vec<String>,
Vec<GraphDbBackendEvalPhaseTiming>,
GraphDbBackendEvalFullProjectionCacheStats,
)> {
let (source_watermark, key, source_watermark_detail) =
graph_db_backend_eval_full_projection_cache_key(root, scope)?;
let lookup_started = Instant::now();
if let Some((cached, disk_bytes, json_bytes, read_profile)) =
graph_db_backend_eval_read_disk_cache::<GraphDbBackendEvalFullProjectionCache>(
root,
"full_projection",
&key,
)
&& cached.version == GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION
&& cached.key == key
&& cached.source_watermark == source_watermark
{
let lookup_overhead_micros = lookup_started
.elapsed()
.as_micros()
.saturating_sub(read_profile.file_read_micros)
.saturating_sub(read_profile.gzip_decode_micros)
.saturating_sub(read_profile.serde_decode_micros);
let prune_started = Instant::now();
let (pruned_files, pruned_bytes) =
graph_db_backend_eval_prune_disk_cache(root, "full_projection", &key);
let prune_micros = prune_started.elapsed().as_micros();
let cache_stats = GraphDbBackendEvalFullProjectionCacheStats {
hit: true,
disk_bytes,
json_bytes,
pruned_files,
pruned_bytes,
};
let read_detail_suffix = if read_profile.legacy {
" (legacy uncompressed cache path)"
} else {
""
};
return Ok((
cached.projection,
cached.warnings,
vec![
graph_db_backend_eval_phase_timing(
"full_projection.cache_lookup",
lookup_overhead_micros,
&format!(
"watermark/version check overhead around the cache load phases; {source_watermark_detail}"
),
),
graph_db_backend_eval_phase_timing(
"full_projection.cache.file_read",
read_profile.file_read_micros,
&format!(
"read compressed cache bytes from .tsift/backend-eval-cache{read_detail_suffix}"
),
),
graph_db_backend_eval_phase_timing(
"full_projection.cache.gzip_decode",
read_profile.gzip_decode_micros,
"gunzip the compressed projection cache bytes",
),
graph_db_backend_eval_phase_timing(
"full_projection.cache.serde_decode",
read_profile.serde_decode_micros,
"serde_json deserialize the decoded projection cache payload",
),
graph_db_backend_eval_phase_timing(
"full_projection.cache.prune",
prune_micros,
"prune sibling cache files older than the current key",
),
graph_db_backend_eval_phase_timing(
"full_projection.source_graph_build",
0,
"reused cached full-project source graph; skipped code-index loading, session markdown scanning, source-handle construction, and semantic summary reads",
),
graph_db_backend_eval_phase_timing(
"full_projection.projection_rows",
0,
"reused cached provider-neutral full-project projection rows",
),
],
cache_stats,
));
}
let mut cache_stats = GraphDbBackendEvalFullProjectionCacheStats::default();
let mut phases = vec![graph_db_backend_eval_phase_timing(
"full_projection.cache_lookup",
lookup_started.elapsed().as_micros(),
&format!(
"no full-project projection cache entry matched the source watermark; {source_watermark_detail}"
),
)];
let full_source = graph_db_backend_eval_timed_phase(
&mut phases,
"full_projection.source_graph_build",
"opt-in full-project source graph build; uses the project root as the path hint so bounded session projections cannot hide full-graph regressions",
|| build_traversal_graph_source_with_options(root, root, scope, false),
)?;
let projection = graph_db_backend_eval_timed_phase(
&mut phases,
"full_projection.projection_rows",
"provider-neutral row construction for the opt-in full-project projection dataset",
|| traversal_projection_from_graph(root, scope, &full_source),
)?;
let warnings = full_source.warnings;
let refreshed_source_watermark =
graph_db_backend_eval_full_projection_source_watermark(root, scope)
.map(|watermark| watermark.value)
.unwrap_or_else(|_| source_watermark.clone());
let write_key = graph_db_backend_eval_full_projection_cache_key_for_watermark(
root,
scope,
&refreshed_source_watermark,
)?;
let cache = GraphDbBackendEvalFullProjectionCache {
version: GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION.to_string(),
key: write_key.clone(),
source_watermark: refreshed_source_watermark,
projection: projection.clone(),
warnings: warnings.clone(),
};
if let Some((disk_bytes, json_bytes, write_profile)) =
graph_db_backend_eval_write_disk_cache(root, "full_projection", &write_key, &cache)
{
cache_stats.disk_bytes = disk_bytes;
cache_stats.json_bytes = json_bytes;
phases.push(graph_db_backend_eval_phase_timing(
"full_projection.cache.serde_encode",
write_profile.serde_encode_micros,
"serde_json serialize the projection cache payload before compression",
));
phases.push(graph_db_backend_eval_phase_timing(
"full_projection.cache.gzip_encode",
write_profile.gzip_encode_micros,
"gzip-compress the serialized projection cache payload",
));
phases.push(graph_db_backend_eval_phase_timing(
"full_projection.cache.file_write",
write_profile.file_write_micros,
"write the compressed projection cache bytes to .tsift/backend-eval-cache",
));
}
let prune_started = Instant::now();
let (pruned_files, pruned_bytes) =
graph_db_backend_eval_prune_disk_cache(root, "full_projection", &write_key);
phases.push(graph_db_backend_eval_phase_timing(
"full_projection.cache.prune",
prune_started.elapsed().as_micros(),
"prune sibling cache files older than the current key",
));
cache_stats.pruned_files = pruned_files;
cache_stats.pruned_bytes = pruned_bytes;
Ok((projection, warnings, phases, cache_stats))
}
fn graph_db_backend_eval_timed(
name: &str,
run: impl FnOnce() -> Result<(Option<usize>, serde_json::Value)>,
) -> (
GraphDbBackendEvalOperation,
Option<GraphDbBackendEvalSignature>,
) {
let started = Instant::now();
match run() {
Ok((rows, value)) => (
GraphDbBackendEvalOperation {
name: name.to_string(),
supported: true,
status: "ok".to_string(),
duration_micros: started.elapsed().as_micros(),
rows,
error: None,
},
Some(GraphDbBackendEvalSignature {
operation: name.to_string(),
value,
}),
),
Err(err) => (
GraphDbBackendEvalOperation {
name: name.to_string(),
supported: false,
status: "error".to_string(),
duration_micros: started.elapsed().as_micros(),
rows: None,
error: Some(format!("{err:#}")),
},
None,
),
}
}
fn graph_db_backend_eval_parity(
sqlite_signatures: Option<&[GraphDbBackendEvalSignature]>,
candidate_signatures: &[GraphDbBackendEvalSignature],
) -> GraphDbBackendEvalParity {
let Some(sqlite_signatures) = sqlite_signatures else {
return GraphDbBackendEvalParity {
matches_sqlite: true,
diagnostics: Vec::new(),
};
};
let sqlite = sqlite_signatures
.iter()
.map(|signature| (signature.operation.as_str(), &signature.value))
.collect::<BTreeMap<_, _>>();
let candidate = candidate_signatures
.iter()
.map(|signature| (signature.operation.as_str(), &signature.value))
.collect::<BTreeMap<_, _>>();
let mut diagnostics = Vec::new();
for (operation, sqlite_value) in sqlite {
match candidate.get(operation) {
Some(candidate_value) if *candidate_value == sqlite_value => {}
Some(_) => diagnostics.push(format!("{operation} output differed from SQLite")),
None => diagnostics.push(format!(
"{operation} did not complete for candidate backend"
)),
}
}
GraphDbBackendEvalParity {
matches_sqlite: diagnostics.is_empty(),
diagnostics,
}
}
pub(crate) fn graph_db_backend_eval_targets(
store: &impl GraphStore,
requested: &[String],
) -> Result<Vec<String>> {
let requested = requested
.iter()
.filter_map(|target| normalize_conflict_target(target))
.collect::<Vec<_>>();
if !requested.is_empty() {
return Ok(requested);
}
for kind in ["backlog", "job_packet"] {
let nodes = store.nodes_by_kind(kind)?;
if let Some(node) = nodes.first() {
if let Some(ref_id) = node.properties.get("ref_id") {
return Ok(vec![ref_id.clone()]);
}
return Ok(vec![node.id.clone()]);
}
}
Ok(Vec::new())
}
fn graph_db_backend_eval_path_targets(
store: &impl GraphStore,
max_hops: usize,
) -> Result<Option<(String, String, usize)>> {
let synthetic_from = "gsym-synthetic-0000";
let synthetic_to = format!("gsym-synthetic-{max_hops:04}");
if store.node(synthetic_from)?.is_some() && store.node(&synthetic_to)?.is_some() {
let outgoing = store.outgoing_edges(synthetic_from, None)?;
if outgoing.len() > 1
&& let Some(edge) = outgoing.first()
{
return Ok(Some((
edge.from_id.clone(),
edge.to_id.clone(),
GRAPH_DB_BACKEND_EVAL_DIRECT_PATH_HOPS,
)));
}
return Ok(Some((synthetic_from.to_string(), synthetic_to, max_hops)));
}
Ok(store.sample_edge(None)?.map(|edge| {
(
edge.from_id,
edge.to_id,
GRAPH_DB_BACKEND_EVAL_DIRECT_PATH_HOPS,
)
}))
}
fn graph_db_backend_eval_path_operation<S: GraphStore>(
store: &S,
configured_max_hops: usize,
) -> (
GraphDbBackendEvalOperation,
Option<GraphDbBackendEvalSignature>,
) {
let operation_name = if configured_max_hops == GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS {
"path_max_hops".to_string()
} else {
format!("path_max_hops_{configured_max_hops}")
};
graph_db_backend_eval_timed(&operation_name, || {
let (from, to, effective_max_hops) =
graph_db_backend_eval_path_targets(store, configured_max_hops)?
.context("backend-eval path probe requires at least one traversable edge")?;
let path = store.shortest_path_with_max_hops(&from, &to, None, Some(effective_max_hops))?;
let warning = if configured_max_hops > GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS {
Some(format!(
"{configured_max_hops}-hop tier is measured only; keep user-facing defaults at {} until repeated samples and SQLite query-plan checks pass",
GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS
))
} else if path.is_none() && effective_max_hops == configured_max_hops {
Some(format!(
"path probe truncated at {configured_max_hops} hops before a route was found"
))
} else {
None
};
Ok((
path.as_ref().map(|path| path.nodes.len()),
serde_json::json!({
"from": from,
"to": to,
"configured_max_hops": configured_max_hops,
"effective_max_hops": effective_max_hops,
"hops": path.as_ref().map(|path| path.hops),
"nodes": path.as_ref().map(|path| &path.nodes),
"found": path.is_some(),
"warning": warning,
}),
))
})
}
fn graph_db_backend_eval_neighborhood_operation<S: GraphStore>(
store: &S,
depth: usize,
limit: usize,
) -> (
GraphDbBackendEvalOperation,
Option<GraphDbBackendEvalSignature>,
) {
graph_db_backend_eval_timed("neighborhood", || {
let edge = match store.sample_edge(Some("calls"))? {
Some(edge) => edge,
None => store.sample_edge(None)?.context(
"backend-eval neighborhood probe requires at least one traversable edge",
)?,
};
let page = store
.paged_neighborhood(
&edge.from_id,
depth,
Some(&edge.kind),
GraphQueryOptions {
limit: Some(limit.max(1)),
..GraphQueryOptions::default()
},
)?
.with_context(|| {
format!(
"backend-eval neighborhood target not found: {}",
edge.from_id
)
})?;
Ok((
Some(page.nodes.len() + page.edges.len()),
serde_json::json!({
"center": edge.from_id,
"kind": edge.kind,
"depth": depth,
"limit": limit.max(1),
"node_ids": page.nodes.iter().map(|node| &node.id).collect::<Vec<_>>(),
"edge_ids": page.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
"truncated": page.page.truncated,
}),
))
})
}
fn graph_db_backend_eval_related_operation<S: GraphStore>(
root: &Path,
scope: Option<&str>,
store: &S,
depth: usize,
limit: usize,
) -> (
GraphDbBackendEvalOperation,
Option<GraphDbBackendEvalSignature>,
) {
graph_db_backend_eval_timed("related", || {
let query = "backend evaluation";
let semantic = semantic_related_report_from_store(
root,
scope,
query,
3,
SemanticRelatedKind::All,
store,
)?;
let seed_ids = semantic
.items
.iter()
.map(|item| item.handle.clone())
.collect::<Vec<_>>();
let subgraph =
graph_db_semantic_seeded_neighborhood(store, &seed_ids, depth, limit.max(1))?;
Ok((
Some(subgraph.nodes.len() + subgraph.edges.len()),
serde_json::json!({
"query": query,
"seed_ids": seed_ids,
"node_ids": subgraph.nodes.iter().map(|node| &node.id).collect::<Vec<_>>(),
"edge_ids": subgraph.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
"truncated": subgraph.truncated,
"warnings": semantic.warnings,
"diagnostics": subgraph.diagnostics,
}),
))
})
}
fn graph_db_backend_eval_evidence_signature(report: &GraphDbEvidenceReport) -> serde_json::Value {
serde_json::json!({
"target": report.target,
"target_node_id": report.target_node.id,
"target_kind": report.target_node.kind,
"worker_context": report.worker_context.iter().map(|node| &node.id).collect::<Vec<_>>(),
"source_handles": report.source_handles.iter().map(|node| &node.id).collect::<Vec<_>>(),
"worker_results": report.worker_results.iter().map(|node| &node.id).collect::<Vec<_>>(),
"semantic_related": report.semantic_related.iter().map(|node| &node.id).collect::<Vec<_>>(),
"path_count": report.shortest_paths.len(),
})
}
fn graph_db_backend_eval_target_resolution_signature(
resolved: &[(String, SubstrateGraphNode)],
) -> serde_json::Value {
serde_json::json!({
"targets": resolved.iter().map(|(target, node)| {
serde_json::json!({
"target": target,
"target_node_id": node.id,
"target_kind": node.kind,
"target_label": node.label,
})
}).collect::<Vec<_>>(),
})
}
fn graph_db_backend_eval_conflict_signature(report: &ConflictMatrixReport) -> serde_json::Value {
serde_json::json!({
"targets": report.targets,
"can_parallel": report.can_parallel,
"fail_closed": report.fail_closed,
"cross_target_parallel_safe": report.cross_target_parallel_safe,
"per_target_fail_closed": report.per_target_fail_closed.iter().map(|target| &target.target).collect::<Vec<_>>(),
"candidates": report.candidates.iter().map(|candidate| {
serde_json::json!({
"target": candidate.target,
"risk": conflict_risk_label(candidate.risk),
"owned_files": candidate.owned_files,
"owned_symbols": candidate.owned_symbols,
"source_handles": candidate.source_handles.iter().map(|handle| &handle.handle).collect::<Vec<_>>(),
"previously_completed": candidate.previously_completed,
"parallel_safe": candidate.parallel_safe,
})
}).collect::<Vec<_>>(),
"conflicts": report.conflicts.iter().map(|pair| {
serde_json::json!({
"left": pair.left,
"right": pair.right,
"risk": conflict_risk_label(pair.risk),
})
}).collect::<Vec<_>>(),
})
}
fn graph_db_backend_eval_dispatch_signature(report: &DispatchTraceReport) -> serde_json::Value {
serde_json::json!({
"targets": report.targets,
"node_ids": report.nodes.iter().map(|node| &node.id).collect::<Vec<_>>(),
"edge_keys": report.edges.iter().map(|e| graph_db_edge_key(&SubstrateGraphEdge::from(e))).collect::<Vec<_>>(),
"evidence_packet_ids": report.evidence_packet_ids,
"worker_prompt_targets": report.worker_prompt_packets.iter().map(|packet| &packet.target).collect::<Vec<_>>(),
"truncated": report.truncated,
})
}
fn graph_db_backend_eval_edge_scan_probe(
store: &impl GraphStore,
) -> Result<(SubstrateGraphEdge, Vec<GraphPropertyFilter>)> {
if let Some((edge, filter)) = store.sample_edge_with_property()? {
return Ok((edge, vec![filter]));
}
let edge = store
.sample_edge(None)?
.context("backend-eval edge scan requires at least one edge")?;
Ok((edge, Vec::new()))
}
#[allow(clippy::too_many_arguments)]
fn graph_db_backend_eval_report_for_store<S: GraphStore>(
backend: &str,
adapter: &str,
read_only: bool,
root: &Path,
path: &Path,
scope: Option<&str>,
targets: &[String],
depth: usize,
limit: usize,
impact_limit: usize,
store: &S,
freshness: GraphDbFreshnessReport,
refresh_operation: GraphDbBackendEvalOperation,
refresh_signature: Option<GraphDbBackendEvalSignature>,
sqlite_signatures: Option<&[GraphDbBackendEvalSignature]>,
extra_warnings: Vec<String>,
prepared: &ConflictMatrixPreparedInputs,
projection_load: &str,
lock_behavior: &str,
install_portability: &str,
) -> (
GraphDbBackendEvalBackendReport,
Vec<GraphDbBackendEvalSignature>,
) {
let mut operations = vec![refresh_operation];
let mut signatures = refresh_signature.into_iter().collect::<Vec<_>>();
let (operation, signature) = graph_db_backend_eval_timed("status", || {
let (nodes, edges) = store.graph_counts()?;
Ok((
Some(nodes + edges),
serde_json::json!({
"freshness": freshness.status,
"nodes": nodes,
"edges": edges,
}),
))
});
operations.push(operation);
signatures.extend(signature);
let (operation, signature) = graph_db_backend_eval_timed("edge_lookup", || {
let edge = store
.sample_edge(None)?
.context("backend-eval edge lookup requires at least one edge")?;
let edge_id = graph_db_edge_key(&edge);
let found = store
.edge(&edge_id)?
.with_context(|| format!("backend-eval edge lookup missed {edge_id}"))?;
Ok((
Some(1),
serde_json::json!({
"edge_id": edge_id,
"from_id": found.from_id,
"to_id": found.to_id,
"kind": found.kind,
}),
))
});
operations.push(operation);
signatures.extend(signature);
let (operation, signature) = graph_db_backend_eval_timed("edge_property_scan", || {
let (edge, filters) = graph_db_backend_eval_edge_scan_probe(store)?;
let page = store.paged_edges(
Some(&edge.kind),
GraphQueryOptions {
limit: Some(limit.max(1)),
property_filters: filters.clone(),
..GraphQueryOptions::default()
},
)?;
Ok((
Some(page.edges.len()),
serde_json::json!({
"kind": edge.kind,
"filters": filters.iter().map(|filter| format!("{}={}", filter.key, filter.value)).collect::<Vec<_>>(),
"edge_ids": page.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
"truncated": page.page.truncated,
}),
))
});
operations.push(operation);
signatures.extend(signature);
let (operation, signature) = graph_db_backend_eval_timed("incident_edges", || {
let edge = store
.sample_edge(None)?
.context("backend-eval incident edge scan requires at least one edge")?;
let page = store.paged_incident_edges(
&edge.from_id,
Some(&edge.kind),
GraphQueryOptions {
limit: Some(limit.max(1)),
..GraphQueryOptions::default()
},
)?;
Ok((
Some(page.edges.len()),
serde_json::json!({
"node_id": edge.from_id,
"kind": edge.kind,
"edge_ids": page.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
"truncated": page.page.truncated,
}),
))
});
operations.push(operation);
signatures.extend(signature);
let (operation, signature) = graph_db_backend_eval_neighborhood_operation(store, depth, limit);
operations.push(operation);
signatures.extend(signature);
let (operation, signature) =
graph_db_backend_eval_related_operation(root, scope, store, depth, limit);
operations.push(operation);
signatures.extend(signature);
for configured_max_hops in std::iter::once(GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS)
.chain(GRAPH_DB_BACKEND_EVAL_EXTENDED_PATH_HOPS)
{
let (operation, signature) =
graph_db_backend_eval_path_operation(store, configured_max_hops);
operations.push(operation);
signatures.extend(signature);
}
let (operation, signature) = graph_db_backend_eval_timed("evidence_target_resolution", || {
let resolved = targets
.iter()
.map(|target| {
let node = graph_db_resolve_evidence_target(store, target)?
.with_context(|| format!("backend-eval target not found: {target}"))?;
Ok((target.clone(), node))
})
.collect::<Result<Vec<_>>>()?;
let signature = graph_db_backend_eval_target_resolution_signature(&resolved);
Ok((Some(resolved.len()), signature))
});
operations.push(operation);
signatures.extend(signature);
let mut evidence_for_report = None;
let mut graph_snapshot_for_trace = None;
let (operation, signature) = graph_db_backend_eval_timed("evidence", || {
let resolved_targets =
resolve_conflict_matrix_targets(store, targets, &prepared.context_pack)?;
let evidence = collect_conflict_matrix_evidence_packets(
root,
scope,
backend,
&resolved_targets,
depth,
limit,
store,
freshness.clone(),
)?;
let report = &evidence
.first()
.context("backend-eval evidence requires at least one target")?
.report;
let rows = evidence
.iter()
.map(|entry| {
entry.report.worker_context.len()
+ entry.report.source_handles.len()
+ entry.report.worker_results.len()
+ entry.report.semantic_related.len()
})
.sum();
let signature = graph_db_backend_eval_evidence_signature(report);
evidence_for_report = Some((resolved_targets, evidence));
Ok((Some(rows), signature))
});
operations.push(operation);
signatures.extend(signature);
let mut conflict_for_trace = None;
let (operation, signature) = graph_db_backend_eval_timed("conflict_matrix", || {
let graph_prepared = if let Some((targets, evidence)) = evidence_for_report.take() {
let graph =
conflict_matrix_target_scoped_graph_snapshot(store, &evidence, depth, limit)?;
let shared_preparation =
conflict_matrix_shared_preparation_summary(&graph, &evidence, "memory_reuse");
ConflictMatrixGraphPreparedInputs {
targets,
graph,
evidence,
shared_preparation,
}
} else {
prepare_conflict_matrix_graph_orchestration(
root,
scope,
backend,
targets,
prepared,
depth,
limit,
store,
freshness.clone(),
)?
};
let report = build_conflict_matrix_report_from_prepared_graph(
root,
path,
scope,
depth,
limit,
impact_limit,
freshness.clone(),
extra_warnings.clone(),
prepared,
&graph_prepared,
)?;
let signature = graph_db_backend_eval_conflict_signature(&report);
let rows = report.candidates.len() + report.conflicts.len();
conflict_for_trace = Some(report);
graph_snapshot_for_trace = Some(graph_prepared.graph);
Ok((Some(rows), signature))
});
operations.push(operation);
signatures.extend(signature);
let (operation, signature) = graph_db_backend_eval_timed("dispatch_trace", || {
let conflict = conflict_for_trace
.take()
.context("backend-eval dispatch-trace requires a completed conflict-matrix report")?;
let graph = graph_snapshot_for_trace
.take()
.context("backend-eval dispatch-trace requires conflict-matrix graph preparation")?;
let report = build_dispatch_trace_report_from_conflict_snapshot(
root,
scope,
conflict,
graph.nodes,
graph.edges,
depth,
limit,
Vec::new(),
)?;
Ok((
Some(report.nodes.len() + report.edges.len()),
graph_db_backend_eval_dispatch_signature(&report),
))
});
operations.push(operation);
signatures.extend(signature);
let total_micros = operations
.iter()
.map(|operation| operation.duration_micros)
.sum();
let parity = graph_db_backend_eval_parity(sqlite_signatures, &signatures);
(
GraphDbBackendEvalBackendReport {
backend: backend.to_string(),
adapter: adapter.to_string(),
read_only,
projection_load: projection_load.to_string(),
operations,
total_micros,
parity,
lock_behavior: lock_behavior.to_string(),
install_portability: install_portability.to_string(),
},
signatures,
)
}
pub(crate) fn graph_db_backend_eval_refresh_operation(
duration_micros: u128,
rows: usize,
value: serde_json::Value,
) -> (GraphDbBackendEvalOperation, GraphDbBackendEvalSignature) {
(
GraphDbBackendEvalOperation {
name: "refresh".to_string(),
supported: true,
status: "ok".to_string(),
duration_micros,
rows: Some(rows),
error: None,
},
GraphDbBackendEvalSignature {
operation: "refresh".to_string(),
value,
},
)
}
pub(crate) fn graph_db_backend_eval_synthetic_projection(
nodes: usize,
fanout: usize,
) -> GraphProjection {
let nodes = nodes.max(12);
let symbol_count = nodes.saturating_sub(9).max(1);
let source = GraphProvenance::new("backend-eval", "synthetic");
let mut projection_nodes = vec![
SubstrateGraphNode::new(
"projection:tsift-traversal:synthetic",
GRAPH_PROJECTION_META_KIND,
"synthetic projection",
)
.with_property("projection_version", GRAPH_PROJECTION_VERSION)
.with_property(
"content_hash",
format!("synthetic-{nodes}-{fanout}-{symbol_count}"),
)
.with_provenance(source.clone()),
SubstrateGraphNode::new("gses-synthetic", "session", "synthetic session")
.with_property("ref_id", "synthetic-session"),
SubstrateGraphNode::new("gbak-synthetic", "backlog", "#synthetic")
.with_property("ref_id", "synthetic")
.with_property("path", "tasks/software/synthetic.md")
.with_property("line", "1")
.with_property(
"expand",
"tsift source-read tasks/software/synthetic.md --start 1 --lines 40",
),
SubstrateGraphNode::new("gjob-synthetic", "job_packet", "do #synthetic")
.with_property("ref_id", "synthetic"),
SubstrateGraphNode::new("gwctx-synthetic", "worker_context", "synthetic context")
.with_property("target", "synthetic")
.with_property("summary", "Synthetic worker owns synthetic.rs")
.with_property(
"expand",
"tsift source-read synthetic.rs --start 1 --lines 80",
),
SubstrateGraphNode::new("gsrc-synthetic", "source_handle", "synthetic.rs:1-80")
.with_property("file", "synthetic.rs")
.with_property("start", "1")
.with_property("end", "80")
.with_property(
"expand",
"tsift source-read synthetic.rs --start 1 --lines 80",
),
SubstrateGraphNode::new("gfil-synthetic", "file", "synthetic.rs")
.with_property("path", "synthetic.rs"),
SubstrateGraphNode::new("gsem-synthetic", "semantic_concept", "backend evaluation")
.with_property("handle", "gsem-synthetic")
.with_property("label", "backend evaluation")
.with_property("embedding_model", SEMANTIC_EMBEDDING_MODEL)
.with_property(
"embedding",
semantic_embedding_property("backend evaluation"),
),
SubstrateGraphNode::new("gwres-synthetic", "worker_result", "completed #synthetic")
.with_property("ref_id", "synthetic")
.with_property("status", "completed")
.with_property("touched_files", "synthetic.rs")
.with_property("expected_tests", "cargo test --test graph_db_conformance"),
];
for idx in 0..symbol_count {
projection_nodes.push(
SubstrateGraphNode::new(
format!("gsym-synthetic-{idx:04}"),
"symbol",
format!("synthetic_symbol_{idx:04}"),
)
.with_property("ref_id", format!("synthetic_symbol_{idx:04}"))
.with_property("path", "synthetic.rs")
.with_property("line", (idx + 1).to_string()),
);
}
let mut projection_edges = vec![
SubstrateGraphEdge::new("gses-synthetic", "gbak-synthetic", "contains"),
SubstrateGraphEdge::new("gses-synthetic", "gjob-synthetic", "queues"),
SubstrateGraphEdge::new("gbak-synthetic", "gwctx-synthetic", "has_context"),
SubstrateGraphEdge::new("gjob-synthetic", "gwctx-synthetic", "has_context"),
SubstrateGraphEdge::new("gwctx-synthetic", "gsrc-synthetic", "uses_source"),
SubstrateGraphEdge::new("gbak-synthetic", "gwres-synthetic", "has_worker_result"),
SubstrateGraphEdge::new("gbak-synthetic", "gsem-synthetic", "mentions_concept"),
SubstrateGraphEdge::new("gsrc-synthetic", "gfil-synthetic", "reads_file"),
SubstrateGraphEdge::new("gfil-synthetic", "gsym-synthetic-0000", "defines"),
];
for idx in 0..symbol_count {
let from = format!("gsym-synthetic-{idx:04}");
for offset in 1..=fanout.max(1).min(symbol_count) {
let to_idx = (idx + offset) % symbol_count;
if to_idx != idx {
projection_edges.push(SubstrateGraphEdge::new(
from.clone(),
format!("gsym-synthetic-{to_idx:04}"),
"calls",
));
}
}
}
GraphProjection {
nodes: projection_nodes,
edges: projection_edges
.into_iter()
.map(|edge| {
edge.with_property("dataset", "synthetic")
.with_provenance(source.clone())
})
.collect(),
}
}
pub(crate) fn graph_db_backend_eval_promotion(
datasets: &[GraphDbBackendEvalDataset],
candidates: &[GraphDbExperimentalBackend],
) -> Vec<GraphDbBackendPromotionDecision> {
let mut decisions = Vec::new();
for candidate in candidates {
let mut reasons = Vec::new();
let mut faster_everywhere = true;
let mut parity_everywhere = true;
for dataset in datasets {
let Some(sqlite_report) = dataset
.backends
.iter()
.find(|backend| backend.backend == "sqlite")
else {
parity_everywhere = false;
faster_everywhere = false;
reasons.push(format!(
"{} dataset is missing SQLite baseline",
dataset.name
));
continue;
};
let sqlite_total = sqlite_report.total_micros;
let Some(candidate_report) = dataset
.backends
.iter()
.find(|backend| backend.backend == candidate.name())
else {
parity_everywhere = false;
reasons.push(format!("{} dataset did not run", dataset.name));
continue;
};
if !candidate_report.parity.matches_sqlite {
parity_everywhere = false;
reasons.push(format!("{} parity differed from SQLite", dataset.name));
}
if candidate_report.total_micros >= sqlite_total {
faster_everywhere = false;
reasons.push(format!(
"{} total {}us did not beat SQLite {}us",
dataset.name, candidate_report.total_micros, sqlite_total
));
}
let sqlite_operations = sqlite_report
.operations
.iter()
.map(|operation| (operation.name.as_str(), operation.duration_micros))
.collect::<BTreeMap<_, _>>();
for operation in &candidate_report.operations {
if let Some(sqlite_duration) = sqlite_operations.get(operation.name.as_str())
&& operation.duration_micros >= *sqlite_duration
{
faster_everywhere = false;
reasons.push(format!(
"{} {} operation {}us did not beat SQLite {}us",
dataset.name, operation.name, operation.duration_micros, sqlite_duration
));
}
}
if candidate_report
.operations
.iter()
.any(|operation| operation.status != "ok")
{
parity_everywhere = false;
reasons.push(format!("{} has failed benchmark operations", dataset.name));
}
}
let decision = if let Some(reason) = candidate.prototype_hold_reason() {
reasons.push(reason.to_string());
reasons.push(
"current bounded prototype timings are benchmark evidence, not a backend switch approval"
.to_string(),
);
"hold"
} else if parity_everywhere && faster_everywhere {
reasons.push(
"prototype gate passed; production promotion still requires the real engine adapter to preserve SQLite's bundled install and multi-process lock behavior"
.to_string(),
);
"eligible"
} else {
reasons.push(
"production promotion requires SQLite parity plus lower total time for every measured operation on every dataset without worse lock behavior or install portability"
.to_string(),
);
"hold"
};
decisions.push(GraphDbBackendPromotionDecision {
backend: candidate.name().to_string(),
decision: decision.to_string(),
reasons: dedupe_preserve_order(reasons),
gate: candidate.promotion_gate(),
});
}
decisions
}
pub(crate) fn graph_db_backend_eval_metrics(
datasets: &[GraphDbBackendEvalDataset],
) -> BTreeMap<String, f64> {
let mut metrics = BTreeMap::new();
for dataset in datasets {
let graph_rows = graph_db_backend_eval_graph_rows(dataset);
metrics.insert(format!("{}.nodes", dataset.name), dataset.nodes as f64);
metrics.insert(format!("{}.edges", dataset.name), dataset.edges as f64);
metrics.insert(format!("{}.graph_rows", dataset.name), graph_rows as f64);
for backend in &dataset.backends {
let prefix = format!("{}.{}", dataset.name, backend.backend.replace('-', "_"));
metrics.insert(
format!("{prefix}.total_duration_micros"),
backend.total_micros as f64,
);
append_graph_db_backend_eval_normalized_duration_metric(
&mut metrics,
&format!("{prefix}.total_duration_micros_per_1k_graph_rows"),
backend.total_micros,
graph_rows,
);
for operation in &backend.operations {
metrics.insert(
format!("{prefix}.{}.duration_micros", operation.name),
operation.duration_micros as f64,
);
append_graph_db_backend_eval_normalized_duration_metric(
&mut metrics,
&format!(
"{prefix}.{}.duration_micros_per_1k_graph_rows",
operation.name
),
operation.duration_micros,
graph_rows,
);
if let Some(rows) = operation.rows {
metrics.insert(format!("{prefix}.{}.rows", operation.name), rows as f64);
}
}
}
}
metrics
}
pub(crate) fn graph_db_backend_eval_graph_rows(dataset: &GraphDbBackendEvalDataset) -> usize {
dataset.nodes + dataset.edges
}
pub(crate) fn append_graph_db_backend_eval_normalized_duration_metric(
metrics: &mut BTreeMap<String, f64>,
key: &str,
duration_micros: u128,
graph_rows: usize,
) {
if graph_rows == 0 {
return;
}
metrics.insert(
key.to_string(),
duration_micros as f64 / graph_rows as f64 * GRAPH_DB_BACKEND_EVAL_NORMALIZATION_ROW_UNIT,
);
}
pub(crate) fn append_graph_db_backend_eval_phase_metrics(
metrics: &mut BTreeMap<String, f64>,
dataset: &str,
graph_rows: usize,
phases: &[GraphDbBackendEvalPhaseTiming],
) {
for phase in phases {
metrics.insert(
format!("{dataset}.refresh_phase.{}.duration_micros", phase.name),
phase.duration_micros as f64,
);
append_graph_db_backend_eval_normalized_duration_metric(
metrics,
&format!(
"{dataset}.refresh_phase.{}.duration_micros_per_1k_graph_rows",
phase.name
),
phase.duration_micros,
graph_rows,
);
}
}
fn graph_db_backend_eval_base_command(
root: &Path,
scope: Option<&str>,
full_projection: bool,
) -> String {
let full_projection_arg = if full_projection {
" --full-projection"
} else {
""
};
format!(
"tsift graph-db --path {}{} --json backend-eval{}",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope),
full_projection_arg
)
}
pub(crate) fn graph_db_backend_eval_metric_digest_command(
root: &Path,
scope: Option<&str>,
full_projection: bool,
) -> String {
format!(
"{} | tsift metric-digest --baseline fixtures/graph-db-performance-history.json",
graph_db_backend_eval_base_command(root, scope, full_projection)
)
}
fn graph_db_backend_eval_repeated_sample_command(
root: &Path,
scope: Option<&str>,
full_projection: bool,
) -> String {
format!(
"for sample in 1 2 3; do {}; done | tsift metric-digest --baseline fixtures/graph-db-performance-history.json",
graph_db_backend_eval_base_command(root, scope, full_projection)
)
}
fn graph_db_backend_eval_hop_cap_promotion_gate() -> GraphDbHopCapPromotionGate {
let mut required_metrics = Vec::new();
for workload in perf_gate::HOP_CAP_REQUIRED_WORKLOADS {
required_metrics.push(format!("{workload}.sqlite.path_max_hops.duration_micros"));
required_metrics.push(format!("{workload}.sqlite.path_max_hops.rows"));
for hops in perf_gate::HOP_CAP_CANDIDATE_TIERS {
required_metrics.push(format!(
"{workload}.sqlite.path_max_hops_{hops}.duration_micros"
));
required_metrics.push(format!("{workload}.sqlite.path_max_hops_{hops}.rows"));
}
}
GraphDbHopCapPromotionGate {
status: "hold_64_default_until_gate_passes".to_string(),
current_default_hops: perf_gate::HOP_CAP_CURRENT_DEFAULT,
candidate_hop_tiers: perf_gate::HOP_CAP_CANDIDATE_TIERS.to_vec(),
required_backend: perf_gate::BASELINE_BACKEND.to_string(),
required_workloads: perf_gate::HOP_CAP_REQUIRED_WORKLOADS
.iter()
.map(|workload| (*workload).to_string())
.collect(),
required_metrics,
allowed_regression_percent: GRAPH_DB_BACKEND_EVAL_ALLOWED_REGRESSION_PERCENT,
minimum_sample_runs: GRAPH_DB_BACKEND_EVAL_MIN_SAMPLE_RUNS,
decision_rule:
"keep 64 as the user-facing default until each candidate tier has repeated real, full_projection, and synthetic_deep_chain SQLite samples within the latency-regression budget and returning useful path rows; full_projection samples are binding only after a cold populate leg proves a cache-hit leg"
.to_string(),
}
}
fn graph_db_backend_eval_backend_adapter_spike_gate() -> GraphDbBackendAdapterSpikeGate {
let candidate_backends = [
GraphDbExperimentalBackend::Falkordb,
GraphDbExperimentalBackend::Kuzu,
GraphDbExperimentalBackend::Surrealdb,
]
.into_iter()
.map(|backend| GraphDbBackendAdapterSpikeCandidate {
backend: backend.name().to_string(),
adapter_label: backend.adapter_label().to_string(),
projection_load: backend.projection_load().to_string(),
lock_behavior: backend.lock_behavior().to_string(),
install_portability: backend.install_portability().to_string(),
})
.collect();
GraphDbBackendAdapterSpikeGate {
status: "hold_real_optional_adapter_required".to_string(),
candidate_backends,
required_workloads: perf_gate::GATE_WORKLOAD_PREFIXES
.iter()
.map(|workload| (*workload).to_string())
.collect(),
required_checks: vec![
"real_optional_adapter_behind_graphstore_without_default_build_dependency".to_string(),
"projection_load_writes_provider_neutral_rows_without_sqlite_row_replay".to_string(),
"freshness_and_full_parity_match_sqlite_on_every_graphstore_operation".to_string(),
"lock_semantics_match_or_beat_sqlite_for_writer_and_read_only_workflows".to_string(),
"install_portability_preserves_cargo_build_install_without_external_service_or_native_toolchain"
.to_string(),
"full_projection_cache_hit_sample_before_backend_or_hop_cap_changes".to_string(),
"beats_sqlite_on_every_required_workload_and_metric_in_backend_eval".to_string(),
],
decision_rule:
"do not promote a read-only prototype; FalkorDB, Kuzu, or SurrealDB can only advance after a real optional adapter proves projection writes/load, lock semantics, install portability, full parity, and faster-than-SQLite results across every required workload"
.to_string(),
evidence_plan: "plans/gback-evidence.md".to_string(),
}
}
pub(crate) fn graph_db_backend_eval_performance_gate(
root: &Path,
scope: Option<&str>,
full_projection: bool,
) -> GraphDbBackendEvalPerformanceGate {
let mut required_metrics = vec![
"real.sqlite.refresh.duration_micros".to_string(),
"real.sqlite.refresh.duration_micros_per_1k_graph_rows".to_string(),
"real.sqlite.edge_lookup.duration_micros_per_1k_graph_rows".to_string(),
"real.sqlite.edge_property_scan.duration_micros_per_1k_graph_rows".to_string(),
"real.sqlite.incident_edges.duration_micros_per_1k_graph_rows".to_string(),
"real.sqlite.neighborhood.duration_micros_per_1k_graph_rows".to_string(),
"real.sqlite.evidence_target_resolution.duration_micros_per_1k_graph_rows".to_string(),
"real.sqlite.evidence.duration_micros_per_1k_graph_rows".to_string(),
"real.sqlite.total_duration_micros_per_1k_graph_rows".to_string(),
"real.refresh_phase.source_graph_build.duration_micros_per_1k_graph_rows".to_string(),
"real.refresh_phase.sqlite_delta_write.duration_micros".to_string(),
"real.refresh_phase.sqlite_property_row_staging.duration_micros".to_string(),
"real.refresh_phase.sqlite_edge_property_row_staging.duration_micros".to_string(),
"real.sqlite.conflict_matrix.duration_micros".to_string(),
"real.sqlite.dispatch_trace.duration_micros".to_string(),
"real.sqlite.path_max_hops.duration_micros".to_string(),
"real.sqlite.path_max_hops_128.duration_micros".to_string(),
"real.sqlite.path_max_hops_256.duration_micros".to_string(),
"real.sqlite.path_max_hops_512.duration_micros".to_string(),
"real.sqlite.path_max_hops_128.duration_micros_per_1k_graph_rows".to_string(),
"real.sqlite.path_max_hops_256.duration_micros_per_1k_graph_rows".to_string(),
"real.sqlite.path_max_hops_512.duration_micros_per_1k_graph_rows".to_string(),
"synthetic_high_degree.sqlite.total_duration_micros".to_string(),
"synthetic_high_degree.sqlite.total_duration_micros_per_1k_graph_rows".to_string(),
"synthetic_high_degree.sqlite.neighborhood.duration_micros_per_1k_graph_rows".to_string(),
"synthetic_high_degree.sqlite.edge_property_scan.duration_micros_per_1k_graph_rows"
.to_string(),
"synthetic_high_degree.sqlite.evidence_target_resolution.duration_micros_per_1k_graph_rows"
.to_string(),
"synthetic_deep_chain.sqlite.incident_edges.duration_micros_per_1k_graph_rows".to_string(),
"synthetic_deep_chain.sqlite.neighborhood.duration_micros_per_1k_graph_rows".to_string(),
"synthetic_deep_chain.sqlite.path_max_hops.duration_micros".to_string(),
"synthetic_deep_chain.sqlite.path_max_hops_128.duration_micros".to_string(),
"synthetic_deep_chain.sqlite.path_max_hops_256.duration_micros".to_string(),
"synthetic_deep_chain.sqlite.path_max_hops_512.duration_micros".to_string(),
"synthetic_deep_chain.sqlite.evidence_target_resolution.duration_micros_per_1k_graph_rows"
.to_string(),
"synthetic_deep_chain.sqlite.path_max_hops.duration_micros_per_1k_graph_rows".to_string(),
"synthetic_deep_chain.sqlite.path_max_hops_128.duration_micros_per_1k_graph_rows"
.to_string(),
"synthetic_deep_chain.sqlite.path_max_hops_256.duration_micros_per_1k_graph_rows"
.to_string(),
"synthetic_deep_chain.sqlite.path_max_hops_512.duration_micros_per_1k_graph_rows"
.to_string(),
];
if full_projection {
required_metrics.extend([
"full_projection.cache.hit".to_string(),
"full_projection.cache.disk_bytes".to_string(),
"full_projection.cache.compression_ratio".to_string(),
"full_projection.refresh_phase.cache_lookup.duration_micros".to_string(),
"full_projection.sqlite.total_duration_micros_per_1k_graph_rows".to_string(),
"full_projection.refresh_phase.source_graph_build.duration_micros_per_1k_graph_rows"
.to_string(),
"full_projection.refresh_phase.projection_rows.duration_micros_per_1k_graph_rows"
.to_string(),
"full_projection.sqlite.sqlite_delta_write.duration_micros".to_string(),
"full_projection.sqlite.sqlite_node_staging.duration_micros".to_string(),
"full_projection.sqlite.post_write_reads.duration_micros".to_string(),
"full_projection.sqlite.neighborhood.duration_micros".to_string(),
"full_projection.sqlite.evidence_target_resolution.duration_micros".to_string(),
"full_projection.sqlite.evidence.duration_micros".to_string(),
"full_projection.sqlite.path_max_hops.duration_micros".to_string(),
"full_projection.sqlite.path_max_hops_128.duration_micros".to_string(),
"full_projection.sqlite.path_max_hops_256.duration_micros".to_string(),
"full_projection.sqlite.path_max_hops_512.duration_micros".to_string(),
"full_projection.sqlite.conflict_matrix.duration_micros".to_string(),
"full_projection.sqlite.dispatch_trace.duration_micros".to_string(),
]);
}
GraphDbBackendEvalPerformanceGate {
baseline_fixture: "fixtures/graph-db-performance-history.json".to_string(),
ci_profile: "synthetic_high_degree + synthetic_deep_chain metrics are CI-safe and bounded"
.to_string(),
opt_in_real_profile:
"pass --full-projection to add the full-project dataset when checking for large projection regressions"
.to_string(),
full_projection_cache_hit_gate: if full_projection {
"binding full_projection performance evidence requires a cold populate leg followed by cache-leg samples with full_projection.cache.hit=1; cache-miss samples are diagnostics, not backend or hop-cap promotion proof"
.to_string()
} else {
"not evaluated until --full-projection is enabled".to_string()
},
allowed_regression_percent: GRAPH_DB_BACKEND_EVAL_ALLOWED_REGRESSION_PERCENT,
minimum_sample_runs: GRAPH_DB_BACKEND_EVAL_MIN_SAMPLE_RUNS,
normalized_metric_unit: "duration_micros_per_1k_graph_rows".to_string(),
required_metrics,
digest_command: graph_db_backend_eval_metric_digest_command(root, scope, full_projection),
repeated_sample_command: graph_db_backend_eval_repeated_sample_command(
root,
scope,
full_projection,
),
hop_cap_promotion: graph_db_backend_eval_hop_cap_promotion_gate(),
backend_adapter_spike: graph_db_backend_eval_backend_adapter_spike_gate(),
}
}
#[cfg(feature = "backend-surrealdb")]
fn graph_db_backend_eval_path_segment(value: &str) -> String {
value
.chars()
.map(|ch| {
if ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_' | '.') {
ch
} else {
'_'
}
})
.collect()
}
#[cfg(feature = "backend-surrealdb")]
fn graph_db_backend_eval_surrealdb_store_path(
root: &Path,
scope: Option<&str>,
dataset: &str,
) -> PathBuf {
root.join(".tsift/backend-eval-cache/surrealdb")
.join(graph_db_backend_eval_path_segment(scope.unwrap_or("root")))
.join(graph_db_backend_eval_path_segment(dataset))
.join("surrealkv")
}
pub(crate) struct GraphDbBackendEvalOptions<'a> {
path: &'a Path,
scope: Option<&'a str>,
candidates: &'a [String],
targets: &'a [String],
full_projection: bool,
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn graph_db_backend_eval_dataset(
name: &str,
root: &Path,
path: &Path,
scope: Option<&str>,
targets: &[String],
depth: usize,
limit: usize,
impact_limit: usize,
candidates: &[GraphDbExperimentalBackend],
sqlite_store: &SqliteGraphStore,
sqlite_freshness: GraphDbFreshnessReport,
sqlite_refresh: (GraphDbBackendEvalOperation, GraphDbBackendEvalSignature),
sqlite_rows: ConvexProjectionRows,
extra_warnings: Vec<String>,
prepared: &ConflictMatrixPreparedInputs,
) -> Result<GraphDbBackendEvalDataset> {
let (nodes, edges) = sqlite_store.graph_counts()?;
let (sqlite_operation, sqlite_signature) = sqlite_refresh;
let (sqlite_report, sqlite_signatures) = graph_db_backend_eval_report_for_store(
"sqlite",
"SQLite GraphStore correctness baseline",
false,
root,
path,
scope,
targets,
depth,
limit,
impact_limit,
sqlite_store,
sqlite_freshness,
sqlite_operation,
Some(sqlite_signature),
None,
extra_warnings.clone(),
prepared,
"SQLite refresh writes provider-neutral projection rows into graph.db transactionally",
"SQLite WAL correctness store; refresh uses one transactional writer and read-only queries use snapshot recovery",
"bundled rusqlite baseline; no external service or runtime required",
);
let mut backends = vec![sqlite_report];
for candidate in candidates {
#[cfg(feature = "backend-surrealdb")]
if *candidate == GraphDbExperimentalBackend::Surrealdb {
let started = Instant::now();
let store_path = graph_db_backend_eval_surrealdb_store_path(root, scope, name);
let (store, warm_start) =
SurrealdbGraphStore::open_or_refresh(&store_path, &sqlite_rows)?;
let (candidate_nodes, candidate_edges) = store.graph_counts()?;
let rows = candidate_nodes + candidate_edges;
let mut refresh_meta = serde_json::json!({
"nodes": candidate_nodes,
"edges": candidate_edges,
});
if warm_start == tsift_surrealdb::WarmStartOutcome::CacheHit {
refresh_meta["warm_start"] = serde_json::json!("cache_hit");
}
let refresh = graph_db_backend_eval_refresh_operation(
started.elapsed().as_micros(),
rows,
refresh_meta,
);
let freshness = sqlite_graph_freshness(sqlite_store, scope.unwrap_or("root"))?;
let (candidate_report, _signatures) = graph_db_backend_eval_report_for_store(
candidate.name(),
"SurrealDB SurrealKV optional adapter spike",
false,
root,
path,
scope,
targets,
depth,
limit,
impact_limit,
&store,
freshness,
refresh.0,
Some(refresh.1),
Some(&sqlite_signatures),
extra_warnings.clone(),
prepared,
"provider-neutral rows written into an embedded/file-backed SurrealDB SurrealKV store through the optional tsift-surrealdb adapter; warm-start reuses existing store when row hash matches",
"embedded/file-backed writer through SurrealDB SurrealKV rewrites backend-eval rows before read-only measurements; promotion still requires multi-process/read-only contention samples",
"feature-gated optional tsift-surrealdb crate; default cargo build/install does not pull SurrealDB into the dependency graph",
);
backends.push(candidate_report);
continue;
}
let started = Instant::now();
let store = ExperimentalReadOnlyGraphStore::from_rows(*candidate, &sqlite_rows)?;
let (candidate_nodes, candidate_edges) = store.graph_counts()?;
let rows = candidate_nodes + candidate_edges;
let refresh = graph_db_backend_eval_refresh_operation(
started.elapsed().as_micros(),
rows,
serde_json::json!({
"nodes": candidate_nodes,
"edges": candidate_edges,
}),
);
let freshness = sqlite_graph_freshness(sqlite_store, scope.unwrap_or("root"))?;
let (candidate_report, _signatures) = graph_db_backend_eval_report_for_store(
candidate.name(),
candidate.adapter_label(),
true,
root,
path,
scope,
targets,
depth,
limit,
impact_limit,
&store,
freshness,
refresh.0,
Some(refresh.1),
Some(&sqlite_signatures),
extra_warnings.clone(),
prepared,
candidate.projection_load(),
candidate.lock_behavior(),
candidate.install_portability(),
);
backends.push(candidate_report);
}
Ok(GraphDbBackendEvalDataset {
name: name.to_string(),
target_count: targets.len(),
nodes,
edges,
backends,
})
}
pub(crate) fn print_graph_db_backend_eval_human(report: &GraphDbBackendEvalReport) {
println!(
"graph-db backend-eval baseline:{} candidates:{}",
report.baseline_backend,
report.candidates.join(", ")
);
for phase in &report.phase_timings {
println!(
"phase:{} {}us {}",
phase.name, phase.duration_micros, phase.detail
);
}
for dataset in &report.datasets {
println!(
"dataset:{} targets:{} rows:{}",
dataset.name,
dataset.target_count,
dataset.nodes + dataset.edges
);
for backend in &dataset.backends {
println!(
" backend:{} total:{}us parity:{}",
backend.backend, backend.total_micros, backend.parity.matches_sqlite
);
println!(" projection-load: {}", backend.projection_load);
println!(" lock-behavior: {}", backend.lock_behavior);
println!(" install-portability: {}", backend.install_portability);
for operation in &backend.operations {
println!(
" {} {} {}us",
operation.name, operation.status, operation.duration_micros
);
}
for diagnostic in &backend.parity.diagnostics {
println!(" parity: {diagnostic}");
}
}
}
for decision in &report.promotion {
println!("promotion {}: {}", decision.backend, decision.decision);
println!(" gate: {}", decision.gate.status);
for reason in &decision.reasons {
println!(" reason: {reason}");
}
for check in &decision.gate.required_checks {
println!(" check: {check}");
}
}
println!("metric-digest: {}", report.metric_digest_command);
println!(
"repeat-samples: {}",
report.performance_gate.repeated_sample_command
);
}
fn traversal_expand_command(root: &Path, handle: &str) -> String {
format!(
"tsift traverse {} --path {} --depth 1 --limit 50",
shell_quote(handle),
shell_quote(root.to_string_lossy().as_ref())
)
}
fn traversal_file_node(root: &Path, file: &str) -> TraversalNode {
let display = relativize(file, root);
let handle = stable_handle("gfil", &format!("file:{display}"));
TraversalNode {
handle: handle.clone(),
kind: "file".to_string(),
label: display.clone(),
ref_id: Some(display.clone()),
path: Some(display),
line: None,
detail: None,
properties: BTreeMap::new(),
expand: traversal_expand_command(root, &handle),
}
}
fn traversal_raw_source_file_node(root: &Path, file: &str) -> TraversalNode {
let mut node = traversal_file_node(root, file);
if let Some(path) = node.path.clone() {
node.detail = Some("raw source fallback; graph evidence unavailable".to_string());
node.expand = source_read_command(root, &path, 1, 80);
}
node
}
fn traversal_symbol_node(root: &Path, symbol: &index::StoredSymbol) -> TraversalNode {
let file = relativize(&symbol.file, root);
let key = format!("symbol:{file}:{}:{}", symbol.line, symbol.name);
let handle = stable_handle("gsym", &key);
TraversalNode {
handle: handle.clone(),
kind: "symbol".to_string(),
label: symbol.name.clone(),
ref_id: Some(symbol.name.clone()),
path: Some(file),
line: Some(symbol.line),
detail: Some(format!("{} {}", symbol.language, symbol.kind)),
properties: BTreeMap::new(),
expand: traversal_expand_command(root, &handle),
}
}
fn traversal_ast_span_expand_command(
root: &Path,
file: &str,
symbol: &index::StoredSymbol,
span: &AstSpanPreview,
) -> String {
if symbol.language == "markdown" {
markdown_ast_command(root, file, Some(&span.handle))
} else {
let line_count = span
.end_line
.saturating_sub(span.start_line)
.saturating_add(1)
.max(1);
source_read_command(root, file, span.start_line, line_count)
}
}
fn traversal_ast_span_node(
root: &Path,
symbol: &index::StoredSymbol,
source: &[u8],
symbols: &[index::StoredSymbol],
) -> Option<(TraversalNode, TraversalAstSpanIndexEntry)> {
let span = stored_symbol_ast_span(symbol, source, symbols, usize::MAX)?;
let file = relativize(&symbol.file, root);
let mut properties = BTreeMap::new();
properties.insert("layer".to_string(), "ast_navigation".to_string());
properties.insert("language".to_string(), symbol.language.clone());
properties.insert("symbol_kind".to_string(), symbol.kind.clone());
properties.insert("node_kind".to_string(), span.node_kind.clone());
properties.insert("start_byte".to_string(), span.start_byte.to_string());
properties.insert("end_byte".to_string(), span.end_byte.to_string());
properties.insert("end_line".to_string(), span.end_line.to_string());
if let Some(body_start_byte) = span.body_start_byte {
properties.insert("body_start_byte".to_string(), body_start_byte.to_string());
}
if let Some(body_end_byte) = span.body_end_byte {
properties.insert("body_end_byte".to_string(), body_end_byte.to_string());
}
if let Some(body_start_line) = span.body_start_line {
properties.insert("body_start_line".to_string(), body_start_line.to_string());
}
if let Some(body_end_line) = span.body_end_line {
properties.insert("body_end_line".to_string(), body_end_line.to_string());
}
if let Some(parent_handle) = &span.parent_handle {
properties.insert("parent_handle".to_string(), parent_handle.clone());
}
if !span.child_handles.is_empty() {
properties.insert("child_handles".to_string(), span.child_handles.join(","));
}
if let Some(parent_module) = &symbol.parent_module {
properties.insert("parent_module".to_string(), parent_module.clone());
}
if let Some(markdown) = &span.markdown {
properties.insert(
"markdown_block_kind".to_string(),
markdown_ast_block_kind(&symbol.kind),
);
if let Some(heading_level) = markdown.heading_level {
properties.insert("heading_level".to_string(), heading_level.to_string());
}
if !markdown.section_path.is_empty() {
properties.insert(
"section_path".to_string(),
markdown.section_path.join(" > "),
);
}
if let Some(section_handle) = &markdown.section_handle {
properties.insert("section_handle".to_string(), section_handle.clone());
}
if let Some(list_depth) = markdown.list_depth {
properties.insert("list_depth".to_string(), list_depth.to_string());
}
if let Some(fence_language) = &markdown.fence_language {
properties.insert("fence_language".to_string(), fence_language.clone());
}
}
let line = i64::try_from(span.start_line).unwrap_or(i64::MAX);
let node = TraversalNode {
handle: span.handle.clone(),
kind: "ast_span".to_string(),
label: symbol.name.clone(),
ref_id: Some(symbol.name.clone()),
path: Some(file.clone()),
line: Some(line),
detail: Some(format!("{} {} AST span", symbol.language, symbol.kind)),
properties,
expand: traversal_ast_span_expand_command(root, &file, symbol, &span),
};
let entry = TraversalAstSpanIndexEntry {
handle: span.handle,
symbol_handle: String::new(),
file_handle: None,
file,
name: symbol.name.clone(),
kind: symbol.kind.clone(),
language: symbol.language.clone(),
node_kind: span.node_kind,
start_byte: span.start_byte,
end_byte: span.end_byte,
parent_module: symbol.parent_module.clone(),
markdown: span.markdown,
};
Some((node, entry))
}
fn traversal_unresolved_symbol_node(root: &Path, name: &str) -> TraversalNode {
let handle = stable_handle("gsym", &format!("symbol:{name}"));
TraversalNode {
handle: handle.clone(),
kind: "symbol".to_string(),
label: name.to_string(),
ref_id: Some(name.to_string()),
path: None,
line: None,
detail: Some("unresolved call target".to_string()),
properties: BTreeMap::new(),
expand: traversal_expand_command(root, &handle),
}
}
fn traversal_route_node(root: &Path, route: &index::StoredRoute) -> TraversalNode {
let file = relativize(&route.file, root);
let method = route.method.as_deref().unwrap_or("any");
let key = format!(
"route:{file}:{}:{}:{}",
route.line, method, route.route_path
);
let handle = stable_handle("grte", &key);
TraversalNode {
handle: handle.clone(),
kind: "route".to_string(),
label: format!("{} {}", method.to_uppercase(), route.route_path),
ref_id: Some(route.route_path.clone()),
path: Some(file),
line: Some(route.line),
detail: Some(format!(
"{} route handled by {}",
route.framework, route.handler_name
)),
properties: BTreeMap::new(),
expand: traversal_expand_command(root, &handle),
}
}
fn traversal_cargo_workspace_node(
root: &Path,
workspace: &multiplicity::CargoWorkspaceInfo,
) -> TraversalNode {
let manifest = relativize_pathbuf(&workspace.manifest_path, root)
.to_string_lossy()
.replace('\\', "/");
let workspace_root = relativize_pathbuf(&workspace.workspace_root, root)
.to_string_lossy()
.replace('\\', "/");
let handle = stable_handle("gcwk", &format!("cargo-workspace:{manifest}"));
let mut properties = BTreeMap::new();
properties.insert("layer".to_string(), "cargo_workspace".to_string());
properties.insert("workspace_root".to_string(), workspace_root.clone());
properties.insert("members".to_string(), workspace.members.join(","));
properties.insert(
"default_members".to_string(),
workspace.default_members.join(","),
);
TraversalNode {
handle: handle.clone(),
kind: "cargo_workspace".to_string(),
label: if workspace_root.is_empty() {
"root cargo workspace".to_string()
} else {
workspace_root
},
ref_id: Some(workspace.id.clone()),
path: Some(manifest),
line: None,
detail: Some("Cargo workspace manifest".to_string()),
properties,
expand: traversal_expand_command(root, &handle),
}
}
fn traversal_cargo_package_node(
root: &Path,
package: &multiplicity::CargoPackageInfo,
) -> TraversalNode {
let manifest = relativize_pathbuf(&package.manifest_path, root)
.to_string_lossy()
.replace('\\', "/");
let package_root = relativize_pathbuf(&package.package_root, root)
.to_string_lossy()
.replace('\\', "/");
let workspace_root = relativize_pathbuf(&package.workspace_root, root)
.to_string_lossy()
.replace('\\', "/");
let handle = stable_handle(
"gcpk",
&format!("cargo-package:{manifest}:{}", package.name),
);
let mut properties = BTreeMap::new();
properties.insert("layer".to_string(), "cargo_package".to_string());
properties.insert("package_name".to_string(), package.name.clone());
properties.insert(
"normalized_name".to_string(),
package.normalized_name.clone(),
);
properties.insert("package_root".to_string(), package_root.clone());
properties.insert("workspace_root".to_string(), workspace_root);
properties.insert("features".to_string(), package.features.join(","));
properties.insert("targets".to_string(), package.targets.join(","));
properties.insert(
"dependencies".to_string(),
package
.dependencies
.iter()
.map(|dependency| format!("{}:{}", dependency.kind, dependency.name))
.collect::<Vec<_>>()
.join(","),
);
TraversalNode {
handle: handle.clone(),
kind: "cargo_package".to_string(),
label: package.name.clone(),
ref_id: Some(package.scope_id.clone()),
path: Some(manifest),
line: None,
detail: Some(format!(
"Cargo package in {}",
if package_root.is_empty() {
"."
} else {
package_root.as_str()
}
)),
properties,
expand: traversal_expand_command(root, &handle),
}
}
fn traversal_session_node(
root: &Path,
markdown_path: &Path,
session_id: Option<&str>,
) -> TraversalNode {
let display = relativize_pathbuf(markdown_path, root)
.to_string_lossy()
.replace('\\', "/");
let handle = stable_handle("gses", &format!("session:{display}"));
TraversalNode {
handle: handle.clone(),
kind: "session".to_string(),
label: session_id.unwrap_or(&display).to_string(),
ref_id: session_id.map(str::to_string),
path: Some(display),
line: None,
detail: Some("agent-doc session artifact".to_string()),
properties: BTreeMap::new(),
expand: traversal_expand_command(root, &handle),
}
}
fn traversal_backlog_node(
root: &Path,
markdown_path: &Path,
id: &str,
text: &str,
line: i64,
) -> TraversalNode {
let display = relativize_pathbuf(markdown_path, root)
.to_string_lossy()
.replace('\\', "/");
let handle = stable_handle("gbak", &format!("backlog:{display}:#{id}"));
TraversalNode {
handle: handle.clone(),
kind: "backlog".to_string(),
label: format!("#{id}"),
ref_id: Some(id.to_string()),
path: Some(display),
line: Some(line),
detail: Some(text.to_string()),
properties: BTreeMap::new(),
expand: traversal_expand_command(root, &handle),
}
}
fn traversal_job_packet_node(
root: &Path,
markdown_path: &Path,
label: &str,
ref_id: Option<&str>,
detail: &str,
line: i64,
) -> TraversalNode {
let display = relativize_pathbuf(markdown_path, root)
.to_string_lossy()
.replace('\\', "/");
let handle = stable_handle("gjob", &format!("job:{display}:{line}:{label}"));
TraversalNode {
handle: handle.clone(),
kind: "job_packet".to_string(),
label: label.to_string(),
ref_id: ref_id.map(str::to_string),
path: Some(display),
line: Some(line),
detail: Some(detail.to_string()),
properties: BTreeMap::new(),
expand: traversal_expand_command(root, &handle),
}
}
#[derive(Clone, Debug)]
struct ParsedWorkerResult {
id: String,
status: String,
touched_files: Vec<String>,
tests: Vec<String>,
follow_up_ids: Vec<String>,
}
fn traversal_worker_result_node(
root: &Path,
markdown_path: &Path,
parsed: &ParsedWorkerResult,
line_text: &str,
line: i64,
) -> TraversalNode {
let display = relativize_pathbuf(markdown_path, root)
.to_string_lossy()
.replace('\\', "/");
let handle = stable_handle(
"wres",
&format!(
"worker-result:{display}:{}:{}:{}",
parsed.id, parsed.status, line
),
);
let mut properties = BTreeMap::new();
properties.insert("status".to_string(), parsed.status.clone());
if !parsed.touched_files.is_empty() {
properties.insert("touched_files".to_string(), parsed.touched_files.join(","));
}
if !parsed.tests.is_empty() {
properties.insert("expected_tests".to_string(), parsed.tests.join(" && "));
}
if !parsed.follow_up_ids.is_empty() {
properties.insert("follow_up_ids".to_string(), parsed.follow_up_ids.join(","));
}
TraversalNode {
handle: handle.clone(),
kind: "worker_result".to_string(),
label: format!("{} #{}", parsed.status, parsed.id),
ref_id: Some(parsed.id.clone()),
path: Some(display),
line: Some(line),
detail: Some(line_text.trim().to_string()),
properties,
expand: traversal_expand_command(root, &handle),
}
}
fn traversal_tokens(input: &str) -> BTreeSet<String> {
input
.split(|ch: char| !(ch.is_ascii_alphanumeric() || ch == '_' || ch == '-'))
.flat_map(|part| part.split(['_', '-']))
.map(str::trim)
.filter(|part| part.len() >= 3)
.map(|part| part.to_ascii_lowercase())
.collect()
}
fn traversal_ast_span_contains(
parent: &TraversalAstSpanIndexEntry,
child: &TraversalAstSpanIndexEntry,
) -> bool {
parent.handle != child.handle
&& parent.file == child.file
&& parent.start_byte <= child.start_byte
&& parent.end_byte >= child.end_byte
}
fn traversal_ast_parent_handle<'a>(
entry: &TraversalAstSpanIndexEntry,
entries: &'a [TraversalAstSpanIndexEntry],
) -> Option<&'a str> {
entries
.iter()
.filter(|candidate| traversal_ast_span_contains(candidate, entry))
.min_by_key(|candidate| {
(
candidate.end_byte.saturating_sub(candidate.start_byte),
candidate.start_byte,
candidate.end_byte,
candidate.kind.as_str(),
candidate.name.as_str(),
candidate.node_kind.as_str(),
)
})
.map(|candidate| candidate.handle.as_str())
}
fn traversal_ast_enclosing_module_handle<'a>(
entry: &TraversalAstSpanIndexEntry,
entries_by_handle: &'a BTreeMap<String, TraversalAstSpanIndexEntry>,
parent_by_handle: &BTreeMap<String, String>,
) -> Option<&'a str> {
let mut current = parent_by_handle.get(&entry.handle);
while let Some(handle) = current {
let Some(parent) = entries_by_handle.get(handle) else {
break;
};
if matches!(parent.kind.as_str(), "module" | "mod")
|| entry
.parent_module
.as_deref()
.is_some_and(|module| module == parent.name)
{
return Some(parent.handle.as_str());
}
current = parent_by_handle.get(&parent.handle);
}
None
}
fn link_ast_navigation_edges(
graph: &mut TraversalGraphBuild,
entries: &[TraversalAstSpanIndexEntry],
) {
let mut entries_by_file = BTreeMap::<String, Vec<TraversalAstSpanIndexEntry>>::new();
let entries_by_handle = entries
.iter()
.map(|entry| (entry.handle.clone(), entry.clone()))
.collect::<BTreeMap<_, _>>();
let mut parent_by_handle = BTreeMap::<String, String>::new();
let mut children_by_parent = BTreeMap::<Option<String>, Vec<TraversalAstSpanIndexEntry>>::new();
for entry in entries {
entries_by_file
.entry(entry.file.clone())
.or_default()
.push(entry.clone());
}
for file_entries in entries_by_file.values() {
for entry in file_entries {
let parent = traversal_ast_parent_handle(entry, file_entries).map(str::to_string);
if let Some(parent) = &parent {
parent_by_handle.insert(entry.handle.clone(), parent.clone());
}
let sibling_key = parent.clone().or_else(|| entry.file_handle.clone());
children_by_parent
.entry(sibling_key)
.or_default()
.push(entry.clone());
}
}
for entry in entries {
let parent = parent_by_handle.get(&entry.handle);
if let Some(parent) = parent {
graph.add_edge(
parent,
&entry.handle,
"contains",
Some("AST parent contains child span".to_string()),
1,
);
graph.add_edge(
parent,
&entry.handle,
"child",
Some("AST child span".to_string()),
1,
);
graph.add_edge(
&entry.handle,
parent,
"parent",
Some("AST parent span".to_string()),
1,
);
} else if let Some(file_handle) = &entry.file_handle {
graph.add_edge(
file_handle,
&entry.handle,
"contains",
Some("file contains top-level AST span".to_string()),
1,
);
}
if let Some(module_handle) =
traversal_ast_enclosing_module_handle(entry, &entries_by_handle, &parent_by_handle)
{
graph.add_edge(
&entry.handle,
module_handle,
"enclosing_module",
Some("nearest enclosing module AST span".to_string()),
1,
);
}
if entry.language == "markdown"
&& let Some(markdown) = &entry.markdown
&& let Some(section_handle) = &markdown.section_handle
&& section_handle != &entry.handle
{
graph.add_edge(
section_handle,
&entry.handle,
"contains_markdown_block",
Some("Markdown section contains block".to_string()),
1,
);
graph.add_edge(
&entry.handle,
section_handle,
"enclosing_section",
Some("Markdown enclosing section".to_string()),
1,
);
}
}
for siblings in children_by_parent.values_mut() {
siblings.sort_by(|left, right| {
left.start_byte
.cmp(&right.start_byte)
.then(left.end_byte.cmp(&right.end_byte))
.then(left.kind.cmp(&right.kind))
.then(left.name.cmp(&right.name))
.then(left.node_kind.cmp(&right.node_kind))
.then(left.handle.cmp(&right.handle))
});
for pair in siblings.windows(2) {
let previous = &pair[0];
let next = &pair[1];
graph.add_edge(
&previous.handle,
&next.handle,
"next_sibling",
Some("next AST sibling span".to_string()),
1,
);
graph.add_edge(
&next.handle,
&previous.handle,
"previous_sibling",
Some("previous AST sibling span".to_string()),
1,
);
}
}
}
fn traversal_markdown_embedded_symbol_node(
root: &Path,
entry: &TraversalAstSpanIndexEntry,
markdown: &MarkdownSpanMetadata,
embedded: &MarkdownEmbeddedSymbol,
) -> TraversalNode {
let mut properties = BTreeMap::new();
properties.insert("layer".to_string(), "embedded_code".to_string());
properties.insert("embedded".to_string(), "true".to_string());
properties.insert("language".to_string(), embedded.language.clone());
properties.insert("symbol_kind".to_string(), embedded.kind.clone());
properties.insert("node_kind".to_string(), embedded.node_kind.clone());
properties.insert("start_byte".to_string(), embedded.start_byte.to_string());
properties.insert("end_byte".to_string(), embedded.end_byte.to_string());
properties.insert("end_line".to_string(), embedded.end_line.to_string());
properties.insert("markdown_block_handle".to_string(), entry.handle.clone());
properties.insert(
"markdown_block_kind".to_string(),
markdown_ast_block_kind(&entry.kind),
);
if let Some(body_start_byte) = embedded.body_start_byte {
properties.insert("body_start_byte".to_string(), body_start_byte.to_string());
}
if let Some(body_end_byte) = embedded.body_end_byte {
properties.insert("body_end_byte".to_string(), body_end_byte.to_string());
}
if let Some(body_start_line) = embedded.body_start_line {
properties.insert("body_start_line".to_string(), body_start_line.to_string());
}
if let Some(body_end_line) = embedded.body_end_line {
properties.insert("body_end_line".to_string(), body_end_line.to_string());
}
if let Some(fence_language) = &markdown.fence_language {
properties.insert("fence_language".to_string(), fence_language.clone());
}
if !markdown.section_path.is_empty() {
properties.insert(
"section_path".to_string(),
markdown.section_path.join(" > "),
);
}
if let Some(section_handle) = &markdown.section_handle {
properties.insert("section_handle".to_string(), section_handle.clone());
}
let line_count = embedded
.end_line
.saturating_sub(embedded.start_line)
.saturating_add(1)
.max(1);
TraversalNode {
handle: embedded.handle.clone(),
kind: "ast_span".to_string(),
label: embedded.name.clone(),
ref_id: Some(embedded.name.clone()),
path: Some(entry.file.clone()),
line: Some(i64::try_from(embedded.start_line).unwrap_or(i64::MAX)),
detail: Some(format!(
"{} {} embedded in Markdown fence",
embedded.language, embedded.kind
)),
properties,
expand: source_read_command(root, &entry.file, embedded.start_line, line_count),
}
}
fn link_markdown_embedded_code_edges(
graph: &mut TraversalGraphBuild,
root: &Path,
entries: &[TraversalAstSpanIndexEntry],
) {
for entry in entries {
let Some(markdown) = &entry.markdown else {
continue;
};
for embedded in &markdown.embedded_symbols {
let node = traversal_markdown_embedded_symbol_node(root, entry, markdown, embedded);
graph.add_node(node);
graph.add_edge(
&entry.handle,
&embedded.handle,
"contains",
Some("Markdown fence contains embedded AST symbol".to_string()),
1,
);
graph.add_edge(
&entry.handle,
&embedded.handle,
"child",
Some("embedded code symbol".to_string()),
1,
);
graph.add_edge(
&entry.handle,
&embedded.handle,
"contains_embedded_symbol",
Some("Markdown fence contains embedded code symbol".to_string()),
1,
);
graph.add_edge(
&embedded.handle,
&entry.handle,
"parent",
Some("Markdown fence parent span".to_string()),
1,
);
graph.add_edge(
&embedded.handle,
&entry.handle,
"embedded_in_fence",
Some("embedded code symbol belongs to Markdown fence".to_string()),
1,
);
if let Some(section_handle) = &markdown.section_handle
&& section_handle != &entry.handle
{
graph.add_edge(
section_handle,
&embedded.handle,
"contains_embedded_code",
Some("Markdown section contains embedded code symbol".to_string()),
1,
);
graph.add_edge(
&embedded.handle,
section_handle,
"enclosing_section",
Some("Markdown enclosing section".to_string()),
1,
);
}
}
}
}
fn traversal_node_tokens(node: &TraversalNode) -> BTreeSet<String> {
let mut tokens = traversal_tokens(&node.label);
if let Some(ref_id) = &node.ref_id {
tokens.extend(traversal_tokens(ref_id));
}
if let Some(path) = &node.path {
tokens.extend(traversal_tokens(path));
}
if let Some(detail) = &node.detail {
tokens.extend(traversal_tokens(detail));
}
tokens
}
fn parse_agent_doc_session_id(content: &str) -> Option<String> {
content.lines().find_map(|line| {
let trimmed = line.trim();
trimmed
.strip_prefix("agent_doc_session:")
.map(str::trim)
.filter(|value| !value.is_empty())
.map(str::to_string)
})
}
fn parse_backlog_line(line: &str) -> Option<(String, String)> {
let trimmed = line.trim();
if !trimmed.starts_with("- [") {
return None;
}
let start = trimmed.find("[#")?;
let after_start = start + 2;
let rest = &trimmed[after_start..];
let end = rest.find(']')?;
let id = rest[..end].trim();
if id.is_empty() {
return None;
}
let text = rest[end + 1..].trim().to_string();
Some((id.to_string(), text))
}
fn parse_queue_dispatch_line(line: &str) -> Option<String> {
let trimmed = line.trim();
["dispatch ", "preset "].iter().find_map(|prefix| {
trimmed
.strip_prefix(prefix)
.map(str::trim)
.filter(|value| !value.is_empty())
.map(str::to_string)
})
}
fn parse_queue_do_line(line: &str) -> Option<String> {
let trimmed = line.trim();
let rest = trimmed.strip_prefix("- do [#")?;
let end = rest.find(']')?;
let id = rest[..end].trim();
(!id.is_empty()).then(|| id.to_string())
}
fn markdown_code_spans(input: &str) -> Vec<String> {
input
.split('`')
.enumerate()
.filter(|(idx, _)| idx % 2 == 1)
.map(|(_, part)| part.trim().to_string())
.filter(|part| !part.is_empty())
.collect()
}
fn push_traversal_token_index(
index: &mut HashMap<String, Vec<usize>>,
tokens: &BTreeSet<String>,
entry_index: usize,
) {
for token in tokens {
index.entry(token.clone()).or_default().push(entry_index);
}
}
impl<'a> TraversalCodeLookup<'a> {
fn new(
symbols: &'a [TraversalSymbolIndexEntry],
files: &'a [TraversalFileIndexEntry],
routes: &'a [TraversalRouteIndexEntry],
multiplicities: &'a [TraversalMultiplicityIndexEntry],
) -> Self {
let mut symbol_index = HashMap::new();
for (idx, entry) in symbols.iter().enumerate() {
push_traversal_token_index(&mut symbol_index, &entry.tokens, idx);
}
let mut file_index = HashMap::new();
let mut file_path_index = HashMap::new();
for (idx, entry) in files.iter().enumerate() {
push_traversal_token_index(&mut file_index, &entry.tokens, idx);
if let Some(path) = entry.node.path.as_ref() {
file_path_index.insert(path.clone(), path.clone());
}
}
let mut route_index = HashMap::new();
for (idx, entry) in routes.iter().enumerate() {
push_traversal_token_index(&mut route_index, &entry.tokens, idx);
}
let mut multiplicity_index = HashMap::new();
for (idx, entry) in multiplicities.iter().enumerate() {
push_traversal_token_index(&mut multiplicity_index, &entry.tokens, idx);
}
Self {
symbols,
files,
routes,
multiplicities,
symbol_index,
file_index,
route_index,
multiplicity_index,
file_path_index,
}
}
fn touched_files_for_line(&self, line: &str) -> Vec<String> {
let mut touched_files = BTreeSet::new();
for candidate in markdown_code_spans(line)
.into_iter()
.chain(line.split_whitespace().map(str::to_string))
{
for path in traversal_path_candidates(&candidate) {
if let Some(file) = self.file_path_index.get(&path) {
touched_files.insert(file.clone());
}
}
}
touched_files.into_iter().collect()
}
}
fn traversal_path_candidates(candidate: &str) -> Vec<String> {
let trimmed = candidate.trim_matches(|ch: char| {
matches!(
ch,
'`' | '"' | '\'' | ',' | ';' | '.' | '!' | '?' | '(' | ')' | '[' | ']' | '{' | '}'
)
});
if trimmed.is_empty() {
return Vec::new();
}
let mut candidates = vec![trimmed.to_string()];
if let Some((path, line_suffix)) = trimmed.rsplit_once(':')
&& !path.is_empty()
&& line_suffix.chars().all(|ch| ch.is_ascii_digit())
{
candidates.push(path.to_string());
}
candidates
}
fn parse_worker_result_line(
line: &str,
lookup: &TraversalCodeLookup<'_>,
) -> Vec<ParsedWorkerResult> {
if line.trim_start().starts_with("- [") {
return Vec::new();
}
let lower = line.to_ascii_lowercase();
let status =
if lower.contains("completed") || lower.contains("code-complete") || lower.contains("done")
{
"completed"
} else if lower.contains("blocked") || lower.contains("externally blocked") {
"blocked"
} else {
return Vec::new();
};
let result_prefix_end = ["follow-up", "follow up", "next:"]
.iter()
.filter_map(|marker| lower.find(marker))
.min()
.unwrap_or(line.len());
let ids = extract_conflict_target_refs(&line[..result_prefix_end]);
if ids.is_empty() {
return Vec::new();
}
let result_ids = ids.iter().cloned().collect::<BTreeSet<_>>();
let all_ids = extract_conflict_target_refs(line);
let touched_files = lookup.touched_files_for_line(line);
let tests = markdown_code_spans(line)
.into_iter()
.filter(|span| span.to_ascii_lowercase().contains("test"))
.collect::<Vec<_>>();
ids.iter()
.map(|id| ParsedWorkerResult {
id: id.clone(),
status: status.to_string(),
touched_files: touched_files.clone(),
tests: tests.clone(),
follow_up_ids: all_ids
.iter()
.filter(|other| *other != id && !result_ids.contains(*other))
.cloned()
.collect(),
})
.collect()
}
fn hinted_markdown_file(root: &Path, path_hint: &Path) -> Option<PathBuf> {
let hinted_path = if path_hint.is_absolute() {
path_hint.to_path_buf()
} else {
root.join(path_hint)
};
if hinted_path.extension().and_then(|ext| ext.to_str()) == Some("md") && hinted_path.is_file() {
return Some(hinted_path);
}
None
}
fn traversal_markdown_content_looks_like_session(content: &str) -> bool {
parse_agent_doc_session_id(content).is_some()
|| content.contains("<!-- agent:exchange")
|| content.contains("<!-- agent:backlog")
|| content.contains("## Backlog")
}
fn traversal_path_is_session_markdown(root: &Path, source_root: &Path, path: &Path) -> bool {
let candidate = if path.is_absolute() {
path.to_path_buf()
} else {
source_root.join(path)
};
if !candidate.starts_with(source_root) && !candidate.starts_with(root) {
return false;
}
if !matches!(
candidate.extension().and_then(|ext| ext.to_str()),
Some("md" | "mdx")
) {
return false;
}
fs::read_to_string(&candidate)
.map(|content| traversal_markdown_content_looks_like_session(&content))
.unwrap_or(false)
}
fn markdown_files_for_traversal(root: &Path, path_hint: &Path) -> Result<Vec<PathBuf>> {
if let Some(hinted_path) = hinted_markdown_file(root, path_hint) {
return Ok(vec![hinted_path]);
}
let mut files = Vec::new();
let walker = ignore::WalkBuilder::new(root)
.hidden(true)
.git_ignore(true)
.git_global(true)
.git_exclude(true)
.build();
for result in walker {
let entry =
result.with_context(|| format!("walking markdown files under {}", root.display()))?;
if !entry.file_type().is_some_and(|ft| ft.is_file()) {
continue;
}
if traversal_path_is_generated_artifact(root, root, entry.path()) {
continue;
}
if entry.path().extension().and_then(|ext| ext.to_str()) == Some("md") {
files.push(entry.path().to_path_buf());
}
}
files.sort();
Ok(files)
}
fn traversal_watermark_path(root: &Path, path: &Path) -> String {
path.strip_prefix(root)
.unwrap_or(path)
.to_string_lossy()
.replace('\\', "/")
}
fn push_traversal_metadata_watermark_part(
root: &Path,
path: &Path,
label: &str,
parts: &mut Vec<String>,
) {
let display = traversal_watermark_path(root, path);
match fs::metadata(path) {
Ok(metadata) => {
let (secs, nanos) = metadata
.modified()
.ok()
.and_then(|modified| modified.duration_since(UNIX_EPOCH).ok())
.map(|duration| (duration.as_secs(), duration.subsec_nanos()))
.unwrap_or((0, 0));
parts.push(format!(
"{label}:{display}:len={}:mtime={secs}.{nanos}",
metadata.len()
));
}
Err(_) => parts.push(format!("{label}:{display}:missing")),
}
}
#[derive(Serialize)]
struct TraversalSummaryWatermarkRow<'a> {
symbol_name: &'a str,
file_path: &'a str,
entities: &'a Option<Vec<summarize::Entity>>,
relationships: &'a Option<Vec<summarize::Relationship>>,
concept_labels: &'a Option<Vec<String>>,
}
fn push_traversal_summaries_watermark_part(root: &Path, parts: &mut Vec<String>) -> Result<()> {
let summaries_db = root.join(".tsift/summaries.db");
if !summaries_db.exists() {
parts.push("summaries_db:absent".to_string());
return Ok(());
}
match summarize::SummaryDb::open_read_only_resilient(&summaries_db)
.and_then(|summary_db| summary_db.all())
{
Ok(summaries) => {
let rows = summaries
.iter()
.map(|summary| TraversalSummaryWatermarkRow {
symbol_name: &summary.symbol_name,
file_path: &summary.file_path,
entities: &summary.entities,
relationships: &summary.relationships,
concept_labels: &summary.concept_labels,
})
.collect::<Vec<_>>();
parts.push(format!(
"summaries_db:rows={}:semantic_hash={}",
rows.len(),
content_hash(&rows)?
));
}
Err(_) => {
push_traversal_metadata_watermark_part(
root,
&summaries_db,
"summaries_db_unreadable",
parts,
);
}
}
Ok(())
}
#[cfg(test)]
fn traversal_relative_path_is_generated_artifact(relative: &str) -> bool {
resolution::relative_path_is_generated_artifact(relative)
}
fn traversal_path_is_generated_artifact(root: &Path, source_root: &Path, path: &Path) -> bool {
resolution::path_is_generated_artifact(root, source_root, path)
}
fn traversal_index_snapshot_part_is_generated(root: &Path, source_root: &Path, part: &str) -> bool {
resolution::index_snapshot_part_is_generated(root, source_root, part)
}
pub(crate) fn traversal_source_watermark(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
session_only: bool,
) -> Result<Option<String>> {
let mut parts = vec![
format!("projection_version:{GRAPH_PROJECTION_VERSION}"),
format!("scope:{}", scope.unwrap_or("root")),
format!("path_hint:{}", traversal_watermark_path(root, path_hint)),
format!("session_only:{session_only}"),
];
if !session_only || hinted_markdown_file(root, path_hint).is_none() {
let targets = match resolve_search_index_targets(root, path_hint, scope, false) {
Ok(targets) => targets,
Err(_) => return Ok(None),
};
let Some(target) = targets.into_iter().next() else {
return Ok(None);
};
let db = match index::IndexDb::open_read_only_resilient(&target.db_path) {
Ok(db) => db,
Err(_) => return Ok(None),
};
parts.push(format!("index_label:{}", target.label));
parts.push(format!(
"index_scope:{}",
target.scope_name.as_deref().unwrap_or("root")
));
parts.push(format!(
"index_source_root:{}",
traversal_watermark_path(root, &target.source_root)
));
let mut snapshot_rows = 0usize;
for part in db.source_snapshot_parts()? {
if traversal_index_snapshot_part_is_generated(root, &target.source_root, &part) {
continue;
}
snapshot_rows += 1;
parts.push(format!("index_snapshot:{part}"));
}
parts.push(format!("index_snapshot_rows:{snapshot_rows}"));
}
let markdown_files = markdown_files_for_traversal(root, path_hint)?;
parts.push(format!("markdown_count:{}", markdown_files.len()));
for markdown_path in markdown_files {
push_traversal_metadata_watermark_part(root, &markdown_path, "markdown", &mut parts);
}
push_traversal_summaries_watermark_part(root, &mut parts)?;
Ok(Some(content_hash(&parts)?))
}
fn ranked_symbol_matches<'a>(
query_tokens: &BTreeSet<String>,
entries: &'a [TraversalSymbolIndexEntry],
index: &HashMap<String, Vec<usize>>,
) -> Vec<(usize, &'a TraversalSymbolIndexEntry)> {
let mut scores = BTreeMap::<usize, usize>::new();
for token in query_tokens {
if let Some(indices) = index.get(token) {
for idx in indices {
*scores.entry(*idx).or_default() += 1;
}
}
}
let mut matches = scores
.into_iter()
.map(|(idx, score)| (score, &entries[idx]))
.collect::<Vec<_>>();
matches.sort_by(|(left_score, left), (right_score, right)| {
right_score
.cmp(left_score)
.then_with(|| left.node.label.cmp(&right.node.label))
.then_with(|| left.handle.cmp(&right.handle))
});
matches
}
fn ranked_file_matches<'a>(
query_tokens: &BTreeSet<String>,
entries: &'a [TraversalFileIndexEntry],
index: &HashMap<String, Vec<usize>>,
) -> Vec<(usize, &'a TraversalFileIndexEntry)> {
let mut scores = BTreeMap::<usize, usize>::new();
for token in query_tokens {
if let Some(indices) = index.get(token) {
for idx in indices {
*scores.entry(*idx).or_default() += 1;
}
}
}
let mut matches = scores
.into_iter()
.map(|(idx, score)| (score, &entries[idx]))
.collect::<Vec<_>>();
matches.sort_by(|(left_score, left), (right_score, right)| {
right_score
.cmp(left_score)
.then_with(|| left.node.label.cmp(&right.node.label))
.then_with(|| left.handle.cmp(&right.handle))
});
matches
}
fn ranked_route_matches<'a>(
query_tokens: &BTreeSet<String>,
entries: &'a [TraversalRouteIndexEntry],
index: &HashMap<String, Vec<usize>>,
) -> Vec<(usize, &'a TraversalRouteIndexEntry)> {
let mut scores = BTreeMap::<usize, usize>::new();
for token in query_tokens {
if let Some(indices) = index.get(token) {
for idx in indices {
*scores.entry(*idx).or_default() += 1;
}
}
}
let mut matches = scores
.into_iter()
.map(|(idx, score)| (score, &entries[idx]))
.collect::<Vec<_>>();
matches.sort_by(|(left_score, left), (right_score, right)| {
right_score
.cmp(left_score)
.then_with(|| left.node.label.cmp(&right.node.label))
.then_with(|| left.handle.cmp(&right.handle))
});
matches
}
fn ranked_multiplicity_matches<'a>(
query_tokens: &BTreeSet<String>,
entries: &'a [TraversalMultiplicityIndexEntry],
index: &HashMap<String, Vec<usize>>,
) -> Vec<(usize, &'a TraversalMultiplicityIndexEntry)> {
let mut scores = BTreeMap::<usize, usize>::new();
for token in query_tokens {
if let Some(indices) = index.get(token) {
for idx in indices {
*scores.entry(*idx).or_default() += 1;
}
}
}
let mut matches = scores
.into_iter()
.map(|(idx, score)| (score, &entries[idx]))
.collect::<Vec<_>>();
matches.sort_by(|(left_score, left), (right_score, right)| {
right_score
.cmp(left_score)
.then_with(|| left.node.kind.cmp(&right.node.kind))
.then_with(|| left.node.label.cmp(&right.node.label))
.then_with(|| left.handle.cmp(&right.handle))
});
matches
}
fn link_backlog_to_code_nodes(
graph: &mut TraversalGraphBuild,
backlog: &TraversalNode,
text: &str,
lookup: &TraversalCodeLookup<'_>,
limit: usize,
) {
let mut query_tokens = traversal_tokens(text);
if let Some(ref_id) = &backlog.ref_id {
query_tokens.extend(traversal_tokens(ref_id));
}
if query_tokens.is_empty() {
return;
}
for (score, entry) in ranked_symbol_matches(&query_tokens, lookup.symbols, &lookup.symbol_index)
.into_iter()
.take(limit)
{
graph.add_edge(
&backlog.handle,
&entry.handle,
"mentions",
Some("backlog text matches symbol tokens".to_string()),
score,
);
}
for (score, entry) in ranked_file_matches(&query_tokens, lookup.files, &lookup.file_index)
.into_iter()
.take(limit.min(5))
{
graph.add_edge(
&backlog.handle,
&entry.handle,
"mentions",
Some("backlog text matches file tokens".to_string()),
score,
);
}
for (score, entry) in ranked_route_matches(&query_tokens, lookup.routes, &lookup.route_index)
.into_iter()
.take(limit.min(5))
{
graph.add_edge(
&backlog.handle,
&entry.handle,
"mentions",
Some("backlog text matches route tokens".to_string()),
score,
);
}
for (score, entry) in ranked_multiplicity_matches(
&query_tokens,
lookup.multiplicities,
&lookup.multiplicity_index,
)
.into_iter()
.take(limit.min(5))
{
graph.add_edge(
&backlog.handle,
&entry.handle,
"mentions",
Some("backlog text matches multiplicity tokens".to_string()),
score,
);
}
}
fn load_agent_doc_traversal_nodes(
root: &Path,
path_hint: &Path,
graph: &mut TraversalGraphBuild,
lookup: &TraversalCodeLookup<'_>,
) -> Result<()> {
for markdown_path in markdown_files_for_traversal(root, path_hint)? {
let content = match fs::read_to_string(&markdown_path) {
Ok(content) => content,
Err(err) => {
graph.warnings.push(format!(
"session artifact unavailable: {}: {err}",
markdown_path.display()
));
continue;
}
};
if !traversal_markdown_content_looks_like_session(&content) {
continue;
}
let session_id = parse_agent_doc_session_id(&content);
let session = traversal_session_node(root, &markdown_path, session_id.as_deref());
graph.add_node(session.clone());
let lines = content.lines().collect::<Vec<_>>();
let mut backlog_by_id = BTreeMap::<String, TraversalNode>::new();
for (idx, line) in lines.iter().enumerate() {
let Some((id, text)) = parse_backlog_line(line) else {
continue;
};
let backlog = traversal_backlog_node(root, &markdown_path, &id, &text, idx as i64 + 1);
graph.add_node(backlog.clone());
backlog_by_id.insert(id.clone(), backlog.clone());
graph.add_edge(
&session.handle,
&backlog.handle,
"contains",
Some("session backlog item".to_string()),
1,
);
link_backlog_to_code_nodes(graph, &backlog, &text, lookup, 8);
}
let mut in_queue = false;
let mut job_by_id = BTreeMap::<String, TraversalNode>::new();
for (idx, line) in lines.iter().enumerate() {
let trimmed = line.trim();
if trimmed.starts_with("<!-- agent:queue") {
in_queue = true;
continue;
}
if trimmed.starts_with("<!-- /agent:queue") {
in_queue = false;
continue;
}
if !in_queue {
continue;
}
if let Some(dispatch) = parse_queue_dispatch_line(line) {
let dispatch_ref = dispatch.strip_prefix('#').unwrap_or(dispatch.as_str());
let node = traversal_job_packet_node(
root,
&markdown_path,
&format!("dispatch {dispatch}"),
Some(dispatch_ref),
"agent-doc dispatch preset",
idx as i64 + 1,
);
graph.add_node(node.clone());
graph.add_edge(
&session.handle,
&node.handle,
"contains",
Some("session queued dispatch".to_string()),
1,
);
continue;
}
if let Some(id) = parse_queue_do_line(line) {
let detail = backlog_by_id
.get(&id)
.and_then(|node| node.detail.clone())
.unwrap_or_else(|| "queued backlog item".to_string());
let node = traversal_job_packet_node(
root,
&markdown_path,
&format!("do #{id}"),
Some(&id),
&detail,
idx as i64 + 1,
);
graph.add_node(node.clone());
graph.add_edge(
&session.handle,
&node.handle,
"contains",
Some("session queued job packet".to_string()),
1,
);
if let Some(backlog) = backlog_by_id.get(&id) {
graph.add_edge(
&node.handle,
&backlog.handle,
"targets",
Some("queued backlog item".to_string()),
1,
);
}
job_by_id.insert(id, node);
}
}
let mut seen_results = BTreeSet::<(String, String, i64)>::new();
for (idx, line) in lines.iter().enumerate() {
for parsed in parse_worker_result_line(line, lookup) {
let line_no = idx as i64 + 1;
if !seen_results.insert((parsed.id.clone(), parsed.status.clone(), line_no)) {
continue;
}
let result =
traversal_worker_result_node(root, &markdown_path, &parsed, line, line_no);
graph.add_node(result.clone());
graph.add_edge(
&session.handle,
&result.handle,
"contains",
Some("session worker result".to_string()),
1,
);
if let Some(backlog) = backlog_by_id.get(&parsed.id) {
graph.add_edge(
&backlog.handle,
&result.handle,
"has_result",
Some(format!("worker result {}", parsed.status)),
1,
);
}
if let Some(job) = job_by_id.get(&parsed.id) {
graph.add_edge(
&job.handle,
&result.handle,
"has_result",
Some(format!("queued worker result {}", parsed.status)),
1,
);
}
let mut result_text = line.to_string();
if !parsed.touched_files.is_empty() {
result_text.push(' ');
result_text.push_str(&parsed.touched_files.join(" "));
}
link_backlog_to_code_nodes(graph, &result, &result_text, lookup, 8);
}
}
}
Ok(())
}
#[derive(Debug, Clone)]
struct AgentDocIndexGate {
db_path: Option<PathBuf>,
source_root: PathBuf,
diagnostics: Vec<String>,
}
#[derive(Clone, Hash, PartialEq, Eq)]
struct AgentDocIndexGateCacheKey {
root: PathBuf,
path_hint: PathBuf,
scope: Option<String>,
packet_label: String,
}
fn agent_doc_index_gate_cache() -> &'static std::sync::Mutex<
std::collections::HashMap<AgentDocIndexGateCacheKey, AgentDocIndexGate>,
> {
static CACHE: std::sync::OnceLock<
std::sync::Mutex<std::collections::HashMap<AgentDocIndexGateCacheKey, AgentDocIndexGate>>,
> = std::sync::OnceLock::new();
CACHE.get_or_init(|| std::sync::Mutex::new(std::collections::HashMap::new()))
}
fn prepare_agent_doc_index_gate_cached(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
packet_label: &str,
) -> (AgentDocIndexGate, String) {
let key = AgentDocIndexGateCacheKey {
root: root.to_path_buf(),
path_hint: path_hint.to_path_buf(),
scope: scope.map(str::to_string),
packet_label: packet_label.to_string(),
};
if let Ok(cache) = agent_doc_index_gate_cache().lock()
&& let Some(cached) = cache.get(&key)
{
return (
cached.clone(),
"reused from in-process index gate cache by root/path_hint/scope key".to_string(),
);
}
let gate = prepare_agent_doc_index_gate(root, path_hint, scope, packet_label);
if let Ok(mut cache) = agent_doc_index_gate_cache().lock() {
cache.insert(key, gate.clone());
}
(
gate,
"fresh inspection/refresh — cache miss on this preparation key".to_string(),
)
}
fn index_reason_for_state(state: SearchIndexState) -> Option<RebuildSearchReason> {
match state {
SearchIndexState::Fresh => None,
SearchIndexState::Missing => Some(RebuildSearchReason::Missing),
SearchIndexState::Stale { stale_files } => Some(RebuildSearchReason::Stale { stale_files }),
}
}
fn index_reason_detail(target: &SearchIndexTarget, reason: RebuildSearchReason) -> String {
rebuild_search_target_detail(&RebuildSearchTarget {
label: target.label.clone(),
reason,
reindex_cmd: target.reindex_cmd.clone(),
})
}
fn index_refresh_diagnostic(
target: &SearchIndexTarget,
reason: RebuildSearchReason,
summary: &index::IndexSummary,
packet_label: &str,
) -> String {
let changed = summary.new + summary.modified + summary.deleted;
format!(
"index refreshed: {}; updated {} changed file{} before {}",
index_reason_detail(target, reason),
changed,
if changed == 1 { "" } else { "s" },
packet_label
)
}
fn index_refresh_fallback_diagnostic(
target: &SearchIndexTarget,
reason: RebuildSearchReason,
err: &anyhow::Error,
packet_label: &str,
) -> String {
format!(
"{}; could not refresh before {}: {err:#}; falling back to raw source file nodes",
index_reason_detail(target, reason),
packet_label
)
}
fn graph_fallback_source_root(root: &Path, path_hint: &Path, scope: Option<&str>) -> PathBuf {
if let Some(scope_name) = scope
&& let Ok(Some(scope)) = config::Config::find_submodule(root, scope_name)
{
return scope.source_root;
}
if let Some(scope_name) = scope
&& let Ok(Some(package)) = multiplicity::find_cargo_package(root, scope_name)
{
return package.package_root;
}
if let Ok(Some(scope)) = config::Config::infer_submodule_from_path(root, path_hint) {
return scope.source_root;
}
if let Ok(Some(package)) = multiplicity::infer_cargo_package_from_path(root, path_hint) {
return package.package_root;
}
if let Ok(Some(scope)) = infer_agent_doc_task_submodule(root, path_hint) {
return scope.source_root;
}
root.to_path_buf()
}
fn prepare_agent_doc_index_gate(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
packet_label: &str,
) -> AgentDocIndexGate {
let fallback_source_root = graph_fallback_source_root(root, path_hint, scope);
let targets = match resolve_search_index_targets(root, path_hint, scope, false) {
Ok(targets) => targets,
Err(err) => {
return AgentDocIndexGate {
db_path: None,
source_root: fallback_source_root,
diagnostics: vec![format!(
"code index unavailable before {packet_label}: {err:#}; falling back to raw source file nodes"
)],
};
}
};
let Some(target) = targets.into_iter().next() else {
return AgentDocIndexGate {
db_path: None,
source_root: fallback_source_root,
diagnostics: vec![format!(
"code index unavailable before {packet_label}: no index target resolved; falling back to raw source file nodes"
)],
};
};
let state = match inspect_search_index(&target) {
Ok(state) => state,
Err(err) => {
return AgentDocIndexGate {
db_path: None,
source_root: target.source_root,
diagnostics: vec![format!(
"code index freshness unavailable before {packet_label}: {err:#}; falling back to raw source file nodes"
)],
};
}
};
let Some(reason) = index_reason_for_state(state) else {
return AgentDocIndexGate {
db_path: Some(target.db_path),
source_root: target.source_root,
diagnostics: Vec::new(),
};
};
match apply_search_index_update(root, &target) {
Ok(summary) => {
// #gdbgatecold: the index was just rewritten, so any cached
// pre-refresh inspection result for this scope (held by the
// active lazily-backed `InspectScopeGuard`) is stale. Invalidate
// the scope epoch so the next `inspect_read_only` re-reads the
// fresh index.
index::inspect_scope_invalidate_all();
let diagnostics = vec![index_refresh_diagnostic(
&target,
reason,
&summary,
packet_label,
)];
AgentDocIndexGate {
db_path: Some(target.db_path),
source_root: target.source_root,
diagnostics,
}
}
Err(err) => {
let diagnostics = vec![index_refresh_fallback_diagnostic(
&target,
reason,
&err,
packet_label,
)];
AgentDocIndexGate {
db_path: None,
source_root: target.source_root,
diagnostics,
}
}
}
}
fn add_raw_source_file_nodes(
root: &Path,
source_root: &Path,
graph: &mut TraversalGraphBuild,
file_entries: &mut Vec<TraversalFileIndexEntry>,
) -> Result<()> {
let mut entries = walk::walk_files(source_root)?;
entries.sort_by(|left, right| left.path.cmp(&right.path));
for entry in entries {
let file = entry.path.to_string_lossy();
let node = traversal_raw_source_file_node(root, file.as_ref());
let entry = TraversalFileIndexEntry {
handle: node.handle.clone(),
tokens: traversal_node_tokens(&node),
node: node.clone(),
};
graph.add_node(node);
file_entries.push(entry);
}
Ok(())
}
fn relative_path_inside_scope(path: &str, scope_root: &str) -> bool {
if scope_root.is_empty() {
return true;
}
path == scope_root || path.starts_with(&format!("{scope_root}/"))
}
fn traversal_symbol_source_path(root: &Path, source_root: &Path, file: &str) -> PathBuf {
let path = Path::new(file);
if path.is_absolute() {
return path.to_path_buf();
}
let source_candidate = source_root.join(path);
if source_candidate.exists() {
source_candidate
} else {
root.join(path)
}
}
fn cargo_import_alias_from_line(line: &str) -> Option<String> {
let trimmed = line.trim();
let rest = trimmed
.strip_prefix("pub use ")
.or_else(|| trimmed.strip_prefix("use "))
.or_else(|| trimmed.strip_prefix("extern crate "))?;
let alias = rest
.split([':', ';', ' ', '\t'])
.next()
.unwrap_or_default()
.trim();
(!alias.is_empty()).then(|| alias.to_string())
}
fn cargo_import_aliases(package: &multiplicity::CargoPackageInfo) -> Result<BTreeSet<String>> {
let mut aliases = BTreeSet::new();
for entry in walk::walk_files(&package.package_root)? {
if entry.path.extension().and_then(|ext| ext.to_str()) != Some("rs") {
continue;
}
let content = fs::read_to_string(&entry.path)
.with_context(|| format!("reading Rust source {}", entry.path.display()))?;
aliases.extend(content.lines().filter_map(cargo_import_alias_from_line));
}
Ok(aliases)
}
fn load_multiplicity_traversal_nodes(
root: &Path,
source_root: &Path,
graph: &mut TraversalGraphBuild,
file_handle_by_path: &HashMap<String, String>,
multiplicity_entries: &mut Vec<TraversalMultiplicityIndexEntry>,
) -> Result<()> {
let inventory = multiplicity::discover_cargo_inventory(source_root)?;
let mut workspace_handle_by_root = BTreeMap::<String, String>::new();
for workspace in &inventory.workspaces {
let node = traversal_cargo_workspace_node(root, workspace);
workspace_handle_by_root.insert(workspace.relative_root.clone(), node.handle.clone());
multiplicity_entries.push(TraversalMultiplicityIndexEntry {
handle: node.handle.clone(),
tokens: traversal_node_tokens(&node),
node: node.clone(),
});
graph.add_node(node);
}
let mut package_handle_by_name = BTreeMap::<String, Vec<String>>::new();
let mut package_nodes = Vec::new();
for package in &inventory.packages {
let node = traversal_cargo_package_node(root, package);
package_handle_by_name
.entry(package.name.clone())
.or_default()
.push(node.handle.clone());
package_handle_by_name
.entry(package.normalized_name.clone())
.or_default()
.push(node.handle.clone());
multiplicity_entries.push(TraversalMultiplicityIndexEntry {
handle: node.handle.clone(),
tokens: traversal_node_tokens(&node),
node: node.clone(),
});
graph.add_node(node.clone());
package_nodes.push((package, node));
}
for (package, node) in &package_nodes {
if let Some(workspace_handle) =
workspace_handle_by_root.get(&package.relative_workspace_root)
{
graph.add_edge(
workspace_handle,
&node.handle,
"contains_package",
Some("Cargo workspace member package".to_string()),
1,
);
}
let package_root = relativize_pathbuf(&package.package_root, root)
.to_string_lossy()
.replace('\\', "/");
for (file, handle) in file_handle_by_path {
if relative_path_inside_scope(file, &package_root) {
graph.add_edge(
&node.handle,
handle,
"owns_file",
Some("Cargo package owns source file".to_string()),
1,
);
}
}
for dependency in &package.dependencies {
if let Some(handles) = package_handle_by_name.get(&dependency.name)
&& handles.len() == 1
{
graph.add_edge(
&node.handle,
&handles[0],
"declares_dependency",
Some(format!("{} Cargo dependency", dependency.kind)),
1,
);
}
}
for alias in cargo_import_aliases(package)? {
if let Some(handles) = package_handle_by_name.get(&alias)
&& handles.len() == 1
&& handles[0] != node.handle
{
graph.add_edge(
&node.handle,
&handles[0],
"uses_crate",
Some("Rust use/extern crate reference".to_string()),
1,
);
graph.add_edge(
&node.handle,
&handles[0],
"imports",
Some("Rust use/extern crate import".to_string()),
1,
);
}
}
}
Ok(())
}
fn build_traversal_graph_source_with_options(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
session_only: bool,
) -> Result<TraversalGraphBuild> {
let mut graph = TraversalGraphBuild::default();
let mut symbol_entries = Vec::new();
let mut file_entries = Vec::new();
let mut route_entries = Vec::new();
let mut multiplicity_entries = Vec::new();
let mut file_handle_by_path = HashMap::<String, String>::new();
let bounded_session_projection = hinted_markdown_file(root, path_hint).is_some();
if !session_only || hinted_markdown_file(root, path_hint).is_none() {
let (gate, _cache_detail) =
prepare_agent_doc_index_gate_cached(root, path_hint, scope, "graph traversal packet");
graph.warnings.extend(gate.diagnostics);
let gate_source_root = gate.source_root.clone();
match gate.db_path {
Some(db_path) if db_path.exists() => {
let db = index::IndexDb::open_read_only_resilient(&db_path)?;
let file_paths = db.file_paths()?;
for file in file_paths {
if traversal_path_is_generated_artifact(
root,
&gate_source_root,
Path::new(&file),
) {
continue;
}
let node = traversal_file_node(root, &file);
let entry = TraversalFileIndexEntry {
handle: node.handle.clone(),
tokens: traversal_node_tokens(&node),
node: node.clone(),
};
if let Some(path) = entry.node.path.as_ref() {
file_handle_by_path.insert(path.clone(), entry.handle.clone());
}
graph.add_node(node);
file_entries.push(entry);
}
let symbols = db.all_symbols()?;
let mut symbol_by_file_name_line = HashMap::new();
let mut span_by_file_name_line = HashMap::new();
let mut first_symbol_by_name = BTreeMap::<String, String>::new();
let mut first_span_by_name = BTreeMap::<String, String>::new();
let mut ast_entries = Vec::<TraversalAstSpanIndexEntry>::new();
let mut source_by_file = HashMap::<String, Option<Vec<u8>>>::new();
for symbol in symbols.iter().filter(|symbol| {
!traversal_path_is_generated_artifact(
root,
&gate_source_root,
Path::new(&symbol.file),
)
}) {
let node = traversal_symbol_node(root, symbol);
let file = relativize(&symbol.file, root);
symbol_by_file_name_line.insert(
format!("{file}:{}:{}", symbol.line, symbol.name),
node.handle.clone(),
);
first_symbol_by_name
.entry(symbol.name.clone())
.or_insert_with(|| node.handle.clone());
let entry = TraversalSymbolIndexEntry {
handle: node.handle.clone(),
tokens: traversal_node_tokens(&node),
node: node.clone(),
};
graph.add_node(node.clone());
if let Some(file_handle) = file_handle_by_path.get(&file) {
graph.add_edge(
file_handle,
&node.handle,
"defines",
Some("file defines symbol".to_string()),
1,
);
}
if !source_by_file.contains_key(&symbol.file) {
let source_path =
traversal_symbol_source_path(root, &gate_source_root, &symbol.file);
source_by_file.insert(symbol.file.clone(), fs::read(source_path).ok());
}
if let Some(Some(source)) = source_by_file.get(&symbol.file)
&& let Some((ast_node, mut ast_entry)) =
traversal_ast_span_node(root, symbol, source, &symbols)
{
ast_entry.symbol_handle = node.handle.clone();
ast_entry.file_handle = file_handle_by_path.get(&file).cloned();
span_by_file_name_line.insert(
format!("{file}:{}:{}", symbol.line, symbol.name),
ast_node.handle.clone(),
);
first_span_by_name
.entry(symbol.name.clone())
.or_insert_with(|| ast_node.handle.clone());
graph.add_node(ast_node.clone());
graph.add_edge(
&node.handle,
&ast_node.handle,
"has_ast_span",
Some("symbol projects to indexed AST span".to_string()),
1,
);
graph.add_edge(
&ast_node.handle,
&node.handle,
"represents_symbol",
Some("AST span represents indexed symbol".to_string()),
1,
);
ast_entries.push(ast_entry);
}
symbol_entries.push(entry);
}
link_ast_navigation_edges(&mut graph, &ast_entries);
link_markdown_embedded_code_edges(&mut graph, root, &ast_entries);
if !bounded_session_projection {
for edge in db.all_stored_edges()? {
if traversal_path_is_generated_artifact(
root,
&gate_source_root,
Path::new(&edge.caller_file),
) {
continue;
}
let caller_file = relativize(&edge.caller_file, root);
let caller_key =
format!("{caller_file}:{}:{}", edge.caller_line, edge.caller_name);
let Some(caller_handle) =
symbol_by_file_name_line.get(&caller_key).cloned()
else {
continue;
};
let callee_handle = if let Some(handle) =
first_symbol_by_name.get(&edge.callee_name)
{
handle.clone()
} else {
let node = traversal_unresolved_symbol_node(root, &edge.callee_name);
let handle = node.handle.clone();
graph.add_node(node);
handle
};
graph.add_edge(
&caller_handle,
&callee_handle,
"calls",
Some(format!("call site {}:{}", caller_file, edge.call_site_line)),
1,
);
if let Some(caller_span) = span_by_file_name_line.get(&caller_key)
&& let Some(callee_span) = first_span_by_name.get(&edge.callee_name)
{
graph.add_edge(
caller_span,
callee_span,
"calls",
Some(format!(
"AST call site {}:{}",
caller_file, edge.call_site_line
)),
1,
);
}
}
}
for route in db.all_routes()? {
if traversal_path_is_generated_artifact(
root,
&gate_source_root,
Path::new(&route.file),
) {
continue;
}
let node = traversal_route_node(root, &route);
let entry = TraversalRouteIndexEntry {
handle: node.handle.clone(),
tokens: traversal_node_tokens(&node),
node: node.clone(),
};
graph.add_node(node.clone());
if let Some(path) = node.path.as_ref()
&& let Some(file_handle) = file_handle_by_path.get(path)
{
graph.add_edge(
file_handle,
&node.handle,
"defines_route",
Some("file declares route".to_string()),
1,
);
}
let handler_handle =
if let Some(handle) = first_symbol_by_name.get(&route.handler_name) {
handle.clone()
} else {
let node = traversal_unresolved_symbol_node(root, &route.handler_name);
let handle = node.handle.clone();
graph.add_node(node);
handle
};
graph.add_edge(
&entry.handle,
&handler_handle,
"handled_by",
Some("route handler reference".to_string()),
1,
);
if let Some(handler_span) = first_span_by_name.get(&route.handler_name) {
graph.add_edge(
&entry.handle,
handler_span,
"handled_by",
Some("route handler AST span".to_string()),
1,
);
graph.add_edge(
handler_span,
&entry.handle,
"handles_route",
Some("AST span handles route".to_string()),
1,
);
}
route_entries.push(entry);
}
}
_ => {
add_raw_source_file_nodes(root, &gate_source_root, &mut graph, &mut file_entries)
.with_context(|| {
format!(
"loading raw source fallback nodes from {}",
gate_source_root.display()
)
})?;
for entry in &file_entries {
if let Some(path) = entry.node.path.as_ref() {
file_handle_by_path.insert(path.clone(), entry.handle.clone());
}
}
}
}
load_multiplicity_traversal_nodes(
root,
&gate_source_root,
&mut graph,
&file_handle_by_path,
&mut multiplicity_entries,
)?;
}
let code_lookup = TraversalCodeLookup::new(
&symbol_entries,
&file_entries,
&route_entries,
&multiplicity_entries,
);
load_agent_doc_traversal_nodes(root, path_hint, &mut graph, &code_lookup)?;
Ok(graph)
}
#[cfg(test)]
fn build_traversal_graph_source(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
) -> Result<TraversalGraphBuild> {
build_traversal_graph_source_with_options(root, path_hint, scope, false)
}
pub(crate) fn write_traversal_graph_store_with_options(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
session_only: bool,
) -> Result<(TraversalGraphBuild, SqliteProjectionRefresh)> {
let source_graph =
build_traversal_graph_source_with_options(root, path_hint, scope, session_only)?;
let projection = traversal_projection_from_graph(root, scope, &source_graph)?;
let graph_db = graph_substrate_db_path(root, scope);
let mut store = SqliteGraphStore::open(&graph_db)?;
let source_watermark = traversal_source_watermark(root, path_hint, scope, session_only)
.ok()
.flatten()
.or_else(|| graph_projection_content_hash(&projection));
let refresh = store.replace_projection_with_version(
scope.unwrap_or("root"),
&projection,
Some(GRAPH_PROJECTION_VERSION),
source_watermark,
)?;
Ok((source_graph, refresh))
}
pub(crate) fn write_traversal_graph_store(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
) -> Result<(TraversalGraphBuild, SqliteProjectionRefresh)> {
write_traversal_graph_store_with_options(root, path_hint, scope, false)
}
fn refresh_traversal_graph_store_with_options(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
session_only: bool,
) -> Result<(TraversalGraphBuild, SqliteProjectionRefresh)> {
let (source_graph, refresh) =
write_traversal_graph_store_with_options(root, path_hint, scope, session_only)?;
let graph_db = graph_substrate_db_path(root, scope);
let store = SqliteGraphStore::open_read_only_resilient(&graph_db)?;
let mut graph = traversal_graph_from_store(root, &store)?;
graph.warnings = source_graph.warnings;
Ok((graph, refresh))
}
fn refresh_traversal_graph_store(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
) -> Result<(TraversalGraphBuild, SqliteProjectionRefresh)> {
refresh_traversal_graph_store_with_options(root, path_hint, scope, false)
}
pub(crate) fn build_traversal_graph(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
) -> Result<TraversalGraphBuild> {
let (graph, _refresh) = refresh_traversal_graph_store(root, path_hint, scope)?;
Ok(graph)
}
fn traversal_query_kind_priority(kind: &str) -> usize {
match kind {
"backlog" => 0,
"job_packet" => 1,
"worker_result" => 2,
"symbol" => 3,
"ast_span" => 4,
"file" => 5,
"route" => 6,
"cargo_package" => 7,
"cargo_workspace" => 8,
"session" => 9,
"semantic_concept" => 10,
"semantic_entity" => 11,
_ => 12,
}
}
fn traversal_node_match_rank(node: &TraversalNode, query: &str) -> Option<(usize, usize, String)> {
let trimmed = query.trim();
if trimmed.is_empty() {
return None;
}
let kind_priority = traversal_query_kind_priority(&node.kind);
if node.handle == trimmed {
return Some((0, kind_priority, node.handle.clone()));
}
if node.path.as_deref() == Some(trimmed) {
let path_priority = if node.kind == "file" {
0
} else {
kind_priority.saturating_add(1)
};
return Some((1, path_priority, node.handle.clone()));
}
let normalized_backlog = trimmed.trim_start_matches('#');
if node.ref_id.as_deref() == Some(trimmed) || node.ref_id.as_deref() == Some(normalized_backlog)
{
return Some((2, kind_priority, node.handle.clone()));
}
if node.label == trimmed || (node.kind == "symbol" && node.label == normalized_backlog) {
return Some((3, kind_priority, node.handle.clone()));
}
None
}
fn resolve_traversal_node<'a>(
graph: &'a TraversalGraphBuild,
query: &str,
) -> Option<&'a TraversalNode> {
graph
.nodes
.values()
.filter_map(|node| traversal_node_match_rank(node, query).map(|rank| (rank, node)))
.min_by(|(left_rank, _), (right_rank, _)| left_rank.cmp(right_rank))
.map(|(_, node)| node)
}
fn traversal_adjacency(edges: &[TraversalEdge]) -> BTreeMap<String, Vec<String>> {
let mut adj = BTreeMap::<String, BTreeSet<String>>::new();
for edge in edges {
adj.entry(edge.from.clone())
.or_default()
.insert(edge.to.clone());
adj.entry(edge.to.clone())
.or_default()
.insert(edge.from.clone());
}
adj.into_iter()
.map(|(node, neighbors)| (node, neighbors.into_iter().collect()))
.collect()
}
fn traversal_shortest_handles(
edges: &[TraversalEdge],
from: &str,
to: &str,
) -> Option<Vec<String>> {
if from == to {
return Some(vec![from.to_string()]);
}
let adj = traversal_adjacency(edges);
if !adj.contains_key(from) || !adj.contains_key(to) {
return None;
}
let mut visited = BTreeSet::new();
let mut queue = VecDeque::new();
let mut parent = BTreeMap::<String, String>::new();
visited.insert(from.to_string());
queue.push_back(from.to_string());
while let Some(current) = queue.pop_front() {
if let Some(neighbors) = adj.get(¤t) {
for neighbor in neighbors {
if visited.insert(neighbor.clone()) {
parent.insert(neighbor.clone(), current.clone());
if neighbor == to {
let mut path = vec![to.to_string()];
let mut cursor = to.to_string();
while let Some(prev) = parent.get(&cursor) {
path.push(prev.clone());
cursor = prev.clone();
}
path.reverse();
return Some(path);
}
queue.push_back(neighbor.clone());
}
}
}
}
None
}
fn traversal_scored_neighbors(edges: &[TraversalEdge], current: &str) -> Vec<String> {
let mut best_score_by_neighbor = BTreeMap::<String, usize>::new();
for edge in edges {
let neighbor = if edge.from == current {
edge.to.as_str()
} else if edge.to == current {
edge.from.as_str()
} else {
continue;
};
let score = traversal_relation_score(edge, current);
best_score_by_neighbor
.entry(neighbor.to_string())
.and_modify(|best| *best = (*best).max(score))
.or_insert(score);
}
let mut ranked = best_score_by_neighbor.into_iter().collect::<Vec<_>>();
ranked.sort_by(|(left_handle, left_score), (right_handle, right_score)| {
right_score
.cmp(left_score)
.then_with(|| left_handle.cmp(right_handle))
});
ranked.into_iter().map(|(handle, _)| handle).collect()
}
fn traversal_neighborhood_handles(
edges: &[TraversalEdge],
origin: &str,
depth: usize,
limit: usize,
) -> BTreeSet<String> {
let mut seen = BTreeSet::new();
let mut queue = VecDeque::new();
seen.insert(origin.to_string());
queue.push_back((origin.to_string(), 0usize));
while let Some((current, current_depth)) = queue.pop_front() {
if current_depth >= depth {
continue;
}
for neighbor in traversal_scored_neighbors(edges, ¤t) {
if limit > 0 && seen.len() >= limit {
return seen;
}
if seen.insert(neighbor.clone()) {
queue.push_back((neighbor, current_depth + 1));
}
}
}
seen
}
fn traversal_edges_between(
handles: &BTreeSet<String>,
edges: &[TraversalEdge],
) -> Vec<TraversalEdge> {
edges
.iter()
.filter(|edge| handles.contains(&edge.from) && handles.contains(&edge.to))
.cloned()
.collect()
}
fn traversal_path_edges(path: &[String], edges: &[TraversalEdge]) -> Vec<TraversalEdge> {
let mut result = Vec::new();
for pair in path.windows(2) {
if let Some(edge) = edges.iter().find(|edge| {
(edge.from == pair[0] && edge.to == pair[1])
|| (edge.from == pair[1] && edge.to == pair[0])
}) {
result.push(edge.clone());
}
}
result
}
fn sorted_traversal_nodes<'a>(
nodes: impl IntoIterator<Item = &'a TraversalNode>,
) -> Vec<TraversalNode> {
let mut nodes = nodes.into_iter().cloned().collect::<Vec<_>>();
nodes.sort_by(|left, right| {
left.kind
.cmp(&right.kind)
.then_with(|| left.label.cmp(&right.label))
.then_with(|| left.path.cmp(&right.path))
.then_with(|| left.handle.cmp(&right.handle))
});
nodes
}
fn traversal_relation_score(edge: &TraversalEdge, origin: &str) -> usize {
let base = match edge.relation.as_str() {
"mentions" => 100,
"contains" => 80,
"parent" | "child" | "has_ast_span" | "represents_symbol" => 78,
"contains_embedded_symbol" | "embedded_in_fence" => 77,
"contains_markdown_block"
| "contains_embedded_code"
| "enclosing_module"
| "enclosing_section" => 76,
"calls" => {
if edge.from == origin {
70
} else {
65
}
}
"handled_by" | "handles_route" => 68,
"defines_route" => 62,
"imports" => 62,
"previous_sibling" | "next_sibling" => 54,
"mentions_concept" | "mentions_entity" => 66,
"semantic_relation" => 64,
"tagged_concept" | "related_concept" => 58,
"defines" => {
if edge.from == origin {
60
} else {
55
}
}
_ => 10,
};
base + edge.weight
}
fn traversal_recommendation_reason(edge: &TraversalEdge, origin: &str) -> String {
match edge.relation.as_str() {
"mentions" => "matched from backlog/session text".to_string(),
"contains" => "contained in the selected session artifact".to_string(),
"has_ast_span" => "indexed AST span for the selected symbol".to_string(),
"represents_symbol" => "indexed symbol represented by the selected AST span".to_string(),
"parent" => "parent AST span".to_string(),
"child" => "child AST span".to_string(),
"previous_sibling" => "previous AST sibling".to_string(),
"next_sibling" => "next AST sibling".to_string(),
"contains_markdown_block" => "Markdown section block".to_string(),
"contains_embedded_symbol" => "embedded code symbol in Markdown fence".to_string(),
"embedded_in_fence" => "Markdown fence containing the embedded symbol".to_string(),
"contains_embedded_code" => "embedded code symbol in Markdown section".to_string(),
"enclosing_module" => "nearest enclosing module".to_string(),
"enclosing_section" => "nearest enclosing Markdown section".to_string(),
"defines" if edge.from == origin => "symbol defined in selected file".to_string(),
"defines" => "file that defines the selected symbol".to_string(),
"defines_route" if edge.from == origin => "route declared in selected file".to_string(),
"defines_route" => "file that declares the selected route".to_string(),
"handled_by" if edge.from == origin => "handler for the selected route".to_string(),
"handled_by" => "route handled by the selected symbol".to_string(),
"handles_route" => "route handled by the selected AST span".to_string(),
"imports" => "import dependency from the selected package".to_string(),
"mentions_concept" => "cached summary concept for the selected source".to_string(),
"mentions_entity" => "cached summary entity for the selected source".to_string(),
"semantic_relation" => "LLM-extracted semantic relationship".to_string(),
"tagged_concept" => "concept label attached to the selected entity".to_string(),
"related_concept" => "co-occurring cached summary concept".to_string(),
"calls" if edge.from == origin => "callee from the selected symbol".to_string(),
"calls" => "caller of the selected symbol".to_string(),
other => format!("connected by {other}"),
}
}
fn traversal_recommendations(
graph: &TraversalGraphBuild,
origin: Option<&str>,
shortest_path: Option<&[String]>,
limit: usize,
) -> Vec<TraversalRecommendation> {
let Some(origin) = origin else {
return Vec::new();
};
let mut recommendations = Vec::new();
let mut seen = BTreeSet::new();
if let Some(path) = shortest_path
&& path.len() > 1
&& path.first().is_some_and(|handle| handle == origin)
&& let Some(next) = graph.nodes.get(&path[1])
{
seen.insert(next.handle.clone());
recommendations.push(TraversalRecommendation {
handle: next.handle.clone(),
kind: next.kind.clone(),
label: next.label.clone(),
reason: "next hop on shortest path".to_string(),
score: 1_000,
expand: next.expand.clone(),
});
}
let mut candidates = graph
.edges
.iter()
.filter_map(|edge| {
let neighbor = if edge.from == origin {
edge.to.as_str()
} else if edge.to == origin {
edge.from.as_str()
} else {
return None;
};
let node = graph.nodes.get(neighbor)?;
Some((traversal_relation_score(edge, origin), edge, node))
})
.collect::<Vec<_>>();
candidates.sort_by(|(left_score, _, left), (right_score, _, right)| {
right_score
.cmp(left_score)
.then_with(|| left.kind.cmp(&right.kind))
.then_with(|| left.label.cmp(&right.label))
.then_with(|| left.handle.cmp(&right.handle))
});
let max = if limit == 0 { usize::MAX } else { limit };
for (score, edge, node) in candidates {
if recommendations.len() >= max {
break;
}
if seen.insert(node.handle.clone()) {
recommendations.push(TraversalRecommendation {
handle: node.handle.clone(),
kind: node.kind.clone(),
label: node.label.clone(),
reason: traversal_recommendation_reason(edge, origin),
score,
expand: node.expand.clone(),
});
}
}
recommendations
}
fn exploration_budget_for_counts(nodes: usize, edges: usize) -> ExplorationBudget {
let scale = nodes.saturating_add(edges);
if scale <= 80 {
ExplorationBudget {
project_size: "small".to_string(),
max_source_windows: 8,
lines_per_window: 96,
relationship_limit: 40,
}
} else if scale <= 800 {
ExplorationBudget {
project_size: "medium".to_string(),
max_source_windows: 6,
lines_per_window: 80,
relationship_limit: 32,
}
} else {
ExplorationBudget {
project_size: "large".to_string(),
max_source_windows: 4,
lines_per_window: 64,
relationship_limit: 24,
}
}
}
fn exploration_node_label(node: &TraversalNode) -> String {
format!("{}:{}", node.kind, node.label)
}
fn exploration_source_window_for_node(
root: &Path,
node: &TraversalNode,
budget: &ExplorationBudget,
) -> Option<ExplorationSourceWindow> {
let file = node.path.as_ref()?;
let anchor = node
.line
.and_then(|line| usize::try_from(line).ok())
.and_then(|line| line.checked_add(1))
.unwrap_or(1);
let context_before = budget.lines_per_window / 3;
let start = anchor.saturating_sub(context_before).max(1);
let end = start
.saturating_add(budget.lines_per_window)
.saturating_sub(1);
let handle = stable_handle("xwin", &format!("{file}:{start}:{end}:{}", node.handle));
Some(ExplorationSourceWindow {
handle,
file: file.clone(),
start,
end,
reason: format!("cluster around {}", exploration_node_label(node)),
expand: source_read_command(root, file, start, budget.lines_per_window),
})
}
fn build_exploration_packet(
root: &Path,
totals: &TraversalTotals,
selected_nodes: &[TraversalNode],
selected_edges: &[TraversalEdge],
) -> ExplorationPacket {
let budget = exploration_budget_for_counts(totals.nodes, totals.edges);
let node_by_handle = selected_nodes
.iter()
.map(|node| (node.handle.as_str(), node))
.collect::<BTreeMap<_, _>>();
let relationship_map = selected_edges
.iter()
.take(budget.relationship_limit)
.filter_map(|edge| {
let from = node_by_handle.get(edge.from.as_str())?;
let to = node_by_handle.get(edge.to.as_str())?;
Some(ExplorationRelation {
from: exploration_node_label(from),
relation: edge.relation.clone(),
to: exploration_node_label(to),
label: edge.label.clone(),
})
})
.collect::<Vec<_>>();
let mut seen_windows = BTreeSet::new();
let mut source_windows = Vec::new();
for node in selected_nodes {
if source_windows.len() >= budget.max_source_windows {
break;
}
let Some(window) = exploration_source_window_for_node(root, node, &budget) else {
continue;
};
let key = (window.file.clone(), window.start, window.end);
if seen_windows.insert(key) {
source_windows.push(window);
}
}
ExplorationPacket {
budget,
relationship_map,
source_windows,
worker_context: Vec::new(),
no_reread_guidance:
"Use the source_windows expand commands for line-numbered context; avoid whole-file reads unless the needed line is outside every listed window."
.to_string(),
}
}
pub(crate) fn traversal_report(
root: &Path,
scope: Option<&str>,
graph: TraversalGraphBuild,
query: Option<&str>,
target: Option<&str>,
depth: usize,
limit: usize,
) -> Result<TraversalReport> {
let totals = TraversalTotals {
nodes: graph.nodes.len(),
edges: graph.edges.len(),
};
let origin_node = query.and_then(|value| resolve_traversal_node(&graph, value));
let target_node = target.and_then(|value| resolve_traversal_node(&graph, value));
if let Some(query) = query
&& origin_node.is_none()
{
bail!("traversal node not found: {}", query);
}
if let Some(target) = target
&& target_node.is_none()
{
bail!("traversal target not found: {}", target);
}
let (mode, selected_nodes, selected_edges, shortest_path) =
if let (Some(origin), Some(target)) = (origin_node, target_node) {
if let Some(handles) =
traversal_shortest_handles(&graph.edges, &origin.handle, &target.handle)
{
let handle_set = handles.iter().cloned().collect::<BTreeSet<_>>();
let nodes = handles
.iter()
.filter_map(|handle| graph.nodes.get(handle).cloned())
.collect::<Vec<_>>();
let edges = traversal_path_edges(&handles, &graph.edges);
let path = TraversalPathReport {
from: origin.clone(),
to: target.clone(),
hops: handles.len().saturating_sub(1),
nodes: nodes.clone(),
edges: edges.clone(),
};
(
"path".to_string(),
nodes,
traversal_edges_between(&handle_set, &graph.edges),
Some(path),
)
} else {
(
"path".to_string(),
vec![origin.clone(), target.clone()],
Vec::new(),
None,
)
}
} else if let Some(origin) = origin_node {
let handles =
traversal_neighborhood_handles(&graph.edges, &origin.handle, depth, limit);
let nodes =
sorted_traversal_nodes(handles.iter().filter_map(|handle| graph.nodes.get(handle)));
let edges = traversal_edges_between(&handles, &graph.edges);
("neighborhood".to_string(), nodes, edges, None)
} else {
let mut nodes = sorted_traversal_nodes(graph.nodes.values());
let truncated_nodes = limit > 0 && nodes.len() > limit;
if truncated_nodes {
nodes.truncate(limit);
}
let handles = nodes
.iter()
.map(|node| node.handle.clone())
.collect::<BTreeSet<_>>();
let mut edges = traversal_edges_between(&handles, &graph.edges);
let truncated_edges = limit > 0 && edges.len() > limit;
if truncated_edges {
edges.truncate(limit);
}
("export".to_string(), nodes, edges, None)
};
let shortest_handles = shortest_path.as_ref().map(|path| {
path.nodes
.iter()
.map(|node| node.handle.clone())
.collect::<Vec<_>>()
});
let recommendations = traversal_recommendations(
&graph,
origin_node.map(|node| node.handle.as_str()),
shortest_handles.as_deref(),
if limit == 0 { 10 } else { limit.min(10) },
);
let exploration = build_exploration_packet(root, &totals, &selected_nodes, &selected_edges);
let truncated = selected_nodes.len() < totals.nodes || selected_edges.len() < totals.edges;
Ok(TraversalReport {
root: root.to_string_lossy().to_string(),
scope: scope.map(str::to_string),
mode,
totals,
query: query.map(str::to_string),
target: target.map(str::to_string),
nodes: selected_nodes,
edges: selected_edges,
shortest_path,
recommendations,
exploration,
truncated,
warnings: graph.warnings,
})
}
fn html_escape(input: &str) -> String {
input
.replace('&', "&")
.replace('<', "<")
.replace('>', ">")
.replace('"', """)
.replace('\'', "'")
}
pub(crate) fn traversal_report_html(report: &TraversalReport) -> Result<String> {
let json = serde_json::to_string(report)?.replace("</", "<\\/");
let mut html = String::new();
html.push_str(
"<!doctype html><html><head><meta charset=\"utf-8\"><title>tsift traversal graph</title>",
);
html.push_str(
r#"<style>
:root{color-scheme:light dark;--bg:#f7f8fb;--panel:#ffffff;--text:#17202a;--muted:#5c6674;--line:#d7dce3;--edge:#8b98a8;--accent:#0f766e;--semantic:#9a3412}
@media (prefers-color-scheme:dark){:root{--bg:#111318;--panel:#1b2028;--text:#ecf1f7;--muted:#a8b3c1;--line:#323946;--edge:#667386;--accent:#2dd4bf;--semantic:#fb923c}}
*{box-sizing:border-box}body{margin:0;background:var(--bg);color:var(--text);font-family:Inter,ui-sans-serif,system-ui,sans-serif;line-height:1.4}.page{max-width:1280px;margin:0 auto;padding:20px}.top{display:flex;align-items:flex-end;justify-content:space-between;gap:16px;margin-bottom:14px}.top h1{font-size:22px;margin:0}.meta{color:var(--muted);font-size:13px}.toolbar{display:flex;gap:8px;align-items:center}.toolbar input{min-width:220px;border:1px solid var(--line);border-radius:6px;background:var(--panel);color:var(--text);padding:8px 10px}.layout{display:grid;grid-template-columns:minmax(0,1fr) 320px;gap:14px;min-height:650px}.graph-panel,.side{background:var(--panel);border:1px solid var(--line);border-radius:8px;overflow:hidden}.graph-panel{position:relative}.legend{position:absolute;left:12px;top:12px;display:flex;flex-wrap:wrap;gap:6px;max-width:calc(100% - 24px)}.legend span{font-size:12px;background:color-mix(in srgb,var(--panel) 86%,transparent);border:1px solid var(--line);border-radius:999px;padding:4px 8px}.side{padding:14px;overflow:auto}.side h2{font-size:15px;margin:0 0 8px}.selected{border-top:1px solid var(--line);margin-top:12px;padding-top:12px}.list{display:grid;gap:8px}.row{border:1px solid var(--line);border-radius:6px;padding:8px;cursor:pointer}.row:hover{border-color:var(--accent)}.kind{font-size:11px;text-transform:uppercase;color:var(--muted);letter-spacing:.04em}.label{font-weight:650;overflow-wrap:anywhere}.handle,code{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;font-size:12px;color:var(--muted)}svg{width:100%;height:650px;display:block}.edge{stroke:var(--edge);stroke-width:1.4;opacity:.72}.edge.semantic{stroke:var(--semantic);stroke-width:1.8}.node{stroke:var(--panel);stroke-width:2;cursor:pointer}.node.semantic{stroke:var(--semantic);stroke-width:2.5}.node-label{font-size:12px;paint-order:stroke;stroke:var(--panel);stroke-width:4px;stroke-linejoin:round;fill:var(--text);pointer-events:none}.hidden{display:none}@media(max-width:900px){.top{display:block}.toolbar{margin-top:12px}.layout{grid-template-columns:1fr}.side{max-height:360px}svg{height:560px}}
</style>"#,
);
html.push_str("</head><body>");
html.push_str("<div class=\"page\">");
html.push_str(&format!(
"<header class=\"top\"><div><h1>tsift traversal graph</h1><div class=\"meta\">mode <code>{}</code> | nodes <code>{}</code>/<code>{}</code> | edges <code>{}</code>/<code>{}</code></div></div><div class=\"toolbar\"><input id=\"filter\" type=\"search\" placeholder=\"Filter nodes\"></div></header>",
html_escape(&report.mode),
report.nodes.len(),
report.totals.nodes,
report.edges.len(),
report.totals.edges
));
html.push_str(
r#"<main class="layout"><section class="graph-panel"><div id="legend" class="legend"></div><svg id="graph-canvas" role="img" aria-label="Traversal graph"></svg></section><aside class="side"><h2>Nodes</h2><div id="node-list" class="list"></div><div id="selected" class="selected"></div></aside></main>"#,
);
html.push_str("<script id=\"graph-data\" type=\"application/json\">");
html.push_str(&json);
html.push_str(
r##"</script><script>
const report = JSON.parse(document.getElementById("graph-data").textContent);
const svg = document.getElementById("graph-canvas");
const list = document.getElementById("node-list");
const selected = document.getElementById("selected");
const filter = document.getElementById("filter");
const legend = document.getElementById("legend");
const nodes = report.nodes.map((node, index) => ({...node, index}));
const nodeByHandle = new Map(nodes.map(node => [node.handle, node]));
const edges = report.edges.filter(edge => nodeByHandle.has(edge.from) && nodeByHandle.has(edge.to));
const colorByKind = new Map([
["file", "#2563eb"], ["symbol", "#16a34a"], ["route", "#7c3aed"],
["session", "#0891b2"], ["backlog", "#dc2626"], ["job_packet", "#ea580c"],
["semantic_concept", "#9a3412"], ["semantic_entity", "#b45309"],
["source_handle", "#64748b"], ["worker_context", "#475569"], ["worker_result", "#15803d"]
]);
function color(kind){ return colorByKind.get(kind) || "#6b7280"; }
function isSemantic(edge){ return edge.relation.includes("concept") || edge.relation.includes("entity") || edge.relation.includes("semantic"); }
function text(value){ return value == null ? "" : String(value); }
function matches(node, query){
if (!query) return true;
const haystack = [node.kind,node.label,node.handle,node.ref_id,node.path,node.detail].map(text).join(" ").toLowerCase();
return haystack.includes(query);
}
function layout(){
const rect = svg.getBoundingClientRect();
const width = rect.width || 900;
const height = rect.height || 650;
const cx = width / 2;
const cy = height / 2;
const kinds = [...new Set(nodes.map(node => node.kind))].sort();
const counts = new Map();
for (const node of nodes) counts.set(node.kind, (counts.get(node.kind) || 0) + 1);
const offsets = new Map();
for (const node of nodes) {
const group = kinds.indexOf(node.kind);
const index = offsets.get(node.kind) || 0;
offsets.set(node.kind, index + 1);
const groupCount = counts.get(node.kind) || 1;
const ring = Math.min(width, height) * (0.18 + ((group % 4) * 0.09));
const angle = (Math.PI * 2 * index / Math.max(groupCount, 1)) + (group * 0.47);
node.x = cx + Math.cos(angle) * ring;
node.y = cy + Math.sin(angle) * ring;
}
}
function draw(){
const query = filter.value.trim().toLowerCase();
const visible = new Set(nodes.filter(node => matches(node, query)).map(node => node.handle));
svg.innerHTML = "";
for (const edge of edges) {
if (!visible.has(edge.from) || !visible.has(edge.to)) continue;
const from = nodeByHandle.get(edge.from);
const to = nodeByHandle.get(edge.to);
const line = document.createElementNS("http://www.w3.org/2000/svg", "line");
line.setAttribute("x1", from.x); line.setAttribute("y1", from.y);
line.setAttribute("x2", to.x); line.setAttribute("y2", to.y);
line.setAttribute("class", "edge" + (isSemantic(edge) ? " semantic" : ""));
line.appendChild(document.createElementNS("http://www.w3.org/2000/svg", "title")).textContent = edge.relation + (edge.label ? ": " + edge.label : "");
svg.appendChild(line);
}
for (const node of nodes) {
if (!visible.has(node.handle)) continue;
const circle = document.createElementNS("http://www.w3.org/2000/svg", "circle");
circle.setAttribute("cx", node.x); circle.setAttribute("cy", node.y);
circle.setAttribute("r", node.kind.startsWith("semantic_") ? 8 : 6);
circle.setAttribute("fill", color(node.kind));
circle.setAttribute("class", "node" + (node.kind.startsWith("semantic_") ? " semantic" : ""));
circle.addEventListener("click", () => selectNode(node));
circle.appendChild(document.createElementNS("http://www.w3.org/2000/svg", "title")).textContent = node.kind + ": " + node.label;
svg.appendChild(circle);
const label = document.createElementNS("http://www.w3.org/2000/svg", "text");
label.setAttribute("x", node.x + 9); label.setAttribute("y", node.y + 4);
label.setAttribute("class", "node-label");
label.textContent = node.label.length > 34 ? node.label.slice(0, 31) + "..." : node.label;
svg.appendChild(label);
}
renderList(query);
}
function renderLegend(){
const kinds = [...new Set(nodes.map(node => node.kind))].sort();
legend.innerHTML = kinds.map(kind => `<span><b style="color:${color(kind)}">●</b> ${kind}</span>`).join("");
}
function renderList(query){
const rows = nodes.filter(node => matches(node, query)).slice(0, 120);
list.innerHTML = rows.map(node => `<div class="row" data-handle="${node.handle}"><div class="kind">${node.kind}</div><div class="label">${escapeHtml(node.label)}</div><div class="handle">${node.handle}</div></div>`).join("");
for (const row of list.querySelectorAll(".row")) {
row.addEventListener("click", () => selectNode(nodeByHandle.get(row.dataset.handle)));
}
}
function selectNode(node){
const adjacent = edges.filter(edge => edge.from === node.handle || edge.to === node.handle).slice(0, 20);
selected.innerHTML = `<h2>${escapeHtml(node.label)}</h2><div class="kind">${node.kind}</div><p class="handle">${node.handle}</p>${node.path ? `<p>${escapeHtml(node.path)}${node.line != null ? ":" + node.line : ""}</p>` : ""}${node.detail ? `<p>${escapeHtml(node.detail)}</p>` : ""}<p><code>${escapeHtml(node.expand)}</code></p><h2>Edges</h2><div class="list">${adjacent.map(edge => `<div class="row"><div class="kind">${edge.relation}</div><div>${escapeHtml(edge.from)} -> ${escapeHtml(edge.to)}</div>${edge.label ? `<div>${escapeHtml(edge.label)}</div>` : ""}</div>`).join("") || "<div class=\"meta\">No visible edges.</div>"}</div>`;
}
function escapeHtml(value){
return text(value).replace(/[&<>"']/g, ch => ({"&":"&","<":"<",">":">","\"":""","'":"'"}[ch]));
}
filter.addEventListener("input", draw);
window.addEventListener("resize", () => { layout(); draw(); });
renderLegend();
layout();
draw();
if (nodes.length) selectNode(nodes[0]);
</script></div></body></html>"##,
);
Ok(html)
}
fn semantic_related_report_from_store(
root: &Path,
scope: Option<&str>,
query: &str,
limit: usize,
kind: SemanticRelatedKind,
store: &impl GraphStore,
) -> Result<SemanticRelatedReport> {
if query.trim().is_empty() {
bail!("semantic query cannot be empty");
}
let query_embedding = semantic_embedding(query);
let node_kinds: &[&str] = match kind {
SemanticRelatedKind::Concept => &["semantic_concept"],
SemanticRelatedKind::Entity => &["semantic_entity"],
SemanticRelatedKind::All => &["semantic_concept", "semantic_entity"],
};
let mut items = Vec::new();
for node_kind in node_kinds {
for node in store.nodes_by_kind(node_kind)? {
let Some(embedding) = node
.properties
.get("embedding")
.and_then(|value| parse_semantic_embedding_property(value))
else {
continue;
};
let score = semantic_cosine(&query_embedding, &embedding);
items.push(SemanticRelatedItem {
handle: node
.properties
.get("handle")
.cloned()
.unwrap_or_else(|| node.id.clone()),
kind: node.kind,
label: node.label,
score,
file_path: node
.properties
.get("source_file")
.or_else(|| node.properties.get("path"))
.cloned(),
source_symbol: node.properties.get("source_symbol").cloned(),
detail: node
.properties
.get("description")
.or_else(|| node.properties.get("detail"))
.cloned(),
expand: node
.properties
.get("expand")
.cloned()
.unwrap_or_else(|| traversal_expand_command(root, &node.id)),
});
}
}
items.sort_by(|left, right| {
right
.score
.partial_cmp(&left.score)
.unwrap_or(Ordering::Equal)
.then_with(|| left.kind.cmp(&right.kind))
.then_with(|| left.label.cmp(&right.label))
.then_with(|| left.handle.cmp(&right.handle))
});
if limit > 0 && items.len() > limit {
items.truncate(limit);
}
let mut warnings = Vec::new();
if items.is_empty() {
warnings.push(
"no semantic graph rows found; run `tsift summarize --extract <path>` first"
.to_string(),
);
}
Ok(SemanticRelatedReport {
root: root.to_string_lossy().to_string(),
scope: scope.map(str::to_string),
query: query.to_string(),
embedding_model: SEMANTIC_EMBEDDING_MODEL.to_string(),
count: items.len(),
items,
warnings,
})
}
fn graph_store_semantic_node_count(store: &impl GraphStore) -> Result<usize> {
Ok(store.nodes_by_kind("semantic_concept")?.len()
+ store.nodes_by_kind("semantic_entity")?.len())
}
fn graph_db_semantic_edge_scan_cap(limit: usize) -> usize {
if limit == 0 {
return 0;
}
limit.saturating_mul(4).clamp(
GRAPH_DB_SEMANTIC_MIN_EDGE_SCAN_CAP,
GRAPH_DB_SEMANTIC_MAX_EDGE_SCAN_CAP,
)
}
fn graph_db_semantic_node_discovery_cap(seed_count: usize, limit: usize) -> usize {
if limit == 0 {
return usize::MAX;
}
limit.saturating_mul(3).max(limit).max(seed_count)
}
fn graph_db_semantic_edge_other_id<'a>(
edge: &'a SubstrateGraphEdge,
current_id: &str,
) -> Option<&'a str> {
if edge.from_id == current_id {
Some(edge.to_id.as_str())
} else if edge.to_id == current_id {
Some(edge.from_id.as_str())
} else {
None
}
}
fn graph_db_semantic_edge_score(edge: &SubstrateGraphEdge, current_id: &str) -> i64 {
let mut score = resolution::edge_kind_rank_score(&edge.kind).saturating_mul(10);
score += if edge.from_id == current_id { 8 } else { 4 };
score += match edge.kind.as_str() {
"mentions_concept" | "mentions_entity" | "tagged_concept" | "tagged_entity"
| "related_concept" => 30,
"semantic_relation" => 28,
"calls" => 24,
"mentions" => 22,
"requests_context" | "scopes_context" | "scopes_source" | "explains_result" => 18,
"defines" | "contains" | "belongs_to" => 12,
_ => 0,
};
score
}
fn graph_db_semantic_seeded_neighborhood(
store: &impl GraphStore,
seed_ids: &[String],
depth: usize,
limit: usize,
) -> Result<GraphDbSemanticSeededSubgraph> {
let seed_rank = seed_ids
.iter()
.enumerate()
.map(|(idx, seed)| (seed.clone(), idx))
.collect::<BTreeMap<_, _>>();
let mut nodes = BTreeMap::<String, SubstrateGraphNode>::new();
let mut edges = BTreeMap::<String, SubstrateGraphEdge>::new();
let mut node_score_by_id = BTreeMap::<String, i64>::new();
let mut queue = VecDeque::<(String, usize)>::new();
let mut seen_at_depth = BTreeMap::<String, usize>::new();
let edge_scan_cap = graph_db_semantic_edge_scan_cap(limit);
let node_discovery_cap = graph_db_semantic_node_discovery_cap(seed_ids.len(), limit);
let mut skipped_by_edge_cap = 0usize;
let mut skipped_by_node_cap = 0usize;
let mut diagnostics = vec![
"semantic-seeded retrieval uses phrase similarity to pick graph seeds".to_string(),
"seed expansion traverses both outgoing and incident edges so code, markdown, conversation, and memory adapters can link into semantic rows without reversing their edge direction".to_string(),
format!(
"seed expansion ranks incident/outgoing edges before caps; per-node edge scan cap={} node discovery cap={}",
if edge_scan_cap == 0 {
"unbounded".to_string()
} else {
edge_scan_cap.to_string()
},
if node_discovery_cap == usize::MAX {
"unbounded".to_string()
} else {
node_discovery_cap.to_string()
}
),
];
for (idx, seed_id) in seed_ids.iter().enumerate() {
if let Some(node) = store.node(seed_id)? {
nodes.entry(seed_id.clone()).or_insert(node);
node_score_by_id
.entry(seed_id.clone())
.or_insert(1_000_000i64.saturating_sub(idx as i64));
queue.push_back((seed_id.clone(), 0));
seen_at_depth.entry(seed_id.clone()).or_insert(0);
} else {
diagnostics.push(format!(
"semantic seed {seed_id} was not present in the graph store"
));
}
}
while let Some((current_id, current_depth)) = queue.pop_front() {
if current_depth >= depth {
continue;
}
let mut expansion_edges_by_key = BTreeMap::<String, SubstrateGraphEdge>::new();
for edge in store.outgoing_edges(¤t_id, None)? {
expansion_edges_by_key
.entry(graph_db_edge_key(&edge))
.or_insert(edge);
}
for edge in store.incident_edges(¤t_id, None)? {
expansion_edges_by_key
.entry(graph_db_edge_key(&edge))
.or_insert(edge);
}
let mut expansion_edges = expansion_edges_by_key.into_values().collect::<Vec<_>>();
expansion_edges.sort_by(|left, right| {
graph_db_semantic_edge_score(right, ¤t_id)
.cmp(&graph_db_semantic_edge_score(left, ¤t_id))
.then_with(|| graph_db_edge_key(left).cmp(&graph_db_edge_key(right)))
});
if edge_scan_cap > 0 && expansion_edges.len() > edge_scan_cap {
skipped_by_edge_cap += expansion_edges.len() - edge_scan_cap;
expansion_edges.truncate(edge_scan_cap);
}
for edge in expansion_edges {
let Some(other_id) = graph_db_semantic_edge_other_id(&edge, ¤t_id) else {
continue;
};
let other_known = nodes.contains_key(other_id);
if !other_known && nodes.len() >= node_discovery_cap {
skipped_by_node_cap += 1;
continue;
}
let other_id = other_id.to_string();
let edge_score = graph_db_semantic_edge_score(&edge, ¤t_id)
.saturating_add((depth.saturating_sub(current_depth) as i64).saturating_mul(5));
node_score_by_id
.entry(other_id.clone())
.and_modify(|score| *score = (*score).max(edge_score))
.or_insert(edge_score);
let edge_key = graph_db_edge_key(&edge);
edges.entry(edge_key).or_insert_with(|| edge.clone());
if let std::collections::btree_map::Entry::Vacant(entry) = nodes.entry(other_id.clone())
&& let Some(node) = store.node(&other_id)?
{
entry.insert(node);
}
if !nodes.contains_key(&other_id) {
continue;
}
let next_depth = current_depth + 1;
let should_queue = seen_at_depth
.get(&other_id)
.is_none_or(|seen_depth| next_depth < *seen_depth);
if should_queue {
seen_at_depth.insert(other_id.clone(), next_depth);
queue.push_back((other_id, next_depth));
}
}
}
if skipped_by_edge_cap > 0 {
diagnostics.push(format!(
"semantic-seeded expansion skipped {skipped_by_edge_cap} lower-scoring incident/outgoing edge(s) after per-node caps"
));
}
if skipped_by_node_cap > 0 {
diagnostics.push(format!(
"semantic-seeded expansion skipped {skipped_by_node_cap} lower-scoring node discovery edge(s) after the discovery cap"
));
}
let mut nodes = nodes.into_values().collect::<Vec<_>>();
nodes.sort_by(|left, right| {
seed_rank
.get(&left.id)
.copied()
.unwrap_or(usize::MAX)
.cmp(&seed_rank.get(&right.id).copied().unwrap_or(usize::MAX))
.then_with(|| {
node_score_by_id
.get(&right.id)
.copied()
.unwrap_or_default()
.cmp(&node_score_by_id.get(&left.id).copied().unwrap_or_default())
})
.then(left.id.cmp(&right.id))
});
let before_limit = nodes.len();
let truncated = limit > 0 && nodes.len() > limit;
if truncated {
nodes.truncate(limit);
diagnostics.push(format!(
"semantic-seeded neighborhood truncated from {before_limit} to {limit} node(s)"
));
}
let node_ids = nodes
.iter()
.map(|node| node.id.as_str())
.collect::<BTreeSet<_>>();
let mut edges = edges
.into_values()
.filter(|edge| {
node_ids.contains(edge.from_id.as_str()) && node_ids.contains(edge.to_id.as_str())
})
.collect::<Vec<_>>();
edges.sort_by_key(graph_db_edge_key);
Ok(GraphDbSemanticSeededSubgraph {
nodes,
edges,
truncated,
diagnostics,
})
}
#[allow(clippy::too_many_arguments)]
fn cmd_semantic_related(
query: &str,
path: &Path,
scope: Option<&str>,
limit: usize,
kind: SemanticRelatedKind,
json_output: bool,
compact: bool,
pretty: bool,
terse: bool,
schema: bool,
) -> Result<()> {
let root = lint::resolve_project_root_or_canonical_path(path)?;
write_traversal_graph_store(&root, path, scope)?;
let graph_db = graph_substrate_db_path(&root, scope);
let store = SqliteGraphStore::open_read_only_resilient(&graph_db)?;
let mut report = semantic_related_report_from_store(&root, scope, query, limit, kind, &store)?;
if let Some(recovery) = store.read_only_recovery() {
report
.warnings
.push(graph_db_read_recovery_diagnostic(recovery));
}
if json_output {
println!("{}", to_json_schema(&report, pretty, terse, false, schema)?);
} else if compact {
for item in &report.items {
println!(
"{:.3}\t{}\t{}\t{}",
item.score, item.kind, item.label, item.handle
);
}
for warning in &report.warnings {
eprintln!("warning: {warning}");
}
} else {
println!(
"Related semantic graph rows for {:?} ({})",
report.query, report.embedding_model
);
for item in &report.items {
println!(
" {:.3} [{}] {} ({})",
item.score, item.kind, item.label, item.handle
);
if let Some(detail) = &item.detail {
println!(" {}", detail);
}
if let Some(file_path) = &item.file_path {
println!(" file: {}", file_path);
}
println!(" expand: {}", item.expand);
}
for warning in &report.warnings {
eprintln!("warning: {warning}");
}
}
Ok(())
}
#[derive(Serialize)]
struct SourceLinePreview {
line: usize,
text: String,
}
#[derive(Serialize)]
pub(crate) struct SourceRangePreview {
start: usize,
end: usize,
total_lines: usize,
truncated_before: bool,
truncated_after: bool,
}
#[derive(Serialize)]
struct SourceExpandCommands {
#[serde(skip_serializing_if = "Option::is_none")]
before: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
after: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
body: Option<String>,
file: String,
#[serde(skip_serializing_if = "Option::is_none")]
markdown_ast: Option<String>,
}
#[derive(Serialize)]
struct SourceSymbolRef {
handle: String,
name: String,
kind: String,
language: String,
file: String,
line: usize,
#[serde(skip_serializing_if = "Option::is_none")]
end_line: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
signature: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
span: Option<AstSpanPreview>,
expand: String,
}
#[derive(Serialize)]
struct SourceSummaryRef {
handle: String,
symbol_name: String,
file_path: String,
summary: String,
expand: String,
}
#[derive(Serialize)]
struct SourceReadReport {
handle: String,
root: String,
file: String,
range: SourceRangePreview,
preview: Vec<SourceLinePreview>,
symbols: Vec<SourceSymbolRef>,
summaries: Vec<SourceSummaryRef>,
#[serde(skip_serializing_if = "Option::is_none")]
markdown: Option<SourceReadMarkdownProjection>,
expand: SourceExpandCommands,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
warnings: Vec<String>,
}
#[derive(Serialize)]
struct SymbolReadTarget {
handle: String,
name: String,
kind: String,
language: String,
file: String,
line: usize,
#[serde(skip_serializing_if = "Option::is_none")]
end_line: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
signature: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
parent_module: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
visibility: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
span: Option<AstSpanPreview>,
}
#[derive(Serialize)]
struct SymbolReadExpandCommands {
source_window: String,
#[serde(skip_serializing_if = "Option::is_none")]
body: Option<String>,
file: String,
explain: String,
callers: String,
callees: String,
#[serde(skip_serializing_if = "Option::is_none")]
markdown_ast: Option<String>,
}
#[derive(Serialize)]
struct SymbolReadReport {
handle: String,
root: String,
query: String,
symbol: SymbolReadTarget,
range: SourceRangePreview,
body: Vec<SourceLinePreview>,
child_symbols: Vec<SourceSymbolRef>,
summaries: Vec<SourceSummaryRef>,
expand: SymbolReadExpandCommands,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
warnings: Vec<String>,
}
#[derive(Clone)]
pub(crate) struct MarkdownAstRawNode {
handle: String,
span_handle: String,
name: String,
kind: String,
block_kind: String,
node_kind: String,
start_byte: usize,
end_byte: usize,
body_start_byte: Option<usize>,
body_end_byte: Option<usize>,
}
#[derive(Clone)]
pub(crate) struct MarkdownAstProjection {
source_hash: String,
nodes: Vec<MarkdownAstRawNode>,
parse_duration_micros: u128,
cache_hit: bool,
}
#[derive(Clone)]
struct MarkdownAstCacheEntry {
source_hash: String,
nodes: Vec<MarkdownAstRawNode>,
parse_duration_micros: u128,
}
static MARKDOWN_AST_CACHE: OnceLock<Mutex<HashMap<String, MarkdownAstCacheEntry>>> =
OnceLock::new();
#[derive(Serialize, Clone)]
struct MarkdownAstNodeMetadata {
#[serde(skip_serializing_if = "Option::is_none")]
heading_level: Option<usize>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
section_path: Vec<String>,
#[serde(skip_serializing_if = "Option::is_none")]
section_handle: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
list_depth: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
list_marker: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
list_order: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
fence_language: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
fence_marker: Option<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
embedded_symbols: Vec<MarkdownEmbeddedSymbol>,
}
#[derive(Serialize, Clone)]
struct MarkdownAstNodeExpand {
source_window: String,
source_body: String,
symbol_read: String,
edit_intents: String,
}
#[derive(Serialize, Clone)]
struct MarkdownAstCacheReport {
source_hash: String,
cache_hit: bool,
parse_duration_micros: u128,
node_count: usize,
section_count: usize,
list_item_count: usize,
code_block_count: usize,
}
#[derive(Serialize, Clone)]
struct MarkdownAstPhaseTiming {
name: String,
duration_micros: u128,
detail: String,
}
#[derive(Serialize, Clone)]
struct MarkdownAstOutlineEntry {
handle: String,
span_handle: String,
name: String,
kind: String,
block_kind: String,
line: usize,
end_line: usize,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
section_path: Vec<String>,
child_count: usize,
expand: String,
}
#[derive(Serialize, Clone)]
struct MarkdownAstProjectionPreview {
mode: String,
total_nodes: usize,
returned_nodes: usize,
omitted_nodes: usize,
selected_node: Option<String>,
cache: MarkdownAstCacheReport,
outline: Vec<MarkdownAstOutlineEntry>,
phase_timings: Vec<MarkdownAstPhaseTiming>,
}
#[derive(Serialize)]
struct SourceReadMarkdownProjection {
handle: String,
mode: String,
total_nodes: usize,
visible_nodes: usize,
outline: Vec<MarkdownAstOutlineEntry>,
expand: String,
}
#[derive(Serialize, Clone)]
struct SourceByteRangePreview {
start: usize,
end: usize,
}
#[derive(Serialize, Clone)]
struct MarkdownAstNode {
handle: String,
span_handle: String,
name: String,
kind: String,
block_kind: String,
node_kind: String,
line: usize,
end_line: usize,
byte_span: SourceByteRangePreview,
#[serde(skip_serializing_if = "Option::is_none")]
body_byte_span: Option<SourceByteRangePreview>,
parent_handle: Option<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
child_handles: Vec<String>,
metadata: MarkdownAstNodeMetadata,
expand: MarkdownAstNodeExpand,
}
#[derive(Serialize)]
struct MarkdownAstExpandCommands {
file: String,
source_read: String,
edit_intents: String,
}
#[derive(Serialize)]
struct MarkdownAstReport {
handle: String,
root: String,
file: String,
range: SourceRangePreview,
projection: MarkdownAstProjectionPreview,
nodes: Vec<MarkdownAstNode>,
expand: MarkdownAstExpandCommands,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
warnings: Vec<String>,
}
pub(crate) fn resolve_source_file(root: &Path, file: &Path) -> Result<PathBuf> {
let candidate = if file.is_absolute() {
file.to_path_buf()
} else {
root.join(file)
};
let canonical = candidate
.canonicalize()
.with_context(|| format!("canonicalizing source file {}", candidate.display()))?;
if !canonical.is_file() {
bail!("source file is not a regular file: {}", canonical.display());
}
let canonical_root = root
.canonicalize()
.with_context(|| format!("canonicalizing project root {}", root.display()))?;
if !canonical.starts_with(&canonical_root) {
bail!(
"source file {} is outside project root {}",
canonical.display(),
canonical_root.display()
);
}
Ok(canonical)
}
pub(crate) fn source_read_command(root: &Path, file: &str, start: usize, lines: usize) -> String {
format!(
"tsift source-read {} --path {} --start {} --lines {} --budget normal",
shell_quote(file),
shell_quote(&root.to_string_lossy()),
start,
lines
)
}
pub(crate) fn source_symbol_read_command(root: &Path, symbol: &str, file: &str) -> String {
format!(
"tsift --envelope symbol-read {} --path {} --file {} --budget normal",
shell_quote(symbol),
shell_quote(&root.to_string_lossy()),
shell_quote(file)
)
}
fn source_symbol_expand_command(root: &Path, symbol: &str) -> String {
format!(
"tsift --envelope explain {} --path {} --budget normal",
shell_quote(symbol),
shell_quote(&root.to_string_lossy())
)
}
fn source_symbol_graph_command(root: &Path, symbol: &str, relation: &str) -> String {
format!(
"tsift graph {} --path {} --{} --json",
shell_quote(symbol),
shell_quote(&root.to_string_lossy()),
relation
)
}
fn source_summary_expand_command(root: &Path, symbol: &str) -> String {
format!(
"tsift summarize {} --path {} --json",
shell_quote(symbol),
shell_quote(&root.to_string_lossy())
)
}
pub(crate) fn markdown_ast_command(root: &Path, file: &str, node: Option<&str>) -> String {
let mut command = format!(
"tsift --envelope markdown-ast {} --path {} --budget normal",
shell_quote(file),
shell_quote(&root.to_string_lossy())
);
if let Some(node) = node {
command.push_str(" --node ");
command.push_str(&shell_quote(node));
}
command
}
fn markdown_edit_intents_command(root: &Path) -> String {
format!(
"tsift --envelope edit-intents --path {} --budget normal",
shell_quote(&root.to_string_lossy())
)
}
pub(crate) fn source_symbol_line(symbol: &index::StoredSymbol) -> usize {
usize::try_from(symbol.line)
.ok()
.and_then(|line| line.checked_add(1))
.unwrap_or(1)
}
fn source_symbol_end_line(symbol: &index::StoredSymbol) -> Option<usize> {
symbol
.end_line
.and_then(|line| usize::try_from(line).ok())
.and_then(|line| line.checked_add(1))
}
fn symbol_span_byte(value: Option<i64>) -> Option<usize> {
value.and_then(|byte| usize::try_from(byte).ok())
}
fn source_line_for_byte(source: &[u8], byte: usize) -> usize {
let byte = byte.min(source.len());
source[..byte]
.iter()
.filter(|value| **value == b'\n')
.count()
.saturating_add(1)
}
fn source_line_for_end_byte(source: &[u8], end_byte: usize) -> usize {
source_line_for_byte(source, end_byte.saturating_sub(1))
}
fn ast_span_handle(
file: &str,
name: &str,
kind: &str,
start_byte: usize,
end_byte: usize,
) -> String {
stable_handle(
"span",
&format!("{file}:{kind}:{name}:{start_byte}:{end_byte}"),
)
}
pub(crate) fn stored_symbol_span_bounds(symbol: &index::StoredSymbol) -> Option<(usize, usize)> {
Some((
symbol_span_byte(symbol.start_byte)?,
symbol_span_byte(symbol.end_byte)?,
))
}
pub(crate) fn symbol_hit_span_bounds(symbol: &index::SymbolHit) -> Option<(usize, usize)> {
Some((
symbol_span_byte(symbol.start_byte)?,
symbol_span_byte(symbol.end_byte)?,
))
}
pub(crate) fn stored_symbol_span_handle(symbol: &index::StoredSymbol) -> Option<String> {
let (start_byte, end_byte) = stored_symbol_span_bounds(symbol)?;
Some(ast_span_handle(
&symbol.file,
&symbol.name,
&symbol.kind,
start_byte,
end_byte,
))
}
fn same_stored_symbol_span(left: &index::StoredSymbol, right: &index::StoredSymbol) -> bool {
left.file == right.file
&& left.name == right.name
&& left.kind == right.kind
&& stored_symbol_span_bounds(left) == stored_symbol_span_bounds(right)
}
fn stored_symbol_parent_span_handle(
symbol: &index::StoredSymbol,
symbols: &[index::StoredSymbol],
) -> Option<String> {
let (start_byte, end_byte) = stored_symbol_span_bounds(symbol)?;
symbols
.iter()
.filter(|candidate| {
if candidate.file != symbol.file || same_stored_symbol_span(candidate, symbol) {
return false;
}
let Some((candidate_start, candidate_end)) = stored_symbol_span_bounds(candidate)
else {
return false;
};
candidate_start <= start_byte && candidate_end >= end_byte
})
.min_by_key(|candidate| {
stored_symbol_span_bounds(candidate)
.map(|(start, end)| end.saturating_sub(start))
.unwrap_or(usize::MAX)
})
.and_then(stored_symbol_span_handle)
}
fn stored_symbol_child_span_handles(
symbol: &index::StoredSymbol,
symbols: &[index::StoredSymbol],
limit: usize,
) -> Vec<String> {
let Some((start_byte, end_byte)) = stored_symbol_span_bounds(symbol) else {
return Vec::new();
};
symbols
.iter()
.filter(|candidate| {
if candidate.file != symbol.file || same_stored_symbol_span(candidate, symbol) {
return false;
}
let Some((candidate_start, candidate_end)) = stored_symbol_span_bounds(candidate)
else {
return false;
};
candidate_start >= start_byte && candidate_end <= end_byte
})
.take(limit)
.filter_map(stored_symbol_span_handle)
.collect()
}
fn markdown_heading_level(source: &[u8], start_byte: usize) -> Option<usize> {
let start = start_byte.min(source.len());
let line_end = source[start..]
.iter()
.position(|value| *value == b'\n')
.map(|pos| start + pos)
.unwrap_or(source.len());
let line = std::str::from_utf8(&source[start..line_end]).unwrap_or("");
let marker = line.trim_start();
let level = marker.chars().take_while(|ch| *ch == '#').count();
(1..=6).contains(&level).then_some(level)
}
fn markdown_list_depth(source: &[u8], start_byte: usize) -> usize {
let start = start_byte.min(source.len());
let line_start = source[..start]
.iter()
.rposition(|value| *value == b'\n')
.map(|pos| pos + 1)
.unwrap_or(0);
source[line_start..start]
.iter()
.map(|byte| match byte {
b'\t' => 4,
b' ' => 1,
_ => 0,
})
.sum::<usize>()
/ 2
}
fn markdown_enclosing_heading_symbols<'a>(
file: &str,
start_byte: usize,
end_byte: usize,
symbols: &'a [index::StoredSymbol],
) -> Vec<&'a index::StoredSymbol> {
let mut headings = symbols
.iter()
.filter(|candidate| candidate.file == file && candidate.kind == "heading")
.filter(|candidate| {
let Some((candidate_start, candidate_end)) = stored_symbol_span_bounds(candidate)
else {
return false;
};
candidate_start <= start_byte && candidate_end >= end_byte
})
.collect::<Vec<_>>();
headings.sort_by(|left, right| {
stored_symbol_span_bounds(left)
.map(|(start, _)| start)
.unwrap_or(usize::MAX)
.cmp(
&stored_symbol_span_bounds(right)
.map(|(start, _)| start)
.unwrap_or(usize::MAX),
)
.then(left.name.cmp(&right.name))
});
headings
}
fn markdown_stored_symbol_metadata(
symbol: &index::StoredSymbol,
source: &[u8],
symbols: &[index::StoredSymbol],
) -> Option<MarkdownSpanMetadata> {
if symbol.language != "markdown" {
return None;
}
let (start_byte, end_byte) = stored_symbol_span_bounds(symbol)?;
let section_symbols =
markdown_enclosing_heading_symbols(&symbol.file, start_byte, end_byte, symbols);
let section_path = section_symbols
.iter()
.map(|heading| heading.name.clone())
.collect::<Vec<_>>();
let section_handle = section_symbols
.last()
.and_then(|heading| stored_symbol_span_handle(heading));
let heading_level = (symbol.kind == "heading")
.then(|| markdown_heading_level(source, start_byte))
.flatten();
let list_depth = (symbol.kind == "list_item").then(|| markdown_list_depth(source, start_byte));
let fence_language = (symbol.kind == "code_block").then(|| symbol.name.clone());
let embedded_symbols = if symbol.kind == "code_block" {
markdown_embedded_symbols(
&symbol.file,
source,
symbol_span_byte(symbol.body_start_byte),
symbol_span_byte(symbol.body_end_byte),
fence_language.as_deref(),
)
} else {
Vec::new()
};
(heading_level.is_some()
|| !section_path.is_empty()
|| section_handle.is_some()
|| list_depth.is_some()
|| fence_language.is_some()
|| !embedded_symbols.is_empty())
.then_some(MarkdownSpanMetadata {
heading_level,
section_path,
section_handle,
list_depth,
fence_language,
embedded_symbols,
})
}
fn markdown_symbol_hit_metadata(
symbol: &index::SymbolHit,
source: &[u8],
start_byte: usize,
) -> Option<MarkdownSpanMetadata> {
if symbol.language != "markdown" {
return None;
}
let heading_level = (symbol.kind == "heading")
.then(|| markdown_heading_level(source, start_byte))
.flatten();
let list_depth = (symbol.kind == "list_item").then(|| markdown_list_depth(source, start_byte));
let fence_language = (symbol.kind == "code_block").then(|| symbol.name.clone());
let embedded_symbols = if symbol.kind == "code_block" {
markdown_embedded_symbols(
&symbol.file,
source,
symbol_span_byte(symbol.body_start_byte),
symbol_span_byte(symbol.body_end_byte),
fence_language.as_deref(),
)
} else {
Vec::new()
};
(heading_level.is_some()
|| list_depth.is_some()
|| fence_language.is_some()
|| !embedded_symbols.is_empty())
.then_some(MarkdownSpanMetadata {
heading_level,
section_path: Vec::new(),
section_handle: None,
list_depth,
fence_language,
embedded_symbols,
})
}
fn is_markdown_path(path: &Path) -> bool {
path.extension()
.and_then(|ext| ext.to_str())
.map(|ext| matches!(ext.to_ascii_lowercase().as_str(), "md" | "mdx"))
.unwrap_or(false)
}
fn markdown_ast_block_kind(kind: &str) -> String {
match kind {
"heading" => "section",
"code_block" => "fenced_code_block",
"list_item" => "list_item",
other => other,
}
.to_string()
}
fn markdown_embedded_language_key(language: &str) -> Option<String> {
let key = language
.split_whitespace()
.next()
.unwrap_or("")
.trim()
.trim_start_matches("language-")
.trim_start_matches("lang-")
.trim_matches(|ch| matches!(ch, '`' | '"' | '\''))
.to_ascii_lowercase();
(!key.is_empty()).then_some(key)
}
fn markdown_embedded_lang(language: &str) -> Option<graph::Lang> {
let key = markdown_embedded_language_key(language)?;
let extension = match key.as_str() {
"rust" => "rs",
"python" => "py",
"typescript" => "ts",
"javascript" => "js",
"kotlin" => "kt",
"shell" | "sh" | "zsh" => "bash",
other => other,
};
let lang = graph::Lang::from_extension(extension)?;
(lang.name() != "markdown").then_some(lang)
}
fn markdown_embedded_ast_span_handle(
file: &str,
language: &str,
name: &str,
kind: &str,
start_byte: usize,
end_byte: usize,
) -> String {
stable_handle(
"span",
&format!("{file}:embedded:{language}:{kind}:{name}:{start_byte}:{end_byte}"),
)
}
fn markdown_embedded_symbols(
file: &str,
source: &[u8],
body_start_byte: Option<usize>,
body_end_byte: Option<usize>,
fence_language: Option<&str>,
) -> Vec<MarkdownEmbeddedSymbol> {
let Some(fence_language) = fence_language else {
return Vec::new();
};
let Some(lang) = markdown_embedded_lang(fence_language) else {
return Vec::new();
};
let Some((body_start_byte, body_end_byte)) = body_start_byte.zip(body_end_byte) else {
return Vec::new();
};
let Some(body) = source.get(body_start_byte.min(source.len())..body_end_byte.min(source.len()))
else {
return Vec::new();
};
if body.is_empty() {
return Vec::new();
}
let Ok(symbols) = lang.extract_symbols(body) else {
return Vec::new();
};
let language = lang.name().to_string();
symbols
.into_iter()
.map(|symbol| {
let start_byte = body_start_byte.saturating_add(symbol.start_byte);
let end_byte = body_start_byte.saturating_add(symbol.end_byte);
let body_start = symbol
.body_start_byte
.map(|byte| body_start_byte.saturating_add(byte));
let body_end = symbol
.body_end_byte
.map(|byte| body_start_byte.saturating_add(byte));
let start_line = source_line_for_byte(source, start_byte);
let end_line = source_line_for_end_byte(source, end_byte).max(start_line);
MarkdownEmbeddedSymbol {
handle: markdown_embedded_ast_span_handle(
file,
&language,
&symbol.name,
&symbol.kind,
start_byte,
end_byte,
),
name: symbol.name,
kind: symbol.kind,
language: language.clone(),
node_kind: symbol.node_kind,
start_byte,
end_byte,
start_line,
end_line,
body_start_byte: body_start,
body_end_byte: body_end,
body_start_line: body_start.map(|byte| source_line_for_byte(source, byte)),
body_end_line: body_end.map(|byte| source_line_for_end_byte(source, byte)),
}
})
.collect()
}
fn markdown_source_line(source: &[u8], start_byte: usize) -> &str {
let start = start_byte.min(source.len());
let line_start = source[..start]
.iter()
.rposition(|value| *value == b'\n')
.map(|pos| pos + 1)
.unwrap_or(0);
let line_end = source[start..]
.iter()
.position(|value| *value == b'\n')
.map(|pos| start + pos)
.unwrap_or(source.len());
std::str::from_utf8(&source[line_start..line_end]).unwrap_or("")
}
fn markdown_list_attributes(source: &[u8], start_byte: usize) -> (Option<String>, Option<usize>) {
let line = markdown_source_line(source, start_byte);
let trimmed = line.trim_start();
for marker in ["-", "*", "+"] {
if trimmed
.strip_prefix(marker)
.and_then(|rest| rest.strip_prefix(' '))
.is_some()
{
return (Some(marker.to_string()), None);
}
}
let digit_end = trimmed
.find(|ch: char| !ch.is_ascii_digit())
.unwrap_or(trimmed.len());
let (digits, rest) = trimmed.split_at(digit_end);
if !digits.is_empty() {
for marker in [".", ")"] {
if rest
.strip_prefix(marker)
.and_then(|value| value.strip_prefix(' '))
.is_some()
{
return (
Some(format!("{digits}{marker}")),
digits.parse::<usize>().ok(),
);
}
}
}
(None, None)
}
fn markdown_fence_marker(source: &[u8], start_byte: usize) -> Option<String> {
let line = markdown_source_line(source, start_byte);
let trimmed = line.trim_start();
["```", "~~~"]
.into_iter()
.find(|marker| trimmed.starts_with(marker))
.map(str::to_string)
}
fn markdown_ast_extract_raw_nodes(file: &str, source: &[u8]) -> Result<Vec<MarkdownAstRawNode>> {
let mut nodes = graph::Lang::Markdown
.extract_symbols(source)
.context("extracting Markdown AST nodes")?
.into_iter()
.map(|symbol| {
let body_start_byte = symbol.body_start_byte;
let body_end_byte = symbol.body_end_byte;
let span_handle = ast_span_handle(
file,
&symbol.name,
&symbol.kind,
symbol.start_byte,
symbol.end_byte,
);
MarkdownAstRawNode {
handle: stable_handle(
"mdast",
&format!(
"{}:{}:{}:{}:{}",
file, symbol.kind, symbol.name, symbol.start_byte, symbol.end_byte
),
),
span_handle,
name: symbol.name,
kind: symbol.kind.clone(),
block_kind: markdown_ast_block_kind(&symbol.kind),
node_kind: symbol.node_kind,
start_byte: symbol.start_byte,
end_byte: symbol.end_byte,
body_start_byte,
body_end_byte,
}
})
.collect::<Vec<_>>();
nodes.sort_by(|left, right| {
left.start_byte
.cmp(&right.start_byte)
.then(left.end_byte.cmp(&right.end_byte))
.then(left.kind.cmp(&right.kind))
.then(left.name.cmp(&right.name))
});
Ok(nodes)
}
pub(crate) fn markdown_ast_projection(file: &str, source: &[u8]) -> Result<MarkdownAstProjection> {
let source_hash = blake3::hash(source).to_hex().to_string();
let cache_key = format!("{file}:{source_hash}");
let cache = MARKDOWN_AST_CACHE.get_or_init(|| Mutex::new(HashMap::new()));
if let Some(entry) = cache
.lock()
.expect("markdown ast cache poisoned")
.get(&cache_key)
{
return Ok(MarkdownAstProjection {
source_hash: entry.source_hash.clone(),
nodes: entry.nodes.clone(),
parse_duration_micros: entry.parse_duration_micros,
cache_hit: true,
});
}
let started = Instant::now();
let nodes = markdown_ast_extract_raw_nodes(file, source)?;
let parse_duration_micros = started.elapsed().as_micros();
cache.lock().expect("markdown ast cache poisoned").insert(
cache_key,
MarkdownAstCacheEntry {
source_hash: source_hash.clone(),
nodes: nodes.clone(),
parse_duration_micros,
},
);
Ok(MarkdownAstProjection {
source_hash,
nodes,
parse_duration_micros,
cache_hit: false,
})
}
fn markdown_ast_cache_report(projection: &MarkdownAstProjection) -> MarkdownAstCacheReport {
MarkdownAstCacheReport {
source_hash: projection.source_hash.clone(),
cache_hit: projection.cache_hit,
parse_duration_micros: projection.parse_duration_micros,
node_count: projection.nodes.len(),
section_count: projection
.nodes
.iter()
.filter(|node| node.kind == "heading")
.count(),
list_item_count: projection
.nodes
.iter()
.filter(|node| node.kind == "list_item")
.count(),
code_block_count: projection
.nodes
.iter()
.filter(|node| node.kind == "code_block")
.count(),
}
}
fn markdown_ast_node_direct_child_count(
node: &MarkdownAstRawNode,
nodes: &[MarkdownAstRawNode],
) -> usize {
nodes
.iter()
.filter(|candidate| {
markdown_ast_parent_handle(candidate, nodes).as_deref() == Some(&node.handle)
})
.count()
}
fn markdown_ast_outline_entry(
root: &Path,
file: &str,
source: &[u8],
nodes: &[MarkdownAstRawNode],
node: &MarkdownAstRawNode,
max_bytes: usize,
) -> MarkdownAstOutlineEntry {
let line = source_line_for_byte(source, node.start_byte);
let end_line = source_line_for_end_byte(source, node.end_byte).max(line);
MarkdownAstOutlineEntry {
handle: node.handle.clone(),
span_handle: node.span_handle.clone(),
name: truncate_for_budget(&node.name, max_bytes),
kind: node.kind.clone(),
block_kind: node.block_kind.clone(),
line,
end_line,
section_path: markdown_ast_node_metadata(file, node, source, nodes).section_path,
child_count: markdown_ast_node_direct_child_count(node, nodes),
expand: markdown_ast_command(root, file, Some(&node.handle)),
}
}
fn markdown_ast_outline_entries(
root: &Path,
file: &str,
source: &[u8],
nodes: &[MarkdownAstRawNode],
limit: usize,
max_bytes: usize,
) -> Vec<MarkdownAstOutlineEntry> {
let mut headings = nodes
.iter()
.filter(|node| node.kind == "heading")
.collect::<Vec<_>>();
let mut blocks = nodes
.iter()
.filter(|node| node.kind != "heading")
.collect::<Vec<_>>();
headings.sort_by_key(|node| (node.start_byte, node.end_byte));
blocks.sort_by_key(|node| (node.start_byte, node.end_byte));
headings
.into_iter()
.chain(blocks)
.take(limit)
.map(|node| markdown_ast_outline_entry(root, file, source, nodes, node, max_bytes))
.collect()
}
fn markdown_ast_node_intersects_lines(
source: &[u8],
node: &MarkdownAstRawNode,
start: usize,
end: usize,
) -> bool {
let line = source_line_for_byte(source, node.start_byte);
let end_line = source_line_for_end_byte(source, node.end_byte).max(line);
line <= end && end_line >= start
}
fn source_read_markdown_projection(
root: &Path,
file: &str,
source: &[u8],
start: usize,
end: usize,
budget: ResponseBudget,
) -> Result<SourceReadMarkdownProjection> {
let projection = markdown_ast_projection(file, source)?;
let visible_nodes = projection
.nodes
.iter()
.filter(|node| markdown_ast_node_intersects_lines(source, node, start, end))
.collect::<Vec<_>>();
let mut outline_nodes = visible_nodes.clone();
outline_nodes.sort_by_key(|node| {
(
node.kind != "heading",
node.start_byte,
node.end_byte,
node.name.as_str(),
)
});
let outline = outline_nodes
.into_iter()
.take(budget.preview_items())
.map(|node| {
markdown_ast_outline_entry(
root,
file,
source,
&projection.nodes,
node,
budget.preview_bytes(),
)
})
.collect::<Vec<_>>();
Ok(SourceReadMarkdownProjection {
handle: stable_handle(
"mdproj",
&format!("{file}:{start}:{end}:{}", projection.source_hash),
),
mode: "window_outline".to_string(),
total_nodes: projection.nodes.len(),
visible_nodes: visible_nodes.len(),
outline,
expand: markdown_ast_command(root, file, None),
})
}
fn markdown_ast_contains(parent: &MarkdownAstRawNode, child: &MarkdownAstRawNode) -> bool {
if parent.handle == child.handle {
return false;
}
parent.start_byte <= child.start_byte && parent.end_byte >= child.end_byte
}
fn markdown_ast_parent_handle(
node: &MarkdownAstRawNode,
nodes: &[MarkdownAstRawNode],
) -> Option<String> {
nodes
.iter()
.filter(|candidate| markdown_ast_contains(candidate, node))
.min_by_key(|candidate| {
(
candidate.end_byte.saturating_sub(candidate.start_byte),
candidate.start_byte,
)
})
.map(|candidate| candidate.handle.clone())
}
fn markdown_ast_child_handles(
node: &MarkdownAstRawNode,
nodes: &[MarkdownAstRawNode],
limit: usize,
) -> Vec<String> {
nodes
.iter()
.filter(|candidate| {
markdown_ast_parent_handle(candidate, nodes).as_deref() == Some(&node.handle)
})
.take(limit)
.map(|candidate| candidate.handle.clone())
.collect()
}
fn markdown_ast_section_nodes<'a>(
node: &MarkdownAstRawNode,
nodes: &'a [MarkdownAstRawNode],
) -> Vec<&'a MarkdownAstRawNode> {
let mut headings = nodes
.iter()
.filter(|candidate| candidate.kind == "heading")
.filter(|candidate| {
candidate.start_byte <= node.start_byte && candidate.end_byte >= node.end_byte
})
.collect::<Vec<_>>();
headings.sort_by(|left, right| {
left.start_byte
.cmp(&right.start_byte)
.then(left.end_byte.cmp(&right.end_byte))
.then(left.name.cmp(&right.name))
});
headings
}
fn markdown_ast_node_metadata(
file: &str,
node: &MarkdownAstRawNode,
source: &[u8],
nodes: &[MarkdownAstRawNode],
) -> MarkdownAstNodeMetadata {
let section_nodes = markdown_ast_section_nodes(node, nodes);
let section_path = section_nodes
.iter()
.map(|heading| heading.name.clone())
.collect::<Vec<_>>();
let section_handle = section_nodes.last().map(|heading| heading.handle.clone());
let heading_level = (node.kind == "heading")
.then(|| markdown_heading_level(source, node.start_byte))
.flatten();
let (list_marker, list_order) = if node.kind == "list_item" {
markdown_list_attributes(source, node.start_byte)
} else {
(None, None)
};
let fence_language = (node.kind == "code_block").then(|| node.name.clone());
let embedded_symbols = if node.kind == "code_block" {
markdown_embedded_symbols(
file,
source,
node.body_start_byte,
node.body_end_byte,
fence_language.as_deref(),
)
} else {
Vec::new()
};
MarkdownAstNodeMetadata {
heading_level,
section_path,
section_handle,
list_depth: (node.kind == "list_item")
.then(|| markdown_list_depth(source, node.start_byte)),
list_marker,
list_order,
fence_language,
fence_marker: (node.kind == "code_block")
.then(|| markdown_fence_marker(source, node.start_byte))
.flatten(),
embedded_symbols,
}
}
fn markdown_ast_node_expand(
root: &Path,
file: &str,
node: &MarkdownAstRawNode,
source: &[u8],
) -> MarkdownAstNodeExpand {
let start_line = source_line_for_byte(source, node.start_byte);
let end_line = source_line_for_end_byte(source, node.end_byte).max(start_line);
let line_count = end_line.saturating_sub(start_line).saturating_add(1).max(1);
let body_start_line = node
.body_start_byte
.map(|byte| source_line_for_byte(source, byte))
.unwrap_or(start_line);
let body_end_line = node
.body_end_byte
.map(|byte| source_line_for_end_byte(source, byte))
.unwrap_or(end_line)
.max(body_start_line);
let body_line_count = body_end_line
.saturating_sub(body_start_line)
.saturating_add(1)
.max(1);
MarkdownAstNodeExpand {
source_window: source_read_command(root, file, start_line, line_count),
source_body: source_read_command(root, file, body_start_line, body_line_count),
symbol_read: source_symbol_read_command(root, &node.name, file),
edit_intents: markdown_edit_intents_command(root),
}
}
fn markdown_ast_node(
root: &Path,
file: &str,
node: &MarkdownAstRawNode,
source: &[u8],
nodes: &[MarkdownAstRawNode],
child_limit: usize,
) -> MarkdownAstNode {
let line = source_line_for_byte(source, node.start_byte);
let end_line = source_line_for_end_byte(source, node.end_byte).max(line);
let body_byte_span = node
.body_start_byte
.zip(node.body_end_byte)
.map(|(start, end)| SourceByteRangePreview { start, end });
MarkdownAstNode {
handle: node.handle.clone(),
span_handle: node.span_handle.clone(),
name: node.name.clone(),
kind: node.kind.clone(),
block_kind: node.block_kind.clone(),
node_kind: node.node_kind.clone(),
line,
end_line,
byte_span: SourceByteRangePreview {
start: node.start_byte,
end: node.end_byte,
},
body_byte_span,
parent_handle: markdown_ast_parent_handle(node, nodes),
child_handles: markdown_ast_child_handles(node, nodes, child_limit),
metadata: markdown_ast_node_metadata(file, node, source, nodes),
expand: markdown_ast_node_expand(root, file, node, source),
}
}
pub(crate) fn stored_symbol_ast_span(
symbol: &index::StoredSymbol,
source: &[u8],
symbols: &[index::StoredSymbol],
child_limit: usize,
) -> Option<AstSpanPreview> {
let (start_byte, end_byte) = stored_symbol_span_bounds(symbol)?;
let node_kind = symbol.node_kind.clone()?;
let body_start_byte = symbol_span_byte(symbol.body_start_byte);
let body_end_byte = symbol_span_byte(symbol.body_end_byte);
Some(AstSpanPreview {
handle: ast_span_handle(
&symbol.file,
&symbol.name,
&symbol.kind,
start_byte,
end_byte,
),
node_kind,
start_byte,
end_byte,
start_line: source_line_for_byte(source, start_byte),
end_line: source_line_for_end_byte(source, end_byte),
body_start_byte,
body_end_byte,
body_start_line: body_start_byte.map(|byte| source_line_for_byte(source, byte)),
body_end_line: body_end_byte.map(|byte| source_line_for_end_byte(source, byte)),
parent_handle: stored_symbol_parent_span_handle(symbol, symbols),
child_handles: stored_symbol_child_span_handles(symbol, symbols, child_limit),
markdown: markdown_stored_symbol_metadata(symbol, source, symbols),
})
}
pub(crate) fn symbol_hit_ast_span(symbol: &index::SymbolHit, source: &[u8]) -> Option<AstSpanPreview> {
let (start_byte, end_byte) = symbol_hit_span_bounds(symbol)?;
let node_kind = symbol.node_kind.clone()?;
let body_start_byte = symbol_span_byte(symbol.body_start_byte);
let body_end_byte = symbol_span_byte(symbol.body_end_byte);
Some(AstSpanPreview {
handle: ast_span_handle(
&symbol.file,
&symbol.name,
&symbol.kind,
start_byte,
end_byte,
),
node_kind,
start_byte,
end_byte,
start_line: source_line_for_byte(source, start_byte),
end_line: source_line_for_end_byte(source, end_byte),
body_start_byte,
body_end_byte,
body_start_line: body_start_byte.map(|byte| source_line_for_byte(source, byte)),
body_end_line: body_end_byte.map(|byte| source_line_for_end_byte(source, byte)),
parent_handle: None,
child_handles: Vec::new(),
markdown: markdown_symbol_hit_metadata(symbol, source, start_byte),
})
}
pub(crate) fn symbol_hit_line(symbol: &index::SymbolHit) -> usize {
usize::try_from(symbol.line)
.ok()
.and_then(|line| line.checked_add(1))
.unwrap_or(1)
}
pub(crate) fn symbol_hit_end_line(symbol: &index::SymbolHit) -> Option<usize> {
symbol
.end_line
.and_then(|line| usize::try_from(line).ok())
.and_then(|line| line.checked_add(1))
}
fn source_symbol_intersects(symbol: &index::StoredSymbol, start: usize, end: usize) -> bool {
if end == 0 {
return false;
}
let symbol_start = source_symbol_line(symbol);
let symbol_end = source_symbol_end_line(symbol).unwrap_or(symbol_start);
symbol_start <= end && symbol_end >= start
}
#[allow(clippy::too_many_arguments)]
fn load_source_symbols(
root: &Path,
file_abs: &Path,
file_display: &str,
source: &[u8],
scope: Option<&str>,
start: usize,
end: usize,
limit: usize,
max_bytes: usize,
warnings: &mut Vec<String>,
) -> Vec<SourceSymbolRef> {
let db_path = match resolve_query_db_path(root, file_abs, scope) {
Ok(path) => path,
Err(err) => {
warnings.push(format!("index refs unavailable: {err:#}"));
return Vec::new();
}
};
if !db_path.exists() {
warnings.push(format!(
"index refs unavailable: no index found at {}",
db_path.display()
));
return Vec::new();
}
let db = match index::IndexDb::open_read_only_resilient(&db_path) {
Ok(db) => db,
Err(err) => {
warnings.push(format!("index refs unavailable: {err:#}"));
return Vec::new();
}
};
let file_key = file_abs.to_string_lossy().to_string();
let symbols = match db.symbols_for_file(&file_key) {
Ok(symbols) => symbols,
Err(err) => {
warnings.push(format!("symbol refs unavailable: {err:#}"));
return Vec::new();
}
};
symbols
.iter()
.filter(|symbol| source_symbol_intersects(symbol, start, end))
.take(limit)
.map(|symbol| {
let line = source_symbol_line(symbol);
let end_line = source_symbol_end_line(symbol);
let handle = stable_handle(
"ssym",
&format!("{}:{}:{}", file_display, symbol.name, line),
);
SourceSymbolRef {
handle,
name: truncate_for_budget(&symbol.name, max_bytes),
kind: symbol.kind.clone(),
language: symbol.language.clone(),
file: file_display.to_string(),
line,
end_line,
signature: symbol
.signature
.clone()
.map(|signature| truncate_for_budget(&signature, max_bytes)),
span: stored_symbol_ast_span(symbol, source, &symbols, limit),
expand: source_symbol_read_command(root, &symbol.name, file_display),
}
})
.collect()
}
fn load_source_summaries(
root: &Path,
file_display: &str,
limit: usize,
max_bytes: usize,
warnings: &mut Vec<String>,
) -> Vec<SourceSummaryRef> {
let db_path = root.join(".tsift/summaries.db");
if !db_path.exists() {
return Vec::new();
}
let db = match summarize::SummaryDb::open_read_only_resilient(&db_path) {
Ok(db) => db,
Err(err) => {
warnings.push(format!("summary refs unavailable: {err:#}"));
return Vec::new();
}
};
let summaries = match db.get_by_file(file_display) {
Ok(summaries) => summaries,
Err(err) => {
warnings.push(format!("summary refs unavailable: {err:#}"));
return Vec::new();
}
};
summaries
.into_iter()
.take(limit)
.map(|summary| SourceSummaryRef {
handle: stable_handle(
"sum",
&format!(
"{}:{}:{}",
summary.file_path, summary.symbol_name, summary.id
),
),
symbol_name: truncate_for_budget(&summary.symbol_name, max_bytes),
file_path: summary.file_path,
summary: truncate_for_budget(&summary.summary, max_bytes),
expand: source_summary_expand_command(root, &summary.symbol_name),
})
.collect()
}
fn cmd_markdown_ast(
file: &Path,
path: &Path,
node: Option<&str>,
format: OutputFormat,
absolute: bool,
budget: ResponseBudget,
) -> Result<()> {
let root = lint::resolve_project_root_or_canonical_path(path)?;
let file_abs = resolve_source_file(&root, file)?;
if !is_markdown_path(&file_abs) {
bail!(
"markdown-ast only supports Markdown files (.md/.mdx): {}",
file_abs.display()
);
}
let file_display = if absolute {
file_abs.to_string_lossy().to_string()
} else {
relativize_pathbuf(&file_abs, &root)
.to_string_lossy()
.to_string()
};
let source = fs::read(&file_abs).with_context(|| format!("reading {}", file_abs.display()))?;
let text = String::from_utf8_lossy(&source);
let total_lines = text.lines().count();
let projection = markdown_ast_projection(&file_display, &source)?;
let raw_nodes = &projection.nodes;
let max_items = budget.preview_items();
let max_bytes = budget.preview_bytes();
let selected_nodes = if let Some(handle) = node {
let matches = raw_nodes
.iter()
.filter(|candidate| candidate.handle == handle || candidate.span_handle == handle)
.collect::<Vec<_>>();
if matches.is_empty() {
bail!("Markdown AST node handle {handle:?} was not found in {file_display}");
}
matches
} else {
raw_nodes.iter().take(max_items).collect::<Vec<_>>()
};
let nodes = selected_nodes
.into_iter()
.map(|raw| {
let mut node =
markdown_ast_node(&root, &file_display, raw, &source, raw_nodes, max_items);
node.name = truncate_for_budget(&node.name, max_bytes);
node
})
.collect::<Vec<_>>();
let outline_started = Instant::now();
let outline = markdown_ast_outline_entries(
&root,
&file_display,
&source,
raw_nodes,
max_items,
max_bytes,
);
let outline_duration_micros = outline_started.elapsed().as_micros();
let projection_preview = MarkdownAstProjectionPreview {
mode: if node.is_some() {
"selected_node".to_string()
} else {
"outline_first".to_string()
},
total_nodes: raw_nodes.len(),
returned_nodes: nodes.len(),
omitted_nodes: raw_nodes.len().saturating_sub(nodes.len()),
selected_node: node.map(str::to_string),
cache: markdown_ast_cache_report(&projection),
outline,
phase_timings: vec![
MarkdownAstPhaseTiming {
name: "parse_extract".to_string(),
duration_micros: projection.parse_duration_micros,
detail: if projection.cache_hit {
"reused cached tree-sitter Markdown symbol extraction".to_string()
} else {
"tree-sitter Markdown symbol extraction".to_string()
},
},
MarkdownAstPhaseTiming {
name: "outline_projection".to_string(),
duration_micros: outline_duration_micros,
detail: "outline-first section/block preview construction".to_string(),
},
],
};
let report = MarkdownAstReport {
handle: stable_handle("mdastrep", &file_display),
root: root.to_string_lossy().to_string(),
file: file_display.clone(),
range: SourceRangePreview {
start: 1,
end: total_lines,
total_lines,
truncated_before: false,
truncated_after: false,
},
projection: projection_preview,
nodes,
expand: MarkdownAstExpandCommands {
file: markdown_ast_command(&root, &file_display, None),
source_read: source_read_command(&root, &file_display, 1, total_lines.max(1)),
edit_intents: markdown_edit_intents_command(&root),
},
warnings: Vec::new(),
};
if format.json_output {
let truncated = node.is_none() && raw_nodes.len() > report.nodes.len();
let mut follow_up = vec![
report.expand.file.clone(),
report.expand.source_read.clone(),
report.expand.edit_intents.clone(),
];
follow_up.extend(
report
.nodes
.iter()
.map(|node| node.expand.source_window.clone()),
);
print_json_or_envelope(
&report,
&format,
"markdown-ast",
"ast",
ToolEnvelopeSummary {
text: format!("markdown ast {} nodes:{}", report.file, report.nodes.len()),
metrics: vec![
envelope_metric("nodes", report.nodes.len()),
envelope_metric("total_nodes", report.projection.total_nodes),
envelope_metric(
"parse_duration_micros",
report.projection.cache.parse_duration_micros,
),
envelope_metric("total_lines", report.range.total_lines),
],
},
truncated,
follow_up,
)?;
} else if format.compact {
println!(
"markdown-ast {} nodes:{} handle:{}",
report.file,
report.nodes.len(),
report.handle
);
for node in &report.nodes {
println!(
" {} {} {}:{}-{}",
node.handle, node.kind, node.name, node.line, node.end_line
);
}
if node.is_none() && raw_nodes.len() > report.nodes.len() {
println!("expand: {}", report.expand.file);
}
} else {
println!(
"Markdown AST `{}` nodes {} of {} ({})",
report.file,
report.nodes.len(),
raw_nodes.len(),
report.handle
);
for node in &report.nodes {
println!(
" {} `{}` {}:{}-{} — {}",
node.handle,
node.name,
node.kind,
node.line,
node.end_line,
node.expand.source_window
);
}
if node.is_none() && raw_nodes.len() > report.nodes.len() {
println!();
println!("Expand:");
println!(" file: {}", report.expand.file);
}
}
Ok(())
}
#[allow(clippy::too_many_arguments)]
fn cmd_source_read(
file: &Path,
path: &Path,
start: usize,
lines: usize,
end: Option<usize>,
scope: Option<&str>,
format: OutputFormat,
absolute: bool,
budget: ResponseBudget,
) -> Result<()> {
if start == 0 {
bail!("--start is 1-based and must be greater than zero");
}
if lines == 0 {
bail!("--lines must be greater than zero");
}
if let Some(end) = end
&& end < start
{
bail!("--end must be greater than or equal to --start");
}
let root = lint::resolve_project_root_or_canonical_path(path)?;
let file_abs = resolve_source_file(&root, file)?;
let file_display = if absolute {
file_abs.to_string_lossy().to_string()
} else {
relativize_pathbuf(&file_abs, &root)
.to_string_lossy()
.to_string()
};
let source = fs::read(&file_abs).with_context(|| format!("reading {}", file_abs.display()))?;
let text = String::from_utf8_lossy(&source);
let all_lines: Vec<&str> = text.lines().collect();
let total_lines = all_lines.len();
if total_lines > 0 && start > total_lines {
bail!(
"--start {} is beyond end of {} ({} lines)",
start,
file_display,
total_lines
);
}
let requested_end = end.unwrap_or_else(|| start.saturating_add(lines).saturating_sub(1));
let end_line = requested_end.min(total_lines);
let max_bytes = budget.preview_bytes();
let token_cap = budget.body_token_cap();
let (preview, preview_end, body_truncated) = if total_lines == 0 {
(Vec::new(), end_line, false)
} else {
let capped = build_token_capped_preview(&all_lines, start, end_line, max_bytes, token_cap);
(capped.preview, capped.capped_end, capped.was_capped)
};
let effective_end = if body_truncated { preview_end } else { end_line };
let mut warnings = Vec::new();
if body_truncated {
warnings.push(format!(
"body preview capped at ~{token_cap} tokens at line {preview_end} of {end_line}"
));
}
let max_items = budget.preview_items();
let symbols = load_source_symbols(
&root,
&file_abs,
&file_display,
&source,
scope,
start,
effective_end,
max_items,
max_bytes,
&mut warnings,
);
let summaries =
load_source_summaries(&root, &file_display, max_items, max_bytes, &mut warnings);
let markdown = if is_markdown_path(&file_abs) {
match source_read_markdown_projection(
&root,
&file_display,
&source,
start,
effective_end,
budget,
) {
Ok(markdown) => Some(markdown),
Err(err) => {
warnings.push(format!("markdown projection unavailable: {err:#}"));
None
}
}
} else {
None
};
let effective_lines = effective_end.saturating_sub(start).saturating_add(1).max(1);
let expand = SourceExpandCommands {
before: (start > 1).then(|| {
let before_start = start.saturating_sub(lines).max(1);
source_read_command(&root, &file_display, before_start, start - before_start)
}),
after: (effective_end < total_lines)
.then(|| source_read_command(&root, &file_display, effective_end + 1, lines)),
body: body_truncated.then(|| {
let remaining = end_line.saturating_sub(effective_end);
source_read_command(&root, &file_display, effective_end + 1, remaining)
}),
file: source_read_command(&root, &file_display, 1, total_lines.max(effective_lines)),
markdown_ast: is_markdown_path(&file_abs)
.then(|| markdown_ast_command(&root, &file_display, None)),
};
let report = SourceReadReport {
handle: stable_handle("swin", &format!("{file_display}:{start}:{effective_end}")),
root: root.to_string_lossy().to_string(),
file: file_display,
range: SourceRangePreview {
start,
end: effective_end,
total_lines,
truncated_before: start > 1,
truncated_after: effective_end < total_lines,
},
preview,
symbols,
summaries,
markdown,
expand,
warnings,
};
if format.json_output {
let truncated = report.range.truncated_before || report.range.truncated_after;
let follow_up = [
report.expand.before.clone(),
report.expand.after.clone(),
report.expand.body.clone(),
Some(report.expand.file.clone()),
report.expand.markdown_ast.clone(),
]
.into_iter()
.flatten()
.collect::<Vec<_>>();
print_json_or_envelope(
&report,
&format,
"source-read",
"window",
ToolEnvelopeSummary {
text: format!(
"source window {}:{}-{}",
report.file, report.range.start, report.range.end
),
metrics: vec![
envelope_metric("lines", report.preview.len()),
envelope_metric("symbols", report.symbols.len()),
envelope_metric("summaries", report.summaries.len()),
envelope_metric(
"markdown_nodes",
report
.markdown
.as_ref()
.map_or(0, |markdown| markdown.visible_nodes),
),
],
},
truncated,
follow_up,
)?;
} else if format.compact {
println!(
"source {}:{}-{} / {} handle:{}",
report.file,
report.range.start,
report.range.end,
report.range.total_lines,
report.handle
);
for line in &report.preview {
println!("{:>5} {}", line.line, line.text);
}
if !report.symbols.is_empty() {
println!("syms[{}]:", report.symbols.len());
for symbol in &report.symbols {
println!(" {} {}:{}", symbol.name, symbol.file, symbol.line);
}
}
if report.range.truncated_before || report.range.truncated_after {
println!("expand: {}", report.expand.file);
}
} else {
println!(
"Source window `{}` lines {}-{} of {} ({})",
report.file,
report.range.start,
report.range.end,
report.range.total_lines,
report.handle
);
for line in &report.preview {
println!("{:>5} | {}", line.line, line.text);
}
if !report.symbols.is_empty() {
println!();
println!("Symbol refs:");
for symbol in &report.symbols {
println!(
" {} `{}` {}:{} — {}",
symbol.handle, symbol.name, symbol.file, symbol.line, symbol.expand
);
}
}
if !report.summaries.is_empty() {
println!();
println!("Summary refs:");
for summary in &report.summaries {
println!(
" {} `{}` — {}",
summary.handle, summary.symbol_name, summary.expand
);
}
}
if report.range.truncated_before || report.range.truncated_after {
println!();
println!("Expand:");
if let Some(before) = &report.expand.before {
println!(" before: {}", before);
}
if let Some(after) = &report.expand.after {
println!(" after: {}", after);
}
println!(" file: {}", report.expand.file);
}
for warning in &report.warnings {
eprintln!("warning: {warning}");
}
}
Ok(())
}
#[allow(clippy::too_many_arguments)]
fn cmd_symbol_read(
symbol: &str,
file_hint: Option<&Path>,
path: &Path,
scope: Option<&str>,
format: OutputFormat,
absolute: bool,
budget: ResponseBudget,
) -> Result<()> {
let root = lint::resolve_project_root_or_canonical_path(path)?;
let hinted_file_abs = file_hint
.map(|file| resolve_source_file(&root, file))
.transpose()?;
let path_hint = hinted_file_abs.as_deref().unwrap_or(root.as_path());
let db_path = resolve_query_db_path(&root, path_hint, scope)?;
if !db_path.exists() {
bail!(
"index refs unavailable: no index found at {}",
db_path.display()
);
}
let db = index::IndexDb::open_read_only_resilient(&db_path)
.with_context(|| format!("opening symbol index {}", db_path.display()))?;
let search_limit = budget.follow_up_items().max(10);
let hits = db
.symbol_search(symbol, search_limit)
.with_context(|| format!("searching symbols for {symbol:?}"))?;
let selected = hits
.into_iter()
.find(|hit| {
let Some(hinted_file_abs) = &hinted_file_abs else {
return true;
};
resolve_source_file(&root, Path::new(&hit.file))
.map(|hit_file| hit_file == *hinted_file_abs)
.unwrap_or(false)
})
.with_context(|| {
let hint = file_hint
.map(|file| format!(" in {}", file.display()))
.unwrap_or_default();
format!("no indexed symbol matched {symbol:?}{hint}")
})?;
let file_abs = resolve_source_file(&root, Path::new(&selected.file))?;
let file_display = if absolute {
file_abs.to_string_lossy().to_string()
} else {
relativize_pathbuf(&file_abs, &root)
.to_string_lossy()
.to_string()
};
let source = fs::read(&file_abs).with_context(|| format!("reading {}", file_abs.display()))?;
let content_hash = blake3::hash(&source).to_hex().to_string();
let text = String::from_utf8_lossy(&source);
let all_lines: Vec<&str> = text.lines().collect();
let total_lines = all_lines.len();
let file_symbols = db
.symbols_for_file(&file_abs.to_string_lossy())
.with_context(|| format!("loading symbols for {}", file_abs.display()))?;
let max_items = budget.preview_items();
let max_bytes = budget.preview_bytes();
let selected_start = symbol_hit_line(&selected);
let selected_end = symbol_hit_end_line(&selected)
.unwrap_or(selected_start)
.max(selected_start);
let stored_target = file_symbols.iter().find(|candidate| {
candidate.name == selected.name
&& candidate.kind == selected.kind
&& source_symbol_line(candidate) == selected_start
});
let target_span = stored_target
.and_then(|stored| stored_symbol_ast_span(stored, &source, &file_symbols, max_items))
.or_else(|| symbol_hit_ast_span(&selected, &source));
let target_start = target_span
.as_ref()
.map(|span| span.start_line)
.unwrap_or(selected_start);
let target_end = target_span
.as_ref()
.map(|span| span.end_line)
.or_else(|| stored_target.and_then(source_symbol_end_line))
.unwrap_or(selected_end)
.max(target_start);
let target_bounds = stored_target
.and_then(stored_symbol_span_bounds)
.or_else(|| symbol_hit_span_bounds(&selected));
let target_end = stored_target
.and_then(source_symbol_end_line)
.unwrap_or(target_end)
.max(target_start);
let body_line_budget = budget.preview_items().max(1).saturating_mul(16);
let line_capped_end = target_start
.saturating_add(body_line_budget)
.saturating_sub(1)
.min(target_end)
.min(total_lines.max(target_start));
let token_cap = budget.body_token_cap();
let (body, effective_preview_end, body_truncated) = if total_lines == 0 || target_start > total_lines {
(Vec::new(), line_capped_end, false)
} else {
let capped = build_token_capped_preview(&all_lines, target_start, line_capped_end, max_bytes, token_cap);
(capped.preview, capped.capped_end, capped.was_capped)
};
let preview_end = if body_truncated { effective_preview_end } else { line_capped_end };
let child_symbols = file_symbols
.iter()
.filter(|candidate| {
if let Some((target_start_byte, target_end_byte)) = target_bounds {
let Some((candidate_start, candidate_end)) = stored_symbol_span_bounds(candidate)
else {
return false;
};
return candidate_start >= target_start_byte
&& candidate_end <= target_end_byte
&& (candidate_start, candidate_end) != (target_start_byte, target_end_byte);
}
let line = source_symbol_line(candidate);
line > target_start && line <= target_end
})
.take(max_items)
.map(|symbol| {
let line = source_symbol_line(symbol);
let end_line = source_symbol_end_line(symbol);
SourceSymbolRef {
handle: stable_handle(
"ssym",
&format!("{}:{}:{}", file_display, symbol.name, line),
),
name: truncate_for_budget(&symbol.name, max_bytes),
kind: symbol.kind.clone(),
language: symbol.language.clone(),
file: file_display.clone(),
line,
end_line,
signature: symbol
.signature
.clone()
.map(|signature| truncate_for_budget(&signature, max_bytes)),
span: stored_symbol_ast_span(symbol, &source, &file_symbols, max_items),
expand: source_symbol_read_command(&root, &symbol.name, &file_display),
}
})
.collect::<Vec<_>>();
let mut warnings = Vec::new();
if body_truncated {
warnings.push(format!(
"body preview capped at ~{token_cap} tokens at line {preview_end} of {target_end}"
));
}
let summaries =
load_source_summaries(&root, &file_display, max_items, max_bytes, &mut warnings);
let symbol_handle = stable_handle(
"sread",
&format!("{}:{}:{}", file_display, selected.name, target_start),
);
let source_lines = preview_end
.saturating_sub(target_start)
.saturating_add(1)
.max(1);
let expand = SymbolReadExpandCommands {
source_window: source_read_command(&root, &file_display, target_start, source_lines),
body: body_truncated.then(|| {
let remaining = target_end.saturating_sub(preview_end);
source_read_command(&root, &file_display, preview_end + 1, remaining)
}),
file: source_read_command(&root, &file_display, 1, total_lines.max(source_lines)),
explain: source_symbol_expand_command(&root, &selected.name),
callers: source_symbol_graph_command(&root, &selected.name, "callers"),
callees: source_symbol_graph_command(&root, &selected.name, "callees"),
markdown_ast: (selected.language == "markdown").then(|| {
markdown_ast_command(
&root,
&file_display,
target_span.as_ref().map(|span| span.handle.as_str()),
)
}),
};
let report = SymbolReadReport {
handle: symbol_handle.clone(),
root: root.to_string_lossy().to_string(),
query: symbol.to_string(),
symbol: SymbolReadTarget {
handle: symbol_handle,
name: selected.name.clone(),
kind: selected.kind.clone(),
language: selected.language.clone(),
file: file_display.clone(),
line: target_start,
end_line: Some(target_end),
signature: stored_target
.and_then(|stored| stored.signature.clone())
.map(|signature| truncate_for_budget(&signature, max_bytes)),
parent_module: stored_target.and_then(|stored| stored.parent_module.clone()),
visibility: stored_target.and_then(|stored| stored.visibility.clone()),
span: target_span,
},
range: SourceRangePreview {
start: target_start,
end: preview_end,
total_lines,
truncated_before: false,
truncated_after: preview_end < target_end,
},
body,
child_symbols,
summaries,
expand,
warnings,
};
if format.json_output {
let truncated = report.range.truncated_after
|| report.body.iter().any(|line| line.text.len() >= max_bytes)
|| report.child_symbols.len() >= max_items;
let follow_up = [
Some(report.expand.source_window.clone()),
report.expand.body.clone(),
Some(report.expand.file.clone()),
Some(report.expand.explain.clone()),
Some(report.expand.callers.clone()),
Some(report.expand.callees.clone()),
]
.into_iter()
.flatten()
.chain(report.expand.markdown_ast.clone())
.collect::<Vec<_>>();
print_json_or_envelope(
&report,
&format,
"symbol-read",
"symbol",
ToolEnvelopeSummary {
text: format!(
"symbol {} {}:{}-{}",
report.symbol.name, report.symbol.file, report.range.start, report.range.end
),
metrics: vec![
envelope_metric("body_lines", report.body.len()),
envelope_metric("child_symbols", report.child_symbols.len()),
envelope_metric("summaries", report.summaries.len()),
],
},
truncated,
follow_up,
)?;
} else if format.compact {
println!(
"symbol {} {}:{}-{} handle:{} hash:{}",
report.symbol.name,
report.symbol.file,
report.range.start,
report.range.end,
report.handle,
content_hash
);
for line in &report.body {
println!("{:>5} {}", line.line, line.text);
}
if !report.child_symbols.is_empty() {
println!("children[{}]:", report.child_symbols.len());
for child in &report.child_symbols {
println!(" {} {}:{}", child.name, child.file, child.line);
}
}
} else {
println!(
"Symbol `{}` in `{}` lines {}-{} ({})",
report.symbol.name,
report.symbol.file,
report.range.start,
report.range.end,
report.handle
);
for line in &report.body {
println!("{:>5} | {}", line.line, line.text);
}
if !report.child_symbols.is_empty() {
println!();
println!("Child symbols:");
for child in &report.child_symbols {
println!(
" {} `{}` {}:{} — {}",
child.handle, child.name, child.file, child.line, child.expand
);
}
}
println!();
println!("Expand:");
println!(" source: {}", report.expand.source_window);
println!(" file: {}", report.expand.file);
println!(" explain: {}", report.expand.explain);
println!(" callers: {}", report.expand.callers);
println!(" callees: {}", report.expand.callees);
for warning in &report.warnings {
eprintln!("warning: {warning}");
}
}
Ok(())
}
#[allow(clippy::too_many_arguments)]
#[derive(Serialize)]
struct ExplainBudgetDefinitionPreview {
handle: String,
#[serde(skip_serializing_if = "Option::is_none")]
tag_alias: Option<String>,
kind: String,
name: String,
file: String,
line: i64,
expand: String,
}
#[derive(Serialize)]
struct ExplainBudgetEdgePreview {
handle: String,
#[serde(skip_serializing_if = "Option::is_none")]
tag_alias: Option<String>,
name: String,
file: String,
line: i64,
expand: String,
}
#[derive(Serialize)]
struct ExplainBudgetCommunityPreview {
size: usize,
members: Vec<String>,
}
#[derive(Serialize)]
struct ExplainBudgetReport {
symbol: String,
max_items: usize,
max_bytes: usize,
definition_total: usize,
callers_total: usize,
callers_truncated_by_limit: bool,
callees_total: usize,
callees_truncated_by_limit: bool,
truncated: bool,
definitions: Vec<ExplainBudgetDefinitionPreview>,
callers: Vec<ExplainBudgetEdgePreview>,
callees: Vec<ExplainBudgetEdgePreview>,
#[serde(skip_serializing_if = "Option::is_none")]
community: Option<ExplainBudgetCommunityPreview>,
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn build_explain_budget_report(
symbol: &str,
_root: &Path,
symbols: &[index::StoredSymbol],
callers: &[index::StoredEdge],
callers_total: usize,
callers_truncated_by_limit: bool,
callees: &[index::StoredEdge],
callees_total: usize,
callees_truncated_by_limit: bool,
community: Option<&graph::Community>,
budget: ResponseBudget,
) -> ExplainBudgetReport {
let max_items = budget.preview_items();
let max_bytes = budget.preview_bytes();
let definitions = symbols
.iter()
.take(max_items)
.map(|entry| {
let symbol_ref = build_compact_symbol_ref(
"edef",
&format!(
"{}:{}:{}:{}",
entry.kind, entry.name, entry.file, entry.line
),
&entry.name,
entry.tags.as_deref(),
max_bytes,
);
ExplainBudgetDefinitionPreview {
handle: symbol_ref.handle,
tag_alias: symbol_ref.tag_alias,
kind: entry.kind.clone(),
name: symbol_ref.name,
file: truncate_for_budget(&entry.file, max_bytes),
line: entry.line,
expand: format!(
"tsift search {} --exact --path {} --limit 20",
shell_quote(&entry.name),
shell_quote(&entry.file)
),
}
})
.collect();
let callers_preview: Vec<ExplainBudgetEdgePreview> = callers
.iter()
.take(max_items)
.map(|entry| {
let symbol_ref = build_compact_symbol_ref(
"ecall",
&format!(
"{}:{}:{}:{}",
entry.caller_name, entry.caller_file, entry.call_site_line, symbol
),
&entry.caller_name,
None,
max_bytes,
);
ExplainBudgetEdgePreview {
handle: symbol_ref.handle,
tag_alias: symbol_ref.tag_alias,
name: symbol_ref.name,
file: truncate_for_budget(&entry.caller_file, max_bytes),
line: entry.call_site_line,
expand: format!(
"tsift explain {} --path {} --limit 0",
shell_quote(&entry.caller_name),
shell_quote(&entry.caller_file)
),
}
})
.collect();
let callees_preview: Vec<ExplainBudgetEdgePreview> = callees
.iter()
.take(max_items)
.map(|entry| {
let symbol_ref = build_compact_symbol_ref(
"eces",
&format!(
"{}:{}:{}:{}",
entry.callee_name, entry.caller_file, entry.call_site_line, symbol
),
&entry.callee_name,
None,
max_bytes,
);
ExplainBudgetEdgePreview {
handle: symbol_ref.handle,
tag_alias: symbol_ref.tag_alias,
name: symbol_ref.name,
file: truncate_for_budget(&entry.caller_file, max_bytes),
line: entry.call_site_line,
expand: format!(
"tsift explain {} --path {} --limit 0",
shell_quote(&entry.callee_name),
shell_quote(&entry.caller_file)
),
}
})
.collect();
let community_preview = community.map(|entry| ExplainBudgetCommunityPreview {
size: entry.members.len(),
members: entry
.members
.iter()
.take(max_items)
.map(|member| truncate_for_budget(&member.name, max_bytes))
.collect(),
});
ExplainBudgetReport {
symbol: symbol.to_string(),
max_items,
max_bytes,
definition_total: symbols.len(),
callers_total,
callers_truncated_by_limit,
callees_total,
callees_truncated_by_limit,
truncated: symbols.len() > max_items
|| callers_total > callers_preview.len()
|| callees_total > callees_preview.len()
|| community
.map(|entry| entry.members.len() > max_items)
.unwrap_or(false),
definitions,
callers: callers_preview,
callees: callees_preview,
community: community_preview,
}
}
pub(crate) fn print_explain_budget_human(report: &ExplainBudgetReport) {
println!(
"explain-budget sym:{} defs:{}/{} crs:{}/{} ces:{}/{}",
shell_quote(&report.symbol),
report.definitions.len(),
report.definition_total,
report.callers.len(),
report.callers_total,
report.callees.len(),
report.callees_total
);
for entry in &report.definitions {
println!(
"def {} {} {}:{} expand:{}",
format_symbol_preview_line(&entry.handle, &entry.name, entry.tag_alias.as_deref()),
entry.kind,
entry.file,
entry.line,
entry.expand
);
}
for entry in &report.callers {
println!(
"caller {} {}:{} expand:{}",
format_symbol_preview_line(&entry.handle, &entry.name, entry.tag_alias.as_deref()),
entry.file,
entry.line,
entry.expand
);
}
for entry in &report.callees {
println!(
"callee {} {}:{} expand:{}",
format_symbol_preview_line(&entry.handle, &entry.name, entry.tag_alias.as_deref()),
entry.file,
entry.line,
entry.expand
);
}
if let Some(community) = &report.community {
println!(
"community size:{} members:{}",
community.size,
community.members.join(", ")
);
}
if report.truncated {
println!(
"budget truncated items:{} bytes:{}",
report.max_items, report.max_bytes
);
}
}
/// Reconcile the tsift symbol index against the tagpath `.naming/index.json`
/// source set and report files covered by one but not the other.
///
/// Today silent recall loss happens when tagpath's `[exclude]` / `extends`
/// chain or its hard-coded `SKIP_DIRS` skip files or languages that tsift
/// still indexes — the tsift symbols in those files cannot resolve a
/// `tagpath_handle` even with a fresh tagpath index. This audit surfaces
/// the diff so operators can decide whether to broaden the tagpath walk,
/// add an `[exclude]` to tsift, or accept the gap.
const TAGPATH_AUDIT_SKIP_DIRS: &[&str] = &[
".git",
"node_modules",
"target",
"__pycache__",
".venv",
"vendor",
];
const TAGPATH_AUDIT_SOURCE_EXTENSIONS: &[&str] = &[
"rs", "py", "ts", "js", "go", "java", "rb", "c", "cpp", "h", "hpp", "cs", "swift", "kt",
"scala", "zig", "nim", "ex", "exs", "erl", "hs", "ml", "clj", "r", "lua", "php", "pl", "d",
"cr", "dart", "jl", "v", "odin", "gleam", "rkt", "scm", "lisp", "lsp", "f", "fs", "fsi", "fsx",
"sh", "bash", "zsh", "sql", "css", "tsx",
];
pub(crate) fn tagpath_audit_supported_extensions(root: &Path) -> BTreeSet<String> {
let mut extensions = TAGPATH_AUDIT_SOURCE_EXTENSIONS
.iter()
.map(|ext| (*ext).to_string())
.collect::<BTreeSet<_>>();
let config_path = root.join(".naming.toml");
if !config_path.exists() {
return extensions;
}
match tagpath::config::resolve(&config_path) {
Ok(config) => {
if let Some(grammars) = config.grammars {
for grammar in grammars.languages.values() {
for ext in &grammar.extensions {
if let Some(normalized) = normalize_extension(ext) {
extensions.insert(normalized);
}
}
}
}
}
Err(err) => {
eprintln!("tagpath_policy_hint_config_unreadable: {err}");
}
}
extensions
}
pub(crate) fn tagpath_audit_policy_hints(
rel_path: &str,
supported_extensions: &BTreeSet<String>,
) -> Vec<String> {
let path = Path::new(rel_path);
let mut hints = BTreeSet::new();
if let Some(parent) = path.parent() {
for component in parent.components() {
if let std::path::Component::Normal(name) = component {
let name = name.to_string_lossy();
if TAGPATH_AUDIT_SKIP_DIRS.contains(&name.as_ref()) {
hints.insert(format!("skip_dir:{name}"));
}
}
}
}
if path
.extension()
.and_then(|ext| ext.to_str())
.and_then(normalize_extension)
.is_some_and(|ext| !supported_extensions.contains(&ext))
{
hints.insert("extension_unsupported".to_string());
}
hints.into_iter().collect()
}
fn normalize_extension(ext: &str) -> Option<String> {
let normalized = ext.trim().trim_start_matches('.').to_ascii_lowercase();
if normalized.is_empty() {
None
} else {
Some(normalized)
}
}
pub(crate) fn diff_digest_status_label(status: diff_digest::DiffDigestFileStatus) -> &'static str {
match status {
diff_digest::DiffDigestFileStatus::Added => "added",
diff_digest::DiffDigestFileStatus::Modified => "modified",
diff_digest::DiffDigestFileStatus::Deleted => "deleted",
}
}
pub(crate) fn diff_digest_summary_label(
state: diff_digest::DiffDigestSummaryState,
) -> &'static str {
match state {
diff_digest::DiffDigestSummaryState::Current => "current",
diff_digest::DiffDigestSummaryState::Stale => "stale",
diff_digest::DiffDigestSummaryState::Missing => "missing",
diff_digest::DiffDigestSummaryState::Unavailable => "unavailable",
}
}
fn test_digest_summary_label(state: test_digest::TestDigestSummaryState) -> &'static str {
match state {
test_digest::TestDigestSummaryState::Current => "current",
test_digest::TestDigestSummaryState::Stale => "stale",
test_digest::TestDigestSummaryState::Missing => "missing",
test_digest::TestDigestSummaryState::Unavailable => "unavailable",
}
}
fn log_digest_summary_label(state: log_digest::LogDigestSummaryState) -> &'static str {
match state {
log_digest::LogDigestSummaryState::Current => "current",
log_digest::LogDigestSummaryState::Stale => "stale",
log_digest::LogDigestSummaryState::Missing => "missing",
log_digest::LogDigestSummaryState::Unavailable => "unavailable",
}
}
pub(crate) fn diff_digest_mode_label(mode: diff_digest::DiffDigestMode) -> &'static str {
match mode {
diff_digest::DiffDigestMode::WorkingTree => "worktree",
diff_digest::DiffDigestMode::Cached => "cached",
diff_digest::DiffDigestMode::Revision => "revision",
}
}
pub(crate) fn diff_digest_mode_display(report: &diff_digest::DiffDigestReport) -> String {
match (&report.mode, &report.revision) {
(diff_digest::DiffDigestMode::WorkingTree, _) => "working tree".to_string(),
(diff_digest::DiffDigestMode::Cached, _) => "staged index".to_string(),
(diff_digest::DiffDigestMode::Revision, Some(revision)) => {
format!("revision {revision}")
}
(diff_digest::DiffDigestMode::Revision, None) => "revision".to_string(),
}
}
pub(crate) fn diff_digest_empty_message(report: &diff_digest::DiffDigestReport) -> String {
match (&report.mode, &report.revision) {
(diff_digest::DiffDigestMode::WorkingTree, _) => "No git changes found.".to_string(),
(diff_digest::DiffDigestMode::Cached, _) => "No staged git changes found.".to_string(),
(diff_digest::DiffDigestMode::Revision, Some(revision)) => {
format!("No diff found for revision {revision}.")
}
(diff_digest::DiffDigestMode::Revision, None) => "No revision diff found.".to_string(),
}
}
fn cmd_impact(
path: &Path,
cached: bool,
revision: Option<&str>,
scope: Option<&str>,
limit: usize,
format: OutputFormat,
) -> Result<()> {
let report = impact::compute(
path,
impact::ImpactOptions {
cached,
revision,
scope,
limit,
},
)?;
if format.json_output {
println!(
"{}",
to_json_schema(
&report,
format.pretty,
format.terse,
format.ultra_terse,
format.schema
)?
);
return Ok(());
}
if format.compact {
println!(
"impact mode:{} changed:{} symbols:{} tests:{}/{}",
diff_digest_mode_label(report.mode),
report.changed_files.len(),
report.changed_symbols.len(),
report.affected_tests.len(),
report.affected_tests_total
);
for target in &report.affected_tests {
println!(
"{} reasons:{} command:{}",
target.path,
target.reasons.len(),
target.commands.join(" && ")
);
}
for warning in &report.warnings {
println!("warning {warning}");
}
return Ok(());
}
println!("Impact ({})", diff_digest_mode_label(report.mode));
println!(" changed files: {}", report.changed_files.len());
println!(" changed symbols: {}", report.changed_symbols.len());
println!(
" affected tests: {}/{}",
report.affected_tests.len(),
report.affected_tests_total
);
for target in &report.affected_tests {
println!();
println!("{}", target.path);
for reason in &target.reasons {
println!(" - {reason}");
}
if !target.symbols.is_empty() {
println!(" symbols: {}", target.symbols.join(", "));
}
for command in &target.commands {
println!(" run: {}", command);
}
}
for warning in &report.warnings {
println!("warning: {warning}");
}
Ok(())
}
pub(crate) fn render_test_digest_from_input(
path: &Path,
input: &str,
runner: Option<&str>,
format: OutputFormat,
) -> Result<()> {
let report = test_digest::compute(path, input, runner)?;
if format.json_output {
println!(
"{}",
to_json_schema(
&report,
format.pretty,
format.terse,
format.ultra_terse,
format.schema
)?
);
return Ok(());
}
if report.failure_groups.is_empty() {
println!("No failures detected (runner: {}).", report.runner);
for warning in &report.warnings {
println!("warning: {warning}");
}
return Ok(());
}
if format.compact {
println!(
"test runner:{} failures:{} groups:{} passed:{} failed:{} skipped:{}",
report.runner,
report.failures,
report.grouped_failures,
report.counts.passed.unwrap_or(0),
report.counts.failed.unwrap_or(report.grouped_failures),
report.counts.skipped.unwrap_or(0),
);
for failure in &report.failure_groups {
let tests = truncate_for_compact(&failure.tests.join(","), 60);
let location = match (&failure.path, failure.line) {
(Some(path), Some(line)) => format!("{path}:{line}"),
(Some(path), None) => path.clone(),
_ => "-".to_string(),
};
println!(
"{} tests:{} count:{} summaries:{} msg:{}",
location,
tests,
failure.occurrences,
test_digest_summary_label(failure.summary_state),
truncate_for_compact(&failure.message, 80)
);
}
for warning in &report.warnings {
println!("warning: {warning}");
}
return Ok(());
}
println!("Test digest ({})", report.runner);
println!(" failures: {}", report.failures);
println!(" failure groups: {}", report.grouped_failures);
if let Some(passed) = report.counts.passed {
println!(" passed: {}", passed);
}
if let Some(failed) = report.counts.failed {
println!(" failed: {}", failed);
}
if let Some(skipped) = report.counts.skipped {
println!(" skipped: {}", skipped);
}
for failure in &report.failure_groups {
println!();
match (&failure.path, failure.line, failure.column) {
(Some(path), Some(line), Some(column)) => println!("{path}:{line}:{column}"),
(Some(path), Some(line), None) => println!("{path}:{line}"),
(Some(path), None, _) => println!("{path}"),
(None, _, _) => println!("(no file anchor)"),
}
println!(" tests: {}", failure.tests.join(", "));
println!(" occurrences: {}", failure.occurrences);
println!(" message: {}", failure.message);
println!(
" cached summaries: {}",
test_digest_summary_label(failure.summary_state)
);
for summary in &failure.current_summaries {
println!(
" - {}: {}",
summary.symbol,
truncate_for_compact(&summary.summary, 160)
);
}
}
for warning in &report.warnings {
println!("warning: {warning}");
}
Ok(())
}
#[derive(Clone, Serialize, Deserialize)]
struct DispatchTraceSummary {
backlog: usize,
job_packet: usize,
worker_result: usize,
worker_context: usize,
source_handle: usize,
semantic_rows: usize,
}
#[derive(Clone, Serialize, Deserialize)]
struct DispatchTraceReport {
contract_version: String,
root: String,
#[serde(skip_serializing_if = "Option::is_none")]
scope: Option<String>,
targets: Vec<String>,
projection_freshness: GraphDbFreshnessReport,
projection_hashes: Vec<String>,
evidence_packet_ids: Vec<String>,
shared_preparation: ConflictMatrixSharedPreparationSummary,
worker_prompt_packets: Vec<ConflictMatrixWorkerPromptPacket>,
worker_feedback: Vec<ConflictMatrixWorkerFeedback>,
summary: DispatchTraceSummary,
nodes: Vec<SubstrateTerseGraphNode>,
edges: Vec<SubstrateTerseGraphEdge>,
conflict_matrix_decisions: Vec<String>,
replay_commands: Vec<String>,
repair_commands: Vec<String>,
truncated: bool,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
warnings: Vec<String>,
}
fn dispatch_trace_allowed_node_kind(kind: &str) -> bool {
matches!(
kind,
"session"
| "backlog"
| "job_packet"
| "worker_result"
| "worker_context"
| "source_handle"
| "semantic_concept"
| "semantic_entity"
| "file"
| "symbol"
| "route"
)
}
fn dispatch_trace_kind_rank(kind: &str) -> usize {
match kind {
"backlog" => 0,
"job_packet" => 1,
"worker_result" => 2,
"worker_context" => 3,
"source_handle" => 4,
"file" => 5,
"symbol" => 6,
"route" => 7,
"semantic_concept" => 8,
"semantic_entity" => 9,
"session" => 10,
_ => 99,
}
}
fn dispatch_trace_summary(nodes: &[SubstrateGraphNode]) -> DispatchTraceSummary {
DispatchTraceSummary {
backlog: nodes.iter().filter(|node| node.kind == "backlog").count(),
job_packet: nodes
.iter()
.filter(|node| node.kind == "job_packet")
.count(),
worker_result: nodes
.iter()
.filter(|node| node.kind == "worker_result")
.count(),
worker_context: nodes
.iter()
.filter(|node| node.kind == "worker_context")
.count(),
source_handle: nodes
.iter()
.filter(|node| node.kind == "source_handle")
.count(),
semantic_rows: nodes
.iter()
.filter(|node| matches!(node.kind.as_str(), "semantic_concept" | "semantic_entity"))
.count(),
}
}
fn dispatch_trace_shared_preparation_summary(
graph_nodes: &[SubstrateGraphNode],
graph_edges: &[SubstrateGraphEdge],
conflict: &ConflictMatrixReport,
) -> ConflictMatrixSharedPreparationSummary {
ConflictMatrixSharedPreparationSummary {
evidence_cache_status: conflict
.inputs
.shared_preparation
.evidence_cache_status
.clone(),
graph_nodes: graph_nodes.len(),
graph_edges: graph_edges.len(),
evidence_packets: conflict.orchestration.evidence_packet_ids.len(),
source_handles: conflict
.candidates
.iter()
.map(|candidate| candidate.source_handles.len())
.sum(),
worker_context: conflict
.candidates
.iter()
.map(|candidate| candidate.worker_context_handles.len())
.sum(),
worker_results: conflict
.candidates
.iter()
.map(|candidate| candidate.worker_feedback.total)
.sum(),
semantic_rows: conflict
.candidates
.iter()
.map(|candidate| candidate.semantic_related.len())
.sum(),
dispatch_trace_snapshot_nodes: graph_nodes.len(),
dispatch_trace_snapshot_edges: graph_edges.len(),
}
}
fn dispatch_trace_collect_ids(
targets: &[String],
candidates: &[ConflictMatrixCandidate],
graph_nodes: &[SubstrateGraphNode],
graph_edges: &[SubstrateGraphEdge],
depth: usize,
limit: usize,
) -> (BTreeSet<String>, bool) {
let target_refs = targets
.iter()
.map(|target| target.trim_start_matches('#').to_string())
.collect::<BTreeSet<_>>();
let mut ids = BTreeSet::new();
for candidate in candidates {
ids.insert(candidate.target_node_id.clone());
for source in &candidate.source_handles {
ids.insert(source.handle.clone());
}
for handle in &candidate.worker_context_handles {
ids.insert(handle.clone());
}
for semantic in &candidate.semantic_related {
ids.insert(semantic.handle.clone());
}
}
for node in graph_nodes {
if !dispatch_trace_allowed_node_kind(&node.kind) {
continue;
}
if node
.properties
.get("ref_id")
.is_some_and(|ref_id| target_refs.contains(ref_id))
{
ids.insert(node.id.clone());
}
}
let node_by_id = graph_nodes
.iter()
.map(|node| (node.id.as_str(), node))
.collect::<BTreeMap<_, _>>();
let max_nodes = if limit == 0 {
usize::MAX
} else {
limit
.saturating_mul(targets.len().max(1))
.saturating_mul(12)
.max(64)
};
let mut truncated = false;
for _ in 0..depth.max(1) {
let before = ids.len();
let current_ids = ids.clone();
for edge in graph_edges {
if ids.len() >= max_nodes {
truncated = true;
break;
}
let touches = current_ids.contains(&edge.from_id) || current_ids.contains(&edge.to_id);
if !touches {
continue;
}
for endpoint in [&edge.from_id, &edge.to_id] {
let Some(node) = node_by_id.get(endpoint.as_str()) else {
continue;
};
if dispatch_trace_allowed_node_kind(&node.kind) {
ids.insert(endpoint.clone());
}
}
}
if ids.len() == before || truncated {
break;
}
}
(ids, truncated)
}
#[allow(clippy::too_many_arguments)]
fn build_dispatch_trace_report_from_conflict_snapshot(
root: &Path,
scope: Option<&str>,
conflict: ConflictMatrixReport,
graph_nodes: Vec<SubstrateGraphNode>,
graph_edges: Vec<SubstrateGraphEdge>,
depth: usize,
limit: usize,
extra_warnings: Vec<String>,
) -> Result<DispatchTraceReport> {
let shared_preparation =
dispatch_trace_shared_preparation_summary(&graph_nodes, &graph_edges, &conflict);
let (ids, truncated) = dispatch_trace_collect_ids(
&conflict.targets,
&conflict.candidates,
&graph_nodes,
&graph_edges,
depth,
limit,
);
let mut nodes = graph_nodes
.into_iter()
.filter(|node| ids.contains(&node.id))
.collect::<Vec<_>>();
nodes.sort_by(|left, right| {
dispatch_trace_kind_rank(&left.kind)
.cmp(&dispatch_trace_kind_rank(&right.kind))
.then(left.id.cmp(&right.id))
});
let node_ids = nodes
.iter()
.map(|node| node.id.as_str())
.collect::<BTreeSet<_>>();
let mut edges = graph_edges
.into_iter()
.filter(|edge| {
node_ids.contains(edge.from_id.as_str()) && node_ids.contains(edge.to_id.as_str())
})
.collect::<Vec<_>>();
edges.sort_by(|left, right| {
left.from_id
.cmp(&right.from_id)
.then(left.kind.cmp(&right.kind))
.then(left.to_id.cmp(&right.to_id))
});
let mut warnings = conflict.warnings;
warnings.extend(extra_warnings);
Ok(DispatchTraceReport {
contract_version: DISPATCH_TRACE_CONTRACT_VERSION.to_string(),
root: conflict.root,
scope: conflict.scope,
targets: conflict.targets,
projection_freshness: conflict.orchestration.projection_freshness,
projection_hashes: conflict.orchestration.projection_hashes,
evidence_packet_ids: conflict.orchestration.evidence_packet_ids,
shared_preparation,
worker_prompt_packets: conflict.worker_prompt_packets,
worker_feedback: conflict
.candidates
.iter()
.map(|candidate| candidate.worker_feedback.clone())
.collect(),
summary: dispatch_trace_summary(&nodes),
nodes: nodes.into_iter().map(Into::into).collect(),
edges: edges.into_iter().map(Into::into).collect(),
conflict_matrix_decisions: conflict.orchestration.conflict_matrix_decisions,
replay_commands: conflict.next_commands,
repair_commands: graph_db_repair_commands(root, scope),
truncated,
warnings,
})
}
fn build_dispatch_trace_report(
path: &Path,
scope: Option<&str>,
raw_targets: &[String],
depth: usize,
limit: usize,
impact_limit: usize,
) -> Result<DispatchTraceReport> {
let root = lint::resolve_project_root_or_canonical_path(path)?;
let source_watermark = traversal_source_watermark(&root, path, scope, false)?;
if graph_db_backend_eval_cached_refresh(&root, scope, source_watermark.as_deref())?.is_none() {
write_traversal_graph_store(&root, path, scope)
.with_context(|| format!("refreshing graph-db projection for {}", root.display()))?;
}
let graph_db = graph_substrate_db_path(&root, scope);
let store = SqliteGraphStore::open_read_only_resilient(&graph_db)
.with_context(|| format!("opening graph-db projection: {}", graph_db.display()))?;
let freshness = sqlite_graph_freshness(&store, scope.unwrap_or("root"))?;
let extra_warnings = store
.read_only_recovery()
.map(graph_db_read_recovery_diagnostic)
.into_iter()
.collect::<Vec<_>>();
let prepared = prepare_conflict_matrix_inputs(&root, path, scope, impact_limit)?;
let graph_prepared = prepare_conflict_matrix_graph_orchestration(
&root,
scope,
"sqlite",
raw_targets,
&prepared,
depth,
limit,
&store,
freshness.clone(),
)?;
let dt_cache_key = cycle_packet_cache::cycle_packet_watermark_key(
&prepared.preparation_cache.source_watermark,
&prepared.preparation_cache.document_watermark,
&prepared.preparation_cache.staged_diff_watermark,
&[
&format!("targets:{}", raw_targets.join(",")),
&format!("depth:{depth}"),
&format!("limit:{limit}"),
],
);
if let Some(cached_report) = cycle_packet_cache::cycle_packet_read_cache::<DispatchTraceReport>(
&root,
cycle_packet_cache::CyclePacketKind::ConflictMatrix,
&dt_cache_key,
) {
return Ok(cached_report);
}
let conflict = build_conflict_matrix_report_from_prepared_graph(
&root,
path,
scope,
depth,
limit,
impact_limit,
freshness,
extra_warnings.clone(),
&prepared,
&graph_prepared,
)?;
let report = build_dispatch_trace_report_from_conflict_snapshot(
&root,
scope,
conflict,
graph_prepared.graph.nodes,
graph_prepared.graph.edges,
depth,
limit,
extra_warnings,
)?;
cycle_packet_cache::cycle_packet_write_cache(
&root,
cycle_packet_cache::CyclePacketKind::ConflictMatrix,
&dt_cache_key,
&report,
);
Ok(report)
}
fn dispatch_trace_html(report: &DispatchTraceReport) -> Result<String> {
let json = serde_json::to_string(report)?.replace("</", "<\\/");
let mut html = String::new();
html.push_str(
"<!doctype html><html><head><meta charset=\"utf-8\"><title>tsift dispatch trace</title>",
);
html.push_str(
r#"<style>
:root{color-scheme:light dark;--bg:#f7f8fb;--panel:#fff;--text:#17202a;--muted:#5c6674;--line:#d7dce3;--edge:#8b98a8;--accent:#0f766e}
@media (prefers-color-scheme:dark){:root{--bg:#111318;--panel:#1b2028;--text:#ecf1f7;--muted:#a8b3c1;--line:#323946;--edge:#667386;--accent:#2dd4bf}}
*{box-sizing:border-box}body{margin:0;background:var(--bg);color:var(--text);font-family:Inter,ui-sans-serif,system-ui,sans-serif;line-height:1.4}.page{max-width:1280px;margin:0 auto;padding:20px}.top{display:flex;align-items:flex-end;justify-content:space-between;gap:16px;margin-bottom:14px}.top h1{font-size:22px;margin:0}.meta{color:var(--muted);font-size:13px}.layout{display:grid;grid-template-columns:minmax(0,1fr) 360px;gap:14px}.panel,.side{background:var(--panel);border:1px solid var(--line);border-radius:8px;overflow:hidden}.side{padding:14px;overflow:auto;max-height:720px}.side h2{font-size:15px;margin:12px 0 8px}.side h2:first-child{margin-top:0}.list{display:grid;gap:8px}.row{border:1px solid var(--line);border-radius:6px;padding:8px}.kind{font-size:11px;text-transform:uppercase;color:var(--muted);letter-spacing:.04em}.label{font-weight:650;overflow-wrap:anywhere}.handle,code{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;font-size:12px;color:var(--muted);overflow-wrap:anywhere}svg{width:100%;height:680px;display:block}.edge{stroke:var(--edge);stroke-width:1.4;opacity:.72}.node{stroke:var(--panel);stroke-width:2}.node-label{font-size:12px;paint-order:stroke;stroke:var(--panel);stroke-width:4px;stroke-linejoin:round;fill:var(--text)}@media(max-width:900px){.top{display:block}.layout{grid-template-columns:1fr}.side{max-height:none}svg{height:560px}}
</style>"#,
);
html.push_str("</head><body><div class=\"page\">");
html.push_str(&format!(
"<header class=\"top\"><div><h1>tsift dispatch trace</h1><div class=\"meta\">targets <code>{}</code> | evidence <code>{}</code> | nodes <code>{}</code> | worker_prompt_packets <code>{}</code></div></div><div class=\"meta\"><code>{}</code></div></header>",
html_escape(&report.targets.join(", ")),
report.evidence_packet_ids.len(),
report.nodes.len(),
report.worker_prompt_packets.len(),
html_escape(&report.contract_version)
));
html.push_str(
r#"<main class="layout"><section class="panel"><svg id="graph-canvas" role="img" aria-label="Dispatch trace graph"></svg></section><aside class="side"><h2>Worker Prompt Packets</h2><div id="packets" class="list"></div><h2>Worker Feedback</h2><div id="feedback" class="list"></div><h2>Nodes</h2><div id="nodes" class="list"></div></aside></main>"#,
);
html.push_str("<script id=\"trace-data\" type=\"application/json\">");
html.push_str(&json);
html.push_str(
r##"</script><script>
const report = JSON.parse(document.getElementById("trace-data").textContent);
const svg = document.getElementById("graph-canvas");
const nodeList = document.getElementById("nodes");
const packets = document.getElementById("packets");
const feedback = document.getElementById("feedback");
const nodes = report.nodes.map((node, index) => ({...node, index}));
const nodeById = new Map(nodes.map(node => [node.id, node]));
const edges = report.edges.filter(edge => nodeById.has(edge.from_id) && nodeById.has(edge.to_id));
const colorByKind = new Map([["backlog","#dc2626"],["job_packet","#ea580c"],["worker_result","#15803d"],["worker_context","#475569"],["source_handle","#64748b"],["semantic_concept","#9a3412"],["semantic_entity","#b45309"],["file","#2563eb"],["symbol","#16a34a"],["route","#7c3aed"],["session","#0891b2"]]);
function color(kind){return colorByKind.get(kind)||"#6b7280";}
function text(value){return value == null ? "" : String(value);}
function escapeHtml(value){return text(value).replace(/[&<>"']/g, ch => ({"&":"&","<":"<",">":">","\"":""","'":"'"}[ch]));}
function layout(){
const rect = svg.getBoundingClientRect();
const width = rect.width || 900, height = rect.height || 680, cx = width / 2, cy = height / 2;
const kinds = [...new Set(nodes.map(node => node.kind))].sort();
const counts = new Map();
for (const node of nodes) counts.set(node.kind, (counts.get(node.kind)||0)+1);
const offsets = new Map();
for (const node of nodes) {
const group = kinds.indexOf(node.kind);
const index = offsets.get(node.kind) || 0;
offsets.set(node.kind, index + 1);
const total = counts.get(node.kind) || 1;
const ring = Math.min(width, height) * (0.18 + ((group % 4) * 0.09));
const angle = Math.PI * 2 * index / Math.max(total, 1) + group * 0.53;
node.x = cx + Math.cos(angle) * ring;
node.y = cy + Math.sin(angle) * ring;
}
}
function draw(){
svg.innerHTML = "";
for (const edge of edges) {
const from = nodeById.get(edge.from_id), to = nodeById.get(edge.to_id);
const line = document.createElementNS("http://www.w3.org/2000/svg", "line");
line.setAttribute("x1", from.x); line.setAttribute("y1", from.y);
line.setAttribute("x2", to.x); line.setAttribute("y2", to.y);
line.setAttribute("class", "edge");
line.appendChild(document.createElementNS("http://www.w3.org/2000/svg", "title")).textContent = edge.kind;
svg.appendChild(line);
}
for (const node of nodes) {
const circle = document.createElementNS("http://www.w3.org/2000/svg", "circle");
circle.setAttribute("cx", node.x); circle.setAttribute("cy", node.y);
circle.setAttribute("r", node.kind.startsWith("semantic_") ? 8 : 6);
circle.setAttribute("fill", color(node.kind));
circle.setAttribute("class", "node");
circle.appendChild(document.createElementNS("http://www.w3.org/2000/svg", "title")).textContent = node.kind + ": " + node.label;
svg.appendChild(circle);
const label = document.createElementNS("http://www.w3.org/2000/svg", "text");
label.setAttribute("x", node.x + 9); label.setAttribute("y", node.y + 4);
label.setAttribute("class", "node-label");
label.textContent = node.label.length > 34 ? node.label.slice(0,31) + "..." : node.label;
svg.appendChild(label);
}
}
packets.innerHTML = report.worker_prompt_packets.map(packet => `<div class="row"><div class="kind">${escapeHtml(packet.contract_version)} - ${escapeHtml(packet.risk)} - parallel_safe ${packet.parallel_safe ? "true" : "false"} - closure ${packet.worker_feedback ? packet.worker_feedback.closure_rank_score : 0}</div><div class="label">${escapeHtml(packet.title)}</div><div class="handle">${escapeHtml(packet.packet_id)}</div><div class="handle">blocks ${escapeHtml((packet.blocks||[]).join(", ") || "none")} | blocked_by ${escapeHtml((packet.blocked_by||[]).join(", ") || "none")}</div></div>`).join("") || "<div class=\"meta\">No packets.</div>";
feedback.innerHTML = report.worker_feedback.map(item => `<div class="row"><div class="kind">completed ${item.completed} - blocked ${item.blocked} - closure ${item.closure_rank_score}</div><div>files ${escapeHtml((item.touched_files||[]).join(", ") || "none")}</div><div>tests ${escapeHtml((item.expected_tests||[]).join(" && ") || "none")}</div>${item.repeated_blockage ? "<div class=\"label\">Repeated blockage</div>" : ""}${(item.stale_expected_tests||[]).length ? `<div class="label">Stale tests: ${escapeHtml(item.stale_expected_tests.join(", "))}</div>` : ""}${(item.follow_up_debt||[]).length ? `<div class="label">Follow-up debt: ${escapeHtml(item.follow_up_debt.join(", "))}</div>` : ""}</div>`).join("") || "<div class=\"meta\">No worker results.</div>";
nodeList.innerHTML = nodes.map(node => `<div class="row"><div class="kind">${escapeHtml(node.kind)}</div><div class="label">${escapeHtml(node.label)}</div><div class="handle">${escapeHtml(node.id)}</div></div>`).join("");
window.addEventListener("resize", () => { layout(); draw(); });
layout(); draw();
</script></div></body></html>"##,
);
Ok(html)
}
struct DispatchTraceOptions<'a> {
path: &'a Path,
scope: Option<&'a str>,
raw_targets: &'a [String],
depth: usize,
limit: usize,
impact_limit: usize,
trace_format: DispatchTraceFormat,
}
fn cmd_dispatch_trace(
options: DispatchTraceOptions<'_>,
output_format: OutputFormat,
) -> Result<()> {
let report = build_dispatch_trace_report(
options.path,
options.scope,
options.raw_targets,
options.depth,
options.limit,
options.impact_limit,
)?;
match options.trace_format {
DispatchTraceFormat::Json => {
if output_format.envelope {
print_json_or_envelope(
&report,
&output_format,
"dispatch-trace",
"operator-review",
ToolEnvelopeSummary {
text: format!(
"Dispatch trace for {} target(s): {} graph node(s), {} worker prompt packet(s)",
report.targets.len(),
report.nodes.len(),
report.worker_prompt_packets.len()
),
metrics: vec![
envelope_metric("targets", report.targets.len()),
envelope_metric("nodes", report.nodes.len()),
envelope_metric("edges", report.edges.len()),
envelope_metric(
"worker_prompt_packets",
report.worker_prompt_packets.len(),
),
],
},
report.truncated,
report.replay_commands.clone(),
)
} else {
println!(
"{}",
to_json_schema(
&report,
output_format.pretty,
output_format.terse,
output_format.ultra_terse,
output_format.schema
)?
);
Ok(())
}
}
DispatchTraceFormat::Html => {
println!("{}", dispatch_trace_html(&report)?);
Ok(())
}
}
}
#[derive(Clone, Debug)]
struct DependencyDagProfile {
id: String,
graph_node_id: String,
label: String,
path: Option<String>,
line: Option<i64>,
detail: Option<String>,
source_files: BTreeSet<String>,
source_symbols: BTreeSet<String>,
config_files: BTreeSet<String>,
expected_tests: BTreeSet<String>,
semantic_refs: BTreeMap<String, ConflictMatrixSemanticRef>,
worker_feedback: ConflictMatrixWorkerFeedback,
}
#[derive(Clone, Debug, Serialize)]
struct DependencyDagNode {
id: String,
graph_node_id: String,
label: String,
#[serde(skip_serializing_if = "Option::is_none")]
path: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
line: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
detail: Option<String>,
source_files: Vec<String>,
source_symbols: Vec<String>,
config_files: Vec<String>,
expected_tests: Vec<String>,
semantic_refs: Vec<ConflictMatrixSemanticRef>,
worker_feedback: ConflictMatrixWorkerFeedback,
}
#[derive(Clone, Debug, Serialize)]
struct DependencyDagEdge {
from: String,
to: String,
kind: String,
weight: usize,
reasons: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
shared_files: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
shared_symbols: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
shared_tests: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
shared_config_files: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
shared_semantic_refs: Vec<String>,
}
#[derive(Clone, Debug, Serialize)]
struct DependencyDagTopoBatch {
batch: usize,
targets: Vec<String>,
}
#[derive(Clone, Debug, Serialize)]
struct DependencyDagCycleDiagnostics {
has_cycles: bool,
blocked_nodes: Vec<String>,
cycle_edges: Vec<DependencyDagEdge>,
}
#[derive(Serialize)]
struct DependencyDagSummary {
nodes: usize,
edges: usize,
topo_batches: usize,
has_cycles: bool,
}
#[derive(Serialize)]
struct DependencyDagReport {
contract_version: &'static str,
root: String,
#[serde(skip_serializing_if = "Option::is_none")]
scope: Option<String>,
path: String,
targets: Vec<String>,
projection_freshness: GraphDbFreshnessReport,
projection_hashes: Vec<String>,
nodes: Vec<DependencyDagNode>,
edges: Vec<DependencyDagEdge>,
topo_batches: Vec<DependencyDagTopoBatch>,
cycle_diagnostics: DependencyDagCycleDiagnostics,
summary: DependencyDagSummary,
replay_commands: Vec<String>,
repair_commands: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
warnings: Vec<String>,
}
fn dependency_dag_backlog_node_for_target(
store: &impl GraphStore,
target: &str,
) -> Result<SubstrateGraphNode> {
let resolved = graph_db_resolve_evidence_target(store, target)?
.with_context(|| format!("dependency-dag target not found: {target}"))?;
if resolved.kind == "backlog" {
return Ok(resolved);
}
let Some(ref_id) = resolved.properties.get("ref_id").cloned() else {
bail!(
"dependency-dag target {} resolved to {} without a backlog ref_id",
target,
resolved.kind
);
};
store
.nodes_by_kind("backlog")?
.into_iter()
.filter(|node| node.properties.get("ref_id") == Some(&ref_id))
.min_by(|left, right| {
left.properties
.get("line")
.and_then(|value| value.parse::<i64>().ok())
.cmp(
&right
.properties
.get("line")
.and_then(|value| value.parse::<i64>().ok()),
)
.then(left.id.cmp(&right.id))
})
.with_context(|| format!("dependency-dag backlog node not found for #{ref_id}"))
}
fn dependency_dag_resolve_backlog_nodes(
root: &Path,
path: &Path,
store: &impl GraphStore,
raw_targets: &[String],
) -> Result<Vec<SubstrateGraphNode>> {
let mut nodes = Vec::new();
let mut seen = BTreeSet::new();
if raw_targets.is_empty() {
let hinted_path = if path.is_absolute() {
path.to_path_buf()
} else {
root.join(path)
};
let hinted_markdown = hinted_path
.extension()
.and_then(|ext| ext.to_str())
.is_some_and(|ext| ext.eq_ignore_ascii_case("md"));
let hinted_rel = hinted_markdown.then(|| {
relativize_pathbuf(&hinted_path, root)
.to_string_lossy()
.replace('\\', "/")
});
for node in store.nodes_by_kind("backlog")? {
if let Some(expected_path) = &hinted_rel
&& node.properties.get("path") != Some(expected_path)
{
continue;
}
if seen.insert(node.id.clone()) {
nodes.push(node);
}
}
if nodes.is_empty() && hinted_rel.is_some() {
for node in store.nodes_by_kind("backlog")? {
if seen.insert(node.id.clone()) {
nodes.push(node);
}
}
}
} else {
for target in raw_targets {
let normalized = normalize_conflict_target(target).unwrap_or_else(|| target.clone());
let node = dependency_dag_backlog_node_for_target(store, &normalized)?;
if seen.insert(node.id.clone()) {
nodes.push(node);
}
}
}
if nodes.is_empty() {
bail!("dependency-dag needs at least one resolvable backlog id");
}
nodes.sort_by(|left, right| {
left.properties
.get("line")
.and_then(|value| value.parse::<i64>().ok())
.cmp(
&right
.properties
.get("line")
.and_then(|value| value.parse::<i64>().ok()),
)
.then(left.id.cmp(&right.id))
});
Ok(nodes)
}
fn dependency_dag_node_id(node: &SubstrateGraphNode) -> String {
node.properties
.get("ref_id")
.cloned()
.unwrap_or_else(|| node.label.trim_start_matches('#').to_string())
}
fn dependency_dag_node_profile(
root: &Path,
store: &impl GraphStore,
node: &SubstrateGraphNode,
graph_nodes_by_id: &BTreeMap<String, SubstrateGraphNode>,
graph_edges: &[SubstrateGraphEdge],
depth: usize,
limit: usize,
) -> Result<DependencyDagProfile> {
let id = dependency_dag_node_id(node);
let mut source_files = BTreeSet::new();
let mut source_symbols = BTreeSet::new();
for edge in graph_edges
.iter()
.filter(|edge| edge.from_id == node.id && edge.kind == "mentions")
{
let Some(target) = graph_nodes_by_id.get(&edge.to_id) else {
continue;
};
match target.kind.as_str() {
"file" | "route" => {
if let Some(path) = target.properties.get("path") {
source_files.insert(path.clone());
}
}
"symbol" => {
source_symbols.insert(target.label.clone());
if let Some(path) = target.properties.get("path") {
source_files.insert(path.clone());
}
}
_ => {}
}
}
let max_rows = if limit == 0 { usize::MAX } else { limit };
for (source, _) in
graph_db_reachable_nodes_by_kind(store, &node.id, "source_handle", depth, max_rows)?
{
let terse: SubstrateTerseGraphNode = (&source).into();
if let Some(handle) = conflict_matrix_source_handle(&terse) {
source_files.insert(handle.file);
}
}
let worker_results = graph_nodes_by_id
.values()
.filter(|candidate| {
candidate.kind == "worker_result"
&& candidate.properties.get("ref_id").map(String::as_str) == Some(id.as_str())
})
.map(SubstrateTerseGraphNode::from)
.collect::<Vec<_>>();
let worker_feedback = conflict_matrix_worker_feedback(&worker_results);
let expected_tests = worker_feedback.expected_tests.iter().cloned().collect();
let config_files = source_files
.iter()
.filter(|file| is_planner_config_path(file))
.cloned()
.collect();
let mut semantic_refs = BTreeMap::new();
for kind in ["semantic_concept", "semantic_entity"] {
for (semantic, _) in
graph_db_reachable_nodes_by_kind(store, &node.id, kind, depth, max_rows)?
{
let terse: SubstrateTerseGraphNode = (&semantic).into();
let item = conflict_matrix_semantic_ref(root, &terse);
semantic_refs
.entry(format!("{}:{}", item.kind, item.label))
.or_insert(item);
}
}
Ok(DependencyDagProfile {
id,
graph_node_id: node.id.clone(),
label: node.label.clone(),
path: node.properties.get("path").cloned(),
line: node
.properties
.get("line")
.and_then(|value| value.parse::<i64>().ok()),
detail: node.properties.get("detail").cloned(),
source_files,
source_symbols,
config_files,
expected_tests,
semantic_refs,
worker_feedback,
})
}
fn dependency_dag_marker_refs(text: &str, markers: &[&str]) -> Vec<String> {
let lower = text.to_ascii_lowercase();
let mut refs = Vec::new();
for marker in markers {
let mut offset = 0usize;
while let Some(pos) = lower[offset..].find(marker) {
let start = offset + pos + marker.len();
let segment = text[start..]
.split(['\n', '.'])
.next()
.unwrap_or(&text[start..]);
refs.extend(extract_conflict_target_refs(segment));
offset = start;
}
}
dedupe_preserve_order(refs)
}
fn dependency_dag_push_edge(
edges: &mut Vec<DependencyDagEdge>,
seen: &mut BTreeSet<(String, String, String)>,
edge: DependencyDagEdge,
) {
if edge.from == edge.to {
return;
}
if seen.insert((edge.from.clone(), edge.to.clone(), edge.kind.clone())) {
edges.push(edge);
}
}
fn dependency_dag_explicit_edges(
profiles: &[DependencyDagProfile],
target_ids: &BTreeSet<String>,
edges: &mut Vec<DependencyDagEdge>,
seen: &mut BTreeSet<(String, String, String)>,
) {
for profile in profiles {
let detail = profile.detail.as_deref().unwrap_or_default();
for dep in dependency_dag_marker_refs(
detail,
&[
"depends on",
"depends-on",
"deps:",
"after",
"blocked by",
"requires",
],
) {
if target_ids.contains(&dep) {
dependency_dag_push_edge(
edges,
seen,
DependencyDagEdge {
from: dep.clone(),
to: profile.id.clone(),
kind: "explicit_depends_on".to_string(),
weight: 1000,
reasons: vec![format!("{} declares dependency on #{dep}", profile.id)],
shared_files: Vec::new(),
shared_symbols: Vec::new(),
shared_tests: Vec::new(),
shared_config_files: Vec::new(),
shared_semantic_refs: Vec::new(),
},
);
}
}
for downstream in dependency_dag_marker_refs(detail, &["before", "unblocks"]) {
if target_ids.contains(&downstream) {
dependency_dag_push_edge(
edges,
seen,
DependencyDagEdge {
from: profile.id.clone(),
to: downstream.clone(),
kind: "explicit_before".to_string(),
weight: 900,
reasons: vec![format!(
"{} declares it should run before #{downstream}",
profile.id
)],
shared_files: Vec::new(),
shared_symbols: Vec::new(),
shared_tests: Vec::new(),
shared_config_files: Vec::new(),
shared_semantic_refs: Vec::new(),
},
);
}
}
}
}
fn dependency_dag_worker_follow_up_edges(
profiles: &[DependencyDagProfile],
target_ids: &BTreeSet<String>,
edges: &mut Vec<DependencyDagEdge>,
seen: &mut BTreeSet<(String, String, String)>,
) {
for profile in profiles {
for follow_up in &profile.worker_feedback.follow_up_ids {
if target_ids.contains(follow_up) {
dependency_dag_push_edge(
edges,
seen,
DependencyDagEdge {
from: profile.id.clone(),
to: follow_up.clone(),
kind: "worker_result_follow_up".to_string(),
weight: 700,
reasons: vec![format!(
"worker_result for #{} references follow-up #{}",
profile.id, follow_up
)],
shared_files: Vec::new(),
shared_symbols: Vec::new(),
shared_tests: Vec::new(),
shared_config_files: Vec::new(),
shared_semantic_refs: Vec::new(),
},
);
}
}
}
}
fn dependency_dag_overlap_edges(
profiles: &[DependencyDagProfile],
edges: &mut Vec<DependencyDagEdge>,
seen: &mut BTreeSet<(String, String, String)>,
) {
for left_idx in 0..profiles.len() {
for right_idx in (left_idx + 1)..profiles.len() {
let left = &profiles[left_idx];
let right = &profiles[right_idx];
let shared_files = sorted_intersection(&left.source_files, &right.source_files);
let shared_symbols = sorted_intersection(&left.source_symbols, &right.source_symbols);
let shared_tests = sorted_intersection(&left.expected_tests, &right.expected_tests);
let shared_config_files = sorted_intersection(&left.config_files, &right.config_files);
let left_semantic = left.semantic_refs.keys().cloned().collect::<BTreeSet<_>>();
let right_semantic = right.semantic_refs.keys().cloned().collect::<BTreeSet<_>>();
let shared_semantic_refs = sorted_intersection(&left_semantic, &right_semantic);
if shared_files.is_empty()
&& shared_symbols.is_empty()
&& shared_tests.is_empty()
&& shared_config_files.is_empty()
&& shared_semantic_refs.is_empty()
{
continue;
}
let kind = if shared_files.is_empty()
&& shared_symbols.is_empty()
&& shared_tests.is_empty()
&& shared_config_files.is_empty()
{
"semantic_relation"
} else {
"shared_resource"
};
let mut reasons = Vec::new();
if !shared_files.is_empty() {
reasons.push(format!("shared files: {}", shared_files.join(", ")));
}
if !shared_symbols.is_empty() {
reasons.push(format!("shared symbols: {}", shared_symbols.join(", ")));
}
if !shared_tests.is_empty() {
reasons.push(format!("shared tests: {}", shared_tests.join(" && ")));
}
if !shared_config_files.is_empty() {
reasons.push(format!(
"shared config files: {}",
shared_config_files.join(", ")
));
}
if !shared_semantic_refs.is_empty() {
reasons.push(format!(
"shared semantic refs: {}",
shared_semantic_refs.join(", ")
));
}
let weight = shared_files.len() * 100
+ shared_config_files.len() * 100
+ shared_symbols.len() * 40
+ shared_tests.len() * 10
+ shared_semantic_refs.len() * 5;
dependency_dag_push_edge(
edges,
seen,
DependencyDagEdge {
from: left.id.clone(),
to: right.id.clone(),
kind: kind.to_string(),
weight,
reasons,
shared_files,
shared_symbols,
shared_tests,
shared_config_files,
shared_semantic_refs,
},
);
}
}
}
fn dependency_dag_topo_batches(
targets: &[String],
edges: &[DependencyDagEdge],
) -> (Vec<DependencyDagTopoBatch>, DependencyDagCycleDiagnostics) {
let target_set = targets.iter().cloned().collect::<BTreeSet<_>>();
let order = targets
.iter()
.enumerate()
.map(|(idx, id)| (id.clone(), idx))
.collect::<BTreeMap<_, _>>();
let mut indegree = targets
.iter()
.map(|id| (id.clone(), 0usize))
.collect::<BTreeMap<_, _>>();
let mut outgoing = BTreeMap::<String, Vec<String>>::new();
let mut seen_pairs = BTreeSet::<(String, String)>::new();
for edge in edges {
if !target_set.contains(&edge.from) || !target_set.contains(&edge.to) {
continue;
}
if !seen_pairs.insert((edge.from.clone(), edge.to.clone())) {
continue;
}
*indegree.entry(edge.to.clone()).or_default() += 1;
outgoing
.entry(edge.from.clone())
.or_default()
.push(edge.to.clone());
}
for values in outgoing.values_mut() {
values.sort_by_key(|id| order.get(id).copied().unwrap_or(usize::MAX));
values.dedup();
}
let mut processed = BTreeSet::new();
let mut batches = Vec::new();
loop {
let mut ready = targets
.iter()
.filter(|id| !processed.contains(*id))
.filter(|id| indegree.get(*id).copied().unwrap_or(0) == 0)
.cloned()
.collect::<Vec<_>>();
ready.sort_by_key(|id| order.get(id).copied().unwrap_or(usize::MAX));
if ready.is_empty() {
break;
}
for id in &ready {
processed.insert(id.clone());
for next in outgoing.get(id).into_iter().flatten() {
if let Some(value) = indegree.get_mut(next) {
*value = value.saturating_sub(1);
}
}
}
batches.push(DependencyDagTopoBatch {
batch: batches.len() + 1,
targets: ready,
});
}
let blocked_nodes = targets
.iter()
.filter(|id| !processed.contains(*id))
.cloned()
.collect::<Vec<_>>();
let blocked_set = blocked_nodes.iter().cloned().collect::<BTreeSet<_>>();
let cycle_edges = edges
.iter()
.filter(|edge| blocked_set.contains(&edge.from) && blocked_set.contains(&edge.to))
.cloned()
.collect::<Vec<_>>();
(
batches,
DependencyDagCycleDiagnostics {
has_cycles: !blocked_nodes.is_empty(),
blocked_nodes,
cycle_edges,
},
)
}
fn dependency_dag_replay_commands(
path: &Path,
scope: Option<&str>,
targets: &[String],
depth: usize,
limit: usize,
) -> Vec<String> {
let target_args = targets
.iter()
.map(|target| shell_quote(target))
.collect::<Vec<_>>()
.join(" ");
let mut command = format!(
"tsift dependency-dag --path {}{} --depth {} --limit {} --json",
shell_quote(path.to_string_lossy().as_ref()),
scope
.map(|scope| format!(" --scope {}", shell_quote(scope)))
.unwrap_or_default(),
depth,
limit
);
if !target_args.is_empty() {
command.push(' ');
command.push_str(&target_args);
}
vec![command]
}
fn build_dependency_dag_report(
path: &Path,
scope: Option<&str>,
raw_targets: &[String],
depth: usize,
limit: usize,
) -> Result<DependencyDagReport> {
let root = lint::resolve_project_root_or_canonical_path(path)?;
write_traversal_graph_store(&root, path, scope)
.with_context(|| format!("refreshing graph-db projection for {}", root.display()))?;
let graph_db = graph_substrate_db_path(&root, scope);
let store = SqliteGraphStore::open_read_only_resilient(&graph_db)
.with_context(|| format!("opening graph-db projection: {}", graph_db.display()))?;
let mut warnings = Vec::new();
if let Some(recovery) = store.read_only_recovery() {
warnings.push(graph_db_read_recovery_diagnostic(recovery));
}
let freshness = sqlite_graph_freshness(&store, scope.unwrap_or("root"))?;
if freshness.fail_closed {
bail!(
"dependency-dag graph projection failed closed: {}; repair: {}",
freshness.diagnostics.join("; "),
graph_db_repair_commands(&root, scope).join("; ")
);
}
let target_nodes = dependency_dag_resolve_backlog_nodes(&root, path, &store, raw_targets)?;
let graph_nodes = store.all_nodes()?;
let graph_edges = store.all_edges()?;
let graph_nodes_by_id = graph_nodes
.into_iter()
.map(|node| (node.id.clone(), node))
.collect::<BTreeMap<_, _>>();
let profiles = target_nodes
.iter()
.map(|node| {
dependency_dag_node_profile(
&root,
&store,
node,
&graph_nodes_by_id,
&graph_edges,
depth,
limit,
)
})
.collect::<Result<Vec<_>>>()?;
let targets = profiles
.iter()
.map(|profile| profile.id.clone())
.collect::<Vec<_>>();
let target_ids = targets.iter().cloned().collect::<BTreeSet<_>>();
let mut edges = Vec::new();
let mut seen_edges = BTreeSet::new();
dependency_dag_explicit_edges(&profiles, &target_ids, &mut edges, &mut seen_edges);
dependency_dag_worker_follow_up_edges(&profiles, &target_ids, &mut edges, &mut seen_edges);
dependency_dag_overlap_edges(&profiles, &mut edges, &mut seen_edges);
edges.sort_by(|left, right| {
left.from
.cmp(&right.from)
.then(left.to.cmp(&right.to))
.then(left.kind.cmp(&right.kind))
});
let (topo_batches, cycle_diagnostics) = dependency_dag_topo_batches(&targets, &edges);
let nodes = profiles
.into_iter()
.map(|profile| DependencyDagNode {
id: profile.id,
graph_node_id: profile.graph_node_id,
label: profile.label,
path: profile.path,
line: profile.line,
detail: profile.detail,
source_files: sorted_set(&profile.source_files),
source_symbols: sorted_set(&profile.source_symbols),
config_files: sorted_set(&profile.config_files),
expected_tests: sorted_set(&profile.expected_tests),
semantic_refs: profile.semantic_refs.into_values().collect(),
worker_feedback: profile.worker_feedback,
})
.collect::<Vec<_>>();
let projection_hashes = freshness
.content_hash
.clone()
.into_iter()
.collect::<Vec<_>>();
let replay_commands = dependency_dag_replay_commands(path, scope, &targets, depth, limit);
let repair_commands = graph_db_repair_commands(&root, scope);
let summary = DependencyDagSummary {
nodes: nodes.len(),
edges: edges.len(),
topo_batches: topo_batches.len(),
has_cycles: cycle_diagnostics.has_cycles,
};
Ok(DependencyDagReport {
contract_version: DEPENDENCY_DAG_CONTRACT_VERSION,
root: root.to_string_lossy().to_string(),
scope: scope.map(str::to_string),
path: path.to_string_lossy().to_string(),
targets,
projection_freshness: freshness,
projection_hashes,
nodes,
edges,
topo_batches,
cycle_diagnostics,
summary,
replay_commands,
repair_commands,
warnings,
})
}
fn print_dependency_dag_human(report: &DependencyDagReport, compact: bool) {
if compact {
println!(
"dependency-dag targets:{} edges:{} batches:{} cycles:{}",
report.targets.len(),
report.edges.len(),
report.topo_batches.len(),
report.cycle_diagnostics.has_cycles
);
} else {
println!("Dependency DAG");
println!(" targets: {}", report.targets.join(", "));
println!(" edges: {}", report.edges.len());
println!(" cycles: {}", report.cycle_diagnostics.has_cycles);
}
for batch in &report.topo_batches {
println!("batch #{}: {}", batch.batch, batch.targets.join(", "));
}
for edge in &report.edges {
println!(
"edge {} -> {} kind:{} weight:{}",
edge.from, edge.to, edge.kind, edge.weight
);
for reason in &edge.reasons {
println!(" reason: {reason}");
}
}
if report.cycle_diagnostics.has_cycles {
println!(
"cycle blocked nodes: {}",
report.cycle_diagnostics.blocked_nodes.join(", ")
);
}
for command in &report.replay_commands {
println!("replay: {command}");
}
for command in &report.repair_commands {
println!("repair: {command}");
}
for warning in &report.warnings {
println!("warning: {warning}");
}
}
fn cmd_dependency_dag(
path: &Path,
scope: Option<&str>,
raw_targets: &[String],
depth: usize,
limit: usize,
format: OutputFormat,
) -> Result<()> {
let report = build_dependency_dag_report(path, scope, raw_targets, depth, limit)?;
if format.json_output {
print_json_or_envelope(
&report,
&format,
"dependency-dag",
"topological-planning",
ToolEnvelopeSummary {
text: format!(
"Dependency DAG for {} target(s): edges={} batches={} cycles={}",
report.targets.len(),
report.edges.len(),
report.topo_batches.len(),
report.cycle_diagnostics.has_cycles
),
metrics: vec![
envelope_metric("targets", report.targets.len()),
envelope_metric("edges", report.edges.len()),
envelope_metric("topo_batches", report.topo_batches.len()),
envelope_metric("has_cycles", report.cycle_diagnostics.has_cycles),
],
},
report.cycle_diagnostics.has_cycles,
report.replay_commands.clone(),
)
} else {
print_dependency_dag_human(&report, format.compact);
Ok(())
}
}
pub(crate) fn render_log_digest_from_input(
path: &Path,
input: &str,
format: OutputFormat,
) -> Result<()> {
let report = log_digest::compute(path, input)?;
if format.json_output {
println!(
"{}",
to_json_schema(
&report,
format.pretty,
format.terse,
format.ultra_terse,
format.schema
)?
);
return Ok(());
}
if format.compact {
println!(
"log lines:{} signals:{} repeats:{} files:{} syms:{} stacks:{}",
report.non_empty_lines,
report.signal_groups,
report.repeated_line_groups,
report.file_ref_groups,
report.symbol_ref_groups,
report.stack_groups
);
for signal in &report.signals {
let location = match (&signal.path, signal.line) {
(Some(path), Some(line)) => format!("{path}:{line}"),
(Some(path), None) => path.clone(),
_ => "-".to_string(),
};
println!(
"{} sev:{} count:{} sums:{} msg:{}",
location,
signal.severity,
signal.occurrences,
log_digest_summary_label(signal.summary_state),
truncate_for_compact(&signal.message, 80)
);
}
for repeated in &report.repeated_lines {
println!(
"repeat count:{} line:{}",
repeated.occurrences,
truncate_for_compact(&repeated.line, 80)
);
}
for symbol in &report.symbol_refs {
println!(
"sym:{} count:{} sums:{}",
symbol.symbol,
symbol.occurrences,
log_digest_summary_label(symbol.summary_state)
);
}
for warning in &report.warnings {
println!("warning: {warning}");
}
return Ok(());
}
println!("Log digest");
println!(" lines: {}", report.total_lines);
println!(" non-empty lines: {}", report.non_empty_lines);
println!(" signal groups: {}", report.signal_groups);
println!(
" repeated lines: {}",
report.repeated_line_groups
);
println!(
" repeated line instances: {}",
report.repeated_line_occurrences
);
println!(" file refs: {}", report.file_ref_groups);
println!(" symbol refs: {}", report.symbol_ref_groups);
println!(" stack groups: {}", report.stack_groups);
if !report.signals.is_empty() {
println!();
println!("Signals:");
for signal in &report.signals {
match (&signal.path, signal.line, signal.column) {
(Some(path), Some(line), Some(column)) => println!("{path}:{line}:{column}"),
(Some(path), Some(line), None) => println!("{path}:{line}"),
(Some(path), None, _) => println!("{path}"),
(None, _, _) => println!("(no file anchor)"),
}
println!(" severity: {}", signal.severity);
println!(" occurrences: {}", signal.occurrences);
println!(" message: {}", signal.message);
println!(
" cached summaries: {}",
log_digest_summary_label(signal.summary_state)
);
for summary in &signal.current_summaries {
println!(
" - {}: {}",
summary.symbol,
truncate_for_compact(&summary.summary, 160)
);
}
}
}
if !report.repeated_lines.is_empty() {
println!();
println!("Repeated lines:");
for repeated in &report.repeated_lines {
println!(
" {}x {}",
repeated.occurrences,
truncate_for_compact(&repeated.line, 180)
);
}
}
if !report.file_refs.is_empty() {
println!();
println!("Anchored files:");
for file_ref in &report.file_refs {
match (file_ref.line, file_ref.column) {
(Some(line), Some(column)) => println!("{}:{}:{}", file_ref.path, line, column),
(Some(line), None) => println!("{}:{}", file_ref.path, line),
(None, _) => println!("{}", file_ref.path),
}
println!(" occurrences: {}", file_ref.occurrences);
println!(
" cached summaries: {}",
log_digest_summary_label(file_ref.summary_state)
);
for summary in &file_ref.current_summaries {
println!(
" - {}: {}",
summary.symbol,
truncate_for_compact(&summary.summary, 160)
);
}
}
}
if !report.symbol_refs.is_empty() {
println!();
println!("Symbol candidates:");
for symbol in &report.symbol_refs {
println!("{}", symbol.symbol);
println!(" occurrences: {}", symbol.occurrences);
println!(
" cached summaries: {}",
log_digest_summary_label(symbol.summary_state)
);
for summary in &symbol.current_summaries {
println!(
" - {}: {}",
summary.symbol,
truncate_for_compact(&summary.summary, 160)
);
}
}
}
if !report.stack_traces.is_empty() {
println!();
println!("Stack groups:");
for stack in &report.stack_traces {
println!(" occurrences: {}", stack.occurrences);
for frame in &stack.frames {
println!(" - {}", frame);
}
}
}
for warning in &report.warnings {
println!("warning: {warning}");
}
Ok(())
}
pub(crate) fn metric_digest_trend_label(trend: metric_digest::MetricDigestTrend) -> &'static str {
match trend {
metric_digest::MetricDigestTrend::Improved => "improved",
metric_digest::MetricDigestTrend::Regressed => "regressed",
metric_digest::MetricDigestTrend::Flat => "flat",
metric_digest::MetricDigestTrend::Unknown => "changed",
}
}
pub(crate) fn metric_digest_gate_label(
decision: metric_digest::CommunitySearchGateDecision,
) -> &'static str {
match decision {
metric_digest::CommunitySearchGateDecision::Pass => "pass",
metric_digest::CommunitySearchGateDecision::Block => "block",
}
}
fn cmd_dci_benchmark(fixture_path: &Path, format: OutputFormat) -> Result<()> {
let input = fs::read_to_string(fixture_path)
.with_context(|| format!("reading dci-benchmark fixture: {}", fixture_path.display()))?;
let report = dci_benchmark::compute(&input)?;
if format.json_output {
println!(
"{}",
to_json_schema(
&report,
format.pretty,
format.terse,
format.ultra_terse,
format.schema
)?
);
return Ok(());
}
if format.compact {
println!(
"dci tasks:{} strategies:{} warnings:{}",
report.tasks_loaded,
report.strategies_compared,
report.warnings.len()
);
for summary in &report.strategy_summaries {
println!(
"{} rank:{} loc:{}/{} rate:{} useful_hits:{} zero_output:{} calls:{} latency_ms:{} tokens:{} output_tokens:{}",
summary.strategy,
summary.rank,
summary.localized,
summary.task_runs,
dci_benchmark::format_number(summary.localization_rate * 100.0),
dci_benchmark::format_number(summary.avg_useful_hits),
dci_benchmark::format_number(summary.zero_output_rate * 100.0),
dci_benchmark::format_number(summary.avg_tool_calls),
dci_benchmark::format_number(summary.avg_latency_ms),
dci_benchmark::format_number(summary.avg_estimated_tokens),
dci_benchmark::format_number(summary.avg_output_tokens)
);
}
if let Some(gate) = &report.memory_retrieval_gate {
println!(
"memory_retrieval_gate decision:{} baseline:{} min_avg_useful_hits:{} max_zero_output_failures:{} diagnostics:{}",
gate.decision,
gate.baseline_strategy,
dci_benchmark::format_number(gate.min_avg_useful_hits),
gate.max_zero_output_failures,
gate.diagnostics.len()
);
}
for warning in &report.warnings {
println!("warning: {warning}");
}
return Ok(());
}
println!("DCI benchmark");
if let Some(description) = &report.description {
println!(" description: {}", description);
}
println!(" tasks loaded: {}", report.tasks_loaded);
println!(" strategies compared: {}", report.strategies_compared);
println!();
println!("Strategy summary:");
for summary in &report.strategy_summaries {
println!(
" #{} {}: localization {}/{} ({:.1}%), avg useful hits {}, zero output {:.1}%, avg calls {}, avg latency {}ms, avg tokens {}, avg output tokens {}",
summary.rank,
summary.strategy,
summary.localized,
summary.task_runs,
summary.localization_rate * 100.0,
dci_benchmark::format_number(summary.avg_useful_hits),
summary.zero_output_rate * 100.0,
dci_benchmark::format_number(summary.avg_tool_calls),
dci_benchmark::format_number(summary.avg_latency_ms),
dci_benchmark::format_number(summary.avg_estimated_tokens),
dci_benchmark::format_number(summary.avg_output_tokens)
);
}
if let Some(gate) = &report.memory_retrieval_gate {
println!();
println!("Memory retrieval gate:");
println!(" decision: {}", gate.decision);
println!(
" baseline: {}, min avg useful hits {}, max zero-output failures {}",
gate.baseline_strategy,
dci_benchmark::format_number(gate.min_avg_useful_hits),
gate.max_zero_output_failures
);
for row in &gate.rows {
println!(
" {}: status {}, avg useful hits {}, zero-output failures {}",
row.strategy,
row.status,
dci_benchmark::format_number(row.avg_useful_hits),
row.zero_output_failures
);
}
for diagnostic in &gate.diagnostics {
println!(" diagnostic: {diagnostic}");
}
}
println!();
println!("Task winners:");
for row in &report.task_rows {
let label = row
.label
.as_ref()
.map(|value| format!(" ({value})"))
.unwrap_or_default();
println!(" {}{}", row.task_id, label);
println!(" localized: {}", row.best_localization.join(", "));
println!(" most useful hits: {}", row.most_useful_hits.join(", "));
println!(
" lowest calls: {}, lowest latency: {}, lowest tokens: {}, lowest output tokens: {}",
row.lowest_tool_calls.as_deref().unwrap_or("-"),
row.lowest_latency.as_deref().unwrap_or("-"),
row.lowest_token_budget.as_deref().unwrap_or("-"),
row.lowest_output_tokens.as_deref().unwrap_or("-")
);
if !row.zero_output_failures.is_empty() {
println!(" zero output: {}", row.zero_output_failures.join(", "));
}
}
for warning in &report.warnings {
println!("warning: {warning}");
}
Ok(())
}
pub(crate) fn format_compact_count(value: u64) -> String {
if value >= 1_000_000 {
format!("{:.1}M", value as f64 / 1_000_000.0)
} else if value >= 1_000 {
format!("{:.1}K", value as f64 / 1_000.0)
} else {
value.to_string()
}
}
fn cmd_digest_runner(
kind: &str,
path: &Path,
runner: Option<&str>,
shell_command: &str,
format: OutputFormat,
) -> Result<()> {
let digest_kind = DigestRunnerKind::parse(kind)?;
let root = transcript_artifact_root(path)?;
let execution = run_digest_runner_command(shell_command)?;
let output = &execution.output;
let captured = String::from_utf8_lossy(&output.stdout).into_owned();
let exit_code = output.status.code().unwrap_or(-1);
if format.json_output && format.envelope {
let artifact_key = format!(
"{}:{}:{}:{}",
digest_kind.as_str(),
shell_command,
execution.executed_command,
captured
);
let artifact = if captured.trim().is_empty() {
None
} else {
let (suffix, expand) = match digest_kind {
DigestRunnerKind::Test => (
"test.log",
format!(
"tsift test-digest --path {} --input {}{} --json",
shell_quote(root.to_string_lossy().as_ref()),
shell_quote(
root.join(".tsift/artifacts")
.join(format!("{}.test.log", stable_handle("tart", &artifact_key)))
.to_string_lossy()
.as_ref()
),
runner
.map(|value| format!(" --runner {}", shell_quote(value)))
.unwrap_or_default()
),
),
DigestRunnerKind::Log => (
"log",
format!(
"tsift log-digest --path {} --input {} --json",
shell_quote(root.to_string_lossy().as_ref()),
shell_quote(
root.join(".tsift/artifacts")
.join(format!("{}.log", stable_handle("tart", &artifact_key)))
.to_string_lossy()
.as_ref()
)
),
),
};
Some(persist_transcript_artifact(
&root,
"tart",
suffix,
&artifact_key,
&captured,
expand,
)?)
};
let filter_report = execution.filter.as_ref().map(DigestRunnerFilter::to_json);
match digest_kind {
DigestRunnerKind::Test => {
let digest_report = test_digest::compute(path, &captured, runner)?;
let report = serde_json::json!({
"kind": digest_kind.as_str(),
"command": shell_command,
"executed_command": execution.executed_command,
"exit_code": exit_code,
"success": output.status.success(),
"filter": filter_report,
"artifact": artifact,
"digest": digest_report,
});
let mut follow_up = artifact
.as_ref()
.map(|entry| vec![entry.expand.clone()])
.unwrap_or_default();
follow_up.push(format!(
"tsift rewrite --run {}",
shell_quote(shell_command)
));
let summary_text = if output.status.success() && digest_report.failures == 0 {
format!("test run passed for {}", runner.unwrap_or("auto"))
} else {
format!("test run captured {} failure(s)", digest_report.failures)
};
print_json_or_envelope(
&report,
&format,
"digest-runner",
"test-run",
ToolEnvelopeSummary {
text: summary_text,
metrics: vec![
envelope_metric("runner", &digest_report.runner),
envelope_metric("exit_code", exit_code),
envelope_metric("filter", execution.filter_label()),
envelope_metric("failures", digest_report.failures),
envelope_metric("groups", digest_report.grouped_failures),
envelope_metric(
"artifact",
artifact
.as_ref()
.map(|entry| entry.handle.as_str())
.unwrap_or("-"),
),
],
},
false,
follow_up,
)?;
}
DigestRunnerKind::Log => {
let digest_report = log_digest::compute(path, &captured)?;
let report = serde_json::json!({
"kind": digest_kind.as_str(),
"command": shell_command,
"executed_command": execution.executed_command,
"exit_code": exit_code,
"success": output.status.success(),
"filter": filter_report,
"artifact": artifact,
"digest": digest_report,
});
let mut follow_up = artifact
.as_ref()
.map(|entry| vec![entry.expand.clone()])
.unwrap_or_default();
follow_up.push(format!(
"tsift rewrite --run {}",
shell_quote(shell_command)
));
let summary_text = if output.status.success() && digest_report.signal_groups == 0 {
"command finished without log signals".to_string()
} else {
format!(
"command emitted {} log signal group(s)",
digest_report.signal_groups
)
};
print_json_or_envelope(
&report,
&format,
"digest-runner",
"command-run",
ToolEnvelopeSummary {
text: summary_text,
metrics: vec![
envelope_metric("exit_code", exit_code),
envelope_metric("filter", execution.filter_label()),
envelope_metric("signals", digest_report.signal_groups),
envelope_metric("file_refs", digest_report.file_ref_groups),
envelope_metric(
"artifact",
artifact
.as_ref()
.map(|entry| entry.handle.as_str())
.unwrap_or("-"),
),
],
},
false,
follow_up,
)?;
}
}
if output.status.success() {
return Ok(());
}
if let Some(code) = output.status.code() {
std::process::exit(code);
}
bail!("digest-wrapped command terminated by signal: {shell_command}");
}
if captured.trim().is_empty() {
let label = match digest_kind {
DigestRunnerKind::Test => "test",
DigestRunnerKind::Log => "log",
};
println!("No {label} output captured.");
} else {
match digest_kind {
DigestRunnerKind::Test => {
render_test_digest_from_input(path, &captured, runner, format)?
}
DigestRunnerKind::Log => render_log_digest_from_input(path, &captured, format)?,
}
}
if output.status.success() {
return Ok(());
}
if let Some(code) = output.status.code() {
std::process::exit(code);
}
bail!("digest-wrapped command terminated by signal: {shell_command}");
}
struct DigestRunnerExecution {
output: std::process::Output,
executed_command: String,
filter: Option<DigestRunnerFilter>,
}
impl DigestRunnerExecution {
fn filter_label(&self) -> &'static str {
self.filter
.as_ref()
.map(|filter| filter.tool)
.unwrap_or("none")
}
}
struct DigestRunnerFilter {
tool: &'static str,
command: String,
}
impl DigestRunnerFilter {
fn to_json(&self) -> serde_json::Value {
serde_json::json!({
"tool": self.tool,
"command": self.command,
})
}
}
fn run_digest_runner_command(shell_command: &str) -> Result<DigestRunnerExecution> {
let filter = rtk_rewrite_for_digest_runner(shell_command);
let executed_command = filter
.as_ref()
.map(|filter| filter.command.as_str())
.unwrap_or(shell_command);
let output = Command::new("sh")
.arg("-lc")
.arg(format!("({executed_command}) 2>&1"))
.stdout(Stdio::piped())
.output()
.with_context(|| format!("running digest-wrapped command: {executed_command}"))?;
Ok(DigestRunnerExecution {
output,
executed_command: executed_command.to_string(),
filter,
})
}
fn rtk_rewrite_for_digest_runner(shell_command: &str) -> Option<DigestRunnerFilter> {
if shell_command.trim_start().starts_with("rtk ") || find_command_on_path("rtk").is_none() {
return None;
}
let output = Command::new("rtk")
.arg("rewrite")
.arg(shell_command)
.output()
.ok()?;
if !output.status.success() {
return None;
}
let rewritten = String::from_utf8_lossy(&output.stdout).trim().to_string();
if rewritten.is_empty() || rewritten == shell_command {
return None;
}
Some(DigestRunnerFilter {
tool: "rtk",
command: rewritten,
})
}
fn find_command_on_path(command: &str) -> Option<PathBuf> {
let path_var = std::env::var_os("PATH")?;
std::env::split_paths(&path_var)
.map(|dir| dir.join(command))
.find(|candidate| candidate.is_file())
}
pub(crate) fn open_existing_summary_db_read_only(db_path: &Path) -> Result<summarize::SummaryDb> {
if !db_path.exists() {
bail!("no summaries.db found — run `tsift summarize --extract <path>` first");
}
summarize::SummaryDb::open_read_only_resilient(db_path)
}
fn status_index_needs_fix(report: &status::StatusReport) -> bool {
!matches!(report.index, status::IndexStatus::Fresh { .. })
}
fn status_instructions_need_fix(report: &status::StatusReport) -> bool {
!matches!(report.instructions, init::InstructionStatus::Current { .. })
}
pub(crate) fn apply_status_fixes(root: &Path, report: &status::StatusReport) -> Result<()> {
if status_instructions_need_fix(report) {
eprintln!("status fix: refreshing tsift instructions");
init::init(root, false, false)?;
}
let eviction = cycle_packet_cache::cycle_packet_cache_evict(
root,
cycle_packet_cache::CYCLE_PACKET_CACHE_DEFAULT_TTL_SECS,
cycle_packet_cache::CYCLE_PACKET_CACHE_DEFAULT_MAX_BYTES,
);
if eviction.evicted_entries > 0 {
eprintln!(
"status fix: evicted {} cycle packet cache entry/entries ({} bytes, {} remaining)",
eviction.evicted_entries,
eviction.evicted_bytes,
eviction.remaining_entries
);
}
if !status_index_needs_fix(report) {
return Ok(());
}
let scopes = config::Config::submodule_dirs(root)?;
if scopes.is_empty() {
eprintln!("status fix: refreshing index");
run_index_update(
&root.join(".tsift/index.db"),
root,
"status --fix refreshing index".to_string(),
root,
None,
false,
false,
)?;
return Ok(());
}
let cfg = config::Config::load(root)?;
for scope in scopes {
if !scope.source_root.exists() {
eprintln!(
"status fix: skipping missing submodule `{}` ({})",
scope.id,
scope.source_root.display()
);
continue;
}
eprintln!("status fix: refreshing submodule `{}` index", scope.id);
run_index_update(
&cfg.db_path_for(root, &scope.id),
&scope.source_root,
format!("status --fix refreshing submodule `{}` index", scope.id),
root,
Some(scope.id.as_str()),
false,
false,
)?;
}
Ok(())
}
pub(crate) fn status_missing_workspace_scopes(report: &status::StatusReport) -> bool {
match &report.index {
status::IndexStatus::Fresh { missing_scopes, .. }
| status::IndexStatus::Stale { missing_scopes, .. }
| status::IndexStatus::Missing { missing_scopes } => !missing_scopes.is_empty(),
}
}
pub(crate) fn autoindex_missing_workspace_scopes(
root: &Path,
report: &status::StatusReport,
) -> Result<()> {
let missing_scopes = match &report.index {
status::IndexStatus::Fresh { missing_scopes, .. }
| status::IndexStatus::Stale { missing_scopes, .. }
| status::IndexStatus::Missing { missing_scopes } => missing_scopes,
};
if missing_scopes.is_empty() {
return Ok(());
}
let missing_scope_ids = missing_scopes
.iter()
.map(|scope| scope.scope.as_str())
.collect::<std::collections::HashSet<_>>();
let cfg = config::Config::load(root)?;
for scope in config::Config::submodule_dirs(root)? {
if !missing_scope_ids.contains(scope.id.as_str()) || !scope.source_root.exists() {
continue;
}
let db_path = cfg.db_path_for(root, &scope.id);
run_index_update(
&db_path,
&scope.source_root,
format!(
"autoindexing missing submodule `{}` during status",
scope.id
),
root,
Some(scope.id.as_str()),
false,
false,
)?;
}
Ok(())
}
pub(crate) fn emit_summary_stats_warnings(stats: &summarize::SummaryStats, root: &Path) {
for warning in &stats.warnings {
let rel_path = relativize_pathbuf(&warning.path, root);
eprintln!(
"warning: summarize stats {}: {}",
rel_path.display(),
warning.message
);
}
}
fn contextualize_error(err: anyhow::Error, context: String) -> anyhow::Error {
Result::<(), anyhow::Error>::Err(err)
.context(context)
.unwrap_err()
}
fn should_attach_lock_diagnostics(err: &anyhow::Error) -> bool {
let message = err.to_string();
message.contains("another tsift index writer is already active")
|| substrate::error_mentions_locked_db(err)
}
fn add_write_lock_context(
err: anyhow::Error,
action: String,
root: &std::path::Path,
scope: Option<&str>,
) -> anyhow::Error {
if !should_attach_lock_diagnostics(&err) {
return contextualize_error(err, action);
}
let Ok(report) = status::check_locks(root, None, scope) else {
return contextualize_error(err, action);
};
contextualize_error(
err,
format!(
"{}\n\nlock diagnostics:\n{}",
action,
status::format_locks_human(&report, false).trim_end()
),
)
}
pub(crate) fn run_index_update(
db_path: &std::path::Path,
source_root: &std::path::Path,
action: String,
root: &std::path::Path,
scope: Option<&str>,
rebuild: bool,
prune: bool,
) -> Result<index::IndexSummary> {
let result = (|| {
let db = index::IndexDb::open(db_path)?;
if rebuild {
db.rebuild(source_root)
} else if prune {
db.apply_changes_pruned(source_root)
} else {
db.apply_changes(source_root)
}
})();
let summary = result.map_err(|err| add_write_lock_context(err, action, root, scope))?;
emit_index_warnings(&summary, source_root, scope);
Ok(summary)
}
pub(crate) fn relativize_index_summary(summary: &mut index::IndexSummary, root: &Path) {
for change in &mut summary.changes {
change.path = relativize_pathbuf(&change.path, root);
}
for warning in &mut summary.warnings {
warning.path = relativize_pathbuf(&warning.path, root);
}
}
fn emit_index_warnings(summary: &index::IndexSummary, root: &Path, scope: Option<&str>) {
for warning in &summary.warnings {
let rel_path = relativize_pathbuf(&warning.path, root);
let stage = match warning.stage {
index::IndexWarningStage::ReadSource => "read failed",
index::IndexWarningStage::ExtractSymbols => "symbol extraction failed",
index::IndexWarningStage::ExtractCallSites => "call extraction failed",
index::IndexWarningStage::ExtractRoutes => "route extraction failed",
};
let scope_prefix = scope.map(|name| format!("[{}] ", name)).unwrap_or_default();
let lang_suffix = warning
.language
.as_deref()
.map(|lang| format!(" [{}]", lang))
.unwrap_or_default();
eprintln!(
"warning: {}{}{}: {}: {}",
scope_prefix,
rel_path.display(),
lang_suffix,
stage,
warning.message
);
}
}
pub(crate) fn load_summarize_config(root: &std::path::Path) -> summarize::SummarizeConfig {
let config_path = root.join(".tsift/config.toml");
if !config_path.exists() {
return summarize::SummarizeConfig::default();
}
#[derive(serde::Deserialize, Default)]
struct RawConfig {
#[serde(default)]
summarize: Option<RawSummarize>,
}
#[derive(serde::Deserialize)]
struct RawSummarize {
model: Option<String>,
max_file_tokens: Option<usize>,
api_key_env: Option<String>,
}
let content = std::fs::read_to_string(&config_path).unwrap_or_default();
let raw: RawConfig = toml::from_str(&content).unwrap_or_default();
let defaults = summarize::SummarizeConfig::default();
match raw.summarize {
Some(s) => summarize::SummarizeConfig {
model: s.model.unwrap_or(defaults.model),
max_file_tokens: s.max_file_tokens.unwrap_or(defaults.max_file_tokens),
api_key_env: s.api_key_env.unwrap_or(defaults.api_key_env),
},
None => defaults,
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct ExtractSymbolContext {
db_path: PathBuf,
source_root: PathBuf,
}
pub(crate) fn find_symbols_db_for_file(
root: &Path,
file_path: &Path,
) -> Result<Option<ExtractSymbolContext>> {
let cfg = config::Config::load(root)?;
let mut submodules = config::Config::submodule_dirs(root)?;
submodules.sort_by(|left, right| {
right
.source_root
.components()
.count()
.cmp(&left.source_root.components().count())
});
for scope in submodules {
if !file_path.starts_with(&scope.source_root) {
continue;
}
let db_path = cfg.db_path_for(root, &scope.id);
if db_path.exists() {
return Ok(Some(ExtractSymbolContext {
db_path,
source_root: scope.source_root,
}));
}
}
let single = root.join(".tsift/index.db");
if single.exists() && file_path.starts_with(root) {
return Ok(Some(ExtractSymbolContext {
db_path: single,
source_root: root.to_path_buf(),
}));
}
Ok(None)
}
pub(crate) fn resolve_extract_base(path: &Path) -> Result<PathBuf> {
let canonical = path
.canonicalize()
.with_context(|| format!("canonicalizing {}", path.display()))?;
Ok(if canonical.is_dir() {
canonical
} else {
canonical
.parent()
.map(Path::to_path_buf)
.unwrap_or(canonical)
})
}
fn normalize_extract_scope_path(path: &Path) -> Result<PathBuf> {
if path.exists() {
return path
.canonicalize()
.with_context(|| format!("canonicalizing extract scope {}", path.display()));
}
Ok(summarize::normalize_lexical_path(path))
}
pub(crate) fn resolve_extract_scope(root: &Path, extract_path: &Path) -> Result<PathBuf> {
let scope = if extract_path.is_absolute() {
extract_path.to_path_buf()
} else {
root.join(extract_path)
};
normalize_extract_scope_path(&scope)
}
pub(crate) fn summarize_diff_matches_scope(changed_path: &Path, extract_scope: &Path) -> bool {
normalize_extract_scope_path(changed_path)
.unwrap_or_else(|_| summarize::normalize_lexical_path(changed_path))
.starts_with(extract_scope)
}
pub(crate) fn summarize_relative_file_path(root: &Path, file_path: &Path) -> String {
summarize::normalize_summary_file_key(file_path.strip_prefix(root).unwrap_or(file_path))
}
pub(crate) fn summarize_full_extract_deleted_summary_paths(
summary_db: &summarize::SummaryDb,
root: &Path,
extract_scope: &Path,
files_to_extract: &[PathBuf],
) -> Result<BTreeSet<String>> {
let live_paths = files_to_extract
.iter()
.map(|file_path| summarize_relative_file_path(root, file_path))
.collect::<BTreeSet<_>>();
let mut deleted = BTreeSet::new();
for cached_path in summary_db.cached_file_paths()? {
if !summarize_diff_matches_scope(&root.join(&cached_path), extract_scope) {
continue;
}
if !live_paths.contains(&cached_path) {
deleted.insert(cached_path);
}
}
Ok(deleted)
}
#[derive(Debug, Clone)]
struct SearchIndexTarget {
label: String,
db_path: PathBuf,
source_root: PathBuf,
scope_name: Option<String>,
reindex_cmd: String,
}
fn cargo_package_index_target(
root: &Path,
package: multiplicity::CargoPackageInfo,
) -> SearchIndexTarget {
SearchIndexTarget {
label: format!("cargo package `{}` index", package.scope_id),
db_path: multiplicity::cargo_package_db_path(root, &package.scope_id),
source_root: package.package_root.clone(),
scope_name: Some(package.scope_id.clone()),
reindex_cmd: format!(
"tsift index --submodule {} {}",
package.scope_id,
root.display()
),
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum SearchIndexState {
Missing,
Fresh,
Stale { stale_files: usize },
}
fn resolve_search_index_targets(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
federated: bool,
) -> Result<Vec<SearchIndexTarget>> {
if let Some(scope_name) = scope {
if let Some(scope) = config::Config::find_submodule(root, scope_name)? {
let cfg = config::Config::load(root)?;
return Ok(vec![SearchIndexTarget {
label: format!("submodule `{}` index", scope.id),
db_path: cfg.db_path_for(root, &scope.id),
source_root: scope.source_root.clone(),
scope_name: Some(scope.id.clone()),
reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
}]);
}
if let Some(package) = multiplicity::find_cargo_package(root, scope_name)? {
return Ok(vec![cargo_package_index_target(root, package)]);
}
config::Config::resolve_submodule(root, scope_name)?;
}
if federated {
let cfg = config::Config::load(root)?;
let mut targets = Vec::new();
for scope in config::Config::submodule_dirs(root)? {
if !cfg.federation_for_scope(&scope) {
continue;
}
targets.push(SearchIndexTarget {
label: format!("submodule `{}` index", scope.id),
db_path: cfg.db_path_for(root, &scope.id),
source_root: scope.source_root.clone(),
scope_name: Some(scope.id.clone()),
reindex_cmd: format!("tsift index --workspace {}", root.display()),
});
}
return Ok(targets);
}
if let Some(scope) = config::Config::infer_submodule_from_path(root, path_hint)? {
let cfg = config::Config::load(root)?;
return Ok(vec![SearchIndexTarget {
label: format!("submodule `{}` index", scope.id),
db_path: cfg.db_path_for(root, &scope.id),
source_root: scope.source_root.clone(),
scope_name: Some(scope.id.clone()),
reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
}]);
}
if let Some(package) = multiplicity::infer_cargo_package_from_path(root, path_hint)? {
return Ok(vec![cargo_package_index_target(root, package)]);
}
if let Some(scope) = infer_agent_doc_task_submodule(root, path_hint)? {
let cfg = config::Config::load(root)?;
return Ok(vec![SearchIndexTarget {
label: format!("submodule `{}` index", scope.id),
db_path: cfg.db_path_for(root, &scope.id),
source_root: scope.source_root.clone(),
scope_name: Some(scope.id.clone()),
reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
}]);
}
let scopes = config::Config::submodule_dirs(root)?;
if !scopes.is_empty() {
let root_db = root.join(".tsift/index.db");
if !root_db.exists() {
let available_scopes = scopes
.iter()
.map(|scope| scope.id.as_str())
.collect::<Vec<_>>()
.join(", ");
let cfg = config::Config::load(root)?;
let indexed_scopes = scopes
.iter()
.filter(|scope| cfg.db_path_for(root, &scope.id).exists())
.map(|scope| scope.id.as_str())
.collect::<Vec<_>>();
let indexed_label = if indexed_scopes.is_empty() {
"none".to_string()
} else {
indexed_scopes.join(", ")
};
bail!(
"workspace root {} has no shared root index at {}. Default search requires `--scope <scope>` or `--federated` when the workspace uses scoped `.tsift/indexes/*/index.db` files. Available scopes: {}. Indexed scopes: {}.",
root.display(),
root_db.display(),
available_scopes,
indexed_label,
);
}
}
Ok(vec![SearchIndexTarget {
label: "index".to_string(),
db_path: root.join(".tsift/index.db"),
source_root: root.to_path_buf(),
scope_name: None,
reindex_cmd: format!("tsift index {}", root.display()),
}])
}
fn inspect_search_index(target: &SearchIndexTarget) -> Result<SearchIndexState> {
if !target.source_root.exists() || !target.db_path.exists() {
return Ok(SearchIndexState::Missing);
}
let inspection =
index::IndexDb::inspect_read_only(&target.db_path, &target.source_root, false)?;
let stale_files =
inspection.summary.new + inspection.summary.modified + inspection.summary.deleted;
if stale_files == 0 {
Ok(SearchIndexState::Fresh)
} else {
Ok(SearchIndexState::Stale { stale_files })
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct RebuildSearchTarget {
label: String,
reason: RebuildSearchReason,
reindex_cmd: String,
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum RebuildSearchReason {
Missing,
Stale { stale_files: usize },
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct DegradedSearchTarget {
label: String,
reason: RebuildSearchReason,
reindex_cmd: String,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum DegradedSearchMode {
ReadOnly,
Exact,
}
#[derive(Debug)]
struct SearchPrecheck {
targets: Vec<SearchIndexTarget>,
degraded_targets: Vec<DegradedSearchTarget>,
}
fn is_active_writer_lock_error(err: &anyhow::Error) -> bool {
err.chain().any(|cause| {
cause
.to_string()
.contains("another tsift index writer is already active")
})
}
fn infer_agent_doc_task_submodule(
root: &Path,
path_hint: &Path,
) -> Result<Option<config::WorkspaceScope>> {
let hinted_path = if path_hint.is_absolute() {
path_hint.to_path_buf()
} else {
root.join(path_hint)
};
let Ok(relative) = hinted_path.strip_prefix(root) else {
return Ok(None);
};
let mut components = relative.components();
let Some(std::path::Component::Normal(first)) = components.next() else {
return Ok(None);
};
if first != "tasks" {
return Ok(None);
}
let Some(file_stem) = relative.file_stem().and_then(|stem| stem.to_str()) else {
return Ok(None);
};
config::Config::find_submodule(root, file_stem)
}
fn degraded_search_target(
target: &SearchIndexTarget,
reason: RebuildSearchReason,
) -> DegradedSearchTarget {
DegradedSearchTarget {
label: target.label.clone(),
reason,
reindex_cmd: target.reindex_cmd.clone(),
}
}
fn apply_search_index_update(
root: &Path,
target: &SearchIndexTarget,
) -> Result<index::IndexSummary> {
run_index_update(
&target.db_path,
&target.source_root,
format!("autoindexing {}", target.label),
root,
target.scope_name.as_deref(),
false,
false,
)
}
fn collect_rebuild_search_targets(
targets: &[SearchIndexTarget],
) -> Result<Vec<RebuildSearchTarget>> {
let mut rebuild_targets = Vec::new();
for target in targets {
let reason = match inspect_search_index(target)? {
SearchIndexState::Missing => RebuildSearchReason::Missing,
SearchIndexState::Fresh => continue,
SearchIndexState::Stale { stale_files } => RebuildSearchReason::Stale { stale_files },
};
rebuild_targets.push(RebuildSearchTarget {
label: target.label.clone(),
reason,
reindex_cmd: target.reindex_cmd.clone(),
});
}
Ok(rebuild_targets)
}
fn rebuild_search_target_detail(target: &RebuildSearchTarget) -> String {
match target.reason {
RebuildSearchReason::Missing => format!("{} is missing", target.label),
RebuildSearchReason::Stale { stale_files } => {
let file_suffix = if stale_files == 1 { "" } else { "s" };
format!(
"{} is stale ({} file{})",
target.label, stale_files, file_suffix
)
}
}
}
fn rebuild_search_targets_message(rebuild_targets: &[RebuildSearchTarget]) -> String {
if rebuild_targets.len() == 1 {
let target = &rebuild_targets[0];
return format!(
"{}. Run `{}` to rebuild before retrying.",
rebuild_search_target_detail(target),
target.reindex_cmd
);
}
let summary: Vec<String> = rebuild_targets
.iter()
.take(3)
.map(rebuild_search_target_detail)
.collect();
let overflow = rebuild_targets.len().saturating_sub(summary.len());
let mut details = summary.join(", ");
if overflow > 0 {
details.push_str(&format!(", +{} more", overflow));
}
let reindex_cmd = rebuild_targets[0].reindex_cmd.clone();
format!(
"{} indexes need rebuild: {}. Run `{}` to rebuild before retrying.",
rebuild_targets.len(),
details,
reindex_cmd
)
}
pub(crate) fn precheck_search_indexes(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
federated: bool,
autoindex: bool,
) -> Result<SearchPrecheck> {
let targets = resolve_search_index_targets(root, path_hint, scope, federated)?;
let mut stale_targets = Vec::new();
let mut degraded_targets = Vec::new();
for target in &targets {
match inspect_search_index(target)? {
SearchIndexState::Missing => {
if autoindex && let Err(err) = apply_search_index_update(root, target) {
if is_active_writer_lock_error(&err) {
degraded_targets
.push(degraded_search_target(target, RebuildSearchReason::Missing));
} else {
return Err(err);
}
}
}
SearchIndexState::Fresh => {}
SearchIndexState::Stale { stale_files } => {
if autoindex {
if let Err(err) = apply_search_index_update(root, target) {
if is_active_writer_lock_error(&err) {
degraded_targets.push(degraded_search_target(
target,
RebuildSearchReason::Stale { stale_files },
));
} else {
return Err(err);
}
}
} else {
stale_targets.push(RebuildSearchTarget {
label: target.label.clone(),
reason: RebuildSearchReason::Stale { stale_files },
reindex_cmd: target.reindex_cmd.clone(),
});
}
}
}
}
if stale_targets.is_empty() {
return Ok(SearchPrecheck {
targets,
degraded_targets,
});
}
bail!(
"tsift search aborted: {} \
or re-run without `--no-autoindex`.",
rebuild_search_targets_message(&stale_targets),
);
}
pub(crate) fn degraded_search_mode(targets: &[DegradedSearchTarget]) -> Option<DegradedSearchMode> {
if targets.is_empty() {
return None;
}
if targets
.iter()
.all(|target| matches!(target.reason, RebuildSearchReason::Missing))
{
Some(DegradedSearchMode::Exact)
} else {
Some(DegradedSearchMode::ReadOnly)
}
}
fn degraded_search_targets_summary(targets: &[DegradedSearchTarget]) -> String {
if targets.len() == 1 {
let target = &targets[0];
return match target.reason {
RebuildSearchReason::Missing => format!("{} is missing", target.label),
RebuildSearchReason::Stale { stale_files } => {
let file_suffix = if stale_files == 1 { "" } else { "s" };
format!(
"{} is stale ({} file{})",
target.label, stale_files, file_suffix
)
}
};
}
let missing = targets
.iter()
.filter(|target| matches!(target.reason, RebuildSearchReason::Missing))
.count();
let stale = targets.len().saturating_sub(missing);
let mut parts = Vec::new();
if stale > 0 {
let suffix = if stale == 1 { "" } else { "es" };
parts.push(format!("{stale} stale index{suffix}"));
}
if missing > 0 {
let suffix = if missing == 1 { "" } else { "es" };
parts.push(format!("{missing} missing index{suffix}"));
}
parts.join(", ")
}
pub(crate) fn emit_degraded_search_note(
targets: &[DegradedSearchTarget],
mode: DegradedSearchMode,
) {
let summary = degraded_search_targets_summary(targets);
let reindex_cmd = &targets[0].reindex_cmd;
match mode {
DegradedSearchMode::ReadOnly => eprintln!(
"note: active tsift writer detected; skipping autoindex because {}. \
Continuing with read-only search and the current index snapshot; symbol hits may lag. \
Retry `{}` after the active writer finishes for fresh index results.",
summary, reindex_cmd
),
DegradedSearchMode::Exact => eprintln!(
"note: active tsift writer detected; skipping autoindex because {}. \
Continuing with exact live-file search. Retry `{}` after the active writer finishes \
for indexed symbol hits.",
summary, reindex_cmd
),
}
}
fn search_timeout_message(
timeout_secs: u64,
strategy: &str,
targets: &[SearchIndexTarget],
) -> Result<String> {
let rebuild_targets = collect_rebuild_search_targets(targets)?;
if rebuild_targets.is_empty() {
return Ok(format!(
"tsift search timed out after {}s (strategy: {}). \
The search root looks fresh, so reindexing is unlikely to help. \
Re-run with `--timeout 0` to disable the timeout, narrow `--path` / `--scope`, \
or try a different strategy.",
timeout_secs, strategy,
));
}
Ok(format!(
"tsift search timed out after {}s (strategy: {}). {}",
timeout_secs,
strategy,
rebuild_search_targets_message(&rebuild_targets),
))
}
fn is_exact_preferring_query_char(ch: char) -> bool {
matches!(ch, '-' | '_' | '/' | '\\' | '.' | ':' | '#' | '@')
}
fn query_prefers_exact_search(query: &str) -> bool {
let trimmed = query.trim();
!trimmed.is_empty()
&& !trimmed.chars().any(char::is_whitespace)
&& trimmed.chars().any(|ch| ch.is_alphanumeric())
&& trimmed.chars().any(is_exact_preferring_query_char)
&& trimmed
.chars()
.all(|ch| ch.is_alphanumeric() || is_exact_preferring_query_char(ch))
}
pub(crate) fn resolve_search_strategy(query: &str, strategy: Option<String>) -> String {
strategy.unwrap_or_else(|| {
if query_prefers_exact_search(query) {
"exact".to_string()
} else {
"lexical".to_string()
}
})
}
pub(crate) fn collect_source_files(path: &std::path::Path) -> Result<Vec<PathBuf>> {
let mut files = Vec::new();
if path.is_file() {
files.push(path.to_path_buf());
return Ok(files);
}
let walker = ignore::WalkBuilder::new(path)
.hidden(true)
.git_ignore(true)
.build();
for entry in walker {
let entry = entry?;
if entry.file_type().is_some_and(|ft| ft.is_file()) {
let p = entry.path();
if let Some(ext) = p.extension() {
let ext = ext.to_string_lossy();
if matches!(
ext.as_ref(),
"rs" | "py"
| "ts"
| "tsx"
| "js"
| "jsx"
| "kt"
| "kts"
| "zig"
| "sh"
| "bash"
| "zsh"
) {
files.push(p.to_path_buf());
}
}
}
}
Ok(files)
}
#[cfg(test)]
mod tests {
use super::*;
use super::semantic_edit::{
EditOp,
apply_edit_op, apply_edit_plan_atomically_inner, markdown_block_spans,
markdown_section_spans,
};
use tsift_memory::{MemoryEventKind, MemoryStore};
use std::cell::RefCell;
use substrate::{ConvexEdgeRow, ConvexGraphClient, ConvexGraphStore, ConvexNodeRow};
fn parse_cli<I, T>(itr: I) -> Cli
where
I: IntoIterator<Item = T> + Send + 'static,
T: Into<std::ffi::OsString> + Clone + Send + 'static,
{
std::thread::Builder::new()
.name("cli-parse".to_string())
.stack_size(16 * 1024 * 1024)
.spawn(move || Cli::parse_from(itr))
.unwrap()
.join()
.unwrap()
}
fn try_parse_cli<I, T>(itr: I) -> std::result::Result<Cli, clap::Error>
where
I: IntoIterator<Item = T> + Send + 'static,
T: Into<std::ffi::OsString> + Clone + Send + 'static,
{
std::thread::Builder::new()
.name("cli-try-parse".to_string())
.stack_size(16 * 1024 * 1024)
.spawn(move || Cli::try_parse_from(itr))
.unwrap()
.join()
.unwrap()
}
fn build_relative_search_budget_report(
query: &str,
strategy: &str,
root: &Path,
response: &sift::SearchResponse,
symbol_hits: &[index::SymbolHit],
budget: ResponseBudget,
filters: &SearchFacetFilters,
) -> SearchBudgetReport {
build_search_budget_report(SearchBudgetReportInput {
query,
strategy,
root,
response,
symbol_hits,
absolute: false,
budget,
filters,
})
}
#[derive(Default)]
struct MemoryConvexGraphClient {
nodes: RefCell<BTreeMap<String, ConvexNodeRow>>,
edges: RefCell<BTreeMap<String, ConvexEdgeRow>>,
}
impl ConvexGraphClient for MemoryConvexGraphClient {
fn upsert_node_row(&self, row: &ConvexNodeRow) -> Result<()> {
self.nodes
.borrow_mut()
.insert(row.external_id.clone(), row.clone());
Ok(())
}
fn upsert_edge_row(&self, row: &ConvexEdgeRow) -> Result<()> {
self.edges
.borrow_mut()
.insert(row.edge_key.clone(), row.clone());
Ok(())
}
fn delete_node_row(&self, external_id: &str) -> Result<usize> {
Ok(usize::from(
self.nodes.borrow_mut().remove(external_id).is_some(),
))
}
fn delete_edge_row(&self, edge_key: &str) -> Result<usize> {
Ok(usize::from(
self.edges.borrow_mut().remove(edge_key).is_some(),
))
}
fn node_row(&self, external_id: &str) -> Result<Option<ConvexNodeRow>> {
Ok(self.nodes.borrow().get(external_id).cloned())
}
fn node_rows(&self) -> Result<Vec<ConvexNodeRow>> {
Ok(self.nodes.borrow().values().cloned().collect())
}
fn edge_rows(&self) -> Result<Vec<ConvexEdgeRow>> {
Ok(self.edges.borrow().values().cloned().collect())
}
fn node_rows_by_kind(&self, kind: &str) -> Result<Vec<ConvexNodeRow>> {
Ok(self
.nodes
.borrow()
.values()
.filter(|row| row.kind == kind)
.cloned()
.collect())
}
fn outgoing_edge_rows(
&self,
from_external_id: &str,
kind: Option<&str>,
) -> Result<Vec<ConvexEdgeRow>> {
Ok(self
.edges
.borrow()
.values()
.filter(|row| row.from_external_id == from_external_id)
.filter(|row| kind.is_none_or(|kind| row.kind == kind))
.cloned()
.collect())
}
}
fn init_git_repo(path: &Path) {
let status = std::process::Command::new("git")
.args(["init"])
.current_dir(path)
.status()
.unwrap();
assert!(status.success(), "git init failed");
let status = std::process::Command::new("git")
.args(["add", "."])
.current_dir(path)
.status()
.unwrap();
assert!(status.success(), "git add failed");
let status = std::process::Command::new("git")
.args([
"-c",
"user.name=tsift-tests",
"-c",
"user.email=tsift-tests@example.com",
"commit",
"--quiet",
"-m",
"init",
])
.current_dir(path)
.status()
.unwrap();
assert!(status.success(), "git commit failed");
}
fn write_empty_root_index(root: &Path) {
let index_dir = root.join(".tsift");
fs::create_dir_all(&index_dir).unwrap();
fs::write(index_dir.join("index.db"), "").unwrap();
}
fn write_repeated_lines(path: &Path, line: &str, lines: usize) -> PathBuf {
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).unwrap();
}
let body = std::iter::repeat_n(line, lines)
.collect::<Vec<_>>()
.join("\n");
fs::write(path, format!("{body}\n")).unwrap();
path.to_path_buf()
}
// --- build_token_capped_preview ---
#[test]
fn token_capped_preview_returns_all_lines_when_under_cap() {
let lines: Vec<&str> = vec!["fn foo() {", " 1 + 1", "}"];
let result = build_token_capped_preview(&lines, 1, 3, 160, 1000);
assert!(!result.was_capped);
assert_eq!(result.preview.len(), 3);
assert_eq!(result.capped_end, 3);
}
#[test]
fn token_capped_preview_truncates_when_over_cap() {
let lines: Vec<&str> = (0..200).map(|_| " let x = some_very_long_expression_here();").collect();
let result = build_token_capped_preview(&lines, 1, 200, 160, 100);
assert!(result.was_capped);
assert!(result.preview.len() < 200);
assert!(result.capped_end < 200);
}
#[test]
fn token_capped_preview_keeps_at_least_one_line() {
let long_line: String = "x".repeat(8000);
let lines: Vec<&str> = vec![&long_line];
let result = build_token_capped_preview(&lines, 1, 1, 160, 10);
assert!(!result.was_capped);
assert_eq!(result.preview.len(), 1);
}
#[test]
fn token_capped_preview_cap_at_boundary() {
let lines: Vec<&str> = vec!["aaaa", "bbbb", "cccc", "dddd"];
let result = build_token_capped_preview(&lines, 1, 4, 160, 4);
assert!(!result.was_capped);
assert_eq!(result.preview.len(), 4);
}
#[test]
fn token_capped_preview_cap_just_over_boundary() {
let lines: Vec<&str> = vec!["aaaa", "bbbb", "cccc", "dddd"];
let result = build_token_capped_preview(&lines, 1, 4, 160, 3);
assert!(result.was_capped);
assert_eq!(result.preview.len(), 3);
assert_eq!(result.capped_end, 3);
}
#[test]
fn token_capped_preview_empty_lines() {
let lines: Vec<&str> = vec![];
let result = build_token_capped_preview(&lines, 1, 0, 160, 100);
assert!(!result.was_capped);
assert!(result.preview.is_empty());
}
#[test]
fn token_capped_preview_per_line_truncation_applied() {
let long_line = "x".repeat(500);
let lines: Vec<&str> = vec![&long_line, "short"];
let result = build_token_capped_preview(&lines, 1, 2, 20, 10000);
assert!(!result.was_capped);
assert_eq!(result.preview.len(), 2);
assert!(result.preview[0].text.len() <= 23);
assert!(result.preview[0].text.ends_with("..."));
}
// --- classify_task ---
#[test]
fn route_search_defaults_to_haiku() {
let (tier, model) = classify_task("find all uses of authenticate");
assert_eq!(tier, "haiku");
assert!(
model.contains("haiku"),
"expected haiku model, got {}",
model
);
}
#[test]
fn route_edit_keywords_to_sonnet() {
for kw in &[
"edit the file",
"fix the bug",
"update the config",
"remove dead code",
"create a new module",
] {
let (tier, _) = classify_task(kw);
assert_eq!(tier, "sonnet", "expected sonnet for {:?}", kw);
}
}
#[test]
fn route_architecture_keywords_to_opus() {
for kw in &[
"design the API",
"architecture review",
"plan the migration",
"analyze the system",
"evaluate trade-offs",
] {
let (tier, _) = classify_task(kw);
assert_eq!(tier, "opus", "expected opus for {:?}", kw);
}
}
#[test]
fn route_architecture_beats_edit() {
// "design and implement" — architecture signal wins (checked first)
let (tier, _) = classify_task("design and implement the new auth service");
assert_eq!(tier, "opus");
}
#[test]
fn cli_accepts_global_compact_flag() {
let cli = parse_cli(["tsift", "--compact", "status"]);
assert!(cli.compact);
assert!(matches!(cli.command, Some(Commands::Status { .. })));
}
#[test]
fn summarize_diff_scope_matches_relative_directory() {
let root = Path::new("/repo");
let extract_scope = resolve_extract_scope(root, Path::new("src/feature")).unwrap();
assert!(summarize_diff_matches_scope(
Path::new("/repo/src/feature/main.rs"),
&extract_scope
));
assert!(!summarize_diff_matches_scope(
Path::new("/repo/src/other/main.rs"),
&extract_scope
));
}
#[test]
fn summarize_diff_scope_matches_relative_file() {
let root = Path::new("/repo");
let extract_scope = resolve_extract_scope(root, Path::new("src/feature/main.rs")).unwrap();
assert!(summarize_diff_matches_scope(
Path::new("/repo/src/feature/main.rs"),
&extract_scope
));
assert!(!summarize_diff_matches_scope(
Path::new("/repo/src/feature/lib.rs"),
&extract_scope
));
}
#[test]
fn summarize_extract_scope_walks_relative_paths_from_root() {
let dir = tempfile::tempdir().unwrap();
let source_dir = dir.path().join("src");
std::fs::create_dir_all(&source_dir).unwrap();
let main_rs = source_dir.join("main.rs");
std::fs::write(&main_rs, "fn alpha() {}\n").unwrap();
let extract_scope = resolve_extract_scope(dir.path(), Path::new("src")).unwrap();
let files = collect_source_files(&extract_scope).unwrap();
assert_eq!(files, vec![main_rs]);
}
#[test]
fn summarize_extract_base_uses_nested_path_instead_of_project_root() {
let dir = tempfile::tempdir().unwrap();
let nested = dir.path().join("src/nested");
std::fs::create_dir_all(&nested).unwrap();
std::fs::write(dir.path().join("root.rs"), "fn root_level() {}\n").unwrap();
let nested_file = nested.join("main.rs");
std::fs::write(&nested_file, "fn nested_only() {}\n").unwrap();
let extract_base = resolve_extract_base(&nested).unwrap();
let extract_scope = resolve_extract_scope(&extract_base, Path::new(".")).unwrap();
let files = collect_source_files(&extract_scope).unwrap();
assert_eq!(extract_scope, nested);
assert_eq!(files, vec![nested_file]);
}
#[test]
fn summarize_extract_base_uses_parent_of_file_path() {
let dir = tempfile::tempdir().unwrap();
let nested = dir.path().join("src/nested");
std::fs::create_dir_all(&nested).unwrap();
let file_path = nested.join("main.rs");
std::fs::write(&file_path, "fn nested_only() {}\n").unwrap();
let extract_base = resolve_extract_base(&file_path).unwrap();
assert_eq!(extract_base, nested);
}
#[test]
fn summarize_extract_scope_normalizes_dotdot_segments() {
let dir = tempfile::tempdir().unwrap();
let source_dir = dir.path().join("src");
std::fs::create_dir_all(&source_dir).unwrap();
let extract_scope = resolve_extract_scope(dir.path(), Path::new("src/../src")).unwrap();
assert_eq!(extract_scope, source_dir.canonicalize().unwrap());
assert!(summarize_diff_matches_scope(
&source_dir.join("main.rs"),
&extract_scope
));
}
#[cfg(unix)]
#[test]
fn summarize_extract_scope_canonicalizes_absolute_symlink_paths() {
use std::os::unix::fs::symlink;
let dir = tempfile::tempdir().unwrap();
let real_root = dir.path().join("real");
let source_dir = real_root.join("src");
std::fs::create_dir_all(&source_dir).unwrap();
let symlink_scope = dir.path().join("scope-link");
symlink(&source_dir, &symlink_scope).unwrap();
let extract_scope = resolve_extract_scope(&real_root, &symlink_scope).unwrap();
assert_eq!(extract_scope, source_dir.canonicalize().unwrap());
assert!(summarize_diff_matches_scope(
&source_dir.join("lib.rs"),
&extract_scope
));
}
#[test]
fn summarize_diff_extract_includes_untracked_files() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(dir.path().join("README.md"), "# repo\n").unwrap();
init_git_repo(dir.path());
let source_dir = dir.path().join("src");
std::fs::create_dir_all(&source_dir).unwrap();
let new_file = source_dir.join("new.rs");
std::fs::write(&new_file, "fn alpha_helper() {}\n").unwrap();
let files = summarize::git_changed_files(dir.path()).unwrap();
assert_eq!(files.existing, vec![new_file]);
assert!(files.deleted.is_empty());
}
#[test]
fn summarize_diff_extract_treats_unborn_head_as_untracked_only() {
let dir = tempfile::tempdir().unwrap();
let status = std::process::Command::new("git")
.args(["init"])
.current_dir(dir.path())
.status()
.unwrap();
assert!(status.success(), "git init failed");
let source_dir = dir.path().join("src");
std::fs::create_dir_all(&source_dir).unwrap();
let new_file = source_dir.join("new.rs");
std::fs::write(&new_file, "fn alpha_helper() {}\n").unwrap();
let files = summarize::git_changed_files(dir.path()).unwrap();
assert_eq!(files.existing, vec![new_file]);
assert!(files.deleted.is_empty());
}
#[test]
fn summarize_diff_extract_tracks_deleted_files() {
let dir = tempfile::tempdir().unwrap();
let source_dir = dir.path().join("src");
std::fs::create_dir_all(&source_dir).unwrap();
let deleted_file = source_dir.join("gone.rs");
std::fs::write(&deleted_file, "fn stale() {}\n").unwrap();
init_git_repo(dir.path());
std::fs::remove_file(&deleted_file).unwrap();
let files = summarize::git_changed_files(dir.path()).unwrap();
assert!(files.existing.is_empty());
assert_eq!(files.deleted, vec![deleted_file]);
}
#[test]
fn summarize_diff_extract_tracks_git_renames() {
let dir = tempfile::tempdir().unwrap();
let source_dir = dir.path().join("src");
std::fs::create_dir_all(&source_dir).unwrap();
let old_file = source_dir.join("old.rs");
let new_file = source_dir.join("new.rs");
std::fs::write(&old_file, "fn stale() {}\n").unwrap();
init_git_repo(dir.path());
let status = std::process::Command::new("git")
.args(["mv", "src/old.rs", "src/new.rs"])
.current_dir(dir.path())
.status()
.unwrap();
assert!(status.success(), "git mv failed");
let files = summarize::git_changed_files(dir.path()).unwrap();
assert_eq!(files.existing, vec![new_file]);
assert_eq!(files.deleted, vec![old_file]);
}
#[test]
fn summarize_diff_extract_deletes_removed_summary_rows() {
let dir = tempfile::tempdir().unwrap();
let source_dir = dir.path().join("src");
std::fs::create_dir_all(&source_dir).unwrap();
let deleted_file = source_dir.join("gone.rs");
std::fs::write(&deleted_file, "fn stale() {}\n").unwrap();
std::fs::write(dir.path().join("README.md"), "# repo\n").unwrap();
init_git_repo(dir.path());
let summary_db =
summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
summary_db
.insert(&summarize::Summary {
id: 0,
symbol_name: "stale".to_string(),
file_path: "src/gone.rs".to_string(),
content_hash: "hash1".to_string(),
summary: "stale summary".to_string(),
entities: None,
relationships: None,
concept_labels: None,
extracted_at: "1700000000".to_string(),
model: "test".to_string(),
tokens_input: Some(100),
tokens_output: Some(50),
})
.unwrap();
std::fs::remove_file(&deleted_file).unwrap();
cmd_summarize(
None,
None,
Some(PathBuf::from("src")),
true,
false,
dir.path(),
false,
true,
false,
false,
false,
)
.unwrap();
assert!(summary_db.get_by_file("src/gone.rs").unwrap().is_empty());
}
#[test]
fn summarize_diff_extract_deletes_renamed_summary_rows() {
let dir = tempfile::tempdir().unwrap();
let source_dir = dir.path().join("src");
std::fs::create_dir_all(&source_dir).unwrap();
let old_file = source_dir.join("old.rs");
std::fs::write(&old_file, "fn stale() {}\n").unwrap();
std::fs::write(dir.path().join("README.md"), "# repo\n").unwrap();
init_git_repo(dir.path());
let summary_db =
summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
summary_db
.insert(&summarize::Summary {
id: 0,
symbol_name: "stale".to_string(),
file_path: "src/old.rs".to_string(),
content_hash: "hash1".to_string(),
summary: "stale summary".to_string(),
entities: None,
relationships: None,
concept_labels: None,
extracted_at: "1700000000".to_string(),
model: "test".to_string(),
tokens_input: Some(100),
tokens_output: Some(50),
})
.unwrap();
let status = std::process::Command::new("git")
.args(["mv", "src/old.rs", "src/new.rs"])
.current_dir(dir.path())
.status()
.unwrap();
assert!(status.success(), "git mv failed");
cmd_summarize(
None,
None,
Some(PathBuf::from("src")),
true,
false,
dir.path(),
false,
true,
false,
false,
false,
)
.unwrap();
assert!(summary_db.get_by_file("src/old.rs").unwrap().is_empty());
}
#[test]
fn summarize_full_extract_deletes_removed_summary_rows_when_scope_is_empty() {
let dir = tempfile::tempdir().unwrap();
let source_dir = dir.path().join("src");
std::fs::create_dir_all(&source_dir).unwrap();
let deleted_file = source_dir.join("gone.rs");
std::fs::write(&deleted_file, "fn stale() {}\n").unwrap();
let summary_db =
summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
summary_db
.insert(&summarize::Summary {
id: 0,
symbol_name: "stale".to_string(),
file_path: "src/gone.rs".to_string(),
content_hash: "hash1".to_string(),
summary: "stale summary".to_string(),
entities: None,
relationships: None,
concept_labels: None,
extracted_at: "1700000000".to_string(),
model: "test".to_string(),
tokens_input: Some(100),
tokens_output: Some(50),
})
.unwrap();
std::fs::remove_file(&deleted_file).unwrap();
cmd_summarize(
None,
None,
Some(PathBuf::from("src")),
false,
false,
dir.path(),
false,
true,
false,
false,
false,
)
.unwrap();
assert!(summary_db.get_by_file("src/gone.rs").unwrap().is_empty());
}
#[test]
fn summarize_extract_fails_fast_when_summary_writer_lock_is_live() {
let dir = tempfile::tempdir().unwrap();
let source_dir = dir.path().join("src");
std::fs::create_dir_all(&source_dir).unwrap();
let file = source_dir.join("lib.rs");
std::fs::write(&file, "fn helper() {}\n").unwrap();
let content = std::fs::read(&file).unwrap();
let summary_db =
summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
summary_db
.insert(&summarize::Summary {
id: 0,
symbol_name: "lib.rs".to_string(),
file_path: "src/lib.rs".to_string(),
content_hash: summarize::content_hash(&content),
summary: "cached summary".to_string(),
entities: None,
relationships: None,
concept_labels: None,
extracted_at: "1700000000".to_string(),
model: "test".to_string(),
tokens_input: Some(100),
tokens_output: Some(50),
})
.unwrap();
drop(summary_db);
let lock_path = summarize::writer_lock_path(&dir.path().join(".tsift/summaries.db"));
let _lock = hold_writer_lock(&lock_path);
let err = cmd_summarize(
None,
None,
Some(PathBuf::from("src")),
false,
false,
dir.path(),
false,
true,
false,
false,
false,
)
.unwrap_err();
let message = err.to_string();
assert!(message.contains("another tsift summarize extractor is already active"));
assert!(message.contains("tsift summarize --extract"));
}
#[test]
fn summarize_stats_fails_closed_when_cache_missing() {
let dir = tempfile::tempdir().unwrap();
let err = cmd_summarize(
None,
None,
None,
false,
true,
dir.path(),
false,
false,
false,
false,
false,
)
.unwrap_err();
assert!(
err.to_string().contains("no summaries.db found"),
"got: {err}"
);
assert!(!dir.path().join(".tsift/summaries.db").exists());
}
#[test]
fn summarize_stats_uses_snapshot_fallback_when_rollback_journal_is_locked() {
let dir = tempfile::tempdir().unwrap();
let summary_db =
summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
summary_db
.insert(&summarize::Summary {
id: 0,
symbol_name: "alpha_helper".to_string(),
file_path: "src/lib.rs".to_string(),
content_hash: "hash1".to_string(),
summary: "cached summary".to_string(),
entities: None,
relationships: None,
concept_labels: None,
extracted_at: "1700000000".to_string(),
model: "claude-haiku-4-5-20251001".to_string(),
tokens_input: Some(100),
tokens_output: Some(40),
})
.unwrap();
drop(summary_db);
let _lock = hold_rollback_journal_lock(&dir.path().join(".tsift/summaries.db"));
let result = cmd_summarize(
None,
None,
None,
false,
true,
dir.path(),
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
}
#[test]
fn summarize_symbol_query_uses_snapshot_fallback_when_rollback_journal_is_locked() {
let dir = tempfile::tempdir().unwrap();
let summary_db =
summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
summary_db
.insert(&summarize::Summary {
id: 0,
symbol_name: "alpha_helper".to_string(),
file_path: "src/lib.rs".to_string(),
content_hash: "hash1".to_string(),
summary: "cached summary".to_string(),
entities: None,
relationships: None,
concept_labels: None,
extracted_at: "1700000000".to_string(),
model: "claude-haiku-4-5-20251001".to_string(),
tokens_input: Some(100),
tokens_output: Some(40),
})
.unwrap();
drop(summary_db);
let _lock = hold_rollback_journal_lock(&dir.path().join(".tsift/summaries.db"));
let result = cmd_summarize(
Some("alpha_helper".to_string()),
None,
None,
false,
false,
dir.path(),
false,
true,
false,
false,
false,
);
assert!(result.is_ok());
}
#[test]
fn summarize_cmd_uses_ancestor_project_root_for_nested_paths() {
let dir = tempfile::tempdir().unwrap();
let nested = dir.path().join("src/nested");
std::fs::create_dir_all(&nested).unwrap();
let summary_db =
summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
summary_db
.insert(&summarize::Summary {
id: 0,
symbol_name: "alpha_helper".to_string(),
file_path: "src/lib.rs".to_string(),
content_hash: "hash1".to_string(),
summary: "cached summary".to_string(),
entities: None,
relationships: None,
concept_labels: None,
extracted_at: "1700000000".to_string(),
model: "claude-haiku-4-5-20251001".to_string(),
tokens_input: Some(100),
tokens_output: Some(40),
})
.unwrap();
let result = cmd_summarize(
Some("alpha_helper".to_string()),
None,
None,
false,
false,
&nested,
false,
true,
false,
false,
false,
);
assert!(result.is_ok());
assert!(!nested.join(".tsift/summaries.db").exists());
}
#[test]
fn summarize_extract_uses_matching_scoped_index_for_workspace_file() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(
dir.path().join(".gitmodules"),
r#"[submodule "src/alpha"]
path = src/alpha
url = https://example.com/alpha
[submodule "src/beta"]
path = src/beta
url = https://example.com/beta
"#,
)
.unwrap();
let alpha_root = dir.path().join("src/alpha");
let beta_root = dir.path().join("src/beta");
std::fs::create_dir_all(alpha_root.join("src")).unwrap();
std::fs::create_dir_all(beta_root.join("src")).unwrap();
std::fs::create_dir_all(dir.path().join(".tsift/indexes/alpha")).unwrap();
std::fs::create_dir_all(dir.path().join(".tsift/indexes/beta")).unwrap();
std::fs::write(alpha_root.join("src/lib.rs"), "fn alpha_helper() {}\n").unwrap();
let beta_file = beta_root.join("src/lib.rs");
std::fs::write(&beta_file, "fn beta_helper() {}\n").unwrap();
std::fs::write(dir.path().join(".tsift/indexes/alpha/index.db"), "").unwrap();
std::fs::write(dir.path().join(".tsift/indexes/beta/index.db"), "").unwrap();
let context = find_symbols_db_for_file(dir.path(), &beta_file)
.unwrap()
.expect("expected matching scoped index");
assert_eq!(
context.db_path,
dir.path().join(".tsift/indexes/beta/index.db")
);
assert_eq!(context.source_root, beta_root);
}
// --- apply_edit_op ---
fn make_op(old: &str, new: &str, replace_all: bool) -> EditOp {
EditOp {
file: PathBuf::from("dummy.txt"),
old: old.to_string(),
new: new.to_string(),
replace_all,
}
}
#[test]
fn edit_replaces_single_occurrence() {
let content = "hello world";
let op = make_op("world", "rust", false);
let (result, count) = apply_edit_op(content, &op).unwrap();
assert_eq!(result, "hello rust");
assert_eq!(count, 1);
}
#[test]
fn edit_replace_all_replaces_every_occurrence() {
let content = "foo foo foo";
let op = make_op("foo", "bar", true);
let (result, count) = apply_edit_op(content, &op).unwrap();
assert_eq!(result, "bar bar bar");
assert_eq!(count, 3);
}
#[test]
fn edit_fails_when_old_not_found() {
let content = "hello world";
let op = make_op("missing", "x", false);
assert!(apply_edit_op(content, &op).is_err());
}
#[test]
fn edit_fails_when_ambiguous_without_replace_all() {
let content = "foo foo";
let op = make_op("foo", "bar", false);
let err = apply_edit_op(content, &op).unwrap_err();
assert!(err.to_string().contains("2 times"), "got: {}", err);
}
#[test]
fn edit_fails_when_old_equals_new() {
let content = "hello";
let op = make_op("hello", "hello", false);
assert!(apply_edit_op(content, &op).is_err());
}
#[test]
fn edit_batch_rolls_back_when_later_swap_fails() {
let dir = tempfile::tempdir().unwrap();
let alpha = dir.path().join("alpha.txt");
let beta = dir.path().join("beta.txt");
fs::write(&alpha, "alpha old\n").unwrap();
fs::write(&beta, "beta old\n").unwrap();
let batch = EditBatch {
edits: vec![
EditOp {
file: alpha.clone(),
old: "old".to_string(),
new: "new".to_string(),
replace_all: false,
},
EditOp {
file: beta.clone(),
old: "old".to_string(),
new: "new".to_string(),
replace_all: false,
},
],
};
let plan = build_edit_plan(&batch).unwrap();
let err = match apply_edit_plan_atomically_inner(plan, |commit_index, _| {
if commit_index == 1 {
bail!("simulated swap failure");
}
Ok(())
}) {
Ok(_) => panic!("expected simulated swap failure"),
Err(err) => err,
};
assert!(err.to_string().contains("simulated swap failure"));
assert_eq!(fs::read_to_string(&alpha).unwrap(), "alpha old\n");
assert_eq!(fs::read_to_string(&beta).unwrap(), "beta old\n");
}
// --- SQL introspection ---
fn setup_test_db() -> (tempfile::NamedTempFile, Connection) {
let tmp = tempfile::NamedTempFile::new().unwrap();
let conn = Connection::open(tmp.path()).unwrap();
conn.execute_batch(
"CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT NOT NULL, email TEXT);
INSERT INTO users VALUES (1, 'Alice', 'alice@example.com');
INSERT INTO users VALUES (2, 'Bob', NULL);
CREATE TABLE posts (id INTEGER PRIMARY KEY, user_id INTEGER NOT NULL, title TEXT NOT NULL, body TEXT,
FOREIGN KEY(user_id) REFERENCES users(id));
INSERT INTO posts VALUES (1, 1, 'Hello World', 'First post');
INSERT INTO posts VALUES (2, 1, 'Second', NULL);
INSERT INTO posts VALUES (3, 2, 'Bob post', 'Content here');"
).unwrap();
(tmp, conn)
}
// --- rewrite_command ---
#[test]
fn rewrite_rg_simple_pattern() {
let result = rewrite_command("rg authenticate");
assert_eq!(
result,
Some("tsift --envelope search \"authenticate\" --exact --budget normal".to_string(),)
);
}
#[test]
fn rewrite_rg_with_path() {
let result = rewrite_command("rg authenticate src/");
assert_eq!(
result,
Some(
"tsift --envelope search \"authenticate\" --exact --budget normal --path \"src/\""
.to_string()
)
);
}
#[test]
fn rewrite_rg_with_flags_ignored() {
let result = rewrite_command("rg -i authenticate src/");
assert_eq!(
result,
Some(
"tsift --envelope search \"authenticate\" --exact --budget normal --path \"src/\""
.to_string()
)
);
}
#[test]
fn rewrite_rg_with_type_flag() {
// -t rs takes a value, should be skipped; pattern is next positional
let result = rewrite_command("rg -t rs authenticate");
assert_eq!(
result,
Some("tsift --envelope search \"authenticate\" --exact --budget normal".to_string())
);
}
#[test]
fn rewrite_rg_pipe_passthrough() {
// Pipe chains can't be translated — pass through
let result = rewrite_command("rg authenticate | head -5");
assert_eq!(result, None);
}
#[test]
fn rewrite_rg_files_passthrough() {
let result = rewrite_command("rg --files src/tsift .agent-doc logs");
assert_eq!(result, None);
}
#[test]
fn rewrite_find_passthrough() {
let result = rewrite_command("find src/tsift .agent-doc -type f -name '*.rs'");
assert_eq!(result, None);
}
#[test]
fn rewrite_grep_recursive() {
let result = rewrite_command("grep -r authenticate src/");
assert_eq!(
result,
Some(
"tsift --envelope search \"authenticate\" --exact --budget normal --path \"src/\""
.to_string()
)
);
}
#[test]
fn rewrite_grep_non_recursive_passthrough() {
let result = rewrite_command("grep authenticate file.txt");
assert_eq!(result, None);
}
#[test]
fn rewrite_tsift_passthrough() {
let result = rewrite_command("tsift search \"foo\"");
assert_eq!(result, Some("tsift search \"foo\"".to_string()));
}
#[test]
fn rewrite_run_tsift_search_disables_timeout_by_default() {
let result = effective_rewrite_run_command("tsift search hookcaps --exact --path /tmp/x");
assert_eq!(
result,
"tsift search hookcaps --exact --path /tmp/x --timeout 0"
);
}
#[test]
fn rewrite_run_preserves_explicit_search_timeout() {
let result = effective_rewrite_run_command(
"tsift search hookcaps --exact --path /tmp/x --timeout 5",
);
assert_eq!(
result,
"tsift search hookcaps --exact --path /tmp/x --timeout 5"
);
}
#[test]
fn rewrite_unrelated_passthrough() {
let result = rewrite_command("echo cargo build");
assert_eq!(result, None);
}
#[test]
fn rewrite_rg_quoted_pattern() {
let result = rewrite_command("rg \"fn main\"");
assert_eq!(
result,
Some("tsift --envelope search \"fn main\" --exact --budget normal".to_string())
);
}
#[test]
fn rewrite_git_diff_to_diff_digest() {
let result = rewrite_command("git diff");
assert_eq!(result, Some("tsift diff-digest .".to_string()));
}
#[test]
fn rewrite_git_diff_cached_to_diff_digest() {
let result = rewrite_command("git diff --cached");
assert_eq!(result, Some("tsift diff-digest --cached .".to_string()));
}
#[test]
fn rewrite_git_diff_with_path_to_diff_digest() {
let result = rewrite_command("git diff -- src/");
assert_eq!(result, Some("tsift diff-digest \"src/\"".to_string()));
}
#[test]
fn rewrite_git_diff_with_revision_passthrough() {
let result = rewrite_command("git diff HEAD~1");
assert_eq!(result, None);
}
#[test]
fn rewrite_git_show_to_revision_diff_digest() {
let result = rewrite_command("git show HEAD~1");
assert_eq!(
result,
Some("tsift diff-digest --revision \"HEAD~1\" .".to_string())
);
}
#[test]
fn rewrite_git_log_patch_history_to_revision_diff_digest() {
let result = rewrite_command("git log -p -1 HEAD~2");
assert_eq!(
result,
Some("tsift diff-digest --revision \"HEAD~2\" .".to_string())
);
}
#[test]
fn rewrite_cat_long_agent_doc_session_to_session_digest() {
let dir = tempfile::tempdir().unwrap();
let session = dir.path().join("tsift.md");
let mut body = String::from("---\nagent_doc_session: tsift-v0.1\n---\n\n## Exchange\n");
for index in 0..90 {
body.push_str(&format!("❯ prompt {index}?\n"));
}
fs::write(&session, body).unwrap();
let result = rewrite_command(&format!("cat {}", shell_quote(session.to_str().unwrap())));
assert_eq!(
result,
Some(format!(
"tsift session-digest --path {} --input {} --source markdown",
shell_quote(&resolve_digest_context_path(&session)),
shell_quote(session.to_str().unwrap())
))
);
}
#[test]
fn rewrite_head_long_claude_jsonl_to_session_digest() {
let dir = tempfile::tempdir().unwrap();
let session = dir.path().join("session.jsonl");
let line =
r#"{"message":{"role":"assistant","content":[{"type":"text","text":"❯ do [#yyhd]"}]}}"#;
let body = std::iter::repeat_n(line, 120)
.collect::<Vec<_>>()
.join("\n");
fs::write(&session, format!("{body}\n")).unwrap();
let result = rewrite_command(&format!(
"head -n 120 {}",
shell_quote(session.to_str().unwrap())
));
assert_eq!(
result,
Some(format!(
"tsift session-digest --path {} --input {} --source claude-jsonl",
shell_quote(&resolve_digest_context_path(&session)),
shell_quote(session.to_str().unwrap())
))
);
}
#[test]
fn rewrite_head_long_codex_jsonl_to_session_digest() {
let dir = tempfile::tempdir().unwrap();
let session = dir.path().join("codex.jsonl");
let line = r#"{"type":"event_msg","payload":{"type":"user_message","message":"do [#cdxlog]. spec-test-build-install-commit-push"}}"#;
let body = std::iter::repeat_n(line, 120)
.collect::<Vec<_>>()
.join("\n");
fs::write(&session, format!("{body}\n")).unwrap();
let result = rewrite_command(&format!(
"head -n 120 {}",
shell_quote(session.to_str().unwrap())
));
assert_eq!(
result,
Some(format!(
"tsift session-digest --path {} --input {} --source codex-jsonl",
shell_quote(&resolve_digest_context_path(&session)),
shell_quote(session.to_str().unwrap())
))
);
}
#[test]
fn rewrite_small_transcript_window_passthrough() {
let dir = tempfile::tempdir().unwrap();
let session = dir.path().join("session.jsonl");
let line = r#"{"message":{"role":"assistant","content":[{"type":"text","text":"hello"}]}}"#;
let body = std::iter::repeat_n(line, 120)
.collect::<Vec<_>>()
.join("\n");
fs::write(&session, format!("{body}\n")).unwrap();
let result = rewrite_command(&format!(
"tail -n 20 {}",
shell_quote(session.to_str().unwrap())
));
assert_eq!(result, None);
}
#[test]
fn rewrite_sed_large_agent_doc_range_to_session_digest() {
let dir = tempfile::tempdir().unwrap();
let session = dir.path().join("tsift.md");
let mut body = String::from("---\nagent_doc_session: tsift-v0.1\n---\n\n## Exchange\n");
for index in 0..120 {
body.push_str(&format!("### Re: topic {index}\n"));
}
fs::write(&session, body).unwrap();
let result = rewrite_command(&format!(
"sed -n '1,120p' {}",
shell_quote(session.to_str().unwrap())
));
assert_eq!(
result,
Some(format!(
"tsift session-digest --path {} --input {} --source markdown",
shell_quote(&resolve_digest_context_path(&session)),
shell_quote(session.to_str().unwrap())
))
);
}
#[test]
fn rewrite_cat_large_agent_doc_log_to_session_digest() {
let dir = tempfile::tempdir().unwrap();
let session = dir.path().join("tsift.log");
let line = "[1776528398] claude_start mode=fresh_restart restart_count=1";
let body = std::iter::repeat_n(line, 120)
.collect::<Vec<_>>()
.join("\n");
fs::write(&session, format!("{body}\n")).unwrap();
let result = rewrite_command(&format!("cat {}", shell_quote(session.to_str().unwrap())));
assert_eq!(
result,
Some(format!(
"tsift session-digest --path {} --input {} --source agent-doc-log",
shell_quote(&resolve_digest_context_path(&session)),
shell_quote(session.to_str().unwrap())
))
);
}
#[test]
fn rewrite_session_reads_prefer_submodule_root_for_digest_path() {
let dir = tempfile::tempdir().unwrap();
fs::write(
dir.path().join(".gitmodules"),
r#"[submodule "src/tsift"]
path = src/tsift
url = https://example.com/tsift
"#,
)
.unwrap();
let submodule = dir.path().join("src/tsift");
fs::create_dir_all(submodule.join("tasks")).unwrap();
fs::write(
submodule.join(".git"),
"gitdir: ../../.git/modules/src/tsift\n",
)
.unwrap();
let session = submodule.join("tasks/plan.md");
let mut body = String::from("---\nagent_doc_session: tsift-v0.1\n---\n\n## Exchange\n");
for index in 0..90 {
body.push_str(&format!("❯ prompt {index}?\n"));
}
fs::write(&session, body).unwrap();
let result = rewrite_command(&format!("cat {}", shell_quote(session.to_str().unwrap())));
assert_eq!(
result,
Some(format!(
"tsift session-digest --path {} --input {} --source markdown",
shell_quote(submodule.to_str().unwrap()),
shell_quote(session.to_str().unwrap())
))
);
}
#[test]
fn rewrite_regular_markdown_read_passthrough() {
let dir = tempfile::tempdir().unwrap();
let readme = dir.path().join("README.md");
let body = std::iter::repeat_n("plain markdown", 120)
.collect::<Vec<_>>()
.join("\n");
fs::write(&readme, format!("{body}\n")).unwrap();
let result = rewrite_command(&format!("cat {}", shell_quote(readme.to_str().unwrap())));
assert_eq!(result, None);
}
#[test]
fn rewrite_cat_large_source_to_source_read_in_indexed_repo() {
let dir = tempfile::tempdir().unwrap();
write_empty_root_index(dir.path());
let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 120);
let result = rewrite_command(&format!("cat {}", shell_quote(source.to_str().unwrap())));
assert_eq!(
result,
Some(format!(
"tsift --envelope source-read \"src/lib.rs\" --path {} --start 1 --lines 80 --budget normal",
shell_quote(&dir.path().to_string_lossy())
))
);
}
#[test]
fn rewrite_head_small_source_window_passthrough() {
let dir = tempfile::tempdir().unwrap();
write_empty_root_index(dir.path());
let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 120);
let result = rewrite_command(&format!(
"head -n 20 {}",
shell_quote(source.to_str().unwrap())
));
assert_eq!(result, None);
}
#[test]
fn rewrite_sed_large_source_range_to_source_read() {
let dir = tempfile::tempdir().unwrap();
write_empty_root_index(dir.path());
let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 200);
let result = rewrite_command(&format!(
"sed -n '40,160p' {}",
shell_quote(source.to_str().unwrap())
));
assert_eq!(
result,
Some(format!(
"tsift --envelope source-read \"src/lib.rs\" --path {} --start 40 --lines 121 --budget normal",
shell_quote(&dir.path().to_string_lossy())
))
);
}
#[test]
fn rewrite_tail_large_source_window_preserves_tail_anchor() {
let dir = tempfile::tempdir().unwrap();
write_empty_root_index(dir.path());
let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 200);
let result = rewrite_command(&format!(
"tail -n 120 {}",
shell_quote(source.to_str().unwrap())
));
assert_eq!(
result,
Some(format!(
"tsift --envelope source-read \"src/lib.rs\" --path {} --start 81 --lines 120 --budget normal",
shell_quote(&dir.path().to_string_lossy())
))
);
}
#[test]
fn rewrite_large_non_source_read_passthrough_even_when_indexed() {
let dir = tempfile::tempdir().unwrap();
write_empty_root_index(dir.path());
let text = write_repeated_lines(&dir.path().join("notes.txt"), "plain text", 120);
let result = rewrite_command(&format!("cat {}", shell_quote(text.to_str().unwrap())));
assert_eq!(result, None);
}
#[test]
fn rewrite_large_source_read_passthrough_without_index() {
let dir = tempfile::tempdir().unwrap();
let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 120);
let result = rewrite_command(&format!("cat {}", shell_quote(source.to_str().unwrap())));
assert_eq!(result, None);
}
#[test]
fn rewrite_cargo_test_to_digest_runner() {
let result = rewrite_command("cargo test --lib");
assert_eq!(
result,
Some(
"tsift --envelope digest-runner --kind \"test\" --path \".\" --shell-command \"cargo test --lib\" --runner \"cargo\"".to_string()
)
);
}
#[test]
fn rewrite_pytest_to_digest_runner() {
let result = rewrite_command("pytest -q tests/test_cli.py");
assert_eq!(
result,
Some(
"tsift --envelope digest-runner --kind \"test\" --path \".\" --shell-command \"pytest -q tests/test_cli.py\" --runner \"pytest\"".to_string()
)
);
}
#[test]
fn rewrite_python_m_pytest_to_digest_runner() {
let result = rewrite_command("python -m pytest tests/test_cli.py");
assert_eq!(
result,
Some(
"tsift --envelope digest-runner --kind \"test\" --path \".\" --shell-command \"python -m pytest tests/test_cli.py\" --runner \"pytest\"".to_string()
)
);
}
#[test]
fn rewrite_cargo_build_to_log_digest_runner() {
let result = rewrite_command("cargo build --release");
assert_eq!(
result,
Some(
"tsift --envelope digest-runner --kind \"log\" --path \".\" --shell-command \"cargo build --release\"".to_string()
)
);
}
#[test]
fn rewrite_cargo_install_to_log_digest_runner() {
let result = rewrite_command("cargo install --path . --force");
assert_eq!(
result,
Some(
"tsift --envelope digest-runner --kind \"log\" --path \".\" --shell-command \"cargo install --path . --force\"".to_string()
)
);
}
#[test]
fn rewrite_metacharacter_command_passthrough() {
let result = rewrite_command("cargo test | head");
assert_eq!(result, None);
}
#[test]
fn rewrite_output_cap_detects_search_even_with_global_flag() {
let cap = rewrite_output_cap("tsift --compact search foo").expect("cap");
assert_eq!(cap.max_lines, 50);
assert_eq!(cap.strip_prefix, Some("Strategy:"));
}
#[test]
fn rewrite_output_cap_skips_structured_output() {
assert!(rewrite_output_cap("tsift search foo --json").is_none());
assert!(rewrite_output_cap("tsift --schema graph foo").is_none());
assert!(rewrite_output_cap("tsift --envelope search foo").is_none());
}
#[test]
fn rewrite_output_format_forwards_envelope_to_digest_runner() {
let command = rewrite_command("cargo test --lib").expect("rewrite");
let forwarded = apply_rewrite_output_format(
&command,
OutputFormat {
json_output: true,
compact: false,
pretty: false,
terse: false,
ultra_terse: false,
schema: false,
envelope: true,
},
);
assert_eq!(
forwarded,
"tsift --envelope digest-runner --kind \"test\" --path \".\" --shell-command \"cargo test --lib\" --runner \"cargo\""
);
}
#[test]
fn rewrite_output_format_forwards_json_when_requested() {
let command = rewrite_command("cargo build --release").expect("rewrite");
let forwarded = apply_rewrite_output_format(
&command,
OutputFormat {
json_output: true,
compact: false,
pretty: true,
terse: false,
ultra_terse: false,
schema: false,
envelope: false,
},
);
assert_eq!(
forwarded,
"tsift --pretty --envelope digest-runner --kind \"log\" --path \".\" --shell-command \"cargo build --release\""
);
}
#[test]
fn output_cap_strips_search_header_and_truncates() {
let capped = apply_output_cap(
b"Strategy: exact | Indexed: 0 | Skipped: 0\n\nline1\nline2\nline3\n",
OutputCap {
max_lines: 2,
strip_prefix: Some("Strategy:"),
},
);
assert_eq!(
capped,
"line1\nline2\n... (+1 more lines; rerun the underlying tsift command directly for the full output)\n"
);
}
#[test]
fn sql_schema_overview_lists_tables() {
let (_tmp, conn) = setup_test_db();
let tables = schema_overview(&conn).unwrap();
let names: Vec<&str> = tables.iter().map(|t| t.name.as_str()).collect();
assert_eq!(names, &["posts", "users"]);
}
#[test]
fn sql_schema_overview_row_counts() {
let (_tmp, conn) = setup_test_db();
let tables = schema_overview(&conn).unwrap();
let users = tables.iter().find(|t| t.name == "users").unwrap();
let posts = tables.iter().find(|t| t.name == "posts").unwrap();
assert_eq!(users.row_count, 2);
assert_eq!(posts.row_count, 3);
}
#[test]
fn sql_table_columns_metadata() {
let (_tmp, conn) = setup_test_db();
let cols = table_columns(&conn, "users").unwrap();
assert_eq!(cols.len(), 3);
assert_eq!(cols[0].name, "id");
assert!(cols[0].pk);
assert_eq!(cols[1].name, "name");
assert!(cols[1].notnull);
assert_eq!(cols[2].name, "email");
assert!(!cols[2].notnull);
}
#[test]
fn sql_execute_query_returns_rows() {
let (_tmp, conn) = setup_test_db();
let (columns, rows) =
execute_query(&conn, "SELECT name, email FROM users ORDER BY id").unwrap();
assert_eq!(columns, &["name", "email"]);
assert_eq!(rows.len(), 2);
assert_eq!(rows[0][0], serde_json::json!("Alice"));
assert_eq!(rows[0][1], serde_json::json!("alice@example.com"));
assert_eq!(rows[1][1], serde_json::Value::Null);
}
#[test]
fn sql_execute_query_aggregate() {
let (_tmp, conn) = setup_test_db();
let (columns, rows) = execute_query(&conn, "SELECT COUNT(*) as cnt FROM posts").unwrap();
assert_eq!(columns, &["cnt"]);
assert_eq!(rows[0][0], serde_json::json!(3));
}
#[test]
fn sql_execute_query_join() {
let (_tmp, conn) = setup_test_db();
let (_cols, rows) = execute_query(
&conn,
"SELECT u.name, p.title FROM users u JOIN posts p ON u.id = p.user_id ORDER BY p.id",
)
.unwrap();
assert_eq!(rows.len(), 3);
assert_eq!(rows[0][0], serde_json::json!("Alice"));
assert_eq!(rows[2][0], serde_json::json!("Bob"));
}
#[test]
fn sql_open_db_read_only() {
let (tmp, _conn) = setup_test_db();
drop(_conn);
let ro_conn = open_db(tmp.path()).unwrap();
let result = ro_conn.execute("INSERT INTO users VALUES (99, 'Fail', NULL)", []);
assert!(result.is_err(), "read-only connection should reject writes");
}
#[test]
fn sql_empty_table_schema() {
let tmp = tempfile::NamedTempFile::new().unwrap();
let conn = Connection::open(tmp.path()).unwrap();
conn.execute_batch("CREATE TABLE empty_tbl (id INTEGER PRIMARY KEY, data BLOB)")
.unwrap();
let tables = schema_overview(&conn).unwrap();
assert_eq!(tables[0].row_count, 0);
assert_eq!(tables[0].columns.len(), 2);
}
// --- graph command ---
fn setup_graph_index() -> tempfile::TempDir {
let dir = tempfile::tempdir().unwrap();
std::fs::write(
dir.path().join("main.rs"),
"fn helper() { println!(\"hi\"); }\nfn main() { helper(); Vec::new(); }",
)
.unwrap();
let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
db.apply_changes(dir.path()).unwrap();
dir
}
fn setup_traversal_project() -> tempfile::TempDir {
let dir = setup_graph_index();
let task_dir = dir.path().join("tasks/software");
std::fs::create_dir_all(&task_dir).unwrap();
std::fs::write(
task_dir.join("tsift.md"),
r#"---
agent_doc_session: tsift-v0.1
agent_doc_format: template
---
## Exchange
<!-- agent:exchange patch=append -->
❯ do [#kgnv]
Completed `#kgnv`; touched files `main.rs`; tests `cargo test traversal_graph`; follow-up `#gfix`.
<!-- /agent:exchange -->
<!-- agent:queue -->
dispatch #spec-test-build-install-commit-push
- do [#kgnv]
<!-- /agent:queue -->
## Backlog
<!-- agent:backlog -->
- [ ] [#kgnv] Fix helper traversal handles while preserving graph navigation.
<!-- /agent:backlog -->
"#,
)
.unwrap();
dir
}
fn resolve_ast_span_node<'a>(
graph: &'a TraversalGraphBuild,
label: &str,
symbol_kind: &str,
) -> &'a TraversalNode {
graph
.nodes
.values()
.find(|node| {
node.kind == "ast_span"
&& node.label == label
&& node.properties.get("symbol_kind") == Some(&symbol_kind.to_string())
})
.unwrap_or_else(|| panic!("missing ast_span {symbol_kind} {label}"))
}
fn setup_multilingual_ast_navigation_project() -> tempfile::TempDir {
let dir = tempfile::tempdir().unwrap();
std::fs::write(
dir.path().join("rust.rs"),
r#"mod fixture_nav_rust_mod {
pub fn fixture_nav_rust_helper() {}
pub fn fixture_nav_rust_entry() {
fixture_nav_rust_helper();
}
}
"#,
)
.unwrap();
std::fs::write(
dir.path().join("python.py"),
r#"def fixture_nav_python_helper():
return 1
def fixture_nav_python_entry():
return fixture_nav_python_helper()
"#,
)
.unwrap();
std::fs::write(
dir.path().join("typescript.ts"),
r#"export function fixture_nav_typescript_entry(): number {
return fixtureNavTsHelper();
}
function fixtureNavTsHelper(): number {
return 1;
}
"#,
)
.unwrap();
std::fs::write(
dir.path().join("javascript.js"),
r#"function fixture_nav_javascript_entry() {
return fixtureNavJsHelper();
}
function fixtureNavJsHelper() {
return 1;
}
"#,
)
.unwrap();
std::fs::write(
dir.path().join("kotlin.kt"),
r#"fun fixture_nav_kotlin_entry(): Int {
return fixtureNavKotlinHelper()
}
fun fixtureNavKotlinHelper(): Int = 1
"#,
)
.unwrap();
std::fs::write(
dir.path().join("zig.zig"),
r#"pub fn fixture_nav_zig_entry() i32 {
return fixtureNavZigHelper();
}
fn fixtureNavZigHelper() i32 {
return 1;
}
"#,
)
.unwrap();
std::fs::write(
dir.path().join("bash.sh"),
r#"#!/usr/bin/env bash
fixture_nav_bash_entry() {
fixture_nav_bash_helper
}
fixture_nav_bash_helper() {
echo ok
}
alias fixture_nav_bash_alias='echo alias'
"#,
)
.unwrap();
std::fs::write(
dir.path().join("README.md"),
r#"# Fixture Guide
## Fixture Section
- Fixture step
- Nested fixture step
```python
def fixture_nav_markdown_embedded():
return 1
```
"#,
)
.unwrap();
let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
db.apply_changes(dir.path()).unwrap();
dir
}
fn assert_cli_expand_command_parses(command: &str) {
let args = shell_split(command)
.into_iter()
.map(str::to_string)
.collect::<Vec<_>>();
assert!(
try_parse_cli(args).is_ok(),
"expand command should parse as a tsift CLI command: {command}"
);
}
fn setup_multiplicity_project() -> tempfile::TempDir {
let dir = tempfile::tempdir().unwrap();
std::fs::write(
dir.path().join("Cargo.toml"),
r#"[workspace]
members = ["crates/core-lib", "crates/cli-app"]
"#,
)
.unwrap();
std::fs::create_dir_all(dir.path().join("crates/core-lib/src")).unwrap();
std::fs::write(
dir.path().join("crates/core-lib/Cargo.toml"),
r#"[package]
name = "core-lib"
[lib]
name = "core_lib"
[features]
default = []
"#,
)
.unwrap();
std::fs::write(
dir.path().join("crates/core-lib/src/lib.rs"),
"pub fn run() {}\n",
)
.unwrap();
std::fs::create_dir_all(dir.path().join("crates/cli-app/src")).unwrap();
std::fs::write(
dir.path().join("crates/cli-app/Cargo.toml"),
r#"[package]
name = "cli-app"
[[bin]]
name = "cli-app"
[dependencies]
core-lib = { path = "../core-lib" }
"#,
)
.unwrap();
std::fs::write(
dir.path().join("crates/cli-app/src/main.rs"),
"use core_lib::run;\nfn main() { run(); }\n",
)
.unwrap();
let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
db.apply_changes(dir.path()).unwrap();
let task_dir = dir.path().join("tasks/software");
std::fs::create_dir_all(&task_dir).unwrap();
std::fs::write(
task_dir.join("tsift.md"),
r#"---
agent_doc_session: tsift-multiplicity
agent_doc_format: template
---
## Backlog
<!-- agent:backlog -->
- [ ] [#corepkg] Update the core-lib Cargo package ownership model.
<!-- /agent:backlog -->
"#,
)
.unwrap();
init_git_repo(dir.path());
dir
}
fn setup_dependency_dag_project() -> tempfile::TempDir {
let dir = tempfile::tempdir().unwrap();
std::fs::write(
dir.path().join("main.rs"),
"fn shared_helper() {}\nfn main() { shared_helper(); }\n",
)
.unwrap();
std::fs::write(
dir.path().join("Cargo.toml"),
"[package]\nname = \"dag-fixture\"\n",
)
.unwrap();
let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
db.apply_changes(dir.path()).unwrap();
let task_dir = dir.path().join("tasks/software");
std::fs::create_dir_all(&task_dir).unwrap();
std::fs::write(
task_dir.join("tsift.md"),
r#"---
agent_doc_session: tsift-dag
agent_doc_format: template
---
## Exchange
<!-- agent:exchange patch=append -->
Completed `#alpha`; touched files `main.rs`; tests `cargo test dependency_dag`; follow-up `#gamma`.
<!-- /agent:exchange -->
## Backlog
<!-- agent:backlog -->
- [ ] [#prep] Prepare Cargo.toml configuration before shared helper work.
- [ ] [#alpha] Update shared_helper in main.rs after #prep.
- [ ] [#beta] Refactor shared_helper tests in main.rs.
- [ ] [#gamma] Follow-up review for graph navigation.
<!-- /agent:backlog -->
"#,
)
.unwrap();
dir
}
fn setup_dependency_dag_cycle_project() -> tempfile::TempDir {
let dir = setup_graph_index();
let task_dir = dir.path().join("tasks/software");
std::fs::create_dir_all(&task_dir).unwrap();
std::fs::write(
task_dir.join("tsift.md"),
r#"---
agent_doc_session: tsift-dag-cycle
agent_doc_format: template
---
## Backlog
<!-- agent:backlog -->
- [ ] [#left] Left side depends on #right.
- [ ] [#right] Right side depends on #left.
<!-- /agent:backlog -->
"#,
)
.unwrap();
dir
}
fn seed_traversal_semantic_summaries(dir: &Path) {
let summary_db = summarize::SummaryDb::open(&dir.join(".tsift/summaries.db")).unwrap();
summary_db
.insert(&summarize::Summary {
id: 0,
symbol_name: "helper".to_string(),
file_path: "main.rs".to_string(),
content_hash: "hash-main".to_string(),
summary: "helper builds graph navigation handles for traversal.".to_string(),
entities: Some(vec![
summarize::Entity {
name: "helper".to_string(),
kind: "function".to_string(),
description: "Builds graph navigation handles.".to_string(),
},
summarize::Entity {
name: "TraversalGraph".to_string(),
kind: "type".to_string(),
description: "Carries GraphStore-backed traversal rows.".to_string(),
},
]),
relationships: Some(vec![summarize::Relationship {
from: "helper".to_string(),
to: "TraversalGraph".to_string(),
kind: "uses".to_string(),
}]),
concept_labels: Some(vec![
"graph navigation".to_string(),
"semantic extraction".to_string(),
]),
extracted_at: "1700000000".to_string(),
model: "test-model".to_string(),
tokens_input: Some(10),
tokens_output: Some(5),
})
.unwrap();
}
fn seed_tsift_memory_graph_db(dir: &Path) {
let db = dir.join(".tsift").join("memory.db");
let store = MemoryStore::open_or_create(&db).unwrap();
let project = dir.to_string_lossy().to_string();
let observation = MemoryEvent::new(
MemoryEventKind::ImportedObservation,
"claude-mem:observations:1",
[
"Graph memory adapter",
"read-only projection",
"graph-db should retrieve tsift memory observations",
"Project memory is queried from .tsift/memory.db",
"graph memory, tsift memory, semantic query",
]
.join("\n\n"),
)
.with_session_id("claude-session-a")
.with_observed_at_unix(1_700_000_000)
.with_import("claude-mem", "observations:1")
.with_metadata("project", project.clone())
.with_metadata("observation_type", "fact")
.with_metadata("prompt_number", "7")
.with_metadata("discovery_tokens", "42")
.with_metadata("content_hash", "hash-observation-1");
store.insert_event(&observation).unwrap();
let summary = MemoryEvent::new(
MemoryEventKind::ImportedSessionSummary,
"claude-mem:session_summaries:2",
[
"Query old memory from graph-db",
"Read-only tsift memory SQLite projection",
"Semantic graph rows can point at existing memory",
"Projected source and session nodes",
"Keep capture ownership inside tsift-memory",
"summary note",
]
.join("\n\n"),
)
.with_session_id("claude-session-a")
.with_observed_at_unix(1_700_000_010)
.with_import("claude-mem", "session_summaries:2")
.with_metadata("project", project)
.with_metadata("prompt_number", "8")
.with_metadata("discovery_tokens", "36");
store.insert_event(&summary).unwrap();
let prompt = MemoryEvent::new(
MemoryEventKind::ImportedUserPrompt,
"claude-mem:user_prompts:3",
"How can graph-db query tsift memory semantic history?",
)
.with_session_id("claude-session-a")
.with_observed_at_unix(1_700_000_020)
.with_import("claude-mem", "user_prompts:3")
.with_metadata("prompt_number", "9");
store.insert_event(&prompt).unwrap();
}
#[test]
fn graph_callers_query() {
let dir = setup_graph_index();
let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
let callers = db.callers_of("helper").unwrap();
assert_eq!(callers.len(), 1);
assert_eq!(callers[0].caller_name, "main");
}
#[test]
fn graph_callees_query() {
let dir = setup_graph_index();
let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
let callees = db.callees_of("main").unwrap();
let names: Vec<&str> = callees.iter().map(|e| e.callee_name.as_str()).collect();
assert!(names.contains(&"helper"));
assert!(names.contains(&"new"));
}
#[test]
fn graph_no_callers_returns_empty() {
let dir = setup_graph_index();
let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
let callers = db.callers_of("nonexistent").unwrap();
assert!(callers.is_empty());
}
#[test]
fn graph_cmd_autoindexes_missing_index_by_default() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(
dir.path().join("main.rs"),
"fn helper() {}\nfn main() { helper(); }\n",
)
.unwrap();
let result = cmd_graph(
"helper",
dir.path(),
true,
false,
None,
20,
false,
true,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
let summary = db.compute_changes(dir.path()).unwrap();
assert_eq!(summary.new + summary.modified + summary.deleted, 0);
}
#[test]
fn traversal_graph_has_stable_typed_handles() {
let dir = setup_traversal_project();
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let graph_again = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let file = resolve_traversal_node(&graph, "main.rs").unwrap();
let symbol = resolve_traversal_node(&graph, "helper").unwrap();
let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
let session = resolve_traversal_node(&graph, "tsift-v0.1").unwrap();
assert!(file.handle.starts_with("gfil-"));
assert!(symbol.handle.starts_with("gsym-"));
assert!(backlog.handle.starts_with("gbak-"));
assert!(session.handle.starts_with("gses-"));
assert_eq!(
symbol.handle,
resolve_traversal_node(&graph_again, "helper")
.unwrap()
.handle
);
assert_eq!(
backlog.handle,
resolve_traversal_node(&graph_again, "#kgnv")
.unwrap()
.handle
);
}
#[test]
fn traversal_graph_links_backlog_items_to_code_tokens() {
let dir = setup_traversal_project();
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
let helper = resolve_traversal_node(&graph, "helper").unwrap();
assert!(graph.edges.iter().any(|edge| {
edge.from == backlog.handle && edge.to == helper.handle && edge.relation == "mentions"
}));
}
#[test]
fn session_hinted_traversal_skips_global_call_edges() {
let dir = setup_traversal_project();
let session = dir.path().join("tasks/software/tsift.md");
let bounded = build_traversal_graph_source(dir.path(), &session, None).unwrap();
let backlog = resolve_traversal_node(&bounded, "#kgnv").unwrap();
let helper = resolve_traversal_node(&bounded, "helper").unwrap();
assert!(bounded.edges.iter().any(|edge| {
edge.from == backlog.handle && edge.to == helper.handle && edge.relation == "mentions"
}));
assert!(
!bounded.edges.iter().any(|edge| edge.relation == "calls"),
"session-hinted graph-db projections should not materialize unrelated global call edges"
);
let full = build_traversal_graph_source(dir.path(), dir.path(), None).unwrap();
assert!(
full.edges.iter().any(|edge| edge.relation == "calls"),
"root/full projections still carry the complete indexed call graph"
);
}
#[test]
fn agent_doc_task_path_infers_matching_workspace_scope() {
let dir = tempfile::tempdir().unwrap();
std::fs::create_dir_all(dir.path().join("src/tsift")).unwrap();
std::fs::create_dir_all(dir.path().join("tasks/software")).unwrap();
std::fs::write(
dir.path().join(".gitmodules"),
"[submodule \"src/tsift\"]\n\tpath = src/tsift\n\turl = https://example.invalid/tsift.git\n",
)
.unwrap();
let task = dir.path().join("tasks/software/tsift.md");
std::fs::write(&task, "# tsift\n").unwrap();
let targets = resolve_search_index_targets(dir.path(), &task, None, false).unwrap();
let query_db_path = resolve_query_db_path(dir.path(), &task, None).unwrap();
let cfg = config::Config::load(dir.path()).unwrap();
assert_eq!(targets.len(), 1);
assert_eq!(targets[0].scope_name.as_deref(), Some("tsift"));
assert_eq!(targets[0].source_root, dir.path().join("src/tsift"));
assert!(
targets[0]
.db_path
.ends_with(".tsift/indexes/tsift/index.db")
);
assert_eq!(query_db_path, cfg.db_path_for(dir.path(), "tsift"));
}
#[test]
fn cargo_package_scope_selector_indexes_package_db() {
let dir = setup_multiplicity_project();
let targets =
resolve_search_index_targets(dir.path(), dir.path(), Some("core_lib"), false).unwrap();
assert_eq!(targets.len(), 1);
assert_eq!(targets[0].scope_name.as_deref(), Some("core-lib"));
assert_eq!(targets[0].source_root, dir.path().join("crates/core-lib"));
assert!(
targets[0]
.db_path
.ends_with(".tsift/indexes/cargo/core-lib/index.db")
);
cmd_index(
dir.path(),
false,
false,
false,
false,
true,
false,
Some("core_lib"),
false,
true,
false,
false,
false,
false,
)
.unwrap();
assert!(targets[0].db_path.exists());
}
#[test]
fn path_inference_prefers_nested_cargo_package_without_submodule() {
let dir = setup_multiplicity_project();
let source = dir.path().join("crates/cli-app/src/main.rs");
let targets = resolve_search_index_targets(dir.path(), &source, None, false).unwrap();
assert_eq!(targets.len(), 1);
assert_eq!(targets[0].scope_name.as_deref(), Some("cli-app"));
assert_eq!(targets[0].source_root, dir.path().join("crates/cli-app"));
}
#[test]
fn traversal_graph_projects_cargo_multiplicity_nodes_and_edges() {
let dir = setup_multiplicity_project();
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let workspace = resolve_traversal_node(&graph, "root cargo workspace").unwrap();
let core = resolve_traversal_node(&graph, "core-lib").unwrap();
let cli = resolve_traversal_node(&graph, "cli-app").unwrap();
let core_file = resolve_traversal_node(&graph, "crates/core-lib/src/lib.rs").unwrap();
assert_eq!(workspace.kind, "cargo_workspace");
assert_eq!(core.kind, "cargo_package");
assert_eq!(
core.properties.get("features"),
Some(&"default".to_string())
);
assert!(graph.edges.iter().any(|edge| {
edge.from == workspace.handle
&& edge.to == core.handle
&& edge.relation == "contains_package"
}));
assert!(graph.edges.iter().any(|edge| {
edge.from == core.handle && edge.to == core_file.handle && edge.relation == "owns_file"
}));
assert!(graph.edges.iter().any(|edge| {
edge.from == cli.handle
&& edge.to == core.handle
&& (edge.relation == "declares_dependency" || edge.relation == "uses_crate")
}));
}
#[test]
fn conflict_matrix_uses_cargo_package_mentions_as_ownership_evidence() {
let dir = setup_multiplicity_project();
let session = dir.path().join("tasks/software/tsift.md");
let report =
build_conflict_matrix_report(&session, None, &["corepkg".to_string()], 3, 8, 20)
.unwrap();
assert!(report.per_target_fail_closed.is_empty());
let candidate = report
.candidates
.iter()
.find(|candidate| candidate.target == "corepkg")
.unwrap();
assert!(
candidate
.owned_files
.iter()
.any(|file| file == "crates/core-lib/Cargo.toml"),
"{:?}",
candidate.owned_files
);
}
#[test]
fn traversal_graph_links_agent_doc_queue_job_packets_to_backlog() {
let dir = setup_traversal_project();
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let job = resolve_traversal_node(&graph, "do #kgnv").unwrap();
let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
assert_eq!(job.kind, "job_packet");
assert!(job.handle.starts_with("gjob-"));
assert!(graph.edges.iter().any(|edge| {
edge.from == job.handle && edge.to == backlog.handle && edge.relation == "targets"
}));
let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
let jobs = store.nodes_by_kind("job_packet").unwrap();
assert!(
jobs.iter()
.any(|node| node.properties.get("ref_id") == Some(&"kgnv".to_string())),
"expected queued job packet in graph store, got {jobs:?}"
);
}
#[test]
fn traversal_graph_includes_routes_and_handler_edges() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(
dir.path().join("api.py"),
r#"@router.get("/items")
def list_items():
return []
"#,
)
.unwrap();
let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
db.apply_changes(dir.path()).unwrap();
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let route = resolve_traversal_node(&graph, "/items").unwrap();
let handler = resolve_traversal_node(&graph, "list_items").unwrap();
assert_eq!(route.kind, "route");
assert!(graph.edges.iter().any(|edge| {
edge.from == route.handle && edge.to == handler.handle && edge.relation == "handled_by"
}));
}
#[test]
fn traversal_graph_projects_rust_ast_navigation_edges() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(
dir.path().join("main.rs"),
r#"mod api {
pub fn helper() {}
pub fn handler() { helper(); }
}
fn main() { api::handler(); }
"#,
)
.unwrap();
let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
db.apply_changes(dir.path()).unwrap();
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let api = resolve_ast_span_node(&graph, "api", "mod");
let helper = resolve_ast_span_node(&graph, "helper", "function");
let handler = resolve_ast_span_node(&graph, "handler", "function");
assert_eq!(helper.kind, "ast_span");
assert!(helper.handle.starts_with("span-"));
assert_eq!(helper.properties.get("language"), Some(&"rust".to_string()));
assert!(graph.edges.iter().any(|edge| {
edge.from == api.handle && edge.to == helper.handle && edge.relation == "contains"
}));
assert!(graph.edges.iter().any(|edge| {
edge.from == api.handle && edge.to == helper.handle && edge.relation == "child"
}));
assert!(graph.edges.iter().any(|edge| {
edge.from == helper.handle && edge.to == api.handle && edge.relation == "parent"
}));
assert!(graph.edges.iter().any(|edge| {
edge.from == helper.handle
&& edge.to == handler.handle
&& edge.relation == "next_sibling"
}));
assert!(graph.edges.iter().any(|edge| {
edge.from == handler.handle
&& edge.to == helper.handle
&& edge.relation == "previous_sibling"
}));
assert!(graph.edges.iter().any(|edge| {
edge.from == helper.handle
&& edge.to == api.handle
&& edge.relation == "enclosing_module"
}));
assert!(graph.edges.iter().any(|edge| {
edge.from == handler.handle && edge.to == helper.handle && edge.relation == "calls"
}));
let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
let ast_nodes = store.nodes_by_kind("ast_span").unwrap();
assert!(
ast_nodes.iter().any(|node| node.id == helper.handle
&& node.properties.get("symbol_kind") == Some(&"function".to_string())),
"expected helper AST span in graph store, got {ast_nodes:?}"
);
assert!(
store
.outgoing_edges(&helper.handle, Some("parent"))
.unwrap()
.iter()
.any(|edge| edge.to_id == api.handle),
"expected persisted AST parent edge"
);
}
#[test]
fn traversal_graph_projects_markdown_section_block_edges() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(
dir.path().join("README.md"),
"# Guide\n\n- Setup\n- Verify\n\n```rust\nfn demo() {}\n```\n",
)
.unwrap();
let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
db.apply_changes(dir.path()).unwrap();
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let guide = resolve_ast_span_node(&graph, "Guide", "heading");
let code = resolve_ast_span_node(&graph, "rust", "code_block");
let embedded = resolve_ast_span_node(&graph, "demo", "function");
let list_item = graph
.nodes
.values()
.find(|node| {
node.kind == "ast_span"
&& node.properties.get("symbol_kind") == Some(&"list_item".to_string())
&& node.properties.get("section_handle") == Some(&guide.handle)
})
.expect("missing Markdown list item AST span");
assert_eq!(
code.properties.get("markdown_block_kind"),
Some(&"fenced_code_block".to_string())
);
assert_eq!(
guide.properties.get("heading_level"),
Some(&"1".to_string())
);
assert_eq!(
embedded.properties.get("embedded"),
Some(&"true".to_string())
);
assert_eq!(
embedded.properties.get("language"),
Some(&"rust".to_string())
);
assert_eq!(
embedded.properties.get("markdown_block_handle"),
Some(&code.handle)
);
assert!(graph.edges.iter().any(|edge| {
edge.from == guide.handle
&& edge.to == code.handle
&& edge.relation == "contains_markdown_block"
}));
assert!(graph.edges.iter().any(|edge| {
edge.from == code.handle
&& edge.to == guide.handle
&& edge.relation == "enclosing_section"
}));
assert!(graph.edges.iter().any(|edge| {
edge.from == guide.handle
&& edge.to == list_item.handle
&& edge.relation == "contains_markdown_block"
}));
assert!(graph.edges.iter().any(|edge| {
edge.from == code.handle
&& edge.to == embedded.handle
&& edge.relation == "contains_embedded_symbol"
}));
assert!(graph.edges.iter().any(|edge| {
edge.from == embedded.handle
&& edge.to == code.handle
&& edge.relation == "embedded_in_fence"
}));
assert!(graph.edges.iter().any(|edge| {
edge.from == guide.handle
&& edge.to == embedded.handle
&& edge.relation == "contains_embedded_code"
}));
let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
assert!(
store
.outgoing_edges(&guide.handle, Some("contains_markdown_block"))
.unwrap()
.iter()
.any(|edge| edge.to_id == code.handle),
"expected persisted Markdown section/block edge"
);
assert!(
store
.outgoing_edges(&code.handle, Some("contains_embedded_symbol"))
.unwrap()
.iter()
.any(|edge| edge.to_id == embedded.handle),
"expected persisted Markdown fence/embedded symbol edge"
);
}
#[test]
fn multilingual_ast_navigation_fixture_locks_recall_handles_expands_and_budget() {
let dir = setup_multilingual_ast_navigation_project();
let db =
index::IndexDb::open_read_only_resilient(&dir.path().join(".tsift/index.db")).unwrap();
let symbols = db.all_symbols().unwrap();
let expected_symbols = [
("rust", "fixture_nav_rust_entry", "function", "rust.rs"),
(
"python",
"fixture_nav_python_entry",
"function",
"python.py",
),
(
"typescript",
"fixture_nav_typescript_entry",
"function",
"typescript.ts",
),
(
"javascript",
"fixture_nav_javascript_entry",
"function",
"javascript.js",
),
(
"kotlin",
"fixture_nav_kotlin_entry",
"function",
"kotlin.kt",
),
("zig", "fixture_nav_zig_entry", "function", "zig.zig"),
("bash", "fixture_nav_bash_entry", "function", "bash.sh"),
("markdown", "Fixture Section", "heading", "README.md"),
("markdown", "Fixture step", "list_item", "README.md"),
("markdown", "python", "code_block", "README.md"),
];
for (language, name, kind, file) in expected_symbols {
let symbol = symbols
.iter()
.find(|symbol| {
symbol.language == language
&& symbol.name == name
&& symbol.kind == kind
&& symbol.file.ends_with(file)
})
.unwrap_or_else(|| panic!("missing indexed {language} {kind} {name}"));
assert!(
symbol.start_byte.is_some() && symbol.end_byte.is_some(),
"{language} {name} should carry AST byte spans"
);
}
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let graph_again = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let expected_ast_nodes = [
("fixture_nav_rust_entry", "function", "rust"),
("fixture_nav_python_entry", "function", "python"),
("fixture_nav_typescript_entry", "function", "typescript"),
("fixture_nav_javascript_entry", "function", "javascript"),
("fixture_nav_kotlin_entry", "function", "kotlin"),
("fixture_nav_zig_entry", "function", "zig"),
("fixture_nav_bash_entry", "function", "bash"),
("Fixture Section", "heading", "markdown"),
("Fixture step", "list_item", "markdown"),
("python", "code_block", "markdown"),
("fixture_nav_markdown_embedded", "function", "python"),
];
for (name, kind, language) in expected_ast_nodes {
let node = resolve_ast_span_node(&graph, name, kind);
let repeated = resolve_ast_span_node(&graph_again, name, kind);
assert!(
node.handle.starts_with("span-"),
"{name} handle: {}",
node.handle
);
assert_eq!(
node.handle, repeated.handle,
"{language} {name} handle drifted"
);
assert_eq!(
node.properties.get("language"),
Some(&language.to_string()),
"{name} should keep its language label"
);
}
let markdown_section = resolve_ast_span_node(&graph, "Fixture Section", "heading");
let markdown_code = resolve_ast_span_node(&graph, "python", "code_block");
let embedded = resolve_ast_span_node(&graph, "fixture_nav_markdown_embedded", "function");
assert!(graph.edges.iter().any(|edge| {
edge.from == markdown_section.handle
&& edge.to == markdown_code.handle
&& edge.relation == "contains_markdown_block"
}));
assert!(graph.edges.iter().any(|edge| {
edge.from == markdown_code.handle
&& edge.to == embedded.handle
&& edge.relation == "contains_embedded_symbol"
}));
assert!(
graph.nodes.len() <= 80,
"multilingual AST fixture should stay bounded, got {} nodes",
graph.nodes.len()
);
assert!(
graph.edges.len() <= 180,
"multilingual AST fixture should stay bounded, got {} edges",
graph.edges.len()
);
let response = empty_search_response(dir.path(), "lexical");
let symbol_hits = db.symbol_search("fixture_nav_python_entry", 20).unwrap();
let report = build_relative_search_budget_report(
"fixture_nav_python_entry",
"lexical",
dir.path(),
&response,
&symbol_hits,
ResponseBudget::new(Some(8), Some(120)),
&SearchFacetFilters::default(),
);
let report_again = build_relative_search_budget_report(
"fixture_nav_python_entry",
"lexical",
dir.path(),
&response,
&symbol_hits,
ResponseBudget::new(Some(8), Some(120)),
&SearchFacetFilters::default(),
);
let top = report
.ranked
.first()
.expect("ranked preview should not be empty");
assert_eq!(top.source, "symbol_span");
assert_eq!(top.name.as_deref(), Some("fixture_nav_python_entry"));
assert!(top.handle.starts_with("srnk-"));
assert_eq!(top.handle, report_again.ranked[0].handle);
assert!(
top.reasons.iter().any(|reason| reason == "ast_span"),
"expected AST span ranking reason, got {:?}",
top.reasons
);
assert!(report.ranked.len() <= 8);
assert!(report.symbols.len() <= 8);
let symbol = report
.symbols
.iter()
.find(|symbol| symbol.name == "fixture_nav_python_entry")
.expect("missing search preview symbol");
assert_cli_expand_command_parses(&symbol.expand);
let ast = symbol
.ast
.as_ref()
.expect("search symbol should expose AST");
assert_cli_expand_command_parses(&ast.expand.source_window);
assert_cli_expand_command_parses(ast.expand.source_body.as_ref().unwrap());
assert_cli_expand_command_parses(&ast.expand.symbol_read);
let markdown_hits = db.symbol_search("python", 20).unwrap();
let markdown_report = build_relative_search_budget_report(
"python",
"lexical",
dir.path(),
&response,
&markdown_hits,
ResponseBudget::new(Some(8), Some(120)),
&SearchFacetFilters::default(),
);
let markdown_symbol = markdown_report
.symbols
.iter()
.find(|symbol| symbol.kind == "code_block" && symbol.language == "markdown")
.expect("missing Markdown code-block symbol");
let markdown_ast = markdown_symbol
.ast
.as_ref()
.expect("Markdown code block should expose AST");
assert_cli_expand_command_parses(markdown_ast.expand.markdown_ast.as_ref().unwrap());
assert_eq!(
markdown_ast
.span
.markdown
.as_ref()
.unwrap()
.embedded_symbols[0]
.name,
"fixture_nav_markdown_embedded"
);
}
#[test]
fn traversal_neighborhood_handles_prioritizes_high_signal_edges_when_limited() {
let edges = vec![
TraversalEdge {
from: "origin".to_string(),
to: "aaa_low".to_string(),
relation: "unknown".to_string(),
label: None,
weight: 1,
},
TraversalEdge {
from: "origin".to_string(),
to: "zzz_high".to_string(),
relation: "mentions".to_string(),
label: None,
weight: 1,
},
];
let handles = traversal_neighborhood_handles(&edges, "origin", 1, 2);
assert!(handles.contains("origin"));
assert!(handles.contains("zzz_high"), "{handles:?}");
assert!(!handles.contains("aaa_low"), "{handles:?}");
}
#[test]
fn traversal_materializes_provider_neutral_sqlite_graph() {
let dir = setup_traversal_project();
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
let backlog_nodes = store.nodes_by_kind("backlog").unwrap();
assert!(
backlog_nodes.iter().any(|node| node.id == backlog.handle
&& node.properties.get("ref_id") == Some(&"kgnv".to_string())),
"expected materialized backlog node, got {backlog_nodes:?}"
);
assert!(
store
.all_nodes()
.unwrap()
.iter()
.any(|node| node.kind == GRAPH_PROJECTION_META_KIND
&& node.properties.get("projection_version")
== Some(&GRAPH_PROJECTION_VERSION.to_string())),
"expected projection metadata node"
);
let source_handles = store.nodes_by_kind("source_handle").unwrap();
assert!(
source_handles
.iter()
.any(|node| node.properties.get("file") == Some(&"main.rs".to_string())),
"expected bounded source_handle rows, got {source_handles:?}"
);
let worker_context = store.nodes_by_kind("worker_context").unwrap();
assert!(
worker_context
.iter()
.any(|node| node.properties.get("target")
== Some(&"tasks/software/tsift.md".to_string())),
"expected bounded worker_context rows, got {worker_context:?}"
);
let worker_results = store.nodes_by_kind("worker_result").unwrap();
assert!(
worker_results.iter().any(|node| {
node.properties.get("ref_id") == Some(&"kgnv".to_string())
&& node.properties.get("status") == Some(&"completed".to_string())
&& node.properties.get("touched_files") == Some(&"main.rs".to_string())
&& node.properties.get("follow_up_ids") == Some(&"gfix".to_string())
}),
"expected worker_result rows, got {worker_results:?}"
);
}
#[test]
fn traversal_projection_materializes_cached_semantic_rows() {
let dir = setup_traversal_project();
seed_traversal_semantic_summaries(dir.path());
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let helper = resolve_traversal_node(&graph, "helper").unwrap();
let concept = resolve_traversal_node(&graph, "graph navigation").unwrap();
let entity = resolve_traversal_node(&graph, "TraversalGraph").unwrap();
assert_eq!(concept.kind, "semantic_concept");
assert_eq!(entity.kind, "semantic_entity");
assert!(concept.handle.starts_with("gcon-"));
assert!(entity.handle.starts_with("gent-"));
let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
assert!(
store
.nodes_by_kind("semantic_concept")
.unwrap()
.iter()
.any(|node| node.label == "semantic extraction"
&& node.properties.contains_key("embedding")),
"expected persisted concept embeddings"
);
assert!(
store
.outgoing_edges(&helper.handle, Some("mentions_concept"))
.unwrap()
.iter()
.any(|edge| edge.to_id == concept.handle),
"expected helper symbol to link to cached summary concept"
);
assert!(
store
.outgoing_edges(
&semantic_entity_handle("helper", "function"),
Some("semantic_relation")
)
.unwrap()
.iter()
.any(|edge| edge.to_id == entity.handle
&& edge.properties.get("relationship_kind") == Some(&"uses".to_string())),
"expected LLM relationship rows projected into GraphStore"
);
}
#[test]
fn traversal_projection_materializes_tsift_memory_rows() {
let dir = setup_traversal_project();
seed_tsift_memory_graph_db(dir.path());
let memory_db = dir.path().join(".tsift").join("memory.db");
let store = MemoryStore::open_or_create(&memory_db).unwrap();
for summary in ["first closeout", "second closeout"] {
let event = MemoryEvent::new(
MemoryEventKind::ResponseSummary,
"tasks/software/tsift.md",
summary,
)
.with_session_id("tasks/software/tsift.md")
.with_observed_at_unix(1_700_000_100);
store.insert_event(&event).unwrap();
}
refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
let native_sources = store
.nodes_by_kind("source_handle")
.unwrap()
.into_iter()
.filter(|node| {
node.properties.get("provider") == Some(&"tsift-memory".to_string())
&& node.properties.get("source_ref")
== Some(&"tasks/software/tsift.md".to_string())
})
.collect::<Vec<_>>();
assert_eq!(
native_sources.len(),
2,
"same-source native memory events must get distinct source handles"
);
let source = store
.nodes_by_kind("source_handle")
.unwrap()
.into_iter()
.find(|node| {
node.properties.get("source_ref") == Some(&"claude-mem:observations:1".to_string())
})
.expect("expected tsift-memory source handle");
let session = store
.nodes_by_kind("memory_session")
.unwrap()
.into_iter()
.find(|node| {
node.properties.get("provider") == Some(&"tsift-memory".to_string())
&& node.properties.get("session_id") == Some(&"claude-session-a".to_string())
})
.expect("expected tsift-memory session node");
let event = store
.nodes_by_kind("memory_event")
.unwrap()
.into_iter()
.find(|node| {
node.properties.get("source_ref") == Some(&"claude-mem:observations:1".to_string())
&& node.properties.get("provider") == Some(&"tsift-memory".to_string())
&& node.properties.get("imported_from") == Some(&"claude-mem".to_string())
})
.expect("expected tsift-memory event node");
let concept = store
.nodes_by_kind("semantic_concept")
.unwrap()
.into_iter()
.find(|node| {
node.properties.get("provider") == Some(&"tsift-memory".to_string())
&& node.label.contains("Graph memory adapter")
&& node.properties.contains_key("embedding")
})
.expect("expected tsift-memory semantic concept");
assert!(
store
.outgoing_edges(&session.id, Some("records_memory_source"))
.unwrap()
.iter()
.any(|edge| edge.to_id == source.id),
"expected session to link to source handle"
);
assert!(
store
.outgoing_edges(&session.id, Some("records_memory_event"))
.unwrap()
.iter()
.any(|edge| edge.to_id == event.id),
"expected session to link to memory event"
);
assert!(
store
.outgoing_edges(&event.id, Some("projects_source"))
.unwrap()
.iter()
.any(|edge| edge.to_id == source.id),
"expected memory event to project source handle"
);
assert!(
store
.outgoing_edges(&source.id, Some("mentions_concept"))
.unwrap()
.iter()
.any(|edge| edge.to_id == concept.id),
"expected source handle to seed semantic concept"
);
let related = semantic_related_report_from_store(
dir.path(),
None,
"tsift memory graph adapter",
5,
SemanticRelatedKind::Concept,
&store,
)
.unwrap();
assert!(
related
.items
.iter()
.any(|item| item.handle == concept.id && item.score > 0.0),
"expected semantic query to retrieve tsift-memory concept, got {:?}",
related.items
);
let graph_related = graph_db_report_from_store(
dir.path(),
None,
"sqlite",
GraphDbQuery::Related {
query: "tsift memory graph adapter".to_string(),
kind: SemanticRelatedKind::Concept,
depth: 1,
seed_limit: 5,
limit: 20,
},
&store,
sqlite_graph_freshness(&store, "root").unwrap(),
Vec::new(),
)
.unwrap();
assert_eq!(
graph_related
.readiness
.as_ref()
.map(|readiness| readiness.status.as_str()),
Some("ready"),
"tsift-memory semantic rows should satisfy graph-db related readiness"
);
assert!(
graph_related.nodes.iter().any(|node| {
node.kind == "semantic_concept"
&& node.properties.get("provider") == Some(&"tsift-memory".to_string())
}),
"expected related graph output to include tsift-memory semantic rows"
);
}
#[test]
fn semantic_related_query_uses_persisted_graph_embeddings() {
let dir = setup_traversal_project();
seed_traversal_semantic_summaries(dir.path());
refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
let report = semantic_related_report_from_store(
dir.path(),
None,
"graph navigation",
5,
SemanticRelatedKind::Concept,
&store,
)
.unwrap();
assert_eq!(report.embedding_model, SEMANTIC_EMBEDDING_MODEL);
assert!(
report
.items
.iter()
.any(|item| item.label == "graph navigation"
&& item.kind == "semantic_concept"
&& item.score > 0.9),
"expected nearest concept match from graph embeddings, got {:?}",
report.items
);
}
#[test]
fn graph_db_related_query_uses_semantic_seeds_and_incident_neighborhoods() {
let dir = setup_traversal_project();
seed_traversal_semantic_summaries(dir.path());
refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
let report = graph_db_report_from_store(
dir.path(),
None,
"sqlite",
GraphDbQuery::Related {
query: "graph navigation".to_string(),
kind: SemanticRelatedKind::All,
depth: 1,
seed_limit: 2,
limit: 20,
},
&store,
sqlite_graph_freshness(&store, "root").unwrap(),
Vec::new(),
)
.unwrap();
let knowledge = report.knowledge_retrieval.as_ref().unwrap();
assert_eq!(knowledge.mode, "semantic_seeded_neighborhood");
assert_eq!(knowledge.seed_kind, "all");
assert_eq!(knowledge.depth, 1);
assert_eq!(
report
.readiness
.as_ref()
.map(|readiness| readiness.status.as_str()),
Some("ready")
);
assert!(
knowledge
.diagnostics
.iter()
.any(|diagnostic| diagnostic.contains("incident"))
);
assert!(
report
.semantic_related
.iter()
.any(|item| item.label == "graph navigation"
&& item.kind == "semantic_concept"
&& item.score > 0.9),
"expected natural-language query to seed the graph navigation concept, got {:?}",
report.semantic_related
);
assert!(
report
.nodes
.iter()
.any(|node| node.kind == "semantic_concept" && node.label == "graph navigation")
);
assert!(
report
.nodes
.iter()
.any(|node| node.kind == "symbol" && node.label == "helper"),
"incident expansion from semantic seed should recover source symbols, got {:?}",
report
.nodes
.iter()
.map(|node| (&node.kind, &node.label))
.collect::<Vec<_>>()
);
assert!(
report
.edges
.iter()
.any(|edge| edge.kind == "mentions_concept")
);
assert!(
report.output_budget.as_ref().is_some_and(|budget| budget
.diagnostics
.iter()
.any(|diagnostic| { diagnostic.contains("budget ranking signals") })),
"expected related output budget diagnostics, got {:?}",
report.output_budget
);
}
#[test]
fn graph_db_related_reports_summary_extract_gate_when_summary_cache_empty() {
let dir = setup_graph_index();
refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
let report = graph_db_report_from_store(
dir.path(),
None,
"sqlite",
GraphDbQuery::Related {
query: "graph navigation".to_string(),
kind: SemanticRelatedKind::All,
depth: 1,
seed_limit: 2,
limit: 20,
},
&store,
sqlite_graph_freshness(&store, "root").unwrap(),
Vec::new(),
)
.unwrap();
let readiness = report.readiness.as_ref().unwrap();
assert_eq!(readiness.status, "blocked");
assert_eq!(readiness.reason, "summary_cache_empty");
assert!(readiness.fail_closed);
assert_eq!(
readiness.next_commands,
vec![
"tsift summarize --extract .".to_string(),
graph_db_refresh_command(dir.path(), None)
]
);
assert!(
report
.knowledge_retrieval
.as_ref()
.unwrap()
.diagnostics
.iter()
.any(|diagnostic| diagnostic.contains("summary cache empty")
&& diagnostic.contains("graph-db materialized code/session rows")),
"expected related diagnostics to carry readiness gate, got {:?}",
report.knowledge_retrieval.as_ref().unwrap().diagnostics
);
}
#[test]
fn graph_db_semantic_seeded_neighborhood_scores_before_caps() {
let mut nodes = vec![
SubstrateGraphNode::new("seed", "semantic_concept", "graph budget"),
SubstrateGraphNode::new("zzz_high", "symbol", "high_signal"),
];
let mut edges = vec![SubstrateGraphEdge::new(
"zzz_high",
"seed",
"mentions_concept",
)];
for idx in 0..24 {
let id = format!("aaa_low_{idx:02}");
nodes.push(SubstrateGraphNode::new(
id.clone(),
"note",
format!("low {idx}"),
));
edges.push(SubstrateGraphEdge::new(id, "seed", "weak_link"));
}
let mut store = SqliteGraphStore::in_memory().unwrap();
store
.replace_projection(&GraphProjection { nodes, edges })
.unwrap();
let subgraph =
graph_db_semantic_seeded_neighborhood(&store, &["seed".to_string()], 1, 3).unwrap();
assert_eq!(subgraph.nodes.len(), 3);
assert_eq!(subgraph.nodes[0].id, "seed");
assert_eq!(
subgraph.nodes[1].id, "zzz_high",
"expected semantic mention edge to survive caps before lexicographic low-signal nodes: {:?}",
subgraph.nodes
);
assert!(subgraph.truncated);
assert!(
subgraph
.diagnostics
.iter()
.any(|diagnostic| diagnostic.contains("per-node edge scan cap")),
"{:?}",
subgraph.diagnostics
);
assert!(
subgraph
.diagnostics
.iter()
.any(|diagnostic| diagnostic.contains("skipped")),
"{:?}",
subgraph.diagnostics
);
}
#[test]
fn conflict_matrix_uses_semantic_rows_as_dispatch_ranking_signal() {
let dir = setup_traversal_project();
seed_traversal_semantic_summaries(dir.path());
init_git_repo(dir.path());
let session = dir.path().join("tasks/software/tsift.md");
refresh_traversal_graph_store(dir.path(), &session, None).unwrap();
let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
let freshness = sqlite_graph_freshness(&store, "root").unwrap();
let evidence = graph_db_evidence_report_from_store(GraphDbEvidenceInput {
root: dir.path(),
scope: None,
backend: "sqlite",
target: "kgnv",
depth: 4,
limit: 8,
cursor: None,
store: &store,
freshness,
warnings: Vec::new(),
})
.unwrap();
assert!(
evidence
.semantic_related
.iter()
.any(|node| node.kind == "semantic_concept" && node.label == "graph navigation"),
"expected semantic evidence rows, got {:?}",
evidence
.semantic_related
.iter()
.map(|node| (&node.kind, &node.label))
.collect::<Vec<_>>()
);
assert!(
evidence
.output_budget
.as_ref()
.is_some_and(|budget| budget.diagnostics.iter().any(|diagnostic| {
diagnostic.contains("semantic_match")
&& diagnostic.contains("source_handle_coverage")
})),
"expected evidence output budget diagnostics, got {:?}",
evidence.output_budget
);
let cached_diff = diff_digest::compute(
dir.path(),
diff_digest::DiffDigestOptions {
cached: true,
revision: None,
max_parsed_files: None,
},
)
.unwrap();
let impact_report = impact::compute(
dir.path(),
impact::ImpactOptions {
cached: true,
revision: None,
scope: None,
limit: 10,
},
)
.unwrap();
let graph_nodes = store.all_nodes().unwrap();
let graph_index = conflict_matrix_graph_index(&graph_nodes);
let semantic_candidate = conflict_matrix_candidate_from_evidence(
dir.path(),
&evidence,
&graph_index,
&cached_diff,
&impact_report,
);
assert!(semantic_candidate.semantic_dispatch_score > 0);
assert!(
semantic_candidate
.semantic_dispatch_reasons
.iter()
.any(|reason| reason.contains("semantic_concept") && reason.contains("owned file")),
"expected semantic ranking explanations, got {:?}",
semantic_candidate.semantic_dispatch_reasons
);
assert!(
semantic_candidate
.semantic_related
.iter()
.any(|item| item.label == "graph navigation")
);
let mut plain_candidate = semantic_candidate.clone();
plain_candidate.target = "plain".to_string();
plain_candidate.semantic_related.clear();
plain_candidate.semantic_dispatch_score = 0;
plain_candidate.semantic_dispatch_reasons.clear();
let mut ranked = [plain_candidate, semantic_candidate];
ranked.sort_by(|left, right| {
left.risk
.cmp(&right.risk)
.then_with(|| left.risk_score.cmp(&right.risk_score))
.then_with(|| {
right
.semantic_dispatch_score
.cmp(&left.semantic_dispatch_score)
})
.then_with(|| left.target.cmp(&right.target))
});
assert_eq!(ranked[0].target, "kgnv");
}
#[test]
fn dependency_dag_extracts_explicit_overlap_and_follow_up_edges() {
let dir = setup_dependency_dag_project();
let session = dir.path().join("tasks/software/tsift.md");
let report = build_dependency_dag_report(dir.path(), None, &[], 4, 12).unwrap();
assert_eq!(report.contract_version, "dependency-dag-v1");
assert_eq!(
report.targets,
vec![
"prep".to_string(),
"alpha".to_string(),
"beta".to_string(),
"gamma".to_string()
]
);
assert!(report.edges.iter().any(|edge| {
edge.from == "prep" && edge.to == "alpha" && edge.kind == "explicit_depends_on"
}));
assert!(report.edges.iter().any(|edge| {
edge.from == "alpha" && edge.to == "gamma" && edge.kind == "worker_result_follow_up"
}));
assert!(report.edges.iter().any(|edge| {
edge.from == "alpha"
&& edge.to == "beta"
&& edge.kind == "shared_resource"
&& edge.shared_files.contains(&"main.rs".to_string())
&& edge.shared_symbols.contains(&"shared_helper".to_string())
}));
assert!(
!report.cycle_diagnostics.has_cycles,
"{:?}",
report.cycle_diagnostics
);
assert_eq!(report.topo_batches[0].targets, vec!["prep".to_string()]);
assert_eq!(report.topo_batches[1].targets, vec!["alpha".to_string()]);
assert!(
report.replay_commands[0].contains("dependency-dag"),
"{:?}",
report.replay_commands
);
cmd_dependency_dag(
&session,
None,
&["alpha".to_string(), "beta".to_string()],
4,
12,
OutputFormat {
json_output: true,
compact: false,
pretty: false,
terse: false,
ultra_terse: false,
schema: false,
envelope: false,
},
)
.unwrap();
}
#[test]
fn dependency_dag_reports_cycles_from_explicit_depends_on_text() {
let dir = setup_dependency_dag_cycle_project();
let report = build_dependency_dag_report(dir.path(), None, &[], 4, 12).unwrap();
assert!(report.cycle_diagnostics.has_cycles);
assert_eq!(
report.cycle_diagnostics.blocked_nodes,
vec!["left".to_string(), "right".to_string()]
);
assert!(report.cycle_diagnostics.cycle_edges.iter().any(|edge| {
edge.from == "left" && edge.to == "right" && edge.kind == "explicit_depends_on"
}));
assert!(report.cycle_diagnostics.cycle_edges.iter().any(|edge| {
edge.from == "right" && edge.to == "left" && edge.kind == "explicit_depends_on"
}));
}
#[test]
fn traversal_projection_queries_match_sqlite_and_convex_stores() {
let dir = setup_traversal_project();
let source_graph = build_traversal_graph_source(dir.path(), dir.path(), None).unwrap();
let projection = traversal_projection_from_graph(dir.path(), None, &source_graph).unwrap();
let mut sqlite = SqliteGraphStore::in_memory().unwrap();
sqlite.replace_projection(&projection).unwrap();
let convex = ConvexGraphStore::new(MemoryConvexGraphClient::default());
projection.upsert_into(&convex).unwrap();
let sqlite_graph = traversal_graph_from_store(dir.path(), &sqlite).unwrap();
let convex_graph = traversal_graph_from_store(dir.path(), &convex).unwrap();
assert_eq!(sqlite_graph.nodes.len(), convex_graph.nodes.len());
assert_eq!(sqlite_graph.edges.len(), convex_graph.edges.len());
let sqlite_backlog = resolve_traversal_node(&sqlite_graph, "#kgnv").unwrap();
let convex_helper = resolve_traversal_node(&convex_graph, "helper").unwrap();
assert!(convex_graph.edges.iter().any(|edge| {
edge.from == sqlite_backlog.handle
&& edge.to == convex_helper.handle
&& edge.relation == "mentions"
}));
}
#[test]
fn graph_db_api_queries_sqlite_neighborhood_and_schema() {
let dir = setup_traversal_project();
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
let freshness = sqlite_graph_freshness(&store, "root").unwrap();
assert_eq!(freshness.status, "current");
let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
let report = graph_db_report_from_store(
dir.path(),
None,
"sqlite",
GraphDbQuery::Neighborhood {
id: backlog.handle.clone(),
depth: 1,
edge_kind: Some("mentions".to_string()),
cursor: None,
limit: None,
property_filters: Vec::new(),
},
&store,
freshness,
Vec::new(),
)
.unwrap();
assert!(
report
.edges
.iter()
.any(|edge| edge.from_id == backlog.handle && edge.kind == "mentions"),
"expected backlog mention edge, got {:?}",
report.edges
);
assert!(
report.ranked_neighbors.iter().any(|neighbor| {
neighbor.depth == Some(1)
&& neighbor.edge_kinds.iter().any(|kind| kind == "mentions")
&& neighbor.node_id != backlog.handle
&& neighbor.handle_coverage_pct >= 95.0
&& neighbor.duplicate_name_precision >= 0.99
}),
"expected ranked neighborhood neighbors with quality scores, got {:?}",
report.ranked_neighbors
);
assert!(report.ranked_neighbors.len() <= GRAPH_DB_RANKED_NEIGHBOR_CAP);
let ranking_gate = report.neighborhood_ranking_gate.as_ref().unwrap();
assert!(!ranking_gate.ranked_output_default);
assert_eq!(ranking_gate.default_order, "stable_node_id");
assert!(
ranking_gate
.diagnostics
.iter()
.any(|diagnostic| diagnostic.contains("score-capped")),
"{ranking_gate:?}"
);
assert!(
ranking_gate
.required_metrics
.iter()
.any(|metric| metric == "handle_coverage_pct")
);
assert!(
ranking_gate
.required_metrics
.iter()
.any(|metric| metric == "duplicate_name_precision")
);
assert!(
report
.page
.as_ref()
.unwrap()
.diagnostics
.iter()
.any(|diagnostic| diagnostic.contains("idx_graph_edges_from_kind")),
"expected SQLite neighborhood query plan diagnostics, got {:?}",
report.page.as_ref().unwrap().diagnostics
);
let edges_report = graph_db_report_from_store(
dir.path(),
None,
"sqlite",
GraphDbQuery::Edges {
edge_kind: Some("mentions".to_string()),
cursor: None,
limit: Some(2),
property_filters: Vec::new(),
},
&store,
sqlite_graph_freshness(&store, "root").unwrap(),
Vec::new(),
)
.unwrap();
let edge_id = edges_report
.edges
.first()
.map(|edge| edge.id.clone())
.expect("expected at least one paged mentions edge");
assert!(edges_report.edges.iter().any(|edge| edge.id == edge_id));
assert_eq!(
edges_report.page.as_ref().unwrap().returned_edges,
edges_report.edges.len()
);
let edge_report = graph_db_report_from_store(
dir.path(),
None,
"sqlite",
GraphDbQuery::Edge {
id: edge_id.clone(),
},
&store,
sqlite_graph_freshness(&store, "root").unwrap(),
Vec::new(),
)
.unwrap();
assert_eq!(
edge_report.edge.as_ref().map(|e| graph_db_edge_key(&SubstrateGraphEdge::from(e))),
Some(edge_id.clone())
);
let incident_report = graph_db_report_from_store(
dir.path(),
None,
"sqlite",
GraphDbQuery::Incident {
id: backlog.handle.clone(),
edge_kind: Some("mentions".to_string()),
cursor: None,
limit: Some(1),
property_filters: Vec::new(),
},
&store,
sqlite_graph_freshness(&store, "root").unwrap(),
Vec::new(),
)
.unwrap();
assert_eq!(incident_report.page.as_ref().unwrap().returned_edges, 1);
assert!(
incident_report
.edges
.iter()
.all(|edge| edge.from_id == backlog.handle || edge.to_id == backlog.handle),
"{:?}",
incident_report.edges
);
let schema_report = graph_db_report_from_store(
dir.path(),
None,
"sqlite",
GraphDbQuery::Schema,
&store,
sqlite_graph_freshness(&store, "root").unwrap(),
Vec::new(),
)
.unwrap();
assert!(
schema_report
.schema
.unwrap()
.operations
.iter()
.any(|operation| operation.command.starts_with("neighborhood"))
);
}
#[test]
fn graph_db_neighborhood_reports_dropped_by_budget_diagnostics() {
let mut nodes = vec![SubstrateGraphNode::new(
"origin",
"backlog",
"#budgeted-neighborhood",
)];
let mut edges = Vec::new();
for idx in 0..32 {
let id = format!("src-{idx:02}");
nodes.push(
SubstrateGraphNode::new(id.clone(), "source_handle", format!("source {idx}"))
.with_property("source_ref", format!("fixture:{idx}"))
.with_property("detail", "x".repeat(600)),
);
edges.push(SubstrateGraphEdge::new("origin", id, "mentions"));
}
let store = SqliteGraphStore::in_memory().unwrap();
GraphProjection { nodes, edges }
.upsert_into(&store)
.unwrap();
let report = graph_db_report_from_store(
Path::new("."),
None,
"fixture",
GraphDbQuery::Neighborhood {
id: "origin".to_string(),
depth: 1,
edge_kind: None,
cursor: None,
limit: None,
property_filters: Vec::new(),
},
&store,
current_graph_db_freshness(),
Vec::new(),
)
.unwrap();
let budget = report.output_budget.as_ref().unwrap();
assert!(budget.selected_nodes < budget.candidate_nodes);
assert!(
budget.dropped_by_budget.iter().any(|drop| {
drop.item == "node"
&& drop.kind == "source_handle"
&& drop.reason == "per_kind_quota"
}),
"expected source_handle budget drops, got {:?}",
budget.dropped_by_budget
);
assert!(report.page.as_ref().unwrap().truncated);
assert!(
report
.page
.as_ref()
.unwrap()
.diagnostics
.iter()
.any(|diagnostic| diagnostic.contains("budget ranking signals")),
"{:?}",
report.page
);
}
#[test]
fn graph_db_output_budget_uses_depth_overrides_for_evidence_rows() {
let mut nodes = vec![SubstrateGraphNode::new("near", "note", "zzz shallow row")];
let mut depth_by_id = BTreeMap::from([("near".to_string(), 1usize)]);
for idx in 0..8 {
let id = format!("far-{idx:02}");
nodes.push(SubstrateGraphNode::new(
id.clone(),
"note",
format!("aaa deeper row {idx}"),
));
depth_by_id.insert(id, 6);
}
let origin_ids = vec!["target".to_string()];
let budgeted = graph_db_apply_output_budget_with_depths_and_cursor(
&origin_ids,
&BTreeMap::new(),
nodes,
Vec::new(),
Some(3),
Some(&depth_by_id),
None,
);
assert!(
budgeted.nodes.iter().any(|node| node.id == "near"),
"expected the shallow evidence row to outrank deeper rows, got {:?}",
budgeted
.nodes
.iter()
.map(|node| (&node.id, &node.label))
.collect::<Vec<_>>()
);
assert!(
budgeted.report.dropped_by_budget.iter().any(|drop| {
drop.item == "node" && drop.kind == "note" && drop.reason == "per_kind_quota"
}),
"expected node quota drops, got {:?}",
budgeted.report.dropped_by_budget
);
assert!(
budgeted
.report
.diagnostics
.iter()
.any(|diagnostic| diagnostic.contains("depth")),
"{:?}",
budgeted.report.diagnostics
);
}
#[test]
fn evidence_pagination_returns_next_cursor_when_truncated() {
let mut nodes = vec![SubstrateGraphNode::new(
"target".to_string(),
"backlog_item",
"target item".to_string(),
)];
let mut depth_by_id = BTreeMap::new();
depth_by_id.insert("target".to_string(), 0);
for idx in 0..20 {
let id = format!("ev-{idx}");
nodes.push(SubstrateGraphNode::new(
id.clone(),
"source_handle",
format!("evidence row {idx}"),
).with_property("detail", "x".repeat(400)));
depth_by_id.insert(id, 1);
}
let origin_ids = vec!["target".to_string()];
let first_page = graph_db_apply_output_budget_with_depths_and_cursor(
&origin_ids,
&BTreeMap::new(),
nodes.clone(),
Vec::new(),
Some(3),
Some(&depth_by_id),
None,
);
assert!(
first_page.truncated,
"expected first page to be truncated with 20 candidates and low limit, got {} selected of {} candidates",
first_page.nodes.len(),
first_page.report.candidate_nodes
);
assert!(
first_page.next_cursor.is_some(),
"expected next_cursor when truncated"
);
let cursor = first_page.next_cursor.unwrap();
assert!(
!cursor.is_empty(),
"cursor should be a non-empty node id"
);
let first_ids: BTreeSet<_> = first_page.nodes.iter().map(|n| n.id.clone()).collect();
let second_page = graph_db_apply_output_budget_with_depths_and_cursor(
&origin_ids,
&BTreeMap::new(),
nodes.clone(),
Vec::new(),
Some(3),
Some(&depth_by_id),
Some(&cursor),
);
let second_ids: BTreeSet<_> = second_page.nodes.iter().map(|n| n.id.clone()).collect();
let overlap: BTreeSet<_> = first_ids.intersection(&second_ids).cloned().collect();
assert!(
overlap.is_empty(),
"pages should not overlap, but found shared ids: {overlap:?}"
);
assert!(
second_page.report.diagnostics.iter().any(|d| d.contains("cursor skipped")),
"expected cursor skip diagnostic, got {:?}",
second_page.report.diagnostics
);
}
#[test]
fn evidence_pagination_no_cursor_returns_all_when_within_budget() {
let mut nodes = vec![SubstrateGraphNode::new(
"target".to_string(),
"backlog_item",
"target item".to_string(),
)];
let mut depth_by_id = BTreeMap::new();
depth_by_id.insert("target".to_string(), 0);
for idx in 0..3 {
let id = format!("ev-{idx}");
nodes.push(SubstrateGraphNode::new(
id.clone(),
"source_handle",
format!("evidence row {idx}"),
));
depth_by_id.insert(id, 1);
}
let origin_ids = vec!["target".to_string()];
let result = graph_db_apply_output_budget_with_depths_and_cursor(
&origin_ids,
&BTreeMap::new(),
nodes,
Vec::new(),
None,
Some(&depth_by_id),
None,
);
assert!(
!result.truncated,
"expected no truncation with small candidate set and default budget"
);
assert!(
result.next_cursor.is_none(),
"expected no next_cursor when not truncated"
);
}
#[test]
fn evidence_pagination_invalid_cursor_returns_first_page() {
let mut nodes = vec![SubstrateGraphNode::new(
"target".to_string(),
"backlog_item",
"target item".to_string(),
)];
let mut depth_by_id = BTreeMap::new();
depth_by_id.insert("target".to_string(), 0);
for idx in 0..5 {
let id = format!("ev-{idx}");
nodes.push(SubstrateGraphNode::new(
id.clone(),
"source_handle",
format!("evidence row {idx}"),
));
depth_by_id.insert(id, 1);
}
let origin_ids = vec!["target".to_string()];
let result = graph_db_apply_output_budget_with_depths_and_cursor(
&origin_ids,
&BTreeMap::new(),
nodes.clone(),
Vec::new(),
None,
Some(&depth_by_id),
Some("nonexistent-id"),
);
assert!(
result.report.diagnostics.iter().any(|d| d.contains("cursor skipped 0")),
"invalid cursor should skip 0 candidates, got {:?}",
result.report.diagnostics
);
}
#[test]
fn graph_db_status_uses_snapshot_fallback_when_rollback_journal_is_locked() {
let dir = setup_traversal_project();
refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
let graph_db = dir.path().join(".tsift/graph.db");
let _lock = hold_rollback_journal_lock(&graph_db);
let report =
graph_db_operator_report_from_disk(dir.path(), None, &graph_db, "status", None, vec![])
.unwrap();
assert_eq!(report.status, "current");
assert_eq!(
report.recovery,
Some(index::ReadOnlyRecovery::SnapshotFallback)
);
assert!(
report
.warnings
.iter()
.any(|warning| warning.contains("rollback-journal lock")),
"expected rollback-journal recovery warning, got {:?}",
report.warnings
);
}
#[test]
fn graph_db_status_copies_wal_sidecars_when_locked() {
let dir = setup_traversal_project();
refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
let graph_db = dir.path().join(".tsift/graph.db");
let _lock = hold_wal_database_lock(&graph_db);
let report =
graph_db_operator_report_from_disk(dir.path(), None, &graph_db, "status", None, vec![])
.unwrap();
assert_eq!(report.status, "current");
assert_eq!(
report.recovery,
Some(index::ReadOnlyRecovery::SnapshotFallbackWal)
);
assert!(
report
.warnings
.iter()
.any(|warning| warning.contains("WAL-aware snapshot fallback")),
"expected WAL recovery warning, got {:?}",
report.warnings
);
}
#[test]
fn graph_db_evidence_uses_snapshot_fallback_when_graph_db_is_locked() {
let dir = setup_traversal_project();
let session = dir.path().join("tasks/software/tsift.md");
refresh_traversal_graph_store(dir.path(), &session, None).unwrap();
let graph_db = dir.path().join(".tsift/graph.db");
let _lock = hold_rollback_journal_lock(&graph_db);
let result = cmd_graph_db(
&session,
None,
GraphDbBackend::Sqlite,
None,
GraphDbQuery::Evidence {
target: "kgnv".to_string(),
depth: 3,
limit: 8,
cursor: None,
},
OutputFormat {
json_output: false,
compact: true,
pretty: false,
terse: false,
ultra_terse: false,
schema: false,
envelope: false,
},
);
assert!(result.is_ok());
}
fn current_graph_db_freshness() -> GraphDbFreshnessReport {
GraphDbFreshnessReport {
status: "current".to_string(),
fail_closed: false,
projection_version: Some(GRAPH_PROJECTION_VERSION.to_string()),
content_hash: Some("fixture".to_string()),
source_watermark: None,
diagnostics: Vec::new(),
}
}
#[test]
fn graph_db_evidence_fails_closed_with_repair_command_for_stale_freshness() {
let dir = setup_traversal_project();
refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
let stale = GraphDbFreshnessReport {
status: "stale".to_string(),
fail_closed: true,
projection_version: Some("old-v0".to_string()),
content_hash: None,
source_watermark: None,
diagnostics: vec!["projection content hash is missing".to_string()],
};
let err = match graph_db_evidence_report_from_store(GraphDbEvidenceInput {
root: dir.path(),
scope: None,
backend: "sqlite",
target: "kgnv",
depth: 3,
limit: 8,
cursor: None,
store: &store,
freshness: stale,
warnings: Vec::new(),
}) {
Ok(_) => panic!("stale graph freshness should fail closed"),
Err(err) => err,
};
let message = err.to_string();
assert!(message.contains("failed closed"), "{message}");
assert!(message.contains("graph-db --path"), "{message}");
assert!(message.contains("refresh --json"), "{message}");
}
fn paged_graph_ids(
store: &impl GraphStore,
cursor: Option<&str>,
) -> (Vec<String>, GraphDbPageReport) {
let report = graph_db_report_from_store(
Path::new("."),
None,
"fixture",
GraphDbQuery::Kind {
kind: "backlog".to_string(),
cursor: cursor.map(str::to_string),
limit: Some(2),
property_filters: vec!["phase=open".to_string()],
},
store,
current_graph_db_freshness(),
Vec::new(),
)
.unwrap();
(
report.nodes.iter().map(|node| node.id.clone()).collect(),
report.page.unwrap(),
)
}
#[test]
fn graph_db_query_pagination_and_filters_match_sqlite_and_convex() {
let nodes = (0..5)
.map(|idx| {
let phase = if idx == 1 { "closed" } else { "open" };
SubstrateGraphNode::new(format!("gbak-{idx:02}"), "backlog", format!("#{idx:02}"))
.with_property("phase", phase)
})
.collect::<Vec<_>>();
let projection = GraphProjection {
nodes,
edges: Vec::new(),
};
let sqlite = SqliteGraphStore::in_memory().unwrap();
projection.upsert_into(&sqlite).unwrap();
let convex = ConvexGraphStore::new(MemoryConvexGraphClient::default());
projection.upsert_into(&convex).unwrap();
let (sqlite_first_ids, sqlite_first_page) = paged_graph_ids(&sqlite, None);
let (convex_first_ids, convex_first_page) = paged_graph_ids(&convex, None);
assert_eq!(sqlite_first_ids, vec!["gbak-00", "gbak-02"]);
assert_eq!(sqlite_first_ids, convex_first_ids);
assert_eq!(sqlite_first_page.next_cursor.as_deref(), Some("gbak-02"));
assert!(sqlite_first_page.truncated);
assert_eq!(
sqlite_first_page.returned_nodes,
convex_first_page.returned_nodes
);
assert_eq!(
sqlite_first_page.property_filters,
convex_first_page.property_filters
);
assert!(
sqlite_first_page
.diagnostics
.iter()
.any(|diagnostic| diagnostic.contains("idx_graph_nodes_kind")),
"expected SQLite kind query plan diagnostics, got {:?}",
sqlite_first_page.diagnostics
);
let cursor = sqlite_first_page.next_cursor.as_deref();
let (sqlite_next_ids, sqlite_next_page) = paged_graph_ids(&sqlite, cursor);
let (convex_next_ids, convex_next_page) = paged_graph_ids(&convex, cursor);
assert_eq!(sqlite_next_ids, vec!["gbak-03", "gbak-04"]);
assert_eq!(sqlite_next_ids, convex_next_ids);
assert_eq!(sqlite_next_page.next_cursor, None);
assert!(!sqlite_next_page.truncated);
assert_eq!(
sqlite_next_page.returned_nodes,
convex_next_page.returned_nodes
);
assert_eq!(
sqlite_next_page.property_filters,
convex_next_page.property_filters
);
}
#[test]
fn traversal_shortest_path_crosses_artifacts_and_symbols() {
let dir = setup_traversal_project();
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
let main = resolve_traversal_node(&graph, "main").unwrap();
let path = traversal_shortest_handles(&graph.edges, &backlog.handle, &main.handle).unwrap();
assert_eq!(path.first(), Some(&backlog.handle));
assert_eq!(path.last(), Some(&main.handle));
assert!(
path.len() >= 3,
"expected backlog -> symbol -> main, got {path:?}"
);
}
#[test]
fn traversal_report_recommends_next_bugfix_nodes() {
let dir = setup_traversal_project();
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let report = traversal_report(dir.path(), None, graph, Some("#kgnv"), None, 1, 50).unwrap();
assert_eq!(report.mode, "neighborhood");
assert!(
report
.recommendations
.iter()
.any(|rec| rec.label == "helper" && rec.reason.contains("matched")),
"expected helper recommendation, got {:?}",
report.recommendations
);
assert!(
!report.exploration.source_windows.is_empty(),
"expected exploration source windows"
);
assert!(
report
.exploration
.no_reread_guidance
.contains("avoid whole-file reads")
);
}
#[test]
fn traversal_graph_refreshes_stale_index_before_loading_symbols() {
let dir = setup_traversal_project();
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
dir.path().join("main.rs"),
"fn fresh_helper() { println!(\"fresh\"); }\nfn main() { fresh_helper(); }\n",
)
.unwrap();
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
assert!(
graph
.warnings
.iter()
.any(|warning| warning.contains("index refreshed")
&& warning.contains("graph traversal packet")),
"expected refresh diagnostic, got {:?}",
graph.warnings
);
assert!(resolve_traversal_node(&graph, "fresh_helper").is_some());
let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
let summary = db.compute_changes(dir.path()).unwrap();
assert_eq!(summary.new + summary.modified + summary.deleted, 0);
}
#[test]
fn traversal_graph_falls_back_to_raw_source_when_stale_refresh_is_blocked() {
let dir = setup_traversal_project();
let db_path = dir.path().join(".tsift/index.db");
let _writer = hold_writer_lock(&index::writer_lock_path(&db_path));
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
dir.path().join("main.rs"),
"fn fresh_helper() { println!(\"fresh\"); }\nfn main() { fresh_helper(); }\n",
)
.unwrap();
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let file = resolve_traversal_node(&graph, "main.rs").unwrap();
assert!(
graph
.warnings
.iter()
.any(|warning| warning.contains("falling back to raw source file nodes")),
"expected raw-source fallback diagnostic, got {:?}",
graph.warnings
);
assert!(
file.detail
.as_deref()
.is_some_and(|detail| detail.contains("raw source fallback")),
"expected raw-source detail, got {:?}",
file.detail
);
assert!(
file.expand.contains("source-read"),
"expected source-read fallback command, got {}",
file.expand
);
assert!(
resolve_traversal_node(&graph, "helper").is_none(),
"stale symbol evidence should be skipped when refresh is blocked"
);
}
#[test]
fn traversal_cmd_supports_json_and_html_outputs() {
let dir = setup_traversal_project();
cmd_traverse(
Some("#kgnv"),
Some("main"),
dir.path(),
None,
1,
50,
TraverseFormat::Json,
false,
false,
false,
None,
)
.unwrap();
cmd_traverse(
None,
None,
dir.path(),
None,
1,
50,
TraverseFormat::Html,
false,
false,
false,
None,
)
.unwrap();
}
#[test]
fn traversal_html_renders_inline_graph_visualization() {
let dir = setup_traversal_project();
seed_traversal_semantic_summaries(dir.path());
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let report = traversal_report(dir.path(), None, graph, None, None, 1, 50).unwrap();
let html = traversal_report_html(&report).unwrap();
assert!(html.contains("id=\"graph-canvas\""));
assert!(html.contains("semantic_concept"));
assert!(html.contains("graph navigation"));
assert!(html.contains("JSON.parse"));
}
#[test]
fn compact_helpers_trim_scores_and_snippets() {
assert_eq!(format_score(0.12345, true), "0.12");
assert_eq!(format_score(0.12345, false), "0.1235");
let snippet = compact_snippet(" first line with useful context\nsecond");
assert_eq!(snippet.as_deref(), Some("first line with useful context"));
}
#[test]
fn compact_members_caps_list() {
let members: Vec<graph::CommunityMember> = ["a", "b", "c", "d", "e", "f"]
.iter()
.map(|n| graph::CommunityMember::new(*n))
.collect();
assert_eq!(compact_members(&members, 5), "a, b, c, d, e (+1 more)");
}
#[test]
fn abbreviate_kind_maps_common_kinds() {
assert_eq!(abbreviate_kind("function"), "fn");
assert_eq!(abbreviate_kind("method"), "meth");
assert_eq!(abbreviate_kind("class"), "cls");
assert_eq!(abbreviate_kind("interface"), "iface");
assert_eq!(abbreviate_kind("type_alias"), "type");
assert_eq!(abbreviate_kind("data_class"), "data_cls");
assert_eq!(abbreviate_kind("sealed_class"), "sealed_cls");
assert_eq!(abbreviate_kind("enum_class"), "enum_cls");
assert_eq!(abbreviate_kind("companion_object"), "comp_obj");
assert_eq!(abbreviate_kind("object"), "obj");
assert_eq!(abbreviate_kind("heading"), "h");
assert_eq!(abbreviate_kind("code_block"), "code");
// short kinds pass through
assert_eq!(abbreviate_kind("struct"), "struct");
assert_eq!(abbreviate_kind("trait"), "trait");
assert_eq!(abbreviate_kind("enum"), "enum");
assert_eq!(abbreviate_kind("const"), "const");
assert_eq!(abbreviate_kind("unknown_kind"), "unknown_kind");
}
#[test]
fn abbreviate_match_type_maps_search_types() {
assert_eq!(abbreviate_match_type("exact_name"), "exact");
assert_eq!(abbreviate_match_type("partial_tags"), "partial");
assert_eq!(abbreviate_match_type("all_tags"), "all_tags");
assert_eq!(abbreviate_match_type("other_type"), "other_type");
}
#[test]
fn explain_compact_groups_edges_by_file() {
let edges = vec![
index::StoredEdge {
caller_file: "src/main.rs".to_string(),
caller_name: "main".to_string(),
caller_line: 1,
callee_name: "helper".to_string(),
call_site_line: 2,
tagpath_handle: None,
},
index::StoredEdge {
caller_file: "src/main.rs".to_string(),
caller_name: "main".to_string(),
caller_line: 1,
callee_name: "render".to_string(),
call_site_line: 3,
tagpath_handle: None,
},
];
let lines = format_edge_groups(&edges, false);
assert_eq!(lines, vec![" src/main.rs (2): helper, render"]);
}
#[test]
fn search_hit_groups_preserve_file_counts_and_samples() {
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
let main_rs = root.join("src/main.rs");
fs::create_dir_all(main_rs.parent().unwrap()).unwrap();
fs::write(&main_rs, "claudescore-3 anchor\nclaudescore-3 follow-up\n").unwrap();
let freshness = exact_search_file_timestamp(&main_rs);
let hits = vec![
sift::SearchHit {
artifact_id: "a".to_string(),
artifact_kind: sift::ContextArtifactKind::File,
path: main_rs.display().to_string(),
rank: 1,
score: 10.0,
confidence: sift::ScoreConfidence::High,
location: Some("line 3".to_string()),
snippet: "claudescore-3 anchor".to_string(),
provenance: sift::ArtifactProvenance {
adapter: sift::AcquisitionAdapterKind::FileSystem,
source: "ripgrep -F".to_string(),
synthetic: false,
},
freshness: freshness.clone(),
budget: sift::ArtifactBudget::from_text("claudescore-3 anchor", 1),
},
sift::SearchHit {
artifact_id: "b".to_string(),
artifact_kind: sift::ContextArtifactKind::File,
path: main_rs.display().to_string(),
rank: 2,
score: 9.0,
confidence: sift::ScoreConfidence::High,
location: Some("line 7".to_string()),
snippet: "claudescore-3 follow-up".to_string(),
provenance: sift::ArtifactProvenance {
adapter: sift::AcquisitionAdapterKind::FileSystem,
source: "ripgrep -F".to_string(),
synthetic: false,
},
freshness: freshness.clone(),
budget: sift::ArtifactBudget::from_text("claudescore-3 follow-up", 1),
},
sift::SearchHit {
artifact_id: "c".to_string(),
artifact_kind: sift::ContextArtifactKind::File,
path: main_rs.display().to_string(),
rank: 3,
score: 8.0,
confidence: sift::ScoreConfidence::High,
location: Some("line 9".to_string()),
snippet: "claudescore-3 tail".to_string(),
provenance: sift::ArtifactProvenance {
adapter: sift::AcquisitionAdapterKind::FileSystem,
source: "ripgrep -F".to_string(),
synthetic: false,
},
freshness,
budget: sift::ArtifactBudget::from_text("claudescore-3 tail", 1),
},
];
let groups = group_search_hits(&hits, root, false);
assert_eq!(groups.len(), 1);
assert_eq!(groups[0].path, "src/main.rs");
assert_eq!(groups[0].hits, 3);
assert_eq!(
groups[0].samples,
vec![
"line 3: claudescore-3 anchor".to_string(),
"line 7: claudescore-3 follow-up".to_string()
]
);
assert!(should_collapse_search_hits(&hits, root, false));
}
#[test]
fn dense_edge_groups_trigger_collapse() {
let edges = vec![
index::StoredEdge {
caller_file: "src/main.rs".to_string(),
caller_name: "main".to_string(),
caller_line: 1,
callee_name: "helper".to_string(),
call_site_line: 2,
tagpath_handle: None,
},
index::StoredEdge {
caller_file: "src/main.rs".to_string(),
caller_name: "beta".to_string(),
caller_line: 5,
callee_name: "helper".to_string(),
call_site_line: 6,
tagpath_handle: None,
},
index::StoredEdge {
caller_file: "src/main.rs".to_string(),
caller_name: "gamma".to_string(),
caller_line: 9,
callee_name: "helper".to_string(),
call_site_line: 10,
tagpath_handle: None,
},
];
assert!(should_collapse_edge_groups(&edges));
}
// --- workspace indexing ---
fn setup_workspace() -> tempfile::TempDir {
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::write(
root.join(".gitmodules"),
r#"[submodule "src/alpha"]
path = src/alpha
url = https://example.com/alpha
[submodule "src/beta"]
path = src/beta
url = https://example.com/beta
"#,
)
.unwrap();
let alpha = root.join("src/alpha");
let beta = root.join("src/beta");
std::fs::create_dir_all(&alpha).unwrap();
std::fs::create_dir_all(&beta).unwrap();
std::fs::write(
alpha.join("lib.rs"),
"fn alpha_helper() {}\nfn alpha_main() { alpha_helper(); }",
)
.unwrap();
std::fs::write(beta.join("lib.rs"), "fn beta_func() {}").unwrap();
dir
}
fn setup_workspace_with_duplicate_leaf_names() -> tempfile::TempDir {
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::write(
root.join(".gitmodules"),
r#"[submodule "pkg/app/foo"]
path = pkg/app/foo
url = https://example.com/pkg-app-foo
[submodule "vendor/foo"]
path = vendor/foo
url = https://example.com/vendor-foo
"#,
)
.unwrap();
let pkg_foo = root.join("pkg/app/foo");
let vendor_foo = root.join("vendor/foo");
std::fs::create_dir_all(&pkg_foo).unwrap();
std::fs::create_dir_all(&vendor_foo).unwrap();
std::fs::write(
pkg_foo.join("lib.rs"),
"fn pkg_only() {}\nfn shared_name() { pkg_only(); }\n",
)
.unwrap();
std::fs::write(
vendor_foo.join("lib.rs"),
"fn vendor_only() {}\nfn shared_name() { vendor_only(); }\n",
)
.unwrap();
dir
}
#[test]
fn workspace_index_creates_per_submodule_dbs() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
assert!(dir.path().join(".tsift/indexes/alpha/index.db").exists());
assert!(dir.path().join(".tsift/indexes/beta/index.db").exists());
}
#[test]
fn workspace_index_single_submodule() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
false,
Some("alpha"),
false,
false,
false,
false,
false,
false,
)
.unwrap();
assert!(dir.path().join(".tsift/indexes/alpha/index.db").exists());
assert!(!dir.path().join(".tsift/indexes/beta/index.db").exists());
}
#[test]
fn workspace_index_single_submodule_errors_on_unknown_scope() {
let dir = setup_workspace();
let err = cmd_index(
dir.path(),
false,
false,
false,
false,
false,
false,
Some("missing"),
false,
false,
false,
false,
false,
false,
)
.unwrap_err();
let msg = err.to_string();
assert!(msg.contains("unknown scope `missing`"));
assert!(msg.contains("Available scopes: alpha, beta"));
assert!(!dir.path().join(".tsift/indexes/missing/index.db").exists());
}
#[test]
fn workspace_index_uses_unique_scope_ids_when_leaf_names_collide() {
let dir = setup_workspace_with_duplicate_leaf_names();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
assert!(
dir.path()
.join(".tsift/indexes/pkg/app/foo/index.db")
.exists()
);
assert!(
dir.path()
.join(".tsift/indexes/vendor/foo/index.db")
.exists()
);
}
#[test]
fn federated_search_across_submodules() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let (hits, _diag) = federated_symbol_search(
dir.path(),
"alpha_helper",
10,
&TagpathSearchOpts {
no_tagpath: true,
strict: false,
},
)
.unwrap();
assert!(
!hits.is_empty(),
"should find alpha_helper via federated search"
);
}
#[test]
fn federated_search_respects_isolation() {
let dir = setup_workspace();
let tsift_dir = dir.path().join(".tsift");
std::fs::create_dir_all(&tsift_dir).unwrap();
std::fs::write(
tsift_dir.join("config.toml"),
r#"
[overrides.alpha]
tier = "isolated"
"#,
)
.unwrap();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let (hits, _diag) = federated_symbol_search(
dir.path(),
"alpha_helper",
10,
&TagpathSearchOpts {
no_tagpath: true,
strict: false,
},
)
.unwrap();
assert!(
hits.is_empty(),
"isolated submodule should not appear in federated search"
);
}
#[test]
fn federated_lexical_search_respects_isolation() {
let dir = setup_workspace();
let tsift_dir = dir.path().join(".tsift");
std::fs::create_dir_all(&tsift_dir).unwrap();
std::fs::write(
tsift_dir.join("config.toml"),
r#"
[overrides.alpha]
tier = "isolated"
"#,
)
.unwrap();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let response = federated_sift_search(
dir.path(),
&dir.path().join(".tsift/search-cache"),
"fn",
10,
0,
"lexical",
)
.unwrap();
assert!(
!response.hits.is_empty(),
"shared scopes should still contribute lexical hits"
);
assert!(
response
.hits
.iter()
.all(|hit| hit.path.ends_with("src/beta/lib.rs")),
"isolated scope should not leak lexical hits: {:?}",
response.hits
);
}
#[test]
fn federated_lexical_search_respects_private_tier() {
let dir = setup_workspace();
let tsift_dir = dir.path().join(".tsift");
std::fs::create_dir_all(&tsift_dir).unwrap();
std::fs::write(
tsift_dir.join("config.toml"),
r#"
[overrides.alpha]
tier = "private"
"#,
)
.unwrap();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let response = federated_sift_search(
dir.path(),
&dir.path().join(".tsift/search-cache"),
"fn",
10,
0,
"lexical",
)
.unwrap();
assert!(
!response.hits.is_empty(),
"shared scopes should still contribute lexical hits"
);
assert!(
response
.hits
.iter()
.all(|hit| hit.path.ends_with("src/beta/lib.rs")),
"private scope should not leak lexical hits: {:?}",
response.hits
);
}
#[test]
fn scoped_search_finds_submodule_symbols() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let cfg = config::Config::load(dir.path()).unwrap();
let db_path = cfg.db_path_for(dir.path(), "alpha");
let db = index::IndexDb::open(&db_path).unwrap();
let hits = db.symbol_search("alpha_main", 10).unwrap();
assert!(!hits.is_empty());
assert_eq!(hits[0].name, "alpha_main");
}
#[test]
fn scoped_search_cmd_errors_on_unknown_scope() {
let dir = setup_workspace();
let err = cmd_search(
"alpha_main".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
Some("missing".to_string()),
false,
false,
false,
0,
false,
false,
false,
false,
false,
false,
false,
)
.unwrap_err();
let msg = err.to_string();
assert!(msg.contains("unknown scope `missing`"));
assert!(msg.contains("Available scopes: alpha, beta"));
}
#[test]
fn scoped_search_cmd_errors_on_ambiguous_legacy_scope_name() {
let dir = setup_workspace_with_duplicate_leaf_names();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let err = cmd_search(
"vendor_only".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
Some("foo".to_string()),
false,
false,
false,
0,
false,
false,
false,
false,
false,
false,
false,
)
.unwrap_err();
let msg = err.to_string();
assert!(msg.contains("ambiguous scope `foo`"));
assert!(msg.contains("pkg/app/foo"));
assert!(msg.contains("vendor/foo"));
}
#[test]
fn scoped_graph_query() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let cfg = config::Config::load(dir.path()).unwrap();
let db_path = cfg.db_path_for(dir.path(), "alpha");
let db = index::IndexDb::open(&db_path).unwrap();
let callees = db.callees_of("alpha_main").unwrap();
let names: Vec<&str> = callees.iter().map(|e| e.callee_name.as_str()).collect();
assert!(names.contains(&"alpha_helper"));
}
fn assert_workspace_query_requires_scope(err: anyhow::Error) {
let msg = err.to_string();
assert!(msg.contains("require `--scope <scope>`"), "{msg}");
assert!(msg.contains("Available scopes: alpha, beta"), "{msg}");
assert!(msg.contains("Indexed scopes: alpha, beta"), "{msg}");
assert!(
!msg.contains("no index found at"),
"workspace query should fail with scope guidance, got: {msg}"
);
}
fn assert_workspace_search_requires_explicit_target(err: anyhow::Error) {
let msg = err.to_string();
assert!(
msg.contains("requires `--scope <scope>` or `--federated`"),
"{msg}"
);
assert!(msg.contains("Available scopes: alpha, beta"), "{msg}");
assert!(msg.contains("Indexed scopes: alpha, beta"), "{msg}");
assert!(
!msg.contains("autoindexing index"),
"workspace search should fail before creating a shared root index: {msg}"
);
}
#[test]
fn graph_cmd_requires_scope_for_workspace_root_without_shared_index() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let err = cmd_graph(
"alpha_main",
dir.path(),
false,
false,
None,
20,
false,
false,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
)
.unwrap_err();
assert_workspace_query_requires_scope(err);
}
#[test]
fn graph_cmd_infers_scope_from_nested_workspace_path() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let nested = dir.path().join("src/alpha/nested");
std::fs::create_dir_all(&nested).unwrap();
let result = cmd_graph(
"alpha_main",
&nested,
false,
false,
None,
20,
false,
false,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn communities_cmd_requires_scope_for_workspace_root_without_shared_index() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let err = cmd_communities(
dir.path(),
None,
1,
10,
false,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
)
.unwrap_err();
assert_workspace_query_requires_scope(err);
}
#[test]
fn communities_cmd_infers_scope_from_nested_workspace_path() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let nested = dir.path().join("src/alpha/nested");
std::fs::create_dir_all(&nested).unwrap();
let result = cmd_communities(
&nested,
None,
1,
10,
false,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn path_cmd_requires_scope_for_workspace_root_without_shared_index() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let err = cmd_path(
"alpha_main",
"alpha_helper",
dir.path(),
None,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
)
.unwrap_err();
assert_workspace_query_requires_scope(err);
}
#[test]
fn path_cmd_infers_scope_from_nested_workspace_path() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let nested = dir.path().join("src/alpha/nested");
std::fs::create_dir_all(&nested).unwrap();
let result = cmd_path(
"alpha_main",
"alpha_helper",
&nested,
None,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn path_cmd_uses_snapshot_fallback_when_rollback_journal_is_locked() {
let dir = setup_graph_index();
let db_path = dir.path().join(".tsift/index.db");
let _lock = hold_rollback_journal_lock(&db_path);
let result = cmd_path(
"main",
"helper",
dir.path(),
None,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn explain_cmd_requires_scope_for_workspace_root_without_shared_index() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let err = cmd_explain(
"alpha_main",
dir.path(),
None,
15,
false,
false,
false,
false,
false,
false,
false,
false,
)
.unwrap_err();
assert_workspace_query_requires_scope(err);
}
#[test]
fn explain_cmd_infers_scope_from_nested_workspace_path() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let nested = dir.path().join("src/alpha/nested");
std::fs::create_dir_all(&nested).unwrap();
let result = cmd_explain(
"alpha_main",
&nested,
None,
15,
false,
false,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
}
#[test]
fn explain_cmd_uses_snapshot_fallback_when_rollback_journal_is_locked() {
let dir = setup_graph_index();
let db_path = dir.path().join(".tsift/index.db");
let _lock = hold_rollback_journal_lock(&db_path);
let result = cmd_explain(
"main",
dir.path(),
None,
15,
false,
false,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
}
// --- community detection ---
#[test]
fn community_detection_groups_related() {
let dir = setup_graph_index();
let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
let edges = db.all_edges().unwrap();
let result = graph::detect_communities(&edges);
assert!(result.node_count > 0);
assert!(!result.communities.is_empty());
}
#[test]
fn community_cmd_autoindexes_missing_index_by_default() {
let dir = tempfile::tempdir().unwrap();
let result = cmd_communities(
dir.path(),
None,
2,
10,
false,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
assert!(dir.path().join(".tsift/index.db").exists());
}
// --- path ---
#[test]
fn path_finds_connected_symbols() {
let dir = setup_graph_index();
let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
let edges = db.all_edges().unwrap();
let result = graph::shortest_path(&edges, "main", "helper");
assert!(result.is_some());
let path = result.unwrap();
assert_eq!(path.hops, 1);
}
#[test]
fn path_returns_none_for_unknown() {
let dir = setup_graph_index();
let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
let edges = db.all_edges().unwrap();
assert!(graph::shortest_path(&edges, "main", "nonexistent").is_none());
}
#[test]
fn path_cmd_autoindexes_missing_index_by_default() {
let dir = tempfile::tempdir().unwrap();
let result = cmd_path(
"a",
"b",
dir.path(),
None,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
assert!(dir.path().join(".tsift/index.db").exists());
}
// --- explain ---
#[test]
fn explain_shows_symbol_info() {
let dir = setup_graph_index();
let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
let symbols = db.symbol_info("main").unwrap();
assert!(!symbols.is_empty());
assert_eq!(symbols[0].name, "main");
assert_eq!(symbols[0].kind, "function");
}
#[test]
fn explain_cmd_autoindexes_missing_index_by_default() {
let dir = tempfile::tempdir().unwrap();
let result = cmd_explain(
"main",
dir.path(),
None,
15,
false,
false,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
assert!(dir.path().join(".tsift/index.db").exists());
}
fn hold_write_lock(db_path: &std::path::Path) -> Connection {
let conn = Connection::open(db_path).unwrap();
conn.execute_batch("BEGIN IMMEDIATE").unwrap();
conn
}
fn hold_writer_lock(lock_path: &std::path::Path) -> std::fs::File {
use fs4::fs_std::FileExt;
use std::io::Write;
let mut file = std::fs::OpenOptions::new()
.read(true)
.write(true)
.create(true)
.truncate(false)
.open(lock_path)
.unwrap();
assert!(file.try_lock_exclusive().unwrap());
writeln!(file, "{}", std::process::id()).unwrap();
file
}
fn hold_rollback_journal_lock(db_path: &std::path::Path) -> Connection {
let conn = Connection::open(db_path).unwrap();
conn.execute_batch("PRAGMA journal_mode=DELETE; BEGIN EXCLUSIVE;")
.unwrap();
std::fs::write(substrate::rollback_journal_path(db_path), "locked").unwrap();
conn
}
fn hold_wal_database_lock(db_path: &std::path::Path) -> Connection {
let conn = Connection::open(db_path).unwrap();
conn.execute_batch(
"PRAGMA journal_mode=WAL;
PRAGMA wal_autocheckpoint=0;
CREATE TABLE IF NOT EXISTS wal_lock_probe (id INTEGER PRIMARY KEY);
INSERT INTO wal_lock_probe DEFAULT VALUES;
PRAGMA locking_mode=EXCLUSIVE;
BEGIN EXCLUSIVE;",
)
.unwrap();
assert!(substrate::wal_sidecar_path(db_path).exists());
conn
}
#[test]
fn index_cmd_reports_wal_sidecar_diagnostics_without_tsift_writer_lock() {
let dir = setup_graph_index();
let db_path = dir.path().join(".tsift/index.db");
let _lock = hold_wal_database_lock(&db_path);
let err = cmd_index(
dir.path(),
false,
false,
false,
false,
false,
false,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap_err();
let msg = err.to_string();
assert!(msg.contains("indexing"));
assert!(msg.contains("lock diagnostics:"));
assert!(msg.contains("lock: absent"));
assert!(msg.contains("wal: present") || msg.contains("shm: present"));
assert!(msg.contains("wedged writer holding live WAL sidecars"));
assert!(msg.contains("snapshot fallback"));
}
#[test]
fn search_cmd_succeeds_while_writer_lock_is_held() {
let dir = setup_graph_index();
let db_path = dir.path().join(".tsift/index.db");
let _lock = hold_write_lock(&db_path);
let result = cmd_search(
"main".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
None,
false,
false,
false,
0,
true,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
}
#[test]
fn search_cmd_uses_snapshot_fallback_when_rollback_journal_lock_appears_after_precheck() {
let dir = setup_graph_index();
let _hook = install_search_post_precheck_lock(dir.path().join(".tsift/index.db"));
let result = cmd_search(
"main".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
None,
false,
false,
false,
0,
true,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
}
#[test]
fn search_cmd_uses_wal_snapshot_fallback_when_lock_appears_after_precheck() {
let dir = setup_graph_index();
let _hook = install_search_post_precheck_wal_lock(dir.path().join(".tsift/index.db"));
let result = cmd_search(
"main".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
None,
false,
false,
false,
0,
true,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
}
#[test]
fn search_cmd_fails_fast_when_autoindex_disabled_and_index_is_stale() {
let dir = setup_graph_index();
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
dir.path().join("main.rs"),
"fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
)
.unwrap();
let err = cmd_search(
"helper".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
None,
false,
false,
false,
0,
false,
false,
false,
false,
false,
false,
false,
)
.unwrap_err();
assert!(err.to_string().contains("search aborted"));
assert!(err.to_string().contains("index is stale"));
assert!(err.to_string().contains("--no-autoindex"));
}
#[test]
fn search_cmd_reports_stale_when_root_index_is_locked_by_rollback_journal() {
let dir = setup_graph_index();
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
dir.path().join("main.rs"),
"fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
)
.unwrap();
let _lock = hold_rollback_journal_lock(&dir.path().join(".tsift/index.db"));
let err = cmd_search(
"helper".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
None,
false,
false,
false,
0,
false,
false,
false,
false,
false,
false,
false,
)
.unwrap_err();
assert!(err.to_string().contains("search aborted"));
assert!(err.to_string().contains("index is stale"));
assert!(!err.to_string().contains("database is locked"));
}
#[test]
fn search_cmd_autoindexes_stale_index_by_default() {
let dir = setup_graph_index();
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
dir.path().join("main.rs"),
"fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
)
.unwrap();
let result = cmd_search(
"helper".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
None,
false,
false,
true,
0,
false,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
let summary = db.compute_changes(dir.path()).unwrap();
assert_eq!(summary.new + summary.modified + summary.deleted, 0);
}
#[test]
fn search_cmd_keeps_read_only_results_when_active_writer_blocks_autoindex() {
let dir = setup_graph_index();
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
dir.path().join("main.rs"),
"fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
)
.unwrap();
let _lock = hold_writer_lock(&dir.path().join(".tsift/index.lock"));
let result = cmd_search(
"helper".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
None,
false,
false,
true,
0,
false,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
let summary = db.compute_changes(dir.path()).unwrap();
assert_eq!(summary.modified, 1);
}
#[test]
fn search_cmd_autoindex_reports_lock_diagnostics_when_rollback_journal_blocks_writer() {
let dir = setup_graph_index();
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
dir.path().join("main.rs"),
"fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
)
.unwrap();
let _lock = hold_rollback_journal_lock(&dir.path().join(".tsift/index.db"));
let err = cmd_search(
"helper".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
None,
false,
false,
true,
0,
false,
false,
false,
false,
false,
false,
false,
)
.unwrap_err();
let msg = err.to_string();
assert!(msg.contains("autoindexing index"));
assert!(msg.contains("lock diagnostics:"));
assert!(msg.contains("journal: present"));
assert!(msg.contains("next: inspect the host for a wedged rollback-journal writer"));
}
#[test]
fn search_cmd_uses_ancestor_project_root_for_nested_paths() {
let dir = setup_graph_index();
let nested = dir.path().join("src/nested");
std::fs::create_dir_all(&nested).unwrap();
let result = cmd_search(
"helper".to_string(),
Some(nested.clone()),
5,
Some("lexical".to_string()),
None,
false,
false,
true,
0,
false,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
assert!(!nested.join(".tsift/index.db").exists());
}
#[test]
fn exact_search_returns_literal_matches() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(dir.path().join("notes.txt"), "alpha\nclaudescore-3\nbeta\n").unwrap();
let response = run_exact_search_with_timeout(dir.path(), "claudescore-3", 5, 0).unwrap();
assert_eq!(response.strategy, "exact");
assert_eq!(response.hits.len(), 1);
assert!(response.hits[0].path.ends_with("notes.txt"));
assert_eq!(response.hits[0].location.as_deref(), Some("line 2"));
assert!(response.hits[0].snippet.contains("claudescore-3"));
}
#[test]
fn exact_search_skips_stale_index_precheck() {
let dir = setup_graph_index();
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
dir.path().join("main.rs"),
"fn helper() { println!(\"updated\"); }\nfn main() { helper(); }\n",
)
.unwrap();
let result = cmd_search(
"println!(\"updated\")".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("exact".to_string()),
None,
false,
false,
false,
0,
false,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
}
#[test]
fn workspace_exact_search_does_not_require_shared_root_index() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let result = cmd_search(
"alpha_helper".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("exact".to_string()),
None,
false,
false,
false,
0,
false,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
assert!(!dir.path().join(".tsift/index.db").exists());
}
#[test]
fn identifier_like_query_prefers_exact_search() {
assert!(query_prefers_exact_search("claudescore-3"));
assert!(query_prefers_exact_search("alpha_helper"));
assert!(query_prefers_exact_search("src/main.rs"));
assert!(query_prefers_exact_search("crate::module"));
assert!(!query_prefers_exact_search("authenticate"));
assert!(!query_prefers_exact_search("fn main"));
assert!(!query_prefers_exact_search("."));
}
#[test]
fn resolve_search_strategy_auto_promotes_identifier_like_queries() {
assert_eq!(resolve_search_strategy("claudescore-3", None), "exact");
assert_eq!(resolve_search_strategy("authenticate", None), "lexical");
assert_eq!(
resolve_search_strategy("claudescore-3", Some("hybrid".to_string())),
"hybrid"
);
}
#[test]
fn workspace_identifier_like_search_auto_uses_exact_backend() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let result = cmd_search(
"alpha_helper".to_string(),
Some(dir.path().to_path_buf()),
5,
None,
None,
false,
false,
false,
0,
false,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
assert!(!dir.path().join(".tsift/index.db").exists());
}
#[test]
fn index_cmd_uses_ancestor_project_root_for_nested_paths() {
let dir = setup_graph_index();
let nested = dir.path().join("src/nested");
std::fs::create_dir_all(&nested).unwrap();
std::fs::write(nested.join("extra.rs"), "fn nested_helper() {}\n").unwrap();
let result = cmd_index(
&nested, false, false, false, false, false, false, None, false, false, false, false,
false, false,
);
assert!(result.is_ok());
assert!(dir.path().join(".tsift/index.db").exists());
assert!(!nested.join(".tsift/index.db").exists());
}
#[test]
fn workspace_index_cmd_uses_ancestor_project_root_for_nested_paths() {
let dir = setup_workspace();
let nested = dir.path().join("docs/nested");
std::fs::create_dir_all(&nested).unwrap();
let result = cmd_index(
&nested, false, false, false, false, false, true, None, false, false, false, false,
false, false,
);
let cfg = config::Config::load(dir.path()).unwrap();
assert!(result.is_ok());
assert!(cfg.db_path_for(dir.path(), "alpha").exists());
assert!(cfg.db_path_for(dir.path(), "beta").exists());
}
#[test]
fn status_cmd_autoindexes_missing_workspace_scopes() {
let dir = setup_workspace();
let cfg = config::Config::load(dir.path()).unwrap();
let alpha = config::Config::resolve_submodule(dir.path(), "alpha").unwrap();
let alpha_db_path = cfg.db_path_for(dir.path(), &alpha.id);
let alpha_db = index::IndexDb::open(&alpha_db_path).unwrap();
alpha_db.apply_changes(&alpha.source_root).unwrap();
let beta_db_path = cfg.db_path_for(dir.path(), "beta");
assert!(!beta_db_path.exists());
cmd_status(
dir.path(),
StatusCommandOptions {
fix: false,
no_fix: false,
json_output: true,
compact: false,
pretty: false,
terse: false,
schema: false,
},
)
.unwrap();
assert!(beta_db_path.exists());
let report = status::check_status(dir.path()).unwrap();
assert!(matches!(report.index, status::IndexStatus::Fresh { .. }));
}
#[test]
fn status_cmd_autoindexes_workspace_when_all_scopes_are_missing() {
let dir = setup_workspace();
let cfg = config::Config::load(dir.path()).unwrap();
cmd_status(
dir.path(),
StatusCommandOptions {
fix: false,
no_fix: false,
json_output: true,
compact: false,
pretty: false,
terse: false,
schema: false,
},
)
.unwrap();
assert!(cfg.db_path_for(dir.path(), "alpha").exists());
assert!(cfg.db_path_for(dir.path(), "beta").exists());
let report = status::check_status(dir.path()).unwrap();
assert!(matches!(report.index, status::IndexStatus::Fresh { .. }));
}
#[test]
fn status_cmd_fix_refreshes_stale_index() {
let dir = setup_graph_index();
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
dir.path().join("main.rs"),
"fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }\n",
)
.unwrap();
let report = status::check_status(dir.path()).unwrap();
assert!(matches!(report.index, status::IndexStatus::Stale { .. }));
cmd_status(
dir.path(),
StatusCommandOptions {
fix: false,
no_fix: false,
json_output: true,
compact: false,
pretty: false,
terse: false,
schema: false,
},
)
.unwrap();
let report = status::check_status(dir.path()).unwrap();
assert!(matches!(report.index, status::IndexStatus::Fresh { .. }));
}
#[test]
fn status_cmd_reports_wal_snapshot_recovery_without_tsift_writer_lock() {
let dir = setup_graph_index();
let db_path = dir.path().join(".tsift/index.db");
let _lock = hold_wal_database_lock(&db_path);
cmd_status(
dir.path(),
StatusCommandOptions {
fix: false,
no_fix: false,
json_output: true,
compact: false,
pretty: false,
terse: false,
schema: false,
},
)
.unwrap();
let report = status::check_status(dir.path()).unwrap();
assert!(matches!(
report.index,
status::IndexStatus::Fresh {
recovery: Some(index::ReadOnlyRecovery::SnapshotFallbackWal),
..
}
));
let locks = status::check_locks(dir.path(), None, None).unwrap();
assert!(matches!(
locks.writer_lock,
status::WriterLockStatus::Absent { .. }
));
assert!(locks.wal_sidecar.present || locks.shared_memory_sidecar.present);
assert!(
locks
.recommended_action
.contains("wedged writer holding live WAL sidecars")
);
}
#[test]
fn locks_report_uses_ancestor_project_root_for_nested_paths() {
let dir = setup_graph_index();
let nested = dir.path().join("src/nested");
std::fs::create_dir_all(&nested).unwrap();
let root = lint::resolve_project_root_or_canonical_path(&nested).unwrap();
let report = status::check_locks(&root, Some(&nested), None).unwrap();
assert_eq!(report.source_root, dir.path());
assert_eq!(report.db_path, dir.path().join(".tsift/index.db"));
}
#[test]
fn workspace_locks_report_infers_scope_from_nested_path() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let nested = dir.path().join("src/alpha/nested");
std::fs::create_dir_all(&nested).unwrap();
let root = lint::resolve_project_root_or_canonical_path(&nested).unwrap();
let report = status::check_locks(&root, Some(&nested), None).unwrap();
let cfg = config::Config::load(dir.path()).unwrap();
assert_eq!(report.label, "submodule `alpha` index");
assert_eq!(report.source_root, dir.path().join("src/alpha"));
assert_eq!(report.db_path, cfg.db_path_for(dir.path(), "alpha"));
assert_eq!(
report.reindex_command,
format!("tsift index --submodule alpha {}", dir.path().display())
);
}
#[test]
fn scoped_search_cmd_autoindexes_stale_submodule_index_by_default() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let alpha = dir.path().join("src/alpha/lib.rs");
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
&alpha,
"fn alpha_helper() { println!(\"updated\"); }\nfn alpha_main() { alpha_helper(); }",
)
.unwrap();
let result = cmd_search(
"alpha_helper".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
Some("alpha".to_string()),
false,
false,
true,
0,
false,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
let cfg = config::Config::load(dir.path()).unwrap();
let db = index::IndexDb::open_read_only(&cfg.db_path_for(dir.path(), "alpha")).unwrap();
let summary = db.compute_changes(&dir.path().join("src/alpha")).unwrap();
assert_eq!(summary.new + summary.modified + summary.deleted, 0);
}
#[test]
fn scoped_search_cmd_reports_stale_when_submodule_index_is_locked_by_rollback_journal() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let alpha = dir.path().join("src/alpha/lib.rs");
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
&alpha,
"fn alpha_helper() { println!(\"updated\"); }\nfn alpha_main() { alpha_helper(); }",
)
.unwrap();
let cfg = config::Config::load(dir.path()).unwrap();
let _lock = hold_rollback_journal_lock(&cfg.db_path_for(dir.path(), "alpha"));
let err = cmd_search(
"alpha_helper".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
Some("alpha".to_string()),
false,
false,
false,
0,
false,
false,
false,
false,
false,
false,
false,
)
.unwrap_err();
assert!(err.to_string().contains("search aborted"));
assert!(err.to_string().contains("submodule `alpha` index"));
assert!(!err.to_string().contains("database is locked"));
}
#[test]
fn federated_search_cmd_autoindexes_stale_indexes_by_default() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let alpha = dir.path().join("src/alpha/lib.rs");
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
&alpha,
"fn alpha_helper() { println!(\"updated\"); }\nfn alpha_main() { alpha_helper(); }",
)
.unwrap();
let result = cmd_search(
"alpha_helper".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
None,
true,
false,
true,
0,
false,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
let cfg = config::Config::load(dir.path()).unwrap();
let db = index::IndexDb::open_read_only(&cfg.db_path_for(dir.path(), "alpha")).unwrap();
let summary = db.compute_changes(&dir.path().join("src/alpha")).unwrap();
assert_eq!(summary.new + summary.modified + summary.deleted, 0);
}
#[test]
fn federated_search_cmd_reports_stale_when_submodule_index_is_locked_by_rollback_journal() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let alpha = dir.path().join("src/alpha/lib.rs");
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
&alpha,
"fn alpha_helper() { println!(\"updated\"); }\nfn alpha_main() { alpha_helper(); }",
)
.unwrap();
let cfg = config::Config::load(dir.path()).unwrap();
let _lock = hold_rollback_journal_lock(&cfg.db_path_for(dir.path(), "alpha"));
let err = cmd_search(
"alpha_helper".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
None,
true,
false,
false,
30,
false,
false,
false,
false,
false,
false,
false,
)
.unwrap_err();
assert!(err.to_string().contains("stale"));
assert!(err.to_string().contains("submodule `alpha` index"));
assert!(!err.to_string().contains("database is locked"));
}
#[test]
fn workspace_search_cmd_requires_explicit_target_without_shared_root_index() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let err = cmd_search(
"alpha_helper".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
None,
false,
false,
true,
0,
false,
false,
false,
false,
false,
false,
false,
)
.unwrap_err();
assert_workspace_search_requires_explicit_target(err);
assert!(!dir.path().join(".tsift/index.db").exists());
}
#[test]
fn workspace_search_cmd_infers_scope_from_nested_path() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let nested = dir.path().join("src/alpha/nested");
std::fs::create_dir_all(&nested).unwrap();
let result = cmd_search(
"alpha_helper".to_string(),
Some(nested),
5,
Some("lexical".to_string()),
None,
false,
false,
false,
0,
false,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
}
#[test]
fn resolve_query_db_path_infers_matching_duplicate_leaf_scope_from_nested_path() {
let dir = setup_workspace_with_duplicate_leaf_names();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let nested = dir.path().join("vendor/foo/nested");
std::fs::create_dir_all(&nested).unwrap();
let root = lint::resolve_project_root_or_canonical_path(&nested).unwrap();
let db_path = resolve_query_db_path(&root, &nested, None).unwrap();
let cfg = config::Config::load(dir.path()).unwrap();
assert_eq!(db_path, cfg.db_path_for(dir.path(), "vendor/foo"));
}
#[test]
fn graph_cmd_succeeds_while_writer_lock_is_held() {
let dir = setup_graph_index();
let db_path = dir.path().join(".tsift/index.db");
let _lock = hold_write_lock(&db_path);
let result = cmd_graph(
"main",
dir.path(),
false,
false,
None,
20,
false,
true,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn graph_cmd_autoindexes_stale_index_by_default() {
let dir = setup_graph_index();
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
dir.path().join("main.rs"),
"fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }\n",
)
.unwrap();
let result = cmd_graph(
"helper",
dir.path(),
true,
false,
None,
20,
false,
true,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
let summary = db.compute_changes(dir.path()).unwrap();
assert_eq!(summary.new + summary.modified + summary.deleted, 0);
}
#[test]
fn graph_cmd_uses_snapshot_fallback_when_rollback_journal_is_locked() {
let dir = setup_graph_index();
let db_path = dir.path().join(".tsift/index.db");
let _lock = hold_rollback_journal_lock(&db_path);
let result = cmd_graph(
"main",
dir.path(),
false,
false,
None,
20,
false,
true,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn graph_cmd_uses_ancestor_project_root_for_nested_paths() {
let dir = setup_graph_index();
let nested = dir.path().join("src/nested");
std::fs::create_dir_all(&nested).unwrap();
let result = cmd_graph(
"helper",
&nested,
true,
false,
None,
20,
false,
false,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn communities_cmd_succeeds_while_writer_lock_is_held() {
let dir = setup_graph_index();
let _lock = hold_writer_lock(&dir.path().join(".tsift/index.lock"));
let result = cmd_communities(
dir.path(),
None,
1,
10,
false,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn communities_cmd_uses_snapshot_fallback_when_rollback_journal_is_locked() {
let dir = setup_graph_index();
let db_path = dir.path().join(".tsift/index.db");
let _lock = hold_rollback_journal_lock(&db_path);
let result = cmd_communities(
dir.path(),
None,
1,
10,
false,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn lint_finds_entities_from_project_root_index_db() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(dir.path().join("main.rs"), "fn alpha_helper() {}\n").unwrap();
std::fs::write(
dir.path().join("README.md"),
"alpha_helper should be backticked.\n",
)
.unwrap();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
false,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let root = lint::find_project_root_for_path(&dir.path().join("README.md"))
.unwrap()
.unwrap();
let entities = lint::collect_entities_from_index_path(&root).unwrap();
let result = lint::lint_markdown(&dir.path().join("README.md"), &entities).unwrap();
assert!(
result
.annotations
.iter()
.any(|ann| ann.text == "alpha_helper")
);
}
// --- search timeout ---
#[test]
fn search_direct_runs_ok() {
let dir = tempfile::tempdir().unwrap();
let search_dir = dir.path().to_path_buf();
let cache_dir = search_dir.join(".tsift/search-cache");
std::fs::write(search_dir.join("test.rs"), "fn main() {}").unwrap();
let result = run_sift_search(&search_dir, &cache_dir, "main", 1, "lexical");
assert!(result.is_ok(), "direct search should succeed");
assert!(
cache_dir.exists(),
"search should create the configured cache dir"
);
}
#[test]
fn search_timeout_zero_disables_timeout() {
let dir = tempfile::tempdir().unwrap();
let search_dir = dir.path().to_path_buf();
let cache_dir = search_dir.join(".tsift/search-cache");
std::fs::write(search_dir.join("test.rs"), "fn main() {}").unwrap();
let result = run_search_with_timeout(&search_dir, &cache_dir, "main", 1, 0, "lexical", &[]);
assert!(result.is_ok(), "timeout=0 should still work (no timeout)");
assert!(
cache_dir.exists(),
"timeout=0 should keep using the stable search cache dir"
);
}
#[test]
fn search_timeout_message_reports_missing_index_as_rebuild_needed() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(dir.path().join("main.rs"), "fn main() {}\n").unwrap();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
false,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let db_path = dir.path().join(".tsift/index.db");
std::fs::remove_file(&db_path).unwrap();
let search_target = SearchIndexTarget {
label: "index".to_string(),
db_path,
source_root: dir.path().to_path_buf(),
scope_name: None,
reindex_cmd: format!("tsift index {}", dir.path().display()),
};
let message = search_timeout_message(1, "lexical", &[search_target]).unwrap();
assert!(message.contains("timed out after 1s"));
assert!(message.contains("index is missing"));
assert!(message.contains("Run `tsift index"));
assert!(!message.contains("search root looks fresh"));
}
#[test]
fn search_worker_output_path_uses_json_suffix() {
let path = next_search_worker_output_path();
assert!(path.extension().is_some_and(|ext| ext == "json"));
}
// --- index quiet mode ---
#[test]
fn index_quiet_suppresses_file_list() {
let dir = setup_graph_index();
let result = cmd_index(
dir.path(),
false,
true,
false,
false,
true,
false,
None,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
}
#[test]
fn index_exit_code_implies_quiet() {
let dir = setup_graph_index();
let result = cmd_index(
dir.path(),
false,
true,
false,
false,
false,
false,
None,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
}
#[test]
fn index_quiet_json_omits_changes() {
let dir = setup_graph_index();
let result = cmd_index(
dir.path(),
false,
true,
false,
false,
true,
false,
None,
true,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
}
#[test]
fn cli_workflow_defaults_to_search_topic() {
let cli = parse_cli(["tsift", "workflow"]);
match cli.command {
Some(Commands::Workflow { topic, json }) => {
assert_eq!(topic, "search");
assert!(!json);
}
_ => panic!("expected Workflow command"),
}
}
#[test]
fn search_workflow_recipe_preserves_handles_across_expansions() {
let recipe = workflow::search_workflow_recipe();
let step_names: Vec<&str> = recipe.steps.iter().map(|step| step.name).collect();
assert_eq!(
step_names,
vec![
"exact-anchor",
"semantic-search",
"explain-symbol",
"summarize-selection",
"digest-expansion"
]
);
assert!(
recipe
.handle_contract
.iter()
.any(|item| item.contains("originating command"))
);
assert!(
recipe.steps[1]
.preserves
.iter()
.any(|item| item.contains("sfam-*"))
);
assert!(
recipe.steps[2]
.preserves
.iter()
.any(|item| item.contains("ecall-*"))
);
assert!(
recipe.steps[4]
.preserves
.iter()
.any(|item| item.contains("artifact handles"))
);
}
// --- JSON compact vs pretty ---
#[test]
fn to_json_compact_default() {
let val = serde_json::json!({"a": 1, "b": [2, 3]});
let compact = to_json(&val, false, false).unwrap();
assert!(!compact.contains('\n'));
assert!(
compact.contains("\"a\":1")
|| compact.contains("\"a\": 1")
|| compact.contains("\"a\":")
);
}
#[test]
fn to_json_pretty_indents() {
let val = serde_json::json!({"a": 1, "b": [2, 3]});
let pretty = to_json(&val, true, false).unwrap();
assert!(pretty.contains('\n'));
assert!(pretty.contains(" "));
}
#[test]
fn to_json_compact_is_shorter() {
let val =
serde_json::json!({"name": "test", "items": [1, 2, 3], "nested": {"key": "value"}});
let compact = to_json(&val, false, false).unwrap();
let pretty = to_json(&val, true, false).unwrap();
assert!(compact.len() < pretty.len());
}
#[test]
fn terse_renames_keys() {
let val =
serde_json::json!({"caller_file": "a.rs", "caller_name": "main", "call_site_line": 10});
let result = to_json(&val, false, true).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
assert!(parsed["_s"].is_object());
let d = &parsed["d"];
assert_eq!(d["cf"], "a.rs");
assert_eq!(d["cn"], "main");
assert_eq!(d["csl"], 10);
}
#[test]
fn terse_schema_only_includes_used_keys() {
let val = serde_json::json!({"name": "test", "score": 0.5});
let result = to_json(&val, false, true).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
let schema = parsed["_s"].as_object().unwrap();
assert_eq!(schema["n"], "name");
assert_eq!(schema["sc"], "score");
assert!(!schema.contains_key("cf"));
}
#[test]
fn terse_nested_arrays() {
let val = serde_json::json!({"callers": [{"caller_name": "a", "caller_file": "b.rs", "caller_line": 1, "callee_name": "c", "call_site_line": 2}]});
let result = to_json(&val, false, true).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
let d = &parsed["d"];
assert_eq!(d["crs"][0]["cn"], "a");
assert_eq!(d["crs"][0]["cf"], "b.rs");
}
#[test]
fn terse_preserves_unknown_keys() {
let val = serde_json::json!({"custom_field": "value", "name": "test"});
let result = to_json(&val, false, true).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
let d = &parsed["d"];
assert_eq!(d["custom_field"], "value");
assert_eq!(d["n"], "test");
}
// --- ultra-terse ---
#[test]
fn ultra_terse_strips_properties_from_graph_nodes() {
let val = serde_json::json!({
"nodes": [{"id": "fn:main", "kind": "fn", "name": "main", "properties": {"line": "10"}}]
});
let result = to_json_schema(&val, false, true, true, false).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
let node = &parsed["d"]["nodes"][0];
assert_eq!(node["id"], "fn:main");
assert_eq!(node["k"], "fn");
assert_eq!(node["n"], "main");
assert!(node.get("properties").is_none());
}
#[test]
fn ultra_terse_strips_properties_from_graph_edges() {
let val = serde_json::json!({
"edges": [{"from_id": "a", "to_id": "b", "kind": "calls", "properties": {"weight": "2"}}]
});
let result = to_json_schema(&val, false, true, true, false).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
let edge = &parsed["d"]["edges"][0];
assert_eq!(edge["from_id"], "a");
assert_eq!(edge["to_id"], "b");
assert_eq!(edge["k"], "c");
assert!(edge.get("properties").is_none());
}
#[test]
fn ultra_terse_abbreviates_edge_kinds() {
let val = serde_json::json!({
"edges": [
{"from_id": "a", "to_id": "b", "kind": "defines"},
{"from_id": "a", "to_id": "c", "kind": "contains"},
{"from_id": "a", "to_id": "d", "kind": "imports"},
{"from_id": "a", "to_id": "e", "kind": "mentions"},
{"from_id": "a", "to_id": "f", "kind": "semantic_relation"},
{"from_id": "a", "to_id": "g", "kind": "belongs_to"},
{"from_id": "a", "to_id": "h", "kind": "scopes_context"},
{"from_id": "a", "to_id": "i", "kind": "uses"},
{"from_id": "a", "to_id": "j", "kind": "parent"},
{"from_id": "a", "to_id": "k", "kind": "unknown_edge"},
]
});
let result = to_json_schema(&val, false, true, true, false).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
let edges = &parsed["d"]["edges"].as_array().unwrap();
assert_eq!(edges[0]["k"], "d");
assert_eq!(edges[1]["k"], "ct");
assert_eq!(edges[2]["k"], "i");
assert_eq!(edges[3]["k"], "m");
assert_eq!(edges[4]["k"], "sr");
assert_eq!(edges[5]["k"], "bt");
assert_eq!(edges[6]["k"], "sctx");
assert_eq!(edges[7]["k"], "u");
assert_eq!(edges[8]["k"], "p");
assert_eq!(edges[9]["k"], "unknown_edge");
}
#[test]
fn ultra_terse_strips_provenance_freshness_from_edges() {
let val = serde_json::json!({
"edges": [{"from_id": "a", "to_id": "b", "kind": "calls", "provenance": [{"source": "tsift"}], "freshness": {"observed_at_unix": 1234567890}}]
});
let result = to_json_schema(&val, false, true, true, false).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
let edge = &parsed["d"]["edges"][0];
assert!(edge.get("provenance").is_none());
assert!(edge.get("freshness").is_none());
assert_eq!(edge["k"], "c");
}
#[test]
fn ultra_terse_truncates_snippets() {
let long_snippet = "x".repeat(120);
let val = serde_json::json!({"snippet": long_snippet});
let result = to_json_schema(&val, false, true, true, false).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
let snipped = parsed["d"]["sn"].as_str().unwrap();
assert_eq!(snipped.len(), 80);
assert!(snipped.ends_with("..."));
}
#[test]
fn ultra_terse_truncates_abbreviated_snippet_key() {
let long_snippet = "y".repeat(100);
let val = serde_json::json!({"snippet": long_snippet});
let result = to_json_schema(&val, false, true, true, false).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
let snipped = parsed["d"]["sn"].as_str().unwrap();
assert_eq!(snipped.len(), 80);
assert!(snipped.ends_with("..."));
}
#[test]
fn ultra_terse_compacts_coverage_snapshot() {
let val = serde_json::json!({
"mode": "incremental",
"total_sector_count": 10,
"dirty_sector_count": 2,
"active_rebuild": Some("rebuild-1"),
"completed_dirty_sector_count": 1,
"mounted_sector_count": 8,
"rebuilding_sector_count": 1,
"resumed_sector_count": 3,
"reused_sector_count": 5
});
let result = to_json_schema(&val, false, true, true, false).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
let d = &parsed["d"];
assert_eq!(d["mode"], "incremental");
assert_eq!(d["total_sector_count"], 10);
assert_eq!(d["dirty_sector_count"], 2);
assert!(d.get("active_rebuild").is_none());
assert!(d.get("completed_dirty_sector_count").is_none());
assert!(d.get("mounted_sector_count").is_none());
assert!(d.get("rebuilding_sector_count").is_none());
assert!(d.get("resumed_sector_count").is_none());
assert!(d.get("reused_sector_count").is_none());
}
#[test]
fn ultra_terse_short_snippet_unchanged() {
let val = serde_json::json!({"snippet": "short text"});
let result = to_json_schema(&val, false, true, true, false).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
assert_eq!(parsed["d"]["sn"], "short text");
}
#[test]
fn ultra_terse_non_graph_object_properties_preserved() {
let val = serde_json::json!({"config": {"properties": {"a": "1"}}});
let result = to_json_schema(&val, false, true, true, false).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
assert!(parsed["d"]["config"]["properties"].is_object());
}
// --- schema-then-values ---
#[test]
fn schema_converts_homogeneous_arrays() {
let val = serde_json::json!({"symbols": [
{"name": "foo", "kind": "fn", "line": 10},
{"name": "bar", "kind": "fn", "line": 20}
]});
let result = to_json_schema(&val, false, false, false, true).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
let syms = &parsed["symbols"];
let columns = syms["_c"]
.as_array()
.unwrap()
.iter()
.map(|value| value.as_str().unwrap())
.collect::<Vec<_>>();
let row0 = syms["_r"][0].as_array().unwrap();
let row1 = syms["_r"][1].as_array().unwrap();
let name_index = columns.iter().position(|column| *column == "name").unwrap();
let kind_index = columns.iter().position(|column| *column == "kind").unwrap();
let line_index = columns.iter().position(|column| *column == "line").unwrap();
assert_eq!(row0[name_index], "foo");
assert_eq!(row0[kind_index], "fn");
assert_eq!(row0[line_index], 10);
assert_eq!(row1[name_index], "bar");
assert_eq!(row1[kind_index], "fn");
assert_eq!(row1[line_index], 20);
}
#[test]
fn schema_skips_short_arrays() {
let val = serde_json::json!({"items": [{"name": "only"}]});
let result = to_json_schema(&val, false, false, false, true).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
assert!(parsed["items"].is_array());
assert_eq!(parsed["items"][0]["name"], "only");
}
#[test]
fn schema_skips_heterogeneous_arrays() {
let val = serde_json::json!({"items": [{"a": 1}, {"b": 2}]});
let result = to_json_schema(&val, false, false, false, true).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
assert!(parsed["items"].is_array());
assert_eq!(parsed["items"][0]["a"], 1);
}
#[test]
fn schema_with_terse_combines() {
let val = serde_json::json!({"callers": [
{"caller_name": "a", "caller_file": "x.rs"},
{"caller_name": "b", "caller_file": "y.rs"}
]});
let result = to_json_schema(&val, false, true, false, true).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
assert!(parsed["_s"].is_object());
let d = &parsed["d"];
let crs = &d["crs"];
assert!(crs["_c"].is_array());
assert!(crs["_r"].is_array());
let columns = crs["_c"]
.as_array()
.unwrap()
.iter()
.map(|value| value.as_str().unwrap())
.collect::<Vec<_>>();
let row = crs["_r"][0].as_array().unwrap();
let name_index = columns.iter().position(|column| *column == "cn").unwrap();
let file_index = columns.iter().position(|column| *column == "cf").unwrap();
assert_eq!(row[name_index], "a");
assert_eq!(row[file_index], "x.rs");
}
#[test]
fn schema_preserves_non_object_arrays() {
let val = serde_json::json!({"tags": ["a", "b", "c"]});
let result = to_json_schema(&val, false, false, false, true).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
assert_eq!(parsed["tags"], serde_json::json!(["a", "b", "c"]));
}
#[test]
fn cli_accepts_global_schema_flag() {
let cli = parse_cli(["tsift", "--schema", "search", "test"]);
assert!(cli.schema);
assert!(matches!(cli.command, Some(Commands::Search { .. })));
}
#[test]
fn cli_accepts_global_envelope_flag() {
let cli = parse_cli([
"tsift",
"--envelope",
"context-pack",
"tasks/software/tsift.md",
]);
assert!(cli.envelope);
assert!(matches!(cli.command, Some(Commands::ContextPack { .. })));
}
#[test]
fn cli_accepts_locks_command() {
let cli = parse_cli(["tsift", "locks"]);
assert!(matches!(cli.command, Some(Commands::Locks { .. })));
}
#[test]
fn cli_parses_memory_budget_guard_command() {
let cli = parse_cli([
"tsift",
"memory",
"budget-guard",
"--file",
"tool.log",
"--budget-tokens",
"1000",
"--json",
]);
match cli.command {
Some(Commands::Memory {
command:
crate::cli::MemoryCommand::BudgetGuard {
file,
budget_tokens,
json,
..
},
}) => {
assert_eq!(file.as_deref(), Some(std::path::Path::new("tool.log")));
assert_eq!(budget_tokens, 1000);
assert!(json);
}
_ => panic!("expected memory budget-guard command"),
}
}
#[test]
fn cli_parses_memory_capture_agent_doc_closeout_command() {
let cli = parse_cli([
"tsift",
"memory",
"capture-agent-doc-closeout",
".",
"--session-path",
"tasks/software/tsift.md",
"--prompt-target",
"do [#tsiftmemhooks]",
"--response-summary",
"wired closeout capture",
"--commit-hash",
"abc123",
"--session-check-status",
"clean",
"--json",
]);
match cli.command {
Some(Commands::Memory {
command:
crate::cli::MemoryCommand::CaptureAgentDocCloseout {
path,
session_path,
prompt_target,
response_summary,
commit_hash,
session_check_status,
json,
},
}) => {
assert_eq!(path, std::path::PathBuf::from("."));
assert_eq!(
session_path,
std::path::PathBuf::from("tasks/software/tsift.md")
);
assert_eq!(prompt_target, "do [#tsiftmemhooks]");
assert_eq!(response_summary, "wired closeout capture");
assert_eq!(commit_hash.as_deref(), Some("abc123"));
assert_eq!(session_check_status, "clean");
assert!(json);
}
_ => panic!("expected memory capture-agent-doc-closeout command"),
}
}
#[test]
fn cli_locks_accepts_scope_flag() {
let cli = parse_cli(["tsift", "locks", "--scope", "alpha"]);
match cli.command {
Some(Commands::Locks { scope, .. }) => {
assert_eq!(scope.as_deref(), Some("alpha"));
}
_ => panic!("expected Locks command"),
}
}
#[test]
fn cli_search_accepts_autoindex_flag() {
let cli = parse_cli(["tsift", "search", "test", "--autoindex"]);
match cli.command {
Some(Commands::Search {
autoindex,
no_autoindex,
..
}) => {
assert!(autoindex);
assert!(!no_autoindex);
}
_ => panic!("expected Search command"),
}
}
#[test]
fn cli_search_accepts_exact_flag() {
let cli = parse_cli(["tsift", "search", "test", "--exact"]);
match cli.command {
Some(Commands::Search {
exact, strategy, ..
}) => {
assert!(exact);
assert!(strategy.is_none());
}
_ => panic!("expected Search command"),
}
}
#[test]
fn cli_parses_diff_digest_command() {
let cli = parse_cli(["tsift", "diff-digest", "--json", "."]);
match cli.command {
Some(Commands::DiffDigest {
json,
path,
cached,
revision,
max_parsed_files,
}) => {
assert!(json);
assert_eq!(path, PathBuf::from("."));
assert!(!cached);
assert!(revision.is_none());
assert_eq!(max_parsed_files, 25);
}
_ => panic!("expected DiffDigest command"),
}
}
#[test]
fn cli_rejects_conflicting_diff_digest_modes() {
match try_parse_cli([
"tsift",
"diff-digest",
"--cached",
"--revision",
"HEAD",
".",
]) {
Ok(_) => panic!("expected conflicting diff-digest modes to fail"),
Err(err) => {
assert!(err.to_string().contains("--cached"));
assert!(err.to_string().contains("--revision"));
}
}
}
#[test]
fn cli_parses_test_digest_command() {
let cli = parse_cli([
"tsift",
"test-digest",
"--path",
".",
"--input",
"target/test.log",
"--runner",
"cargo",
"--json",
]);
match cli.command {
Some(Commands::TestDigest {
json,
path,
input,
runner,
}) => {
assert!(json);
assert_eq!(path, PathBuf::from("."));
assert_eq!(input, Some(PathBuf::from("target/test.log")));
assert_eq!(runner.as_deref(), Some("cargo"));
}
_ => panic!("expected TestDigest command"),
}
}
#[test]
fn cli_parses_log_digest_command() {
let cli = parse_cli([
"tsift",
"log-digest",
"--path",
".",
"--input",
"target/build.log",
"--json",
]);
match cli.command {
Some(Commands::LogDigest { json, path, input }) => {
assert!(json);
assert_eq!(path, PathBuf::from("."));
assert_eq!(input, Some(PathBuf::from("target/build.log")));
}
_ => panic!("expected LogDigest command"),
}
}
#[test]
fn cli_parses_metric_digest_command() {
let cli = parse_cli([
"tsift",
"metric-digest",
"--input",
"target/runs.json",
"--baseline",
"target/prior.json",
"--metric",
"session_mae",
"--lower-is-better",
"session_mae",
"--history",
"4",
"--top",
"2",
"--json",
]);
match cli.command {
Some(Commands::MetricDigest {
input,
baseline,
metrics,
lower_is_better,
history,
top,
json,
..
}) => {
assert!(json);
assert_eq!(input, Some(PathBuf::from("target/runs.json")));
assert_eq!(baseline, Some(PathBuf::from("target/prior.json")));
assert_eq!(metrics, vec!["session_mae"]);
assert_eq!(lower_is_better, vec!["session_mae"]);
assert_eq!(history, 4);
assert_eq!(top, 2);
}
_ => panic!("expected MetricDigest command"),
}
}
#[test]
fn cli_parses_dci_benchmark_command() {
let cli = parse_cli([
"tsift",
"dci-benchmark",
"--fixture",
"fixtures/dci-search-benchmark.json",
"--json",
]);
match cli.command {
Some(Commands::DciBenchmark { fixture, json }) => {
assert!(json);
assert_eq!(fixture, PathBuf::from("fixtures/dci-search-benchmark.json"));
}
_ => panic!("expected DciBenchmark command"),
}
}
#[test]
fn cli_parses_session_digest_command() {
let cli = parse_cli([
"tsift",
"session-digest",
"--path",
".",
"--input",
"target/session.md",
"--source",
"markdown",
"--json",
]);
match cli.command {
Some(Commands::SessionDigest {
json,
path,
input,
source,
}) => {
assert!(json);
assert_eq!(path, PathBuf::from("."));
assert_eq!(input, Some(PathBuf::from("target/session.md")));
assert_eq!(source.as_deref(), Some("markdown"));
}
_ => panic!("expected SessionDigest command"),
}
}
#[test]
fn cli_parses_session_cost_command() {
let cli = parse_cli([
"tsift",
"session-cost",
"--input",
"target/session.jsonl",
"--source",
"codex-jsonl",
"--json",
]);
match cli.command {
Some(Commands::SessionCost {
json,
input,
source,
}) => {
assert!(json);
assert_eq!(input, Some(PathBuf::from("target/session.jsonl")));
assert_eq!(source.as_deref(), Some("codex-jsonl"));
}
_ => panic!("expected SessionCost command"),
}
}
#[test]
fn cli_parses_session_review_command() {
let cli = parse_cli([
"tsift",
"session-review",
"tasks/software/tsift.md",
"--next-context",
"--json",
]);
match cli.command {
Some(Commands::SessionReview {
json,
next_context,
path,
..
}) => {
assert!(json);
assert!(next_context);
assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
}
_ => panic!("expected SessionReview command"),
}
}
#[test]
fn cli_search_accepts_budget_flags() {
let cli = parse_cli([
"tsift",
"search",
"alpha_helper",
"--max-items",
"3",
"--max-bytes",
"96",
]);
match cli.command {
Some(Commands::Search {
max_items,
max_bytes,
..
}) => {
assert_eq!(max_items, Some(3));
assert_eq!(max_bytes, Some(96));
}
_ => panic!("expected Search command"),
}
}
#[test]
fn cli_search_accepts_budget_preset() {
let cli = parse_cli(["tsift", "search", "alpha_helper", "--budget", "small"]);
match cli.command {
Some(Commands::Search { budget, .. }) => {
assert_eq!(budget, Some(ResponseBudgetPreset::Small));
}
_ => panic!("expected Search command"),
}
}
#[test]
fn cli_search_accepts_ast_facet_filters() {
let cli = parse_cli([
"tsift",
"search",
"setup",
"--lang",
"markdown",
"--kind",
"list_item",
"--node-kind",
"list_item",
"--section",
"Install",
"--parent",
"Run setup.",
"--child",
"Confirm setup.",
"--fence-language",
"rust",
"--list-depth",
"1",
"--heading-level",
"2",
]);
match cli.command {
Some(Commands::Search {
lang,
kind,
node_kind,
section,
parent,
child,
fence_language,
list_depth,
heading_level,
..
}) => {
assert_eq!(lang, vec!["markdown"]);
assert_eq!(kind, vec!["list_item"]);
assert_eq!(node_kind, vec!["list_item"]);
assert_eq!(section, vec!["Install"]);
assert_eq!(parent, vec!["Run setup."]);
assert_eq!(child, vec!["Confirm setup."]);
assert_eq!(fence_language, vec!["rust"]);
assert_eq!(list_depth, vec![1]);
assert_eq!(heading_level, vec![2]);
}
_ => panic!("expected Search command"),
}
}
#[test]
fn response_budget_presets_fill_defaults_and_preserve_explicit_caps() {
let small = ResponseBudget::from_cli(None, None, Some(ResponseBudgetPreset::Small), false);
assert_eq!(small.preview_items(), 3);
assert_eq!(small.preview_bytes(), 120);
assert_eq!(small.follow_up_items(), 4);
let overridden =
ResponseBudget::from_cli(Some(7), None, Some(ResponseBudgetPreset::Small), false);
assert_eq!(overridden.preview_items(), 7);
assert_eq!(overridden.preview_bytes(), 120);
assert_eq!(overridden.follow_up_items(), 7);
let envelope_default = ResponseBudget::from_cli(None, None, None, true);
assert!(envelope_default.is_active());
}
#[test]
fn cli_explain_accepts_budget_flags() {
let cli = parse_cli([
"tsift",
"explain",
"alpha_helper",
"--max-items",
"2",
"--max-bytes",
"80",
]);
match cli.command {
Some(Commands::Explain {
max_items,
max_bytes,
..
}) => {
assert_eq!(max_items, Some(2));
assert_eq!(max_bytes, Some(80));
}
_ => panic!("expected Explain command"),
}
}
#[test]
fn cli_session_review_accepts_budget_flags() {
let cli = parse_cli([
"tsift",
"session-review",
"tasks/software/tsift.md",
"--max-items",
"4",
"--max-bytes",
"120",
]);
match cli.command {
Some(Commands::SessionReview {
max_items,
max_bytes,
..
}) => {
assert_eq!(max_items, Some(4));
assert_eq!(max_bytes, Some(120));
}
_ => panic!("expected SessionReview command"),
}
}
#[test]
fn cli_parses_context_pack_command() {
let cli = parse_cli([
"tsift",
"context-pack",
"tasks/software/tsift.md",
"--test-input",
"target/test.log",
"--runner",
"cargo",
"--log-input",
"target/build.log",
"--max-items",
"3",
"--max-bytes",
"96",
"--json",
]);
match cli.command {
Some(Commands::ContextPack {
path,
test_input,
runner,
log_input,
json,
max_items,
max_bytes,
budget,
convex_snapshot,
}) => {
assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
assert_eq!(test_input, Some(PathBuf::from("target/test.log")));
assert_eq!(runner.as_deref(), Some("cargo"));
assert_eq!(log_input, Some(PathBuf::from("target/build.log")));
assert!(json);
assert_eq!(max_items, Some(3));
assert_eq!(max_bytes, Some(96));
assert!(budget.is_none());
assert!(convex_snapshot.is_none());
}
_ => panic!("expected ContextPack command"),
}
}
#[test]
fn cli_parses_token_savings_command() {
let cli = parse_cli([
"tsift",
"token-savings",
"--fixture",
"fixtures/tsift-token-savings.json",
"--fail-under",
"--json",
]);
match cli.command {
Some(Commands::TokenSavings {
fixture,
fail_under,
json,
}) => {
assert_eq!(fixture, PathBuf::from("fixtures/tsift-token-savings.json"));
assert!(fail_under);
assert!(json);
}
_ => panic!("expected TokenSavings command"),
}
}
#[test]
fn token_savings_report_records_fixture_thresholds() {
let raw_symbols = [
"validate_user",
"validateUser",
"ValidateUser",
"validate-user",
"VALIDATE_USER",
"Validate_User",
"raw_symbol",
"rawSymbol",
"RawSymbol",
"raw-symbol",
"RAW_SYMBOL",
"Raw_Symbol",
]
.iter()
.enumerate()
.map(|(idx, identifier)| TokenSavingsRawSymbol {
identifier: (*identifier).to_string(),
file: format!("src/example_{idx}.rs"),
line: (idx + 1) as u64,
context: "function".to_string(),
})
.collect();
let fixture = TokenSavingsFixture {
schema_version: 1,
description: "fixture".to_string(),
token_estimate: "ceil(utf8_bytes / 4)".to_string(),
cases: vec![TokenSavingsFixtureCase {
name: "search-preview".to_string(),
surface: "search".to_string(),
minimum_savings_percent: 40.0,
raw_symbols,
tagpath_families: vec![
TokenSavingsFamily {
canonical: "validate_user".to_string(),
count: 6,
aliases: BTreeMap::new(),
},
TokenSavingsFamily {
canonical: "raw_symbol".to_string(),
count: 6,
aliases: BTreeMap::new(),
},
],
context_pack_inputs: None,
session_review_inputs: None,
source_read_inputs: None,
markdown_projection_inputs: None,
}],
};
let report = build_token_savings_report(&fixture).unwrap();
assert!(report.pass);
assert_eq!(report.cases[0].raw_symbol_count, 12);
assert_eq!(report.cases[0].family_count, 2);
assert_eq!(report.cases[0].status, "pass");
assert!(report.cases[0].byte_delta > 0);
assert!(report.cases[0].raw_estimated_tokens > report.cases[0].envelope_estimated_tokens);
assert!(report.cases[0].savings_percent >= 40.0);
}
#[test]
fn token_savings_source_read_inputs_preserve_required_anchors() {
let fixture = TokenSavingsFixture {
schema_version: 1,
description: "fixture".to_string(),
token_estimate: "ceil(utf8_bytes / 4)".to_string(),
cases: vec![TokenSavingsFixtureCase {
name: "source-read".to_string(),
surface: "source-read".to_string(),
minimum_savings_percent: 40.0,
raw_symbols: Vec::new(),
tagpath_families: Vec::new(),
context_pack_inputs: None,
session_review_inputs: None,
source_read_inputs: Some(TokenSavingsSourceReadInputs {
reads: vec![TokenSavingsSourceReadInput {
command: "sed -n '40,160p' src/main.rs".to_string(),
file: "src/main.rs".to_string(),
raw_start: 40,
raw_lines: 121,
raw_excerpt: "line 40\n".repeat(121),
envelope_start: 40,
envelope_lines: 121,
required_line_anchors: vec![40, 120, 160],
}],
}),
markdown_projection_inputs: None,
}],
};
let report = build_token_savings_report(&fixture).unwrap();
assert!(report.pass);
assert_eq!(report.cases[0].surface, "source-read");
assert!(report.cases[0].savings_percent >= 40.0);
}
#[test]
fn token_savings_source_read_inputs_fail_when_anchor_is_hidden() {
let fixture = TokenSavingsFixture {
schema_version: 1,
description: "fixture".to_string(),
token_estimate: "ceil(utf8_bytes / 4)".to_string(),
cases: vec![TokenSavingsFixtureCase {
name: "source-read".to_string(),
surface: "source-read".to_string(),
minimum_savings_percent: 40.0,
raw_symbols: Vec::new(),
tagpath_families: Vec::new(),
context_pack_inputs: None,
session_review_inputs: None,
source_read_inputs: Some(TokenSavingsSourceReadInputs {
reads: vec![TokenSavingsSourceReadInput {
command: "cat src/main.rs".to_string(),
file: "src/main.rs".to_string(),
raw_start: 1,
raw_lines: 200,
raw_excerpt: "line\n".repeat(200),
envelope_start: 1,
envelope_lines: 80,
required_line_anchors: vec![120],
}],
}),
markdown_projection_inputs: None,
}],
};
let err = match build_token_savings_report(&fixture) {
Ok(_) => panic!("hidden anchor should fail the source-read fixture"),
Err(err) => err,
};
assert!(err.to_string().contains("hides required line anchor 120"));
}
#[test]
fn token_savings_markdown_projection_inputs_require_outline_and_selected_nodes() {
let fixture = TokenSavingsFixture {
schema_version: 1,
description: "fixture".to_string(),
token_estimate: "ceil(utf8_bytes / 4)".to_string(),
cases: vec![TokenSavingsFixtureCase {
name: "markdown-projection".to_string(),
surface: "context-pack".to_string(),
minimum_savings_percent: 40.0,
raw_symbols: Vec::new(),
tagpath_families: Vec::new(),
context_pack_inputs: None,
session_review_inputs: None,
source_read_inputs: None,
markdown_projection_inputs: Some(TokenSavingsMarkdownProjectionInputs {
documents: vec![TokenSavingsMarkdownProjectionInput {
command: "context-pack markdown body".to_string(),
file: "tasks/software/tsift.md".to_string(),
raw_markdown: "# Heading\n\n".repeat(120),
outline_nodes: vec!["Heading".to_string(), "Details".to_string()],
selected_nodes: vec!["mdast-selected".to_string()],
expand:
"tsift --envelope markdown-ast tasks/software/tsift.md --node mdast-selected --budget normal"
.to_string(),
}],
}),
}],
};
let report = build_token_savings_report(&fixture).unwrap();
assert!(report.pass);
assert_eq!(report.cases[0].surface, "context-pack");
assert!(report.cases[0].savings_percent >= 40.0);
}
#[test]
fn markdown_ast_projection_cache_reuses_large_document_section_and_block_lookups() {
let mut content = String::from("# Cache Root\n\n");
for idx in 0..96 {
content.push_str(&format!(
"## Section {idx}\n\n- Item {idx}\n\n```rust\nfn sample_{idx}() {{}}\n```\n\n"
));
}
let first = markdown_ast_projection("semantic-edit", content.as_bytes()).unwrap();
assert!(!first.cache_hit);
assert!(first.nodes.len() > 200);
let sections = markdown_section_spans(&content).unwrap();
let list_items = markdown_block_spans(&content, "list_item").unwrap();
let code_blocks = markdown_block_spans(&content, "code_block").unwrap();
let second = markdown_ast_projection("semantic-edit", content.as_bytes()).unwrap();
assert!(second.cache_hit);
assert_eq!(second.nodes.len(), first.nodes.len());
assert_eq!(sections.len(), 97);
assert_eq!(list_items.len(), 96);
assert_eq!(code_blocks.len(), 96);
let first_code = first
.nodes
.iter()
.find(|node| node.kind == "code_block")
.expect("expected a Markdown code block");
let first_code_node = markdown_ast_node(
Path::new("/repo"),
"semantic-edit",
first_code,
content.as_bytes(),
&first.nodes,
8,
);
assert_eq!(first_code_node.metadata.embedded_symbols.len(), 1);
assert_eq!(
first_code_node.metadata.embedded_symbols[0].name,
"sample_0"
);
assert_eq!(
first_code_node.metadata.embedded_symbols[0].language,
"rust"
);
}
#[test]
fn search_budget_report_truncates_symbol_preview_and_emits_stable_handle() {
let response = empty_search_response(Path::new("/repo"), "lexical");
let symbol_hits = vec![index::SymbolHit {
name: "alpha_helper_with_a_long_name".to_string(),
kind: "function".to_string(),
language: "rust".to_string(),
file: "/repo/src/lib.rs".to_string(),
line: 12,
end_line: None,
node_kind: None,
start_byte: None,
end_byte: None,
body_start_byte: None,
body_end_byte: None,
tags: None,
score: 0.98,
match_type: "exact_name".to_string(),
tagpath_handle: None,
}];
let report = build_relative_search_budget_report(
"alpha_helper_with_a_long_name",
"lexical",
Path::new("/repo"),
&response,
&symbol_hits,
ResponseBudget::new(Some(1), Some(12)),
&SearchFacetFilters::default(),
);
assert_eq!(report.symbols.len(), 1);
assert!(report.symbols[0].handle.starts_with("sfam-"));
assert_eq!(report.symbols[0].tag_alias.as_deref(), Some("alpha/hel..."));
assert_eq!(report.symbols[0].name, "alpha_hel...");
assert_eq!(report.symbols[0].file, "src/lib.rs");
assert!(report.symbols[0].expand.contains("tsift search"));
}
#[test]
fn search_budget_report_promotes_ast_span_artifacts_for_symbols() {
let dir = tempfile::tempdir().unwrap();
let src_dir = dir.path().join("src");
fs::create_dir_all(&src_dir).unwrap();
let source = "fn alpha_helper() {\n beta();\n}\n";
let file = src_dir.join("lib.rs");
fs::write(&file, source).unwrap();
let body_start = source.find("{\n").unwrap() + 1;
let body_end = source.rfind("\n}").unwrap() + 1;
let response = empty_search_response(dir.path(), "lexical");
let symbol_hits = vec![index::SymbolHit {
name: "alpha_helper".to_string(),
kind: "function".to_string(),
language: "rust".to_string(),
file: file.to_string_lossy().to_string(),
line: 0,
end_line: Some(2),
node_kind: Some("function_item".to_string()),
start_byte: Some(0),
end_byte: Some(i64::try_from(source.len()).unwrap()),
body_start_byte: Some(i64::try_from(body_start).unwrap()),
body_end_byte: Some(i64::try_from(body_end).unwrap()),
tags: Some("alpha,helper".to_string()),
score: 0.98,
match_type: "exact_name".to_string(),
tagpath_handle: None,
}];
let report = build_relative_search_budget_report(
"alpha helper",
"lexical",
dir.path(),
&response,
&symbol_hits,
ResponseBudget::new(Some(5), Some(96)),
&SearchFacetFilters::default(),
);
let symbol = &report.symbols[0];
assert_eq!(symbol.language, "rust");
assert_eq!(symbol.end_line, Some(2));
let ast = symbol
.ast
.as_ref()
.expect("search symbol preview should expose an AST span artifact");
assert_eq!(ast.artifact_kind, "ast_span");
assert!(ast.span.handle.starts_with("span-"));
assert_eq!(ast.span.node_kind, "function_item");
assert_eq!(ast.span.start_byte, 0);
assert_eq!(ast.span.end_byte, source.len());
assert_eq!(ast.span.body_start_byte, Some(body_start));
assert_eq!(ast.span.body_end_byte, Some(body_end));
assert!(ast.expand.source_window.contains("source-read"));
assert!(
ast.expand
.source_body
.as_ref()
.unwrap()
.contains("source-read")
);
assert!(ast.expand.symbol_read.contains("symbol-read"));
assert!(ast.expand.markdown_ast.is_none());
}
#[test]
fn search_budget_report_links_markdown_spans_to_markdown_ast_expansion() {
let dir = tempfile::tempdir().unwrap();
let source = "# Guide\n\n## Install\n\n- Run setup.\n";
let file = dir.path().join("README.md");
fs::write(&file, source).unwrap();
let heading_start = source.find("## Install").unwrap();
let heading_end = source.len();
let response = empty_search_response(dir.path(), "lexical");
let symbol_hits = vec![index::SymbolHit {
name: "Install".to_string(),
kind: "heading".to_string(),
language: "markdown".to_string(),
file: file.to_string_lossy().to_string(),
line: 2,
end_line: Some(4),
node_kind: Some("atx_heading".to_string()),
start_byte: Some(i64::try_from(heading_start).unwrap()),
end_byte: Some(i64::try_from(heading_end).unwrap()),
body_start_byte: Some(i64::try_from(source.find("- Run setup.").unwrap()).unwrap()),
body_end_byte: Some(i64::try_from(heading_end).unwrap()),
tags: Some("install".to_string()),
score: 1.0,
match_type: "exact_name".to_string(),
tagpath_handle: None,
}];
let report = build_relative_search_budget_report(
"Install",
"lexical",
dir.path(),
&response,
&symbol_hits,
ResponseBudget::new(Some(5), Some(96)),
&SearchFacetFilters::default(),
);
let ast = report.symbols[0]
.ast
.as_ref()
.expect("Markdown search symbol should expose an AST span artifact");
assert_eq!(ast.span.node_kind, "atx_heading");
assert_eq!(ast.span.markdown.as_ref().unwrap().heading_level, Some(2));
let markdown_ast = ast
.expand
.markdown_ast
.as_ref()
.expect("Markdown symbols should include markdown-ast expansion");
assert!(markdown_ast.contains("markdown-ast"), "{markdown_ast}");
assert!(markdown_ast.contains("--node"), "{markdown_ast}");
assert!(markdown_ast.contains(&ast.span.handle), "{markdown_ast}");
assert!(ast.expand.source_window.contains("source-read"));
assert!(ast.expand.symbol_read.contains("symbol-read"));
}
#[test]
fn search_budget_report_exposes_markdown_embedded_code_symbols() {
let dir = tempfile::tempdir().unwrap();
let source = "# Guide\n\n```rust\nfn sample() {}\n```\n";
let file = dir.path().join("README.md");
fs::write(&file, source).unwrap();
let fence_start = source.find("```rust").unwrap();
let body_start = source.find("fn sample").unwrap();
let body_end = body_start + "fn sample() {}\n".len();
let response = empty_search_response(dir.path(), "lexical");
let symbol_hits = vec![index::SymbolHit {
name: "rust".to_string(),
kind: "code_block".to_string(),
language: "markdown".to_string(),
file: file.to_string_lossy().to_string(),
line: 2,
end_line: Some(4),
node_kind: Some("fenced_code_block".to_string()),
start_byte: Some(i64::try_from(fence_start).unwrap()),
end_byte: Some(i64::try_from(source.len()).unwrap()),
body_start_byte: Some(i64::try_from(body_start).unwrap()),
body_end_byte: Some(i64::try_from(body_end).unwrap()),
tags: Some("rust".to_string()),
score: 1.0,
match_type: "exact_name".to_string(),
tagpath_handle: None,
}];
let report = build_relative_search_budget_report(
"rust",
"lexical",
dir.path(),
&response,
&symbol_hits,
ResponseBudget::new(Some(5), Some(96)),
&SearchFacetFilters::default(),
);
let embedded = &report.symbols[0]
.ast
.as_ref()
.unwrap()
.span
.markdown
.as_ref()
.unwrap()
.embedded_symbols;
assert_eq!(embedded.len(), 1);
assert_eq!(embedded[0].name, "sample");
assert_eq!(embedded[0].kind, "function");
assert_eq!(embedded[0].language, "rust");
assert_eq!(embedded[0].node_kind, "function_item");
assert!(embedded[0].handle.starts_with("span-"));
assert_eq!(embedded[0].start_byte, body_start);
assert_eq!(embedded[0].start_line, 4);
}
fn test_lexical_search_hit(
path: &Path,
rank: usize,
score: f64,
snippet: &str,
) -> sift::SearchHit {
sift::SearchHit {
artifact_id: format!("hit-{rank}"),
artifact_kind: sift::ContextArtifactKind::File,
budget: sift::ArtifactBudget::from_text(snippet, 1),
confidence: sift::ScoreConfidence::High,
freshness: sift::ArtifactFreshness {
modified_unix_secs: None,
observed_unix_secs: 0,
},
location: Some("line 1".to_string()),
path: path.to_string_lossy().to_string(),
provenance: sift::ArtifactProvenance {
adapter: sift::AcquisitionAdapterKind::FileSystem,
source: "test lexical hit".to_string(),
synthetic: false,
},
rank,
score,
snippet: snippet.to_string(),
}
}
fn test_summary(symbol_name: &str, file_path: &str, summary: &str) -> summarize::Summary {
summarize::Summary {
id: 0,
symbol_name: symbol_name.to_string(),
file_path: file_path.to_string(),
content_hash: "hash".to_string(),
summary: summary.to_string(),
entities: None,
relationships: None,
concept_labels: None,
extracted_at: "2026-06-02T00:00:00Z".to_string(),
model: "test".to_string(),
tokens_input: None,
tokens_output: None,
}
}
#[test]
fn search_budget_ranked_preview_prioritizes_precise_ast_span_over_broad_file_hit() {
let dir = tempfile::tempdir().unwrap();
let src_dir = dir.path().join("src");
fs::create_dir_all(&src_dir).unwrap();
let source = "fn alpha_helper() {}\n";
let file = src_dir.join("lib.rs");
let broad_file = dir.path().join("README.md");
fs::write(&file, source).unwrap();
fs::write(
&broad_file,
"alpha helper alpha helper alpha helper in prose\n",
)
.unwrap();
let mut response = empty_search_response(dir.path(), "lexical");
response.hits.push(test_lexical_search_hit(
&broad_file,
1,
240.0,
"alpha helper alpha helper alpha helper in prose",
));
let symbol_hits = vec![index::SymbolHit {
name: "alpha_helper".to_string(),
kind: "function".to_string(),
language: "rust".to_string(),
file: file.to_string_lossy().to_string(),
line: 0,
end_line: Some(0),
node_kind: Some("function_item".to_string()),
start_byte: Some(0),
end_byte: Some(i64::try_from(source.len()).unwrap()),
body_start_byte: Some(i64::try_from(source.find("{}").unwrap() + 1).unwrap()),
body_end_byte: Some(i64::try_from(source.find("{}").unwrap() + 1).unwrap()),
tags: Some("alpha,helper".to_string()),
score: 0.8,
match_type: "all_tags".to_string(),
tagpath_handle: None,
}];
let report = build_relative_search_budget_report(
"alpha helper",
"lexical",
dir.path(),
&response,
&symbol_hits,
ResponseBudget::new(Some(5), Some(128)),
&SearchFacetFilters::default(),
);
assert_eq!(report.ranked[0].source, "symbol_span");
assert_eq!(report.ranked[0].name.as_deref(), Some("alpha_helper"));
assert!(report.ranked[0].score > report.ranked[1].score);
assert_eq!(report.ranked[1].source, "lexical_file");
}
#[test]
fn search_budget_ranked_preview_includes_summary_and_graph_evidence() {
let dir = tempfile::tempdir().unwrap();
let source = "# Guide\n\n```rust\nfn sample() {}\n```\n";
let file = dir.path().join("README.md");
fs::write(&file, source).unwrap();
let summary_db =
summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
summary_db
.insert(&test_summary(
"rust",
"README.md",
"Rust fence contains a sample function.",
))
.unwrap();
let fence_start = source.find("```rust").unwrap();
let body_start = source.find("fn sample").unwrap();
let body_end = body_start + "fn sample() {}\n".len();
let response = empty_search_response(dir.path(), "lexical");
let symbol_hits = vec![index::SymbolHit {
name: "rust".to_string(),
kind: "code_block".to_string(),
language: "markdown".to_string(),
file: file.to_string_lossy().to_string(),
line: 2,
end_line: Some(4),
node_kind: Some("fenced_code_block".to_string()),
start_byte: Some(i64::try_from(fence_start).unwrap()),
end_byte: Some(i64::try_from(source.len()).unwrap()),
body_start_byte: Some(i64::try_from(body_start).unwrap()),
body_end_byte: Some(i64::try_from(body_end).unwrap()),
tags: Some("rust".to_string()),
score: 1.0,
match_type: "exact_name".to_string(),
tagpath_handle: None,
}];
let report = build_relative_search_budget_report(
"rust",
"lexical",
dir.path(),
&response,
&symbol_hits,
ResponseBudget::new(Some(5), Some(128)),
&SearchFacetFilters::default(),
);
let symbol = &report.symbols[0];
assert_eq!(symbol.summary_refs, 1);
assert_eq!(symbol.graph_neighbors, 1);
assert!(
report.ranked[0]
.reasons
.iter()
.any(|reason| reason == "summary_refs:1")
);
assert!(
report.ranked[0]
.reasons
.iter()
.any(|reason| reason == "graph_neighbors:1")
);
}
fn markdown_search_facet_fixture() -> tempfile::TempDir {
let dir = tempfile::tempdir().unwrap();
let source = r#"# Guide
## Install
- Run setup.
- Confirm setup.
```rust
fn sample() {}
```
"#;
fs::write(dir.path().join("README.md"), source).unwrap();
let index_dir = dir.path().join(".tsift");
fs::create_dir_all(&index_dir).unwrap();
run_index_update(
&index_dir.join("index.db"),
dir.path(),
"indexing markdown search facet fixture".to_string(),
dir.path(),
None,
false,
false,
)
.unwrap();
dir
}
fn markdown_search_facet_hits(root: &Path, query: &str) -> Vec<index::SymbolHit> {
let db = index::IndexDb::open_read_only_resilient(&root.join(".tsift/index.db")).unwrap();
db.symbol_search(query, 20).unwrap()
}
#[test]
fn search_facet_filters_match_scalar_symbol_fields() {
let dir = tempfile::tempdir().unwrap();
let hits = vec![
index::SymbolHit {
name: "alpha_helper".to_string(),
kind: "function".to_string(),
language: "rust".to_string(),
file: dir.path().join("src/lib.rs").to_string_lossy().to_string(),
line: 0,
end_line: None,
node_kind: Some("function_item".to_string()),
start_byte: None,
end_byte: None,
body_start_byte: None,
body_end_byte: None,
tags: None,
score: 1.0,
match_type: "exact_name".to_string(),
tagpath_handle: None,
},
index::SymbolHit {
name: "Install".to_string(),
kind: "heading".to_string(),
language: "markdown".to_string(),
file: dir.path().join("README.md").to_string_lossy().to_string(),
line: 0,
end_line: None,
node_kind: Some("atx_heading".to_string()),
start_byte: None,
end_byte: None,
body_start_byte: None,
body_end_byte: None,
tags: None,
score: 0.9,
match_type: "exact_name".to_string(),
tagpath_handle: None,
},
];
let filtered = apply_search_facet_filters(
dir.path(),
hits,
&SearchFacetFilters {
languages: vec!["rust".to_string()],
kinds: vec!["function".to_string()],
node_kinds: vec!["function_item".to_string()],
..SearchFacetFilters::default()
},
);
assert_eq!(filtered.len(), 1);
assert_eq!(filtered[0].name, "alpha_helper");
}
#[test]
fn search_facet_filters_match_markdown_sections_and_block_metadata() {
let dir = markdown_search_facet_fixture();
let nested_list = apply_search_facet_filters(
dir.path(),
markdown_search_facet_hits(dir.path(), "setup"),
&SearchFacetFilters {
sections: vec!["Install".to_string()],
parents: vec!["Run setup.".to_string()],
list_depths: vec![1],
..SearchFacetFilters::default()
},
);
assert_eq!(nested_list.len(), 1);
assert_eq!(nested_list[0].name, "Confirm setup.");
let parent_list = apply_search_facet_filters(
dir.path(),
markdown_search_facet_hits(dir.path(), "setup"),
&SearchFacetFilters {
children: vec!["Confirm setup.".to_string()],
..SearchFacetFilters::default()
},
);
assert_eq!(parent_list.len(), 1);
assert_eq!(parent_list[0].name, "Run setup.");
let heading = apply_search_facet_filters(
dir.path(),
markdown_search_facet_hits(dir.path(), "Install"),
&SearchFacetFilters {
heading_levels: vec![2],
node_kinds: vec!["atx_heading".to_string()],
..SearchFacetFilters::default()
},
);
assert_eq!(heading.len(), 1);
assert_eq!(heading[0].name, "Install");
let fence = apply_search_facet_filters(
dir.path(),
markdown_search_facet_hits(dir.path(), "rust"),
&SearchFacetFilters {
fence_languages: vec!["rust".to_string()],
kinds: vec!["code_block".to_string()],
..SearchFacetFilters::default()
},
);
assert_eq!(fence.len(), 1);
assert_eq!(fence[0].kind, "code_block");
let embedded_child = apply_search_facet_filters(
dir.path(),
markdown_search_facet_hits(dir.path(), "rust"),
&SearchFacetFilters {
children: vec!["sample".to_string()],
kinds: vec!["code_block".to_string()],
..SearchFacetFilters::default()
},
);
assert_eq!(embedded_child.len(), 1);
assert_eq!(embedded_child[0].name, "rust");
}
#[test]
fn search_budget_report_groups_repeated_symbols_by_canonical_tag_family() {
let response = empty_search_response(Path::new("/repo"), "lexical");
let symbol_hits = vec![
index::SymbolHit {
name: "alpha_helper".to_string(),
kind: "function".to_string(),
language: "rust".to_string(),
file: "/repo/src/lib.rs".to_string(),
line: 12,
end_line: None,
node_kind: None,
start_byte: None,
end_byte: None,
body_start_byte: None,
body_end_byte: None,
tags: Some("alpha,helper".to_string()),
score: 0.98,
match_type: "exact_name".to_string(),
tagpath_handle: None,
},
index::SymbolHit {
name: "alphaHelper".to_string(),
kind: "method".to_string(),
language: "rust".to_string(),
file: "/repo/src/main.rs".to_string(),
line: 34,
end_line: None,
node_kind: None,
start_byte: None,
end_byte: None,
body_start_byte: None,
body_end_byte: None,
tags: Some("alpha,helper".to_string()),
score: 0.93,
match_type: "tag_overlap".to_string(),
tagpath_handle: None,
},
index::SymbolHit {
name: "alpha_helper".to_string(),
kind: "function".to_string(),
language: "rust".to_string(),
file: "/repo/src/worker.rs".to_string(),
line: 56,
end_line: None,
node_kind: None,
start_byte: None,
end_byte: None,
body_start_byte: None,
body_end_byte: None,
tags: Some("alpha,helper".to_string()),
score: 0.91,
match_type: "tag_overlap".to_string(),
tagpath_handle: None,
},
];
let report = build_relative_search_budget_report(
"alpha helper",
"lexical",
Path::new("/repo"),
&response,
&symbol_hits,
ResponseBudget::new(Some(5), Some(48)),
&SearchFacetFilters::default(),
);
assert_eq!(report.symbol_total, 1);
assert_eq!(report.raw_symbol_total, 3);
assert_eq!(report.symbols.len(), 1);
assert_eq!(report.symbols[0].tag_alias.as_deref(), Some("alpha/helper"));
assert_eq!(report.symbols[0].match_count, 3);
assert_eq!(report.symbols[0].surface_count, 2);
assert_eq!(report.symbols[0].file_count, 3);
assert_eq!(
report.symbols[0].surface_examples,
vec!["alpha_helper".to_string(), "alphaHelper".to_string()]
);
assert!(report.symbols[0].name.contains("(+1 variant)"));
assert!(report.symbols[0].file.contains("(+2 files)"));
assert!(report.symbols[0].expand.contains("tsift search"));
assert!(report.symbols[0].expand.contains("alpha helper"));
}
#[test]
fn search_budget_report_carries_active_filters() {
let response = empty_search_response(Path::new("/repo"), "lexical");
let symbol_hits = vec![index::SymbolHit {
name: "alpha_helper".to_string(),
kind: "function".to_string(),
language: "rust".to_string(),
file: "/repo/src/lib.rs".to_string(),
line: 12,
end_line: None,
node_kind: Some("function_item".to_string()),
start_byte: None,
end_byte: None,
body_start_byte: None,
body_end_byte: None,
tags: Some("alpha,helper".to_string()),
score: 0.98,
match_type: "exact_name".to_string(),
tagpath_handle: None,
}];
let filters = SearchFacetFilters {
languages: vec!["rust".to_string()],
kinds: vec!["function".to_string()],
node_kinds: vec!["function_item".to_string()],
..SearchFacetFilters::default()
};
let report = build_relative_search_budget_report(
"alpha helper",
"lexical",
Path::new("/repo"),
&response,
&symbol_hits,
ResponseBudget::new(Some(5), Some(48)),
&filters,
);
assert_eq!(report.filters, filters);
assert_eq!(
search_facet_filters_summary(&report.filters),
"lang=rust kind=function node-kind=function_item"
);
}
#[test]
fn search_budget_report_warns_on_broad_preview_and_lists_narrowing_commands() {
let mut response = empty_search_response(Path::new("/repo"), "lexical");
response.indexed_artifacts = 450;
let symbol_hits = vec![
index::SymbolHit {
name: "alpha_helper".to_string(),
kind: "function".to_string(),
language: "rust".to_string(),
file: "/repo/src/lib.rs".to_string(),
line: 12,
end_line: None,
node_kind: None,
start_byte: None,
end_byte: None,
body_start_byte: None,
body_end_byte: None,
tags: Some("alpha,helper".to_string()),
score: 0.98,
match_type: "exact_name".to_string(),
tagpath_handle: None,
},
index::SymbolHit {
name: "beta_helper".to_string(),
kind: "function".to_string(),
language: "rust".to_string(),
file: "/repo/src/beta.rs".to_string(),
line: 21,
end_line: None,
node_kind: None,
start_byte: None,
end_byte: None,
body_start_byte: None,
body_end_byte: None,
tags: Some("beta,helper".to_string()),
score: 0.92,
match_type: "tag_overlap".to_string(),
tagpath_handle: None,
},
];
let report = build_relative_search_budget_report(
"helper",
"lexical",
Path::new("/repo"),
&response,
&symbol_hits,
ResponseBudget::new(Some(1), Some(64)),
&SearchFacetFilters::default(),
);
let guard = report
.scale_guard
.as_ref()
.expect("broad previews should emit a scale guard");
assert_eq!(guard.level, "high-hit");
assert_eq!(guard.signals.indexed_artifacts, 450);
assert_eq!(guard.signals.raw_symbol_matches, 2);
assert!(
guard
.narrow_commands
.iter()
.any(|command| command.contains("--exact"))
);
assert!(
guard
.narrow_commands
.iter()
.any(|command| command.contains("alpha helper"))
);
assert!(
guard
.narrow_commands
.last()
.unwrap()
.contains("workflow search")
);
}
#[test]
fn explain_budget_report_limits_edges_and_members() {
let symbols = vec![index::StoredSymbol {
name: "alpha_helper".to_string(),
kind: "function".to_string(),
language: "rust".to_string(),
signature: None,
file: "src/lib.rs".to_string(),
line: 10,
end_line: None,
node_kind: None,
start_byte: None,
end_byte: None,
body_start_byte: None,
body_end_byte: None,
parent_module: None,
visibility: None,
tags: None,
tagpath_handle: None,
}];
let callers = vec![
index::StoredEdge {
caller_file: "src/main.rs".to_string(),
caller_name: "main".to_string(),
caller_line: 1,
callee_name: "alpha_helper".to_string(),
call_site_line: 3,
tagpath_handle: None,
},
index::StoredEdge {
caller_file: "src/worker.rs".to_string(),
caller_name: "worker".to_string(),
caller_line: 5,
callee_name: "alpha_helper".to_string(),
call_site_line: 8,
tagpath_handle: None,
},
];
let community = graph::Community {
id: 1,
members: vec![
graph::CommunityMember::new("alpha_helper"),
graph::CommunityMember::new("main"),
graph::CommunityMember::new("worker"),
],
modularity_contribution: 0.5,
};
let report = build_explain_budget_report(
"alpha_helper",
Path::new("/repo"),
&symbols,
&callers,
2,
false,
&[],
0,
false,
Some(&community),
ResponseBudget::new(Some(1), Some(24)),
);
assert_eq!(report.definitions.len(), 1);
assert_eq!(report.callers.len(), 1);
assert!(report.truncated);
assert_eq!(report.community.as_ref().unwrap().members.len(), 1);
assert_eq!(
report.definitions[0].tag_alias.as_deref(),
Some("alpha/helper")
);
assert!(report.callers[0].handle.starts_with("ecall-"));
assert_eq!(report.callers[0].tag_alias.as_deref(), Some("main"));
}
#[test]
fn session_review_next_context_budget_limits_lists() {
let report = session_review::SessionReviewReport {
root: "/repo".to_string(),
target: "tasks/software/tsift.md".to_string(),
target_kind: "file".to_string(),
sessions_considered: 1,
sessions_matched: 1,
claude_sessions: 1,
codex_sessions: 0,
agent_doc_logs: 0,
prompt_target_count: 2,
command_groups: 0,
file_groups: 2,
symbol_groups: 1,
failure_groups: 1,
runtime_event_groups: 0,
restart_churn_groups: 0,
closeout_groups: 0,
usage_samples: 1,
prompt_tokens: 120,
cached_input_tokens: 80,
cache_creation_input_tokens: 0,
output_tokens: 40,
reasoning_output_tokens: 0,
total_tokens: 240,
cached_input_ratio: Some(40.0),
largest_turn_total_tokens: 240,
aggregate_cost: session_review::SessionReviewCostSummary {
scope: "bounded_matched_sessions".to_string(),
sessions: 1,
usage_samples: 1,
prompt_tokens: 120,
cached_input_tokens: 80,
cache_creation_input_tokens: 0,
output_tokens: 40,
reasoning_output_tokens: 0,
total_tokens: 240,
cached_input_ratio: Some(40.0),
largest_turn_total_tokens: 240,
},
latest_session_cost: Some(session_review::SessionReviewCostSummary {
scope: "latest_matched_session".to_string(),
sessions: 1,
usage_samples: 1,
prompt_tokens: 120,
cached_input_tokens: 80,
cache_creation_input_tokens: 0,
output_tokens: 40,
reasoning_output_tokens: 0,
total_tokens: 240,
cached_input_ratio: Some(66.67),
largest_turn_total_tokens: 240,
}),
guardrails: vec![
session_cost::SessionCostGuardrail {
kind: "cache_resend".to_string(),
severity: "warn".to_string(),
message: "cached input ratio was high".to_string(),
guidance: "compact or restart the session".to_string(),
},
session_cost::SessionCostGuardrail {
kind: "prompt_budget".to_string(),
severity: "warn".to_string(),
message: "largest prompt turn reached 999999 tokens".to_string(),
guidance: "compact the session before another large turn".to_string(),
},
session_cost::SessionCostGuardrail {
kind: "restart_loop".to_string(),
severity: "warn".to_string(),
message: "restart churn detected".to_string(),
guidance: "restart cleanly".to_string(),
},
session_cost::SessionCostGuardrail {
kind: "noop_closeout".to_string(),
severity: "warn".to_string(),
message: "commit_already_current appeared 8 times".to_string(),
guidance: "avoid reopening without new edits".to_string(),
},
],
loop_clusters: vec![],
file_read_diagnostics: vec![],
prompt_targets: vec![
session_review::SessionReviewPromptTarget {
text: "do one".to_string(),
occurrences: 1,
},
session_review::SessionReviewPromptTarget {
text: "do two".to_string(),
occurrences: 1,
},
],
commands: vec![],
touched_files: vec![],
touched_symbols: vec![],
failures: vec![],
runtime_events: vec![],
restart_churn: vec![],
closeout: vec![],
largest_turns: vec![],
sessions: vec![session_review::SessionReviewSession {
source: "claude_jsonl".to_string(),
path: "/tmp/session.jsonl".to_string(),
matched_by: vec!["path".to_string()],
modified_unix_secs: None,
prompt_target_count: 2,
command_groups: 0,
file_groups: 2,
symbol_groups: 1,
failure_groups: 1,
runtime_event_groups: 0,
restart_churn_groups: 0,
closeout_groups: 0,
usage_samples: 1,
prompt_tokens: 120,
cached_input_tokens: 80,
cache_creation_input_tokens: 0,
output_tokens: 40,
reasoning_output_tokens: 0,
total_tokens: 240,
largest_turn_total_tokens: 240,
}],
next_context: session_review::SessionReviewNextContext {
target: "tasks/software/tsift.md".to_string(),
active_prompt_targets: vec!["do one".to_string(), "do two".to_string()],
last_verification: session_review::SessionReviewVerificationState {
status: "green".to_string(),
detail: "cargo test".to_string(),
},
touched_files: vec!["src/lib.rs".to_string(), "src/main.rs".to_string()],
touched_symbols: vec!["alpha_helper".to_string(), "main".to_string()],
unresolved_failures: vec![session_review::SessionReviewFailure {
kind: "timeout".to_string(),
message: "search timed out".to_string(),
occurrences: 1,
command: None,
session_path: None,
}],
next_digest_commands: vec![
"tsift session-review --next-context tasks/software/tsift.md".to_string(),
"tsift diff-digest .".to_string(),
"tsift test-digest --path . < target/very-long-test-output-file-name-that-must-remain-executable.log".to_string(),
"tsift log-digest --path . < target/very-long-build-output-file-name-that-must-remain-executable.log".to_string(),
],
},
warnings: vec![],
};
let budget_report = build_session_review_next_context_budget_report(
&report,
ResponseBudget::new(Some(1), Some(12)),
None,
);
assert!(budget_report.truncated);
assert_eq!(budget_report.prompt_targets, vec!["do one"]);
assert_eq!(budget_report.touched_files, vec!["src/lib.rs"]);
assert!(
budget_report.touched_symbol_refs[0]
.handle
.starts_with("ncsym-")
);
assert_eq!(
budget_report.touched_symbol_refs[0].tag_alias.as_deref(),
Some("alpha/helper")
);
assert!(
budget_report.unresolved_failures[0]
.handle
.starts_with("snf-")
);
assert_eq!(budget_report.next_digest_commands.len(), 4);
assert_eq!(
budget_report.next_digest_commands[2],
"tsift test-digest --path . < target/very-long-test-output-file-name-that-must-remain-executable.log"
);
assert_eq!(budget_report.next_token_actions.len(), 1);
assert_eq!(budget_report.next_token_actions[0].kind, "prompt_budget");
let full_action_report = build_session_review_next_context_budget_report(
&report,
ResponseBudget::new(Some(4), Some(120)),
None,
);
assert_eq!(
full_action_report
.next_token_actions
.iter()
.map(|action| action.kind.as_str())
.collect::<Vec<_>>(),
vec![
"prompt_budget",
"cache_resend",
"restart_loop",
"noop_closeout"
]
);
assert_eq!(
full_action_report.next_token_actions[0]
.compact_command
.as_deref(),
Some("agent-doc compact \"tasks/software/tsift.md\" --commit")
);
assert_eq!(
full_action_report.next_token_actions[0]
.restart_command
.as_deref(),
Some("agent-doc start \"tasks/software/tsift.md\"")
);
assert!(
full_action_report.next_token_actions[0]
.digest_commands
.iter()
.any(|command| command
== "tsift --envelope context-pack \"tasks/software/tsift.md\" --budget normal")
);
}
#[test]
fn context_pack_diff_preview_limits_files_and_symbols() {
let report = diff_digest::DiffDigestReport {
root: "/repo".to_string(),
mode: diff_digest::DiffDigestMode::WorkingTree,
revision: None,
files_changed: 2,
files_with_current_summaries: 1,
symbols_touched: 3,
call_edges_added: 1,
call_edges_removed: 0,
files: vec![
diff_digest::DiffDigestFile {
path: "src/lib.rs".to_string(),
status: diff_digest::DiffDigestFileStatus::Modified,
touched_symbols: vec!["alpha_helper".to_string(), "beta_helper".to_string()],
summary_state: diff_digest::DiffDigestSummaryState::Current,
current_summaries: vec![diff_digest::DiffDigestSummarySnippet {
symbol: "alpha_helper".to_string(),
summary: "alpha helper handles the main alpha workflow".to_string(),
}],
added_call_edges: vec!["alpha->beta".to_string()],
removed_call_edges: vec![],
warnings: vec!["stale parse".to_string()],
},
diff_digest::DiffDigestFile {
path: "src/main.rs".to_string(),
status: diff_digest::DiffDigestFileStatus::Added,
touched_symbols: vec!["main".to_string()],
summary_state: diff_digest::DiffDigestSummaryState::Missing,
current_summaries: vec![],
added_call_edges: vec![],
removed_call_edges: vec![],
warnings: vec![],
},
],
};
let preview =
build_context_pack_diff_preview(&report, ResponseBudget::new(Some(1), Some(11)), None);
assert!(preview.truncated);
assert_eq!(preview.files.len(), 1);
assert_eq!(preview.files[0].path, "src/lib.rs");
assert_eq!(preview.files[0].touched_symbols, vec!["alpha_he..."]);
assert!(
preview.files[0].touched_symbol_refs[0]
.handle
.starts_with("cdsym-")
);
assert_eq!(
preview.files[0].touched_symbol_refs[0].tag_alias.as_deref(),
Some("alpha/he...")
);
assert!(
preview.files[0].summary_refs[0]
.handle
.starts_with("cdsum-")
);
assert_eq!(
preview.files[0].summary_refs[0].tag_alias.as_deref(),
Some("alpha/he...")
);
assert_eq!(preview.files[0].summary_refs[0].summary, "alpha he...");
assert_eq!(
preview.files[0].summary_refs[0].expand,
"tsift summarize --file \"src/lib.rs\""
);
assert_eq!(preview.files[0].warnings, vec!["stale parse"]);
}
#[test]
fn context_pack_status_reminders_include_stale_index_state() {
let dir = setup_graph_index();
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
dir.path().join("main.rs"),
"fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }\n",
)
.unwrap();
let reminders = context_pack_status_reminders(dir.path());
assert_eq!(reminders.len(), 1);
assert!(reminders[0].contains("index stale"));
assert!(reminders[0].contains("tsift index ."));
}
// #gdbgatecold regression-lock: the trusted context-pack pipeline must
// share its index-inspection across `prepare_agent_doc_index_gate` and
// `context_pack_status_reminders` (both call `IndexDb::inspect_read_only`
// on the same `(root, .tsift/index.db)` key). With the scope guard
// active in `build_context_pack_report_with_profile`, the second call
// hits the cache, so we should record one miss and at least one hit.
#[test]
fn build_context_pack_reuses_inspect_within_scope() {
let dir = setup_graph_index();
init_git_repo(dir.path());
let _guard = index::InspectScopeGuard::new();
let _ = build_context_pack_report(
dir.path(),
None,
None,
None,
ResponseBudget::new(Some(2), Some(96)),
)
.unwrap();
let (hits, misses) = index::inspect_scope_stats();
assert!(
hits >= 1,
"expected at least one cached inspect within scope (hits={hits}, misses={misses})"
);
assert!(
misses >= 1,
"expected at least one initial inspect miss (hits={hits}, misses={misses})"
);
}
// #gdbgatecold scope-isolation: outside of any scope, every call to
// `IndexDb::inspect_read_only` must hit the disk fresh. This locks in
// the contract that the search/status fast-paths never reuse a cached
// inspection across consecutive top-level calls.
#[test]
fn inspect_read_only_outside_scope_does_not_cache() {
let dir = setup_graph_index();
let db_path = dir.path().join(".tsift/index.db");
let _first = index::IndexDb::inspect_read_only(&db_path, dir.path(), false).unwrap();
let (hits, misses) = index::inspect_scope_stats();
assert_eq!(
(hits, misses),
(0, 0),
"no scope guard => no hits/misses recorded"
);
let _second = index::IndexDb::inspect_read_only(&db_path, dir.path(), false).unwrap();
let (hits, _) = index::inspect_scope_stats();
assert_eq!(hits, 0, "must not reuse inspection outside of any scope");
}
#[test]
fn context_pack_refreshes_stale_index_before_handoff() {
let dir = setup_graph_index();
init_git_repo(dir.path());
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
dir.path().join("main.rs"),
"fn helper() { println!(\"updated\"); }\nfn main() { helper(); }\n",
)
.unwrap();
let report = build_context_pack_report(
dir.path(),
None,
None,
None,
ResponseBudget::new(Some(2), Some(96)),
)
.unwrap();
assert!(
report
.status_reminders
.iter()
.any(|reminder| reminder.contains("index refreshed")
&& reminder.contains("context-pack handoff")),
"expected context-pack refresh diagnostic, got {:?}",
report.status_reminders
);
assert!(
!report
.status_reminders
.iter()
.any(|reminder| reminder.contains("index stale")),
"stale reminder should be gone after refresh: {:?}",
report.status_reminders
);
let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
let summary = db.compute_changes(dir.path()).unwrap();
assert_eq!(summary.new + summary.modified + summary.deleted, 0);
}
#[test]
fn context_pack_materializes_source_handles_into_graph_store() {
let dir = tempfile::tempdir().unwrap();
let packet = ExplorationPacket {
budget: exploration_budget_for_counts(2, 1),
relationship_map: vec![ExplorationRelation {
from: "file:main.rs".to_string(),
relation: "touches_symbol".to_string(),
to: "symbol:helper".to_string(),
label: Some("modified diff".to_string()),
}],
source_windows: vec![ExplorationSourceWindow {
handle: "xwin-test".to_string(),
file: "main.rs".to_string(),
start: 1,
end: 32,
reason: "changed file".to_string(),
expand: "tsift source-read main.rs --path . --start 1 --lines 32".to_string(),
}],
worker_context: vec![ExplorationWorkerContext {
handle: "xwrk-test".to_string(),
target: "tasks/software/tsift.md".to_string(),
summary: "do #kgnv".to_string(),
expand: "tsift --envelope context-pack tasks/software/tsift.md --budget normal"
.to_string(),
}],
no_reread_guidance: "use windows".to_string(),
};
let packet = materialize_context_pack_exploration_packet(dir.path(), packet).unwrap();
assert_eq!(packet.source_windows[0].handle, "xwin-test");
let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
let source_handles = store.nodes_by_kind("source_handle").unwrap();
assert_eq!(source_handles.len(), 1);
assert_eq!(
source_handles[0].properties.get("file"),
Some(&"main.rs".to_string())
);
assert_eq!(
store
.outgoing_edges(&exploration_ref_id("file:main.rs"), Some("touches_symbol"))
.unwrap()
.len(),
1
);
let worker_context = store.nodes_by_kind("worker_context").unwrap();
assert_eq!(worker_context.len(), 1);
assert_eq!(
store
.outgoing_edges("xwrk-test", Some("scopes_source"))
.unwrap()
.len(),
1
);
}
#[test]
fn context_pack_records_graph_orchestration_observability() {
let dir = setup_traversal_project();
init_git_repo(dir.path());
let session = dir.path().join("tasks/software/tsift.md");
refresh_traversal_graph_store(dir.path(), &session, None).unwrap();
let report = build_context_pack_report(
&session,
None,
None,
None,
ResponseBudget::new(Some(4), Some(160)),
)
.unwrap();
assert_eq!(
report.graph_orchestration.contract_version,
CONTEXT_PACK_GRAPH_ORCHESTRATION_CONTRACT_VERSION
);
assert_eq!(
report
.graph_orchestration
.projection_freshness
.status
.as_str(),
"current"
);
assert!(!report.graph_orchestration.projection_hashes.is_empty());
assert_eq!(report.graph_orchestration.readiness.status, "blocked");
assert_eq!(
report.graph_orchestration.readiness.reason,
"summary_cache_empty"
);
assert!(report.graph_orchestration.readiness.fail_closed);
assert!(
report
.graph_orchestration
.readiness
.next_commands
.iter()
.any(|command| command == "tsift summarize --extract ."),
"{:?}",
report.graph_orchestration.readiness.next_commands
);
assert!(
report
.graph_orchestration
.evidence_packet_ids
.iter()
.all(|id| !id.starts_with("gevd-")),
"evidence packet ids should be empty when readiness is blocked: {:?}",
report.graph_orchestration.evidence_packet_ids
);
assert!(
report
.graph_orchestration
.conflict_matrix_decisions
.iter()
.any(|decision| decision.contains("readiness blocked")),
"conflict-matrix decisions should reference readiness block: {:?}",
report.graph_orchestration.conflict_matrix_decisions
);
assert!(
!report
.graph_orchestration
.follow_up_commands
.iter()
.any(|command| command.contains("conflict-matrix")),
"conflict-matrix command should not appear when readiness is blocked: {:?}",
report.graph_orchestration.follow_up_commands
);
assert!(
report
.graph_orchestration
.follow_up_commands
.iter()
.any(|command| command == "tsift summarize --extract ."),
"{:?}",
report.graph_orchestration.follow_up_commands
);
assert!(
!report
.graph_orchestration
.worker_ownership_blocks
.is_empty()
);
}
#[test]
fn convex_sync_report_chunks_upserts_and_tombstones() {
let dir = setup_traversal_project();
let source_graph = build_traversal_graph_source(dir.path(), dir.path(), None).unwrap();
let projection = traversal_projection_from_graph(dir.path(), None, &source_graph).unwrap();
let mut snapshot = projection.to_convex_rows();
snapshot.nodes.push(ConvexNodeRow {
external_id: "stale-node".to_string(),
kind: "backlog".to_string(),
label: "stale".to_string(),
properties: BTreeMap::new(),
provenance: Vec::new(),
freshness: None,
});
snapshot.edges.clear();
snapshot.edges.push(ConvexEdgeRow {
edge_key: "stale-edge".to_string(),
from_external_id: "stale-node".to_string(),
to_external_id: "stale-node".to_string(),
kind: "mentions".to_string(),
properties: BTreeMap::new(),
provenance: Vec::new(),
freshness: None,
});
let snapshot_path = dir.path().join("convex-snapshot.json");
fs::write(&snapshot_path, serde_json::to_string(&snapshot).unwrap()).unwrap();
let report = build_convex_sync_report(dir.path(), None, Some(&snapshot_path), 2).unwrap();
assert_eq!(report.freshness.status, "stale");
assert!(report.freshness.fail_closed);
assert_eq!(report.node_tombstones, vec!["stale-node".to_string()]);
assert!(
report.edge_upserts.len() > 1,
"snapshot without edges should upsert local edges"
);
assert_eq!(report.edge_tombstones, vec!["stale-edge".to_string()]);
assert_eq!(
report.chunks.first().map(|chunk| chunk.operation.as_str()),
Some("delete_edges"),
"edge tombstones should be planned before node tombstones"
);
assert!(
report
.chunks
.iter()
.any(|chunk| chunk.operation == "upsert_edges" && chunk.count <= 2),
"expected chunked edge upserts, got {:?}",
report.chunks
);
}
#[test]
fn convex_snapshot_validation_fails_closed_when_stale() {
let dir = setup_traversal_project();
build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let snapshot = ConvexProjectionRows::default();
let snapshot_path = dir.path().join("empty-convex-snapshot.json");
fs::write(&snapshot_path, serde_json::to_string(&snapshot).unwrap()).unwrap();
let err = verify_convex_projection_snapshot(dir.path(), None, &snapshot_path).unwrap_err();
assert!(
err.to_string()
.contains("Convex graph projection is not current"),
"{err}"
);
}
#[test]
fn convex_sync_report_marks_live_apply_mode_without_network() {
let dir = setup_traversal_project();
let report =
build_convex_sync_report_with_snapshot(dir.path(), None, None, 100, false).unwrap();
assert!(!report.dry_run);
assert!(
!report
.diagnostics
.iter()
.any(|diagnostic| diagnostic.contains("dry-run only")),
"apply-mode report should not claim dry-run diagnostics"
);
assert!(
report
.chunks
.iter()
.any(|chunk| chunk.operation == "upsert_nodes"),
"live apply mode should still expose chunked idempotent operations"
);
}
#[test]
fn convex_sync_apply_round_trips_with_http_backend() {
use std::net::TcpListener;
use std::sync::{Arc, Mutex};
let dir = setup_traversal_project();
let report =
build_convex_sync_report_with_snapshot(dir.path(), None, None, 100, false).unwrap();
let expected_chunks = report.chunks.len();
assert!(expected_chunks > 0);
let listener = TcpListener::bind("127.0.0.1:0").unwrap();
let endpoint = format!("http://{}", listener.local_addr().unwrap());
let operations = Arc::new(Mutex::new(Vec::<String>::new()));
let server_operations = Arc::clone(&operations);
let server = std::thread::spawn(move || {
for _ in 0..expected_chunks {
let (mut stream, _) = listener.accept().unwrap();
let mut reader = BufReader::new(stream.try_clone().unwrap());
let mut request_line = String::new();
reader.read_line(&mut request_line).unwrap();
assert!(request_line.starts_with("POST "));
let mut content_length = 0usize;
loop {
let mut line = String::new();
reader.read_line(&mut line).unwrap();
if line == "\r\n" {
break;
}
if let Some(value) = line.to_ascii_lowercase().strip_prefix("content-length:") {
content_length = value.trim().parse().unwrap();
}
}
let mut body = vec![0u8; content_length];
reader.read_exact(&mut body).unwrap();
let request: serde_json::Value = serde_json::from_slice(&body).unwrap();
server_operations
.lock()
.unwrap()
.push(request["operation"].as_str().unwrap().to_string());
let response = br#"{"status":"ok","message":"accepted"}"#;
write!(
stream,
"HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n",
response.len()
)
.unwrap();
stream.write_all(response).unwrap();
}
});
cmd_convex_sync(
ConvexSyncOptions {
path: dir.path(),
scope: None,
snapshot: None,
chunk_size: 100,
remote_snapshot: false,
apply: true,
endpoint: Some(&endpoint),
auth_token_env: "TSIFT_TEST_CONVEX_AUTH_TOKEN",
},
OutputFormat {
json_output: false,
compact: true,
pretty: false,
terse: false,
ultra_terse: false,
schema: false,
envelope: false,
},
)
.unwrap();
server.join().unwrap();
let operations = operations.lock().unwrap().clone();
assert!(operations.contains(&"upsert_nodes".to_string()));
assert!(operations.contains(&"upsert_edges".to_string()));
}
#[test]
fn context_pack_diff_preview_attaches_tag_ontology_refs() {
let root = tempfile::tempdir().unwrap();
fs::create_dir_all(root.path().join(".naming/tags")).unwrap();
fs::write(
root.path().join(".naming/tags/alpha.md"),
"+++\ntag = \"alpha\"\ntitle = \"Alpha Domain\"\ndomain = \"fixture\"\n+++\n\nAlpha definition.\n",
)
.unwrap();
let ontology = load_tag_ontology_preview_context(root.path()).unwrap();
let report = diff_digest::DiffDigestReport {
root: root.path().display().to_string(),
mode: diff_digest::DiffDigestMode::WorkingTree,
revision: None,
files_changed: 1,
files_with_current_summaries: 1,
symbols_touched: 1,
call_edges_added: 0,
call_edges_removed: 0,
files: vec![diff_digest::DiffDigestFile {
path: "src/lib.rs".to_string(),
status: diff_digest::DiffDigestFileStatus::Modified,
touched_symbols: vec!["alpha_helper".to_string()],
summary_state: diff_digest::DiffDigestSummaryState::Current,
current_summaries: vec![diff_digest::DiffDigestSummarySnippet {
symbol: "alpha_helper".to_string(),
summary: "alpha helper summary".to_string(),
}],
added_call_edges: vec![],
removed_call_edges: vec![],
warnings: vec![],
}],
};
let preview = build_context_pack_diff_preview(
&report,
ResponseBudget::new(Some(1), Some(80)),
Some(&ontology),
);
let symbol_ref = &preview.files[0].touched_symbol_refs[0].ontology_refs[0];
assert!(symbol_ref.handle.starts_with("tont-"));
assert_eq!(symbol_ref.tag, "alpha");
assert_eq!(symbol_ref.path, ".naming/tags/alpha.md");
assert_eq!(symbol_ref.title.as_deref(), Some("Alpha Domain"));
assert_eq!(symbol_ref.domain.as_deref(), Some("fixture"));
assert_eq!(
preview.files[0].summary_refs[0].ontology_refs[0].path,
".naming/tags/alpha.md"
);
}
#[test]
fn context_pack_test_preview_limits_failure_groups() {
let report = test_digest::TestDigestReport {
root: "/repo".to_string(),
runner: "cargo".to_string(),
failures: 2,
grouped_failures: 2,
counts: test_digest::TestDigestCounts {
passed: Some(8),
failed: Some(2),
skipped: Some(1),
},
failure_groups: vec![
test_digest::TestDigestFailure {
tests: vec!["suite::alpha_failure".to_string()],
message: "assertion failed".to_string(),
path: Some("src/lib.rs".to_string()),
line: Some(42),
column: None,
occurrences: 1,
summary_state: test_digest::TestDigestSummaryState::Current,
current_summaries: vec![test_digest::TestDigestSummarySnippet {
symbol: "alpha_failure".to_string(),
summary: "failure summary for alpha test".to_string(),
}],
},
test_digest::TestDigestFailure {
tests: vec!["suite::beta_failure".to_string()],
message: "panic".to_string(),
path: Some("src/main.rs".to_string()),
line: Some(7),
column: None,
occurrences: 1,
summary_state: test_digest::TestDigestSummaryState::Missing,
current_summaries: vec![],
},
],
warnings: vec!["warning text".to_string()],
};
let preview =
build_context_pack_test_preview(&report, ResponseBudget::new(Some(1), Some(14)), None);
assert!(preview.truncated);
assert_eq!(preview.failure_groups.len(), 1);
assert_eq!(preview.failure_groups[0].tests, vec!["suite::alph..."]);
assert_eq!(preview.failure_groups[0].message, "assertion f...");
assert!(
preview.failure_groups[0].summary_refs[0]
.handle
.starts_with("ctsum-")
);
assert_eq!(
preview.failure_groups[0].summary_refs[0].expand,
"tsift summarize --file \"src/lib.rs\""
);
assert_eq!(preview.warnings, vec!["warning text"]);
}
#[test]
fn context_pack_log_preview_limits_signals_and_refs() {
let report = log_digest::LogDigestReport {
root: "/repo".to_string(),
total_lines: 12,
non_empty_lines: 10,
signal_groups: 2,
repeated_line_groups: 2,
repeated_line_occurrences: 3,
file_ref_groups: 2,
symbol_ref_groups: 2,
stack_groups: 1,
signals: vec![
log_digest::LogDigestSignal {
severity: "error".to_string(),
message: "src/lib.rs:42 boom".to_string(),
path: Some("src/lib.rs".to_string()),
line: Some(42),
column: None,
occurrences: 2,
summary_state: log_digest::LogDigestSummaryState::Current,
current_summaries: vec![log_digest::LogDigestSummarySnippet {
symbol: "alpha_helper".to_string(),
summary: "alpha helper cached log summary".to_string(),
}],
},
log_digest::LogDigestSignal {
severity: "warn".to_string(),
message: "slow path".to_string(),
path: None,
line: None,
column: None,
occurrences: 1,
summary_state: log_digest::LogDigestSummaryState::Unavailable,
current_summaries: vec![],
},
],
repeated_lines: vec![
log_digest::LogDigestRepeatedLine {
line: "retrying work item alpha".to_string(),
occurrences: 3,
},
log_digest::LogDigestRepeatedLine {
line: "retrying work item beta".to_string(),
occurrences: 2,
},
],
file_refs: vec![
log_digest::LogDigestFileRef {
path: "src/lib.rs".to_string(),
line: Some(42),
column: None,
occurrences: 2,
summary_state: log_digest::LogDigestSummaryState::Current,
current_summaries: vec![log_digest::LogDigestSummarySnippet {
symbol: "alpha_helper".to_string(),
summary: "alpha helper cached file summary".to_string(),
}],
},
log_digest::LogDigestFileRef {
path: "src/main.rs".to_string(),
line: Some(7),
column: None,
occurrences: 1,
summary_state: log_digest::LogDigestSummaryState::Missing,
current_summaries: vec![],
},
],
symbol_refs: vec![
log_digest::LogDigestSymbolRef {
symbol: "alpha_helper".to_string(),
occurrences: 2,
summary_state: log_digest::LogDigestSummaryState::Current,
current_summaries: vec![log_digest::LogDigestSummarySnippet {
symbol: "alpha_helper".to_string(),
summary: "alpha helper cached symbol summary".to_string(),
}],
},
log_digest::LogDigestSymbolRef {
symbol: "beta_helper".to_string(),
occurrences: 1,
summary_state: log_digest::LogDigestSummaryState::Missing,
current_summaries: vec![],
},
],
stack_traces: vec![log_digest::LogDigestStackGroup {
frames: vec!["frame one".to_string()],
occurrences: 1,
}],
warnings: vec!["warning text".to_string()],
};
let preview =
build_context_pack_log_preview(&report, ResponseBudget::new(Some(1), Some(14)), None);
assert!(preview.truncated);
assert_eq!(preview.signals.len(), 1);
assert_eq!(preview.signals[0].message, "src/lib.rs:...");
assert_eq!(preview.repeated_lines[0].line, "retrying wo...");
assert_eq!(preview.file_refs.len(), 1);
assert_eq!(preview.symbol_refs[0].symbol, "alpha_helper");
assert!(
preview.signals[0].summary_refs[0]
.handle
.starts_with("clsum-")
);
assert!(
preview.file_refs[0].summary_refs[0]
.handle
.starts_with("clfsum-")
);
assert!(
preview.symbol_refs[0].summary_refs[0]
.handle
.starts_with("clssum-")
);
assert_eq!(
preview.symbol_refs[0].summary_refs[0].tag_alias.as_deref(),
Some("alpha/helper")
);
assert_eq!(
preview.symbol_refs[0].summary_refs[0].expand,
"tsift summarize \"alpha_helper\""
);
assert_eq!(preview.warnings, vec!["warning text"]);
}
#[test]
fn cli_search_rejects_exact_with_strategy_flag() {
let cli = try_parse_cli([
"tsift",
"search",
"test",
"--exact",
"--strategy",
"lexical",
]);
assert!(cli.is_err());
}
#[test]
fn cli_search_autoindexes_by_default() {
let cli = parse_cli(["tsift", "search", "test"]);
match cli.command {
Some(Commands::Search {
autoindex,
no_autoindex,
..
}) => {
assert!(!autoindex);
assert!(!no_autoindex);
assert!(autoindex || !no_autoindex);
}
_ => panic!("expected Search command"),
}
}
#[test]
fn cli_search_accepts_no_autoindex_flag() {
let cli = parse_cli(["tsift", "search", "test", "--no-autoindex"]);
match cli.command {
Some(Commands::Search {
autoindex,
no_autoindex,
..
}) => {
assert!(!autoindex);
assert!(no_autoindex);
}
_ => panic!("expected Search command"),
}
}
#[test]
fn cli_search_rejects_conflicting_autoindex_flags() {
let cli = try_parse_cli(["tsift", "search", "test", "--autoindex", "--no-autoindex"]);
assert!(cli.is_err());
}
// --- relativize paths ---
#[test]
fn cli_accepts_global_absolute_flag() {
let cli = parse_cli(["tsift", "--absolute", "status"]);
assert!(cli.absolute);
assert!(matches!(cli.command, Some(Commands::Status { .. })));
}
#[test]
fn cli_accepts_global_tabular_flag() {
let cli = parse_cli(["tsift", "--tabular", "search", "test"]);
assert!(cli.tabular);
assert!(matches!(cli.command, Some(Commands::Search { .. })));
}
#[test]
fn cli_tabular_with_graph() {
let cli = parse_cli(["tsift", "--tabular", "graph", "main"]);
assert!(cli.tabular);
assert!(matches!(cli.command, Some(Commands::Graph { .. })));
}
#[test]
fn cli_tabular_with_communities() {
let cli = parse_cli(["tsift", "--tabular", "communities"]);
assert!(cli.tabular);
assert!(matches!(cli.command, Some(Commands::Communities { .. })));
}
#[test]
fn cli_tabular_with_explain() {
let cli = parse_cli(["tsift", "--tabular", "explain", "main"]);
assert!(cli.tabular);
assert!(matches!(cli.command, Some(Commands::Explain { .. })));
}
#[test]
fn cli_traverse_accepts_path_target_and_html_format() {
let cli = parse_cli([
"tsift", "traverse", "#kgnv", "--to", "main", "--path", ".", "--format", "html",
]);
match cli.command {
Some(Commands::Traverse {
node,
to,
path,
format,
..
}) => {
assert_eq!(node.as_deref(), Some("#kgnv"));
assert_eq!(to.as_deref(), Some("main"));
assert_eq!(path, PathBuf::from("."));
assert_eq!(format, TraverseFormat::Html);
}
_ => panic!("expected Traverse command"),
}
}
#[test]
fn cli_parses_semantic_related_command() {
let cli = parse_cli([
"tsift",
"semantic",
"graph navigation",
"--path",
".",
"--kind",
"all",
"--limit",
"3",
"--json",
]);
match cli.command {
Some(Commands::Semantic {
query,
path,
kind,
limit,
json,
..
}) => {
assert_eq!(query, "graph navigation");
assert_eq!(path, PathBuf::from("."));
assert_eq!(kind, SemanticRelatedKind::All);
assert_eq!(limit, 3);
assert!(json);
}
_ => panic!("expected Semantic command"),
}
}
#[test]
fn cli_parses_convex_sync_command() {
let cli = parse_cli([
"tsift",
"convex-sync",
".",
"--snapshot",
"rows.json",
"--chunk-size",
"25",
"--json",
]);
match cli.command {
Some(Commands::ConvexSync {
path,
snapshot,
chunk_size,
json,
..
}) => {
assert_eq!(path, PathBuf::from("."));
assert_eq!(snapshot, Some(PathBuf::from("rows.json")));
assert_eq!(chunk_size, 25);
assert!(json);
}
_ => panic!("expected ConvexSync command"),
}
}
#[test]
fn cli_parses_convex_sync_live_flags() {
let cli = parse_cli([
"tsift",
"convex-sync",
".",
"--remote-snapshot",
"--apply",
"--endpoint",
"https://example.test/convex-graph",
"--auth-token-env",
"TSIFT_TEST_TOKEN",
]);
match cli.command {
Some(Commands::ConvexSync {
remote_snapshot,
apply,
endpoint,
auth_token_env,
..
}) => {
assert!(remote_snapshot);
assert!(apply);
assert_eq!(
endpoint.as_deref(),
Some("https://example.test/convex-graph")
);
assert_eq!(auth_token_env, "TSIFT_TEST_TOKEN");
}
_ => panic!("expected ConvexSync command"),
}
}
#[test]
fn cli_parses_graph_db_query() {
let cli = parse_cli([
"tsift",
"graph-db",
"--backend",
"convex-snapshot",
"--convex-snapshot",
"rows.json",
"--json",
"neighborhood",
"gbak-kgnv",
"--depth",
"2",
"--edge-kind",
"mentions",
"--property",
"path=tasks/software/tsift.md",
"--cursor",
"gbak-old",
"--limit",
"10",
]);
match cli.command {
Some(Commands::GraphDb {
backend,
convex_snapshot,
json,
query,
..
}) => {
assert_eq!(backend, GraphDbBackend::ConvexSnapshot);
assert_eq!(convex_snapshot, Some(PathBuf::from("rows.json")));
assert!(json);
match query {
GraphDbQuery::Neighborhood {
id,
depth,
edge_kind,
cursor,
limit,
property_filters,
} => {
assert_eq!(id, "gbak-kgnv");
assert_eq!(depth, 2);
assert_eq!(edge_kind.as_deref(), Some("mentions"));
assert_eq!(cursor.as_deref(), Some("gbak-old"));
assert_eq!(limit, Some(10));
assert_eq!(
property_filters,
vec!["path=tasks/software/tsift.md".to_string()]
);
}
_ => panic!("expected graph-db neighborhood query"),
}
}
_ => panic!("expected GraphDb command"),
}
}
#[test]
fn cli_parses_graph_db_backend_eval_surrealdb_candidate() {
let cli = parse_cli([
"tsift",
"graph-db",
"--json",
"backend-eval",
"--candidate",
"surrealdb",
"--target",
"gval",
"--full-projection",
]);
match cli.command {
Some(Commands::GraphDb { json, query, .. }) => {
assert!(json);
match query {
GraphDbQuery::BackendEval {
candidates,
targets,
full_projection,
} => {
assert_eq!(candidates, vec!["surrealdb".to_string()]);
assert_eq!(targets, vec!["gval".to_string()]);
assert!(full_projection);
}
_ => panic!("expected graph-db backend-eval query"),
}
}
_ => panic!("expected GraphDb command"),
}
}
#[test]
fn cli_parses_graph_db_tokensave_backend() {
let cli = parse_cli([
"tsift",
"graph-db",
"--backend",
"tokensave",
"--json",
"node",
"fn:main",
]);
match cli.command {
Some(Commands::GraphDb {
backend,
json,
query,
..
}) => {
assert_eq!(backend, GraphDbBackend::Tokensave);
assert!(json);
match query {
GraphDbQuery::Node { id } => assert_eq!(id, "fn:main"),
_ => panic!("expected graph-db node query"),
}
}
_ => panic!("expected GraphDb command"),
}
}
#[test]
fn cli_parses_analyze_command() {
let cli = parse_cli([
"tsift", "analyze", ".", "--scope", "core", "--entry", "main", "--entry", "run",
"--limit", "7", "--json",
]);
match cli.command {
Some(Commands::Analyze {
path,
scope,
entry_points,
limit,
json,
}) => {
assert_eq!(path, PathBuf::from("."));
assert_eq!(scope.as_deref(), Some("core"));
assert_eq!(entry_points, vec!["main".to_string(), "run".to_string()]);
assert_eq!(limit, 7);
assert!(json);
}
_ => panic!("expected Analyze command"),
}
}
#[test]
fn cli_parses_graph_db_related_query() {
let cli = parse_cli([
"tsift",
"graph-db",
"--json",
"related",
"voice avatar memory retrieval",
"--kind",
"all",
"--depth",
"3",
"--seed-limit",
"4",
"--limit",
"12",
]);
match cli.command {
Some(Commands::GraphDb { json, query, .. }) => {
assert!(json);
match query {
GraphDbQuery::Related {
query,
kind,
depth,
seed_limit,
limit,
} => {
assert_eq!(query, "voice avatar memory retrieval");
assert_eq!(kind, SemanticRelatedKind::All);
assert_eq!(depth, 3);
assert_eq!(seed_limit, 4);
assert_eq!(limit, 12);
}
_ => panic!("expected graph-db related query"),
}
}
_ => panic!("expected GraphDb command"),
}
}
#[test]
fn cli_parses_graph_db_compact_query() {
let cli = parse_cli([
"tsift",
"graph-db",
"--path",
".",
"compact",
"--apply",
"--prune-tombstones",
"--confirmed-convex-reconciled",
]);
match cli.command {
Some(Commands::GraphDb { query, .. }) => match query {
GraphDbQuery::Compact {
apply,
prune_tombstones,
confirmed_convex_reconciled,
} => {
assert!(apply);
assert!(prune_tombstones);
assert!(confirmed_convex_reconciled);
}
_ => panic!("expected graph-db compact query"),
},
_ => panic!("expected GraphDb command"),
}
}
#[test]
fn cli_parses_impact_command() {
let cli = parse_cli(["tsift", "impact", ".", "--cached", "--limit", "5"]);
match cli.command {
Some(Commands::Impact {
path,
cached,
limit,
..
}) => {
assert_eq!(path, PathBuf::from("."));
assert!(cached);
assert_eq!(limit, 5);
}
_ => panic!("expected Impact command"),
}
}
#[test]
fn cli_parses_conflict_matrix_command() {
let cli = parse_cli([
"tsift",
"conflict-matrix",
"--path",
"tasks/software/tsift.md",
"--depth",
"4",
"--limit",
"12",
"--impact-limit",
"6",
"--json",
"pwcm",
"#g6kf",
]);
match cli.command {
Some(Commands::ConflictMatrix {
targets,
path,
depth,
limit,
impact_limit,
json,
..
}) => {
assert_eq!(targets, vec!["pwcm".to_string(), "#g6kf".to_string()]);
assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
assert_eq!(depth, 4);
assert_eq!(limit, 12);
assert_eq!(impact_limit, 6);
assert!(json);
}
_ => panic!("expected ConflictMatrix command"),
}
}
#[test]
fn cli_parses_dispatch_trace_command() {
let cli = parse_cli([
"tsift",
"dispatch-trace",
"--path",
"tasks/software/tsift.md",
"--format",
"html",
"--depth",
"4",
"pwcm",
"#g6kf",
]);
match cli.command {
Some(Commands::DispatchTrace {
targets,
path,
format,
depth,
..
}) => {
assert_eq!(targets, vec!["pwcm".to_string(), "#g6kf".to_string()]);
assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
assert_eq!(format, DispatchTraceFormat::Html);
assert_eq!(depth, 4);
}
_ => panic!("expected DispatchTrace command"),
}
}
#[test]
fn cli_parses_dependency_dag_command() {
let cli = parse_cli([
"tsift",
"dependency-dag",
"--path",
"tasks/software/tsift.md",
"--depth",
"5",
"--limit",
"20",
"--json",
"alpha",
"#beta",
]);
match cli.command {
Some(Commands::DependencyDag {
targets,
path,
depth,
limit,
json,
..
}) => {
assert_eq!(targets, vec!["alpha".to_string(), "#beta".to_string()]);
assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
assert_eq!(depth, 5);
assert_eq!(limit, 20);
assert!(json);
}
_ => panic!("expected DependencyDag command"),
}
}
#[test]
fn relativize_strips_root_prefix() {
let root = std::path::Path::new("/home/user/project");
assert_eq!(
relativize("/home/user/project/src/main.rs", root),
"src/main.rs"
);
}
#[test]
fn relativize_leaves_non_matching_path() {
let root = std::path::Path::new("/home/user/project");
assert_eq!(
relativize("/other/path/file.rs", root),
"/other/path/file.rs"
);
}
#[test]
fn relativize_leaves_already_relative() {
let root = std::path::Path::new("/home/user/project");
assert_eq!(relativize("src/main.rs", root), "src/main.rs");
}
#[test]
fn relativize_pathbuf_strips_prefix() {
let root = std::path::Path::new("/home/user/project");
let path = std::path::Path::new("/home/user/project/src/lib.rs");
assert_eq!(relativize_pathbuf(path, root), PathBuf::from("src/lib.rs"));
}
#[test]
fn relativize_edges_strips_caller_file() {
let root = std::path::Path::new("/tmp/proj");
let mut edges = vec![index::StoredEdge {
caller_file: "/tmp/proj/src/main.rs".to_string(),
caller_name: "main".to_string(),
caller_line: 1,
callee_name: "helper".to_string(),
call_site_line: 5,
tagpath_handle: None,
}];
relativize_edges(&mut edges, root);
assert_eq!(edges[0].caller_file, "src/main.rs");
}
#[test]
fn relativize_json_paths_strips_known_keys() {
let root = std::path::Path::new("/tmp/proj");
let mut val = serde_json::json!({
"file": "/tmp/proj/src/main.rs",
"path": "/tmp/proj/test.rs",
"name": "/tmp/proj/not-a-path",
"hits": [{"path": "/tmp/proj/nested.rs", "score": 1.0}]
});
relativize_json_paths(&mut val, root);
assert_eq!(val["file"], "src/main.rs");
assert_eq!(val["path"], "test.rs");
assert_eq!(val["name"], "/tmp/proj/not-a-path");
assert_eq!(val["hits"][0]["path"], "nested.rs");
}
// --- limit caps ---
#[test]
fn cli_graph_accepts_limit_flag() {
let cli = parse_cli(["tsift", "graph", "main", "--limit", "5"]);
match cli.command {
Some(Commands::Graph { limit, .. }) => assert_eq!(limit, 5),
_ => panic!("expected Graph command"),
}
}
#[test]
fn cli_graph_default_limit_is_20() {
let cli = parse_cli(["tsift", "graph", "main"]);
match cli.command {
Some(Commands::Graph { limit, .. }) => assert_eq!(limit, 20),
_ => panic!("expected Graph command"),
}
}
#[test]
fn cli_communities_accepts_limit_flag() {
let cli = parse_cli(["tsift", "communities", "--limit", "3"]);
match cli.command {
Some(Commands::Communities { limit, .. }) => assert_eq!(limit, 3),
_ => panic!("expected Communities command"),
}
}
#[test]
fn cli_communities_default_limit_is_10() {
let cli = parse_cli(["tsift", "communities"]);
match cli.command {
Some(Commands::Communities { limit, .. }) => assert_eq!(limit, 10),
_ => panic!("expected Communities command"),
}
}
#[test]
fn cli_explain_accepts_limit_flag() {
let cli = parse_cli(["tsift", "explain", "main", "--limit", "7"]);
match cli.command {
Some(Commands::Explain { limit, .. }) => assert_eq!(limit, 7),
_ => panic!("expected Explain command"),
}
}
#[test]
fn cli_explain_default_limit_is_15() {
let cli = parse_cli(["tsift", "explain", "main"]);
match cli.command {
Some(Commands::Explain { limit, .. }) => assert_eq!(limit, 15),
_ => panic!("expected Explain command"),
}
}
#[test]
fn cli_limit_zero_means_unlimited() {
let cli = parse_cli(["tsift", "graph", "main", "--limit", "0"]);
match cli.command {
Some(Commands::Graph { limit, .. }) => assert_eq!(limit, 0),
_ => panic!("expected Graph command"),
}
}
#[test]
fn graph_cmd_limit_runs_ok() {
let dir = setup_graph_index();
let result = cmd_graph(
"main",
dir.path(),
false,
false,
None,
1,
false,
false,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn graph_cmd_unlimited_runs_ok() {
let dir = setup_graph_index();
let result = cmd_graph(
"main",
dir.path(),
false,
false,
None,
0,
false,
false,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn graph_cmd_tabular_runs_ok() {
let dir = setup_graph_index();
let result = cmd_graph(
"main",
dir.path(),
false,
false,
None,
20,
false,
false,
false,
false,
false,
true,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn communities_cmd_tabular_runs_ok() {
let dir = setup_graph_index();
let result = cmd_communities(
dir.path(),
None,
1,
10,
false,
false,
false,
false,
true,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn explain_cmd_tabular_runs_ok() {
let dir = setup_graph_index();
let result = cmd_explain(
"main",
dir.path(),
None,
15,
false,
false,
false,
false,
false,
true,
false,
false,
);
assert!(result.is_ok());
}
#[test]
fn traversal_excludes_agent_doc_runtime_paths_from_source_watermark() {
// #gdbcacheprove: .agent-doc runtime markdown (snapshots, baselines, archives,
// session docs, runtime logs) must not contribute to the source watermark, or
// every agent-doc cycle would invalidate the graph-db backend-eval cache and
// force a full rebuild on the next run.
let cases = [
".agent-doc",
".agent-doc/snapshots/abc.md",
".agent-doc/baselines/abc.md",
".agent-doc/archives/2026.md",
".agent-doc/runtime/run.jsonl",
"src/foo/.agent-doc",
"src/foo/.agent-doc/snapshots/x.md",
"./.agent-doc/snapshots/x.md",
];
for path in cases {
assert!(
traversal_relative_path_is_generated_artifact(path),
"expected `{path}` to be excluded from source watermark"
);
}
// Real source paths must NOT be excluded.
for path in [
"src/main.rs",
"tests/perf_gate.rs",
"fixtures/x.json",
"agent-doc/src/lib.rs", // sibling dir without the leading dot
"src/.agent-doc-helper.rs",
] {
assert!(
!traversal_relative_path_is_generated_artifact(path),
"expected `{path}` to be included in source watermark"
);
}
}
#[test]
fn traversal_excludes_tsift_and_target_runtime_paths_from_source_watermark() {
// #cachelookupshift: the conflict-matrix preparation cache key hashes
// file_state snapshot rows + every markdown file under the root. Any
// .tsift/, target/, or .agent-doc/ path slipping past the filter would
// shift the watermark every run because those directories mutate as a
// side effect of running tsift itself. This test locks the artifact
// filter against regressions for each prefix variant
// (bare, root-anchored, nested, and './' leading).
let cases = [
".tsift",
".tsift/index.db",
".tsift/indexes/foo/index.db",
".tsift/conflict-matrix-cache/inputs/abc.json",
".tsift/summaries.db",
"src/foo/.tsift",
"src/foo/.tsift/graph.db",
"./.tsift/index.db",
"target",
"target/debug/build/x",
"target/release/tsift",
"src/foo/target/debug/x",
"./target/release/x",
];
for path in cases {
assert!(
traversal_relative_path_is_generated_artifact(path),
"expected `{path}` to be excluded from source watermark"
);
}
// Look-alike paths must NOT be excluded — only true artifact dirs.
for path in [
"src/ctx-core-dev/lib/a__target/CHANGELOG.md",
"src/ctx-core-dev/lib/a__target/A__Target/index.d.ts",
"src/tsift-extras/lib.rs",
"tsift/README.md",
"src/targeting.rs",
"src/.tsiftrc",
"src/agent-doc-helper.rs",
] {
assert!(
!traversal_relative_path_is_generated_artifact(path),
"expected `{path}` to be included in source watermark"
);
}
}
#[test]
fn traversal_source_watermark_is_stable_across_invocations_on_quiescent_root() {
// #cachelookupshift: the conflict-matrix preparation cache only hits
// when traversal_source_watermark returns the same hash for two
// consecutive calls on identical source state. Lock that invariant so
// a future change that folds wall-clock time, a directory mtime, or
// any other non-content input into the hash trips this test before
// regressing the preparation_cache_lookup hit rate. We exercise the
// session_only=true path with a hinted markdown file so the test does
// not need a full index DB to drive the index-snapshot branch.
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::create_dir_all(root.join("src")).unwrap();
std::fs::write(root.join("src/main.rs"), "fn main() {}\n").unwrap();
let hint = root.join("README.md");
std::fs::write(&hint, "# stable\n").unwrap();
// Add a generated-artifact directory that must NOT affect the watermark.
std::fs::create_dir_all(root.join(".tsift")).unwrap();
std::fs::write(root.join(".tsift/index.db"), b"placeholder").unwrap();
std::fs::create_dir_all(root.join("target/debug")).unwrap();
std::fs::write(root.join("target/debug/marker"), b"placeholder").unwrap();
let first = traversal_source_watermark(root, &hint, None, true)
.expect("first watermark call must succeed")
.expect("first watermark must produce a hash for hinted markdown");
let second = traversal_source_watermark(root, &hint, None, true)
.expect("second watermark call must succeed")
.expect("second watermark must produce a hash for hinted markdown");
assert_eq!(
first, second,
"watermark must be identical across back-to-back invocations on a quiescent root"
);
// Mutating a generated-artifact file must NOT shift the hash.
std::fs::write(root.join(".tsift/index.db"), b"changed").unwrap();
std::fs::write(root.join("target/debug/marker"), b"changed").unwrap();
let third = traversal_source_watermark(root, &hint, None, true)
.expect("third watermark call must succeed")
.expect("third watermark must produce a hash for hinted markdown");
assert_eq!(
first, third,
"watermark must ignore mutations under .tsift/ and target/"
);
// Mutating the hinted markdown file MUST shift the hash so the
// preparation cache invalidates correctly when user state changes.
// Sleep briefly to push the file mtime past the original even on
// coarse-resolution filesystems.
std::thread::sleep(std::time::Duration::from_millis(20));
std::fs::write(&hint, "# stable edited with longer content\n").unwrap();
let fourth = traversal_source_watermark(root, &hint, None, true)
.expect("fourth watermark call must succeed")
.expect("fourth watermark must produce a hash for hinted markdown");
assert_ne!(
first, fourth,
"watermark must invalidate when the hinted markdown file changes"
);
}
#[test]
fn traversal_source_watermark_uses_summary_rows_not_summaries_db_metadata() {
// #gcachemiss: full-projection cache keys must not miss just because the
// SQLite summary cache file header or mtime churned. Only the semantic rows
// that feed traversal projection should participate in the source watermark.
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::write(root.join("README.md"), "# stable\n").unwrap();
let summaries_db_path = root.join(".tsift/summaries.db");
let summary_db = summarize::SummaryDb::open(&summaries_db_path).unwrap();
let mut summary = summarize::Summary {
id: 0,
symbol_name: "main".to_string(),
file_path: "src/main.rs".to_string(),
content_hash: "hash-main".to_string(),
summary: "main wires the CLI".to_string(),
entities: Some(vec![summarize::Entity {
name: "Cli".to_string(),
kind: "type".to_string(),
description: "Command-line interface".to_string(),
}]),
relationships: None,
concept_labels: Some(vec!["cli".to_string()]),
extracted_at: "1700000000".to_string(),
model: "test-model".to_string(),
tokens_input: Some(10),
tokens_output: Some(5),
};
summary_db.insert(&summary).unwrap();
drop(summary_db);
let hint = root.join("README.md");
let first = traversal_source_watermark(root, &hint, None, true)
.expect("first watermark call must succeed")
.expect("first watermark must produce a hash");
std::thread::sleep(std::time::Duration::from_millis(20));
let conn = Connection::open(&summaries_db_path).unwrap();
conn.pragma_update(None, "user_version", 1).unwrap();
conn.pragma_update(None, "user_version", 0).unwrap();
drop(conn);
let second = traversal_source_watermark(root, &hint, None, true)
.expect("second watermark call must succeed")
.expect("second watermark must produce a hash");
assert_eq!(
first, second,
"metadata-only summaries.db churn must not invalidate the source watermark"
);
summary.entities = Some(vec![summarize::Entity {
name: "GraphCache".to_string(),
kind: "type".to_string(),
description: "Stable full-projection cache input".to_string(),
}]);
let summary_db = summarize::SummaryDb::open(&summaries_db_path).unwrap();
summary_db.delete_by_file("src/main.rs").unwrap();
summary_db.insert(&summary).unwrap();
drop(summary_db);
let third = traversal_source_watermark(root, &hint, None, true)
.expect("third watermark call must succeed")
.expect("third watermark must produce a hash");
assert_ne!(
first, third,
"semantic summary row changes must invalidate the source watermark"
);
}
#[test]
fn full_projection_source_watermark_ignores_source_mtime_when_index_rows_unchanged() {
// #gfullhot: backend-eval full-projection cache keys should be based on
// the indexed graph inputs, not file_state mtimes. Touching a source file
// without changing extracted symbols/call edges must still hit the cache.
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::create_dir_all(root.join("src")).unwrap();
std::fs::create_dir_all(root.join(".tsift")).unwrap();
let source = root.join("src/lib.rs");
let source_body = "pub fn alpha() { beta(); }\npub fn beta() {}\n";
std::fs::write(&source, source_body).unwrap();
let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
db.rebuild(root).unwrap();
drop(db);
let first = graph_db_backend_eval_full_projection_source_watermark(root, None)
.unwrap()
.value;
std::thread::sleep(std::time::Duration::from_millis(20));
std::fs::write(&source, source_body).unwrap();
let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
db.apply_changes(root).unwrap();
drop(db);
let second = graph_db_backend_eval_full_projection_source_watermark(root, None)
.unwrap()
.value;
assert_eq!(
first, second,
"mtime-only source index churn must not invalidate the full-projection cache"
);
}
#[test]
fn full_projection_source_watermark_ignores_session_markdown_churn() {
// #gfullhot: the full-projection performance cache isolates code graph
// and semantic-summary inputs. Current session evidence is measured by
// the bounded real dataset, so unrelated task-doc edits must not force a
// million-row full-projection rebuild.
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::create_dir_all(root.join("src")).unwrap();
std::fs::create_dir_all(root.join("tasks/software")).unwrap();
std::fs::create_dir_all(root.join(".tsift")).unwrap();
std::fs::write(root.join("src/lib.rs"), "pub fn alpha() {}\n").unwrap();
let task_doc = root.join("tasks/software/tsift.md");
std::fs::write(
&task_doc,
"---\nagent_doc_session: tsift-v0.1\n---\n\n## Backlog\n\n- [ ] [#one] Initial item\n",
)
.unwrap();
let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
db.rebuild(root).unwrap();
drop(db);
let first = graph_db_backend_eval_full_projection_source_watermark(root, None)
.unwrap()
.value;
std::fs::write(
&task_doc,
"---\nagent_doc_session: tsift-v0.1\n---\n\n## Backlog\n\n- [ ] [#one] Edited item\n",
)
.unwrap();
let second = graph_db_backend_eval_full_projection_source_watermark(root, None)
.unwrap()
.value;
assert_eq!(
first, second,
"session markdown churn must not invalidate the full-projection code/summary cache"
);
}
#[test]
fn full_projection_cache_hit_skips_provider_neutral_rebuild_after_mtime_churn() {
// #gfullhot: once a full-project projection is cached, repeated samples
// with unchanged graph inputs must report zero source_graph_build and
// projection_rows work even if indexed file mtimes changed.
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::create_dir_all(root.join("src")).unwrap();
std::fs::create_dir_all(root.join(".tsift")).unwrap();
let source = root.join("src/lib.rs");
let source_body = "pub fn alpha() { beta(); }\npub fn beta() {}\n";
std::fs::write(&source, source_body).unwrap();
let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
db.rebuild(root).unwrap();
drop(db);
let (_projection, _warnings, _phases, first_stats) =
graph_db_backend_eval_full_projection_with_profile(root, None).unwrap();
assert!(
!first_stats.hit,
"the first full-projection run should populate the cache"
);
std::thread::sleep(std::time::Duration::from_millis(20));
std::fs::write(&source, source_body).unwrap();
let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
db.apply_changes(root).unwrap();
drop(db);
let (_projection, _warnings, phases, second_stats) =
graph_db_backend_eval_full_projection_with_profile(root, None).unwrap();
assert!(second_stats.hit, "mtime-only churn should still cache-hit");
let source_graph_build = phases
.iter()
.find(|phase| phase.name == "full_projection.source_graph_build")
.expect("cache hit must report source_graph_build");
let projection_rows = phases
.iter()
.find(|phase| phase.name == "full_projection.projection_rows")
.expect("cache hit must report projection_rows");
assert_eq!(source_graph_build.duration_micros, 0);
assert_eq!(projection_rows.duration_micros, 0);
}
#[test]
fn build_token_capped_preview_within_cap() {
let lines: Vec<&str> = vec!["fn foo() {", " 1 + 2", "}"];
let capped = build_token_capped_preview(&lines, 1, 3, 160, 1000);
assert!(!capped.was_capped);
assert_eq!(capped.preview.len(), 3);
assert_eq!(capped.capped_end, 3);
}
#[test]
fn build_token_capped_preview_truncates_long_body() {
let owned: Vec<String> = (0..200).map(|i| format!(" let line_{i} = {i};")).collect();
let lines: Vec<&str> = owned.iter().map(|s| s.as_str()).collect();
let capped = build_token_capped_preview(&lines, 1, 200, 160, 100);
assert!(capped.was_capped);
assert!(capped.preview.len() < 200);
assert!(capped.capped_end < 200);
assert!(!capped.preview.is_empty());
}
#[test]
fn build_token_capped_preview_respects_start_offset() {
let owned: Vec<String> = (0..100).map(|i| format!("line {i}")).collect();
let lines: Vec<&str> = owned.iter().map(|s| s.as_str()).collect();
let capped = build_token_capped_preview(&lines, 50, 100, 160, 50);
assert!(capped.was_capped);
assert!(capped.capped_end >= 50);
assert!(capped.capped_end < 100);
assert_eq!(capped.preview[0].line, 50);
}
#[test]
fn response_budget_body_token_cap_defaults() {
let budget = ResponseBudget::from_cli(None, None, Some(ResponseBudgetPreset::Normal), true);
assert_eq!(budget.body_token_cap(), 1500);
let budget = ResponseBudget::from_cli(None, None, Some(ResponseBudgetPreset::Small), true);
assert_eq!(budget.body_token_cap(), 500);
let budget = ResponseBudget::from_cli(None, None, Some(ResponseBudgetPreset::Deep), true);
assert_eq!(budget.body_token_cap(), 3000);
}
#[test]
fn build_token_capped_preview_empty_input() {
let lines: Vec<&str> = vec![];
let capped = build_token_capped_preview(&lines, 1, 0, 160, 1000);
assert!(!capped.was_capped);
assert!(capped.preview.is_empty());
}
#[test]
fn build_token_capped_preview_single_long_line_fits() {
let lines: Vec<&str> = vec!["short"];
let capped = build_token_capped_preview(&lines, 1, 1, 160, 100);
assert!(!capped.was_capped);
assert_eq!(capped.preview.len(), 1);
assert_eq!(capped.capped_end, 1);
}
#[test]
fn edge_index_replaces_from_id_to_id_with_positions() {
let input = serde_json::json!({
"nodes": [
{"id": "symbol:src/lib.rs:foo"},
{"id": "symbol:src/lib.rs:bar"},
{"id": "symbol:src/lib.rs:baz"}
],
"edges": [
{"from_id": "symbol:src/lib.rs:foo", "to_id": "symbol:src/lib.rs:bar", "k": "calls"},
{"from_id": "symbol:src/lib.rs:bar", "to_id": "symbol:src/lib.rs:baz", "k": "calls"}
]
});
let result = edge_index_transform(input);
let edges = result.get("edges").unwrap().as_array().unwrap();
assert_eq!(edges.len(), 2);
assert_eq!(edges[0]["from"], 0);
assert_eq!(edges[0]["to"], 1);
assert_eq!(edges[1]["from"], 1);
assert_eq!(edges[1]["to"], 2);
assert!(edges[0].get("from_id").is_none());
assert!(edges[0].get("to_id").is_none());
}
#[test]
fn edge_index_preserves_unresolved_ids_as_strings() {
let input = serde_json::json!({
"nodes": [{"id": "symbol:src/lib.rs:foo"}],
"edges": [
{"from_id": "symbol:src/lib.rs:foo", "to_id": "symbol:other.rs:missing", "k": "ref"}
]
});
let result = edge_index_transform(input);
let edge = &result["edges"][0];
assert_eq!(edge["from"], 0);
assert_eq!(edge["to_id"], "symbol:other.rs:missing");
}
#[test]
fn edge_index_noop_without_nodes_and_edges() {
let input = serde_json::json!({"report": {"entries": [{"from_id": "a", "to_id": "b"}]}});
let result = edge_index_transform(input);
assert_eq!(result["report"]["entries"][0]["from_id"], "a");
}
}
// --- SQL introspection ---
#[derive(Serialize)]
struct TableInfo {
name: String,
columns: Vec<ColumnInfo>,
row_count: i64,
}
#[derive(Serialize)]
struct ColumnInfo {
name: String,
#[serde(rename = "type")]
col_type: String,
notnull: bool,
pk: bool,
#[serde(skip_serializing_if = "Option::is_none")]
default_value: Option<String>,
}
/// Open a SQLite connection (read-only).
pub(crate) fn open_db(path: &std::path::Path) -> Result<Connection> {
let conn = Connection::open_with_flags(
path,
rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
)
.with_context(|| format!("opening database: {}", path.display()))?;
Ok(conn)
}
/// List all user tables with column metadata and row counts.
pub(crate) fn schema_overview(conn: &Connection) -> Result<Vec<TableInfo>> {
let mut stmt = conn.prepare(
"SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%' ORDER BY name",
)?;
let table_names: Vec<String> = stmt
.query_map([], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
let mut tables = Vec::new();
for tbl in table_names {
let columns = table_columns(conn, &tbl)?;
let row_count: i64 =
conn.query_row(&format!("SELECT COUNT(*) FROM \"{}\"", tbl), [], |row| {
row.get(0)
})?;
tables.push(TableInfo {
name: tbl,
columns,
row_count,
});
}
Ok(tables)
}
/// Get column metadata for a single table.
pub(crate) fn table_columns(conn: &Connection, table: &str) -> Result<Vec<ColumnInfo>> {
let mut stmt = conn.prepare(&format!("PRAGMA table_info(\"{}\")", table))?;
let cols = stmt
.query_map([], |row| {
Ok(ColumnInfo {
name: row.get(1)?,
col_type: row.get::<_, String>(2).unwrap_or_default(),
notnull: row.get::<_, bool>(3).unwrap_or(false),
pk: row.get::<_, i32>(5).unwrap_or(0) > 0,
default_value: row.get(4)?,
})
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
Ok(cols)
}
/// Execute an arbitrary SQL query and return rows as JSON values.
pub(crate) fn execute_query(
conn: &Connection,
sql: &str,
) -> Result<(Vec<String>, Vec<Vec<serde_json::Value>>)> {
let mut stmt = conn.prepare(sql).context("preparing SQL query")?;
let col_names: Vec<String> = stmt.column_names().iter().map(|s| s.to_string()).collect();
let col_count = col_names.len();
let mut rows = Vec::new();
let mut query_rows = stmt.query([])?;
while let Some(row) = query_rows.next()? {
let mut vals = Vec::with_capacity(col_count);
for i in 0..col_count {
let val = match row.get_ref(i)? {
rusqlite::types::ValueRef::Null => serde_json::Value::Null,
rusqlite::types::ValueRef::Integer(n) => serde_json::json!(n),
rusqlite::types::ValueRef::Real(f) => serde_json::json!(f),
rusqlite::types::ValueRef::Text(s) => {
serde_json::Value::String(String::from_utf8_lossy(s).into_owned())
}
rusqlite::types::ValueRef::Blob(b) => {
serde_json::Value::String(format!("<blob {} bytes>", b.len()))
}
};
vals.push(val);
}
rows.push(vals);
}
Ok((col_names, rows))
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum DigestRunnerKind {
Test,
Log,
}
impl DigestRunnerKind {
fn parse(raw: &str) -> Result<Self> {
match raw.trim().to_ascii_lowercase().as_str() {
"test" => Ok(Self::Test),
"log" => Ok(Self::Log),
other => bail!("unsupported digest runner kind `{other}`; expected test or log"),
}
}
fn as_str(self) -> &'static str {
match self {
Self::Test => "test",
Self::Log => "log",
}
}
}
/// Simple shell word splitting (handles single and double quotes).
pub(crate) fn shell_split(s: &str) -> Vec<&str> {
let mut parts = Vec::new();
let mut i = 0;
let bytes = s.as_bytes();
while i < bytes.len() {
// Skip whitespace
while i < bytes.len() && bytes[i].is_ascii_whitespace() {
i += 1;
}
if i >= bytes.len() {
break;
}
let start = i;
if bytes[i] == b'"' || bytes[i] == b'\'' {
let quote = bytes[i];
i += 1;
while i < bytes.len() && bytes[i] != quote {
i += 1;
}
if i < bytes.len() {
i += 1; // closing quote
}
} else {
while i < bytes.len() && !bytes[i].is_ascii_whitespace() {
i += 1;
}
}
parts.push(&s[start..i]);
}
parts
}
/// Quote a string for shell if it contains special characters.
pub(crate) fn shell_quote(s: &str) -> String {
// Strip existing quotes
let unquoted =
if (s.starts_with('"') && s.ends_with('"')) || (s.starts_with('\'') && s.ends_with('\'')) {
&s[1..s.len() - 1]
} else {
s
};
if unquoted
.chars()
.all(|c| c.is_alphanumeric() || c == '_' || c == '-' || c == '.' || c == '/')
{
format!("\"{}\"", unquoted)
} else {
format!(
"\"{}\"",
unquoted.replace('\\', "\\\\").replace('"', "\\\"")
)
}
}
fn empty_search_coverage() -> sift::SearchCoverageSnapshot {
sift::SearchCoverageSnapshot {
mode: sift::SearchCoverageMode::Sealed,
total_sector_count: 0,
mounted_sector_count: 0,
reused_sector_count: 0,
dirty_sector_count: 0,
completed_dirty_sector_count: 0,
rebuilding_sector_count: 0,
resumed_sector_count: 0,
active_rebuild: None,
}
}
fn aggregate_search_coverage(responses: &[sift::SearchResponse]) -> sift::SearchCoverageSnapshot {
let total_sector_count = responses
.iter()
.map(|response| response.coverage.total_sector_count)
.sum();
let mounted_sector_count = responses
.iter()
.map(|response| response.coverage.mounted_sector_count)
.sum();
let reused_sector_count = responses
.iter()
.map(|response| response.coverage.reused_sector_count)
.sum();
let dirty_sector_count = responses
.iter()
.map(|response| response.coverage.dirty_sector_count)
.sum();
let completed_dirty_sector_count = responses
.iter()
.map(|response| response.coverage.completed_dirty_sector_count)
.sum();
let rebuilding_sector_count = responses
.iter()
.map(|response| response.coverage.rebuilding_sector_count)
.sum();
let resumed_sector_count = responses
.iter()
.map(|response| response.coverage.resumed_sector_count)
.sum();
let mode = if dirty_sector_count == 0 && rebuilding_sector_count == 0 {
sift::SearchCoverageMode::Sealed
} else if completed_dirty_sector_count > 0
|| rebuilding_sector_count > 0
|| resumed_sector_count > 0
{
sift::SearchCoverageMode::Converging
} else {
sift::SearchCoverageMode::Frontier
};
sift::SearchCoverageSnapshot {
mode,
total_sector_count,
mounted_sector_count,
reused_sector_count,
dirty_sector_count,
completed_dirty_sector_count,
rebuilding_sector_count,
resumed_sector_count,
active_rebuild: responses
.iter()
.find_map(|response| response.coverage.active_rebuild.clone()),
}
}
fn empty_search_response(root: &Path, strategy: &str) -> sift::SearchResponse {
sift::SearchResponse {
strategy: strategy.to_string(),
root: root.display().to_string(),
indexed_artifacts: 0,
skipped_artifacts: 0,
coverage: empty_search_coverage(),
hits: Vec::new(),
}
}
fn absolutize_search_hit_paths(response: &mut sift::SearchResponse, search_root: &Path) {
for hit in &mut response.hits {
let path = Path::new(&hit.path);
if path.is_relative() {
hit.path = search_root.join(path).display().to_string();
}
}
}
fn merge_search_responses(
root: &Path,
strategy: &str,
limit: usize,
responses: Vec<sift::SearchResponse>,
) -> sift::SearchResponse {
let indexed_artifacts = responses
.iter()
.map(|response| response.indexed_artifacts)
.sum();
let skipped_artifacts = responses
.iter()
.map(|response| response.skipped_artifacts)
.sum();
let coverage = if responses.is_empty() {
empty_search_coverage()
} else {
aggregate_search_coverage(&responses)
};
let mut hits: Vec<sift::SearchHit> = responses
.into_iter()
.flat_map(|response| response.hits)
.collect();
hits.sort_by(|left, right| {
right
.score
.partial_cmp(&left.score)
.unwrap_or(Ordering::Equal)
.then_with(|| left.path.cmp(&right.path))
.then_with(|| left.location.cmp(&right.location))
});
hits.truncate(limit);
for (rank, hit) in hits.iter_mut().enumerate() {
hit.rank = rank + 1;
}
sift::SearchResponse {
strategy: strategy.to_string(),
root: root.display().to_string(),
indexed_artifacts,
skipped_artifacts,
coverage,
hits,
}
}
pub(crate) fn federated_sift_search(
root: &Path,
cache_dir: &Path,
query: &str,
limit: usize,
timeout_secs: u64,
strategy: &str,
) -> Result<sift::SearchResponse> {
let targets = resolve_search_index_targets(root, root, None, true)?;
if targets.is_empty() {
if config::Config::submodule_dirs(root)?.is_empty() {
return run_search_with_timeout(
root,
cache_dir,
query,
limit,
timeout_secs,
strategy,
&[],
);
}
return Ok(empty_search_response(root, strategy));
}
let mut responses = Vec::with_capacity(targets.len());
for target in &targets {
let mut response = run_search_with_timeout(
&target.source_root,
cache_dir,
query,
limit,
timeout_secs,
strategy,
std::slice::from_ref(target),
)?;
absolutize_search_hit_paths(&mut response, &target.source_root);
response.root = root.display().to_string();
responses.push(response);
}
Ok(merge_search_responses(root, strategy, limit, responses))
}
/// Federated symbol search across every scoped `.tsift/indexes/<scope>/index.db`
/// in the workspace. Per-scope tagpath annotation runs inside the per-scope
/// loop so each scope's adapter resolves against its own `.naming.toml` /
/// `.naming/index.json` (the workspace root usually has no tagpath of its
/// own). The merged `TagpathAnnotationDiagnostic` reports `loaded=true` when
/// at least one scope loaded, and `stale=true` with the first stale reason
/// when any scope was stale.
pub(crate) fn federated_symbol_search(
root: &std::path::Path,
query: &str,
limit: usize,
tagpath_opts: &TagpathSearchOpts,
) -> Result<(Vec<index::SymbolHit>, TagpathAnnotationDiagnostic)> {
let cfg = config::Config::load(root)?;
let submodules = config::Config::submodule_dirs(root)?;
let mut all_hits: Vec<index::SymbolHit> = Vec::new();
let mut combined = TagpathAnnotationDiagnostic::default();
for scope in &submodules {
if !cfg.federation_for_scope(scope) {
continue;
}
let db_path = cfg.db_path_for(root, &scope.id);
if !db_path.exists() {
continue;
}
let db = index::IndexDb::open_read_only(&db_path)?;
let mut hits = db.symbol_search(query, limit)?;
let diag = annotate_hits_with_tagpath(&mut hits, &scope.source_root, tagpath_opts)?;
combined.loaded |= diag.loaded;
if diag.stale && !combined.stale {
combined.stale = true;
combined.reason = diag.reason;
}
all_hits.append(&mut hits);
}
all_hits.sort_by(|a, b| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
});
all_hits.truncate(limit);
Ok((all_hits, combined))
}
#[derive(Debug, Deserialize)]
#[serde(tag = "type", rename_all = "lowercase")]
enum RipgrepJsonEvent {
Match {
data: RipgrepMatchData,
},
#[serde(other)]
Other,
}
#[derive(Debug, Deserialize)]
struct RipgrepMatchData {
path: RipgrepTextField,
lines: RipgrepTextField,
line_number: Option<usize>,
}
#[derive(Debug, Deserialize)]
struct RipgrepTextField {
text: Option<String>,
}
pub(crate) fn federated_exact_search(
root: &Path,
query: &str,
limit: usize,
timeout_secs: u64,
) -> Result<sift::SearchResponse> {
let cfg = config::Config::load(root)?;
let mut responses = Vec::new();
for scope in config::Config::submodule_dirs(root)? {
if !cfg.federation_for_scope(&scope) {
continue;
}
let mut response =
run_exact_search_with_timeout(&scope.source_root, query, limit, timeout_secs)?;
absolutize_search_hit_paths(&mut response, &scope.source_root);
response.root = root.display().to_string();
responses.push(response);
}
Ok(merge_search_responses(root, "exact", limit, responses))
}
pub(crate) fn run_sift_search(
search_path: &Path,
cache_dir: &Path,
query: &str,
limit: usize,
strategy: &str,
) -> Result<sift::SearchResponse> {
let engine = Sift::builder().with_cache_dir(cache_dir).build();
let options = SearchOptions::default()
.with_limit(limit)
.with_strategy(strategy.to_string());
let input = SearchInput::new(search_path, query).with_options(options);
engine.search(input).context("sift search failed")
}
fn exact_search_timeout_message(timeout_secs: u64) -> String {
format!(
"tsift search timed out after {}s (strategy: exact). \
Re-run with `--timeout 0` to disable the timeout or narrow `--path` / `--scope`.",
timeout_secs
)
}
fn exact_search_command(search_path: &Path, query: &str) -> Command {
let mut command = Command::new("rg");
command
.arg("--json")
.arg("--fixed-strings")
.arg("--line-number")
.arg("--hidden")
.arg("--")
.arg(query)
.arg(search_path);
command
}
fn exact_search_file_timestamp(path: &Path) -> sift::ArtifactFreshness {
let observed_unix_secs = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs() as i64;
let modified_unix_secs = fs::metadata(path)
.ok()
.and_then(|metadata| metadata.modified().ok())
.and_then(|modified| modified.duration_since(UNIX_EPOCH).ok())
.map(|duration| duration.as_secs() as i64);
sift::ArtifactFreshness {
observed_unix_secs,
modified_unix_secs,
}
}
fn parse_exact_search_output(
search_path: &Path,
limit: usize,
raw: &str,
) -> Result<sift::SearchResponse> {
if limit == 0 {
return Ok(sift::SearchResponse {
strategy: "exact".to_string(),
root: search_path.display().to_string(),
indexed_artifacts: 0,
skipped_artifacts: 0,
coverage: empty_search_coverage(),
hits: Vec::new(),
});
}
let mut hits = Vec::new();
for line in raw.lines() {
let event: RipgrepJsonEvent =
serde_json::from_str(line).context("parsing ripgrep exact-search output")?;
let RipgrepJsonEvent::Match { data } = event else {
continue;
};
let Some(path_text) = data.path.text else {
continue;
};
let Some(lines_text) = data.lines.text else {
continue;
};
let path = PathBuf::from(path_text);
let snippet = lines_text.trim_end_matches(['\r', '\n']).to_string();
let rank = hits.len() + 1;
hits.push(sift::SearchHit {
artifact_id: format!(
"exact:{}:{}:{}",
path.display(),
data.line_number.unwrap_or(0),
rank
),
artifact_kind: sift::ContextArtifactKind::File,
path: path.display().to_string(),
rank,
score: (limit.saturating_sub(rank).saturating_add(1)) as f64,
confidence: sift::ScoreConfidence::High,
location: data.line_number.map(|line| format!("line {}", line)),
snippet: snippet.clone(),
provenance: sift::ArtifactProvenance {
adapter: sift::AcquisitionAdapterKind::FileSystem,
source: "ripgrep -F".to_string(),
synthetic: false,
},
freshness: exact_search_file_timestamp(&path),
budget: sift::ArtifactBudget::from_text(&snippet, 1),
});
if hits.len() >= limit {
break;
}
}
Ok(sift::SearchResponse {
strategy: "exact".to_string(),
root: search_path.display().to_string(),
indexed_artifacts: hits.len(),
skipped_artifacts: 0,
coverage: empty_search_coverage(),
hits,
})
}
fn exact_search_response_from_process(
search_path: &Path,
limit: usize,
status: std::process::ExitStatus,
stdout: &[u8],
stderr: &[u8],
) -> Result<sift::SearchResponse> {
if !status.success() && status.code() != Some(1) {
let message = String::from_utf8_lossy(stderr);
let trimmed = message.trim();
if trimmed.is_empty() {
bail!("ripgrep exact search exited with status {}", status);
}
bail!("{}", trimmed);
}
let raw = String::from_utf8(stdout.to_vec()).context("decoding ripgrep exact-search output")?;
parse_exact_search_output(search_path, limit, &raw)
}
fn run_exact_search(search_path: &Path, query: &str, limit: usize) -> Result<sift::SearchResponse> {
let output = exact_search_command(search_path, query)
.output()
.context("running exact search with ripgrep")?;
exact_search_response_from_process(
search_path,
limit,
output.status,
&output.stdout,
&output.stderr,
)
}
pub(crate) fn run_exact_search_with_timeout(
search_path: &Path,
query: &str,
limit: usize,
timeout_secs: u64,
) -> Result<sift::SearchResponse> {
if timeout_secs == 0 {
return run_exact_search(search_path, query, limit);
}
let mut child = exact_search_command(search_path, query)
.stdin(Stdio::null())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.context("spawning timed exact search worker")?;
let timeout = Duration::from_secs(timeout_secs);
let status = wait_for_child_exit(&mut child, timeout)
.context("waiting for timed exact search worker")?;
if status.is_none() {
let _ = child.kill();
let _ = child.wait();
bail!("{}", exact_search_timeout_message(timeout_secs));
}
let status = status.unwrap();
let stdout = read_child_stdout(&mut child)?;
let stderr = read_child_stderr(&mut child)?;
exact_search_response_from_process(
search_path,
limit,
status,
stdout.as_bytes(),
stderr.as_bytes(),
)
}
pub(crate) fn run_search_with_timeout(
search_path: &Path,
cache_dir: &Path,
query: &str,
limit: usize,
timeout_secs: u64,
strategy: &str,
search_targets: &[SearchIndexTarget],
) -> Result<sift::SearchResponse> {
if timeout_secs == 0 {
return run_sift_search(search_path, cache_dir, query, limit, strategy);
}
let output_path = next_search_worker_output_path();
let mut child = Command::new(
std::env::current_exe().context("resolving tsift executable for timed search")?,
)
.arg("__search-worker")
.arg("--path")
.arg(search_path)
.arg("--cache-dir")
.arg(cache_dir)
.arg("--query")
.arg(query)
.arg("--limit")
.arg(limit.to_string())
.arg("--strategy")
.arg(strategy)
.arg("--output")
.arg(&output_path)
.stdin(Stdio::null())
.stdout(Stdio::null())
.stderr(Stdio::piped())
.spawn()
.context("spawning timed sift search worker")?;
let timeout = Duration::from_secs(timeout_secs);
let status =
wait_for_child_exit(&mut child, timeout).context("waiting for timed sift search worker")?;
if status.is_none() {
let _ = child.kill();
let _ = child.wait();
let _ = fs::remove_file(&output_path);
bail!(
"{}",
search_timeout_message(timeout_secs, strategy, search_targets)?
);
}
let status = status.unwrap();
let stderr = read_child_stderr(&mut child)?;
if !status.success() {
let _ = fs::remove_file(&output_path);
let message = stderr.trim();
if message.is_empty() {
bail!("sift search worker exited with status {}", status);
}
bail!("{}", message);
}
let raw = fs::read_to_string(&output_path)
.with_context(|| format!("reading search worker output: {}", output_path.display()))?;
let _ = fs::remove_file(&output_path);
serde_json::from_str(&raw).context("parsing search worker output")
}
fn next_search_worker_output_path() -> PathBuf {
let stamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_nanos();
std::env::temp_dir().join(format!(
"tsift-search-{}-{}.json",
std::process::id(),
stamp
))
}
fn wait_for_child_exit(
child: &mut std::process::Child,
timeout: Duration,
) -> Result<Option<std::process::ExitStatus>> {
let started = Instant::now();
loop {
if let Some(status) = child.try_wait()? {
return Ok(Some(status));
}
if started.elapsed() >= timeout {
return Ok(None);
}
let remaining = timeout.saturating_sub(started.elapsed());
std::thread::sleep(remaining.min(Duration::from_millis(10)));
}
}
fn read_child_stderr(child: &mut std::process::Child) -> Result<String> {
let mut stderr = String::new();
if let Some(mut pipe) = child.stderr.take() {
pipe.read_to_string(&mut stderr)
.context("reading search worker stderr")?;
}
Ok(stderr)
}
fn read_child_stdout(child: &mut std::process::Child) -> Result<String> {
let mut stdout = String::new();
if let Some(mut pipe) = child.stdout.take() {
pipe.read_to_string(&mut stdout)
.context("reading search worker stdout")?;
}
Ok(stdout)
}
pub(crate) fn maybe_apply_search_worker_test_hooks() -> Result<()> {
if let Ok(path) = std::env::var("TSIFT_TEST_SEARCH_WORKER_PID_FILE") {
fs::write(&path, std::process::id().to_string())
.with_context(|| format!("writing search worker pid file: {path}"))?;
}
if let Ok(ms) = std::env::var("TSIFT_TEST_SEARCH_WORKER_SLEEP_MS") {
let delay_ms = ms
.parse::<u64>()
.with_context(|| format!("parsing TSIFT_TEST_SEARCH_WORKER_SLEEP_MS={ms}"))?;
std::thread::sleep(Duration::from_millis(delay_ms));
}
Ok(())
}
#[cfg(test)]
thread_local! {
static SEARCH_POST_PRECHECK_LOCK_HOOK: RefCell<Option<SearchPostPrecheckLockHook>> = const { RefCell::new(None) };
}
#[cfg(test)]
enum SearchPostPrecheckLockMode {
RollbackJournal,
Wal,
}
#[cfg(test)]
struct SearchPostPrecheckLockHook {
db_path: PathBuf,
mode: SearchPostPrecheckLockMode,
}
#[cfg(test)]
struct SearchPostPrecheckLockGuard;
#[cfg(test)]
impl Drop for SearchPostPrecheckLockGuard {
fn drop(&mut self) {
SEARCH_POST_PRECHECK_LOCK_HOOK.with(|hook| {
hook.borrow_mut().take();
});
}
}
#[cfg(test)]
fn install_search_post_precheck_lock(db_path: PathBuf) -> SearchPostPrecheckLockGuard {
install_search_post_precheck_lock_hook(db_path, SearchPostPrecheckLockMode::RollbackJournal)
}
#[cfg(test)]
fn install_search_post_precheck_wal_lock(db_path: PathBuf) -> SearchPostPrecheckLockGuard {
install_search_post_precheck_lock_hook(db_path, SearchPostPrecheckLockMode::Wal)
}
#[cfg(test)]
fn install_search_post_precheck_lock_hook(
db_path: PathBuf,
mode: SearchPostPrecheckLockMode,
) -> SearchPostPrecheckLockGuard {
SEARCH_POST_PRECHECK_LOCK_HOOK.with(|hook| {
assert!(
hook.borrow().is_none(),
"search post-precheck lock hook already installed"
);
*hook.borrow_mut() = Some(SearchPostPrecheckLockHook { db_path, mode });
});
SearchPostPrecheckLockGuard
}
#[cfg(test)]
pub(crate) fn maybe_apply_search_post_precheck_test_hooks() -> Result<()> {
let Some(hook) = SEARCH_POST_PRECHECK_LOCK_HOOK.with(|hook| hook.borrow_mut().take()) else {
return Ok(());
};
let (ready_tx, ready_rx) = std::sync::mpsc::sync_channel(1);
std::thread::spawn(move || {
let conn = Connection::open(&hook.db_path).expect("opening db for search lock hook");
match hook.mode {
SearchPostPrecheckLockMode::RollbackJournal => {
conn.execute_batch("PRAGMA journal_mode=DELETE; BEGIN EXCLUSIVE;")
.expect("acquiring rollback-journal hook lock");
fs::write(substrate::rollback_journal_path(&hook.db_path), "locked")
.expect("writing rollback journal marker");
}
SearchPostPrecheckLockMode::Wal => {
conn.execute_batch(
"PRAGMA journal_mode=WAL;
PRAGMA wal_autocheckpoint=0;
CREATE TABLE IF NOT EXISTS search_wal_lock_probe (id INTEGER PRIMARY KEY);
INSERT INTO search_wal_lock_probe DEFAULT VALUES;
PRAGMA locking_mode=EXCLUSIVE;
BEGIN EXCLUSIVE;",
)
.expect("acquiring WAL hook lock");
assert!(substrate::wal_sidecar_path(&hook.db_path).exists());
}
}
ready_tx.send(()).expect("signaling search lock hook");
std::thread::sleep(Duration::from_millis(200));
drop(conn);
let _ = fs::remove_file(substrate::rollback_journal_path(&hook.db_path));
});
ready_rx
.recv_timeout(Duration::from_secs(1))
.context("waiting for search post-precheck lock hook")?;
Ok(())
}
#[cfg(not(test))]
pub(crate) fn maybe_apply_search_post_precheck_test_hooks() -> Result<()> {
Ok(())
}