1mod cli;
2mod commands;
3mod community_detection;
4mod conflict_matrix;
5mod context_pack;
6mod output;
7mod rewrite;
8mod search_budget;
9mod semantic_edit;
10mod session_review_budget;
11mod token_savings;
12mod workflow;
13
14pub use rewrite::rewrite_command;
15pub(crate) use rewrite::{apply_rewrite_output_format, execute_rewritten_command, no_rewrite_message};
16pub(crate) use community_detection::{
17 CommunityDetectionReport, annotate_community_members_with_context,
18 community_tagpath_cache_part, community_tagpath_cache_part_for_loaded,
19 detect_communities_cached, file_communities_from_callers,
20 graph_effectiveness_blocked, graph_effectiveness_ready,
21 resolve_tagpath_handle_for_callee_edge, update_community_annotation_diagnostics,
22};
23#[allow(unused_imports)]
24pub(crate) use conflict_matrix::{
25 ConflictMatrixCandidate, ConflictMatrixGraphPreparedInputs,
26 ConflictMatrixPreparedInputs, ConflictMatrixReport,
27 ConflictMatrixSemanticRef, ConflictMatrixSharedPreparationSummary,
28 ConflictMatrixWorkerFeedback, ConflictMatrixWorkerPromptPacket,
29 build_conflict_matrix_report, build_conflict_matrix_report_from_prepared_graph,
30 cmd_conflict_matrix, collect_conflict_matrix_evidence_packets,
31 conflict_matrix_candidate_from_evidence, conflict_matrix_graph_index,
32 conflict_matrix_semantic_ref, conflict_matrix_shared_preparation_summary,
33 conflict_matrix_source_handle, conflict_matrix_target_scoped_graph_snapshot,
34 conflict_matrix_worker_feedback,
35 conflict_risk_label, extract_conflict_target_refs, hash_bytes_hex,
36 is_planner_config_path, normalize_conflict_target,
37 prepare_conflict_matrix_graph_orchestration,
38 prepare_conflict_matrix_inputs, resolve_conflict_matrix_targets,
39 sorted_intersection, sorted_set,
40};
41#[allow(unused_imports)]
42pub(crate) use context_pack::{
43 ContextPackReport, ContextPackSummaryRefPreview,
44 build_context_pack_diff_preview, build_context_pack_log_preview,
45 build_context_pack_report, build_context_pack_report_with_profile,
46 build_context_pack_test_preview, context_pack_status_reminders,
47 exploration_ref_id, materialize_context_pack_exploration_packet,
48 print_context_pack_human,
49};
50pub(crate) use search_budget::{
51 SearchBudgetReportInput,
52 apply_search_facet_filters, build_search_budget_follow_up, build_search_budget_report,
53 print_search_budget_human,
54};
55#[allow(unused_imports)]
56pub(crate) use session_review_budget::{
57 SessionReviewBudgetFailurePreview, SessionReviewBudgetReport,
58 SessionReviewNextContextBudgetReport, SessionReviewNextTokenAction,
59 build_session_review_budget_report, build_session_review_next_context_budget_report,
60 print_session_review_budget_human, print_session_review_next_context_budget_human,
61};
62#[cfg(test)]
63use search_budget::{SearchBudgetReport, search_facet_filters_summary};
64pub(crate) use semantic_edit::{
65 AstSpanPreview, EditBatch, EditResult, EditStatus,
66 MarkdownEmbeddedSymbol, MarkdownSpanMetadata, MetricDigestOptions,
67 SemanticEditVerifyOptions, apply_edit_plan_atomically, build_edit_plan, cmd_edit_intents,
68};
69
70#[cfg(test)]
71use rewrite::{apply_output_cap, effective_rewrite_run_command, resolve_digest_context_path, rewrite_output_cap, OutputCap};
72#[cfg(test)]
73use std::io::{BufRead as _, BufReader};
74#[cfg(test)]
75use token_savings::{
76 TokenSavingsFamily, TokenSavingsFixture, TokenSavingsFixtureCase,
77 TokenSavingsMarkdownProjectionInput, TokenSavingsMarkdownProjectionInputs,
78 TokenSavingsRawSymbol, TokenSavingsSourceReadInput, TokenSavingsSourceReadInputs,
79 build_token_savings_report,
80};
81
82use anyhow::{Context, Result, bail};
83use clap::Parser;
84use cli::{Cli, Commands, DispatchTraceFormat, GraphDbQuery, SemanticRelatedKind};
85#[cfg(test)]
86use cli::{GraphDbBackend, TraverseFormat};
87use commands::digests::{
88 cmd_context_pack, cmd_diff_digest, cmd_log_digest, cmd_metric_digest, cmd_session_cost,
89 cmd_session_digest, cmd_session_review_with_budget, cmd_test_digest,
90};
91#[cfg(test)]
92use commands::graph::cmd_explain;
93use commands::graph::{
94 cmd_analyze, cmd_communities, cmd_explain_with_budget, cmd_graph, cmd_path, cmd_traverse,
95};
96#[cfg(test)]
97use commands::index_search::cmd_search;
98use commands::index_search::{cmd_index, cmd_search_with_budget, cmd_search_worker};
99use commands::infra::{
100 StatusCommandOptions, cmd_convex_sync, cmd_edit, cmd_graph_db, cmd_init, cmd_locks,
101 cmd_rewrite, cmd_route, cmd_sql, cmd_status,
102};
103use commands::memory::cmd_memory;
104use commands::quality::{cmd_audit, cmd_audit_tagpath, cmd_lint};
105use commands::summarize::cmd_summarize;
106use flate2::{Compression, read::GzDecoder, write::GzEncoder};
107use output::tagpath::{
108 TagpathAnnotationDiagnostic, TagpathSearchOpts,
109 annotate_communities_with_tagpath, annotate_hits_with_tagpath,
110 annotate_path_nodes_with_tagpath, annotate_stored_edges_with_tagpath,
111 annotate_stored_symbols_with_tagpath,
112};
113#[cfg(test)]
114use output::ResponseBudgetPreset;
115use output::{
116 OutputFormat, ResponseBudget, ToolEnvelope, ToolEnvelopeMetric,
117 ToolEnvelopeSummary, TranscriptArtifactRef,
118};
119use rusqlite::{Connection, OptionalExtension};
120use serde::{Deserialize, Serialize};
121use sift::{SearchInput, SearchOptions, Sift};
122#[cfg(test)]
123use std::cell::RefCell;
124use std::cmp::Ordering;
125use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque};
126use std::env;
127use std::fs;
128use std::io::{Read as _, Write as _};
129use std::path::{Path, PathBuf};
130use std::process::{Command, Stdio};
131use std::sync::{Mutex, OnceLock};
132use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
133use substrate::{
134 ConvexEdgeRow, ConvexNodeRow, ConvexProjectionRows, GraphEdge as SubstrateGraphEdge,
135 GraphFreshness, GraphNode as SubstrateGraphNode, GraphProjection, GraphPropertyFilter,
136 GraphProvenance, GraphQueryOptions, GraphQueryPage, GraphStore, SQLITE_GRAPH_SCHEMA_VERSION,
137 SqliteGraphStore, SqliteProjectionRefresh,
138 TerseGraphNode as SubstrateTerseGraphNode, TerseGraphEdge as SubstrateTerseGraphEdge,
139};
140use tsift_core::{NeighborhoodScoring, RankedNeighborhoodOptions};
141use tagpath::{family as tagpath_family, ontology as tagpath_ontology};
142#[cfg(test)]
143use tsift_agent_doc::session_cost;
144#[cfg(test)]
145use tsift_agent_doc::session_review;
146use tsift_digest::{diff_digest, log_digest, metric_digest, test_digest};
147use tsift_graph as graph;
148use tsift_index::{config, index, init, multiplicity, walk};
149use tsift_memory::{MemoryEvent, default_memory_db_path, read_memory_events};
150use tsift_quality::{cycle_packet_cache, dci_benchmark, lint, perf_gate, token_gate};
151use tsift_resolution as resolution;
152use tsift_search::{impact, sift};
153use tsift_sqlite as substrate;
154use tsift_status::status;
155use tsift_summarize::summarize;
156#[cfg(feature = "backend-surrealdb")]
157use tsift_surrealdb::SurrealdbGraphStore;
158use tsift_tokensave::TokensaveDb;
159
160#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize)]
161pub(crate) enum GraphDbExperimentalBackend {
162 DuckdbDuckpgq,
163 Falkordb,
164 Ladybug,
165 Kuzu,
166 Surrealdb,
167}
168
169#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize)]
170pub(crate) struct SearchFacetFilters {
171 #[serde(skip_serializing_if = "Vec::is_empty", default)]
172 pub(crate) languages: Vec<String>,
173 #[serde(skip_serializing_if = "Vec::is_empty", default)]
174 pub(crate) kinds: Vec<String>,
175 #[serde(skip_serializing_if = "Vec::is_empty", default)]
176 pub(crate) node_kinds: Vec<String>,
177 #[serde(skip_serializing_if = "Vec::is_empty", default)]
178 pub(crate) sections: Vec<String>,
179 #[serde(skip_serializing_if = "Vec::is_empty", default)]
180 pub(crate) parents: Vec<String>,
181 #[serde(skip_serializing_if = "Vec::is_empty", default)]
182 pub(crate) children: Vec<String>,
183 #[serde(skip_serializing_if = "Vec::is_empty", default)]
184 pub(crate) fence_languages: Vec<String>,
185 #[serde(skip_serializing_if = "Vec::is_empty", default)]
186 pub(crate) list_depths: Vec<usize>,
187 #[serde(skip_serializing_if = "Vec::is_empty", default)]
188 pub(crate) heading_levels: Vec<usize>,
189}
190
191impl SearchFacetFilters {
192 pub(crate) fn is_empty(&self) -> bool {
193 self.languages.is_empty()
194 && self.kinds.is_empty()
195 && self.node_kinds.is_empty()
196 && self.sections.is_empty()
197 && self.parents.is_empty()
198 && self.children.is_empty()
199 && self.fence_languages.is_empty()
200 && self.list_depths.is_empty()
201 && self.heading_levels.is_empty()
202 }
203
204 fn needs_ast_context(&self) -> bool {
205 !self.sections.is_empty()
206 || !self.parents.is_empty()
207 || !self.children.is_empty()
208 || !self.fence_languages.is_empty()
209 || !self.list_depths.is_empty()
210 || !self.heading_levels.is_empty()
211 }
212}
213
214#[derive(Serialize)]
215struct GraphDbBackendPromotionGate {
216 status: String,
217 native_adapter_required: bool,
218 required_checks: Vec<String>,
219}
220
221impl GraphDbExperimentalBackend {
222 fn name(self) -> &'static str {
223 match self {
224 Self::DuckdbDuckpgq => "duckdb-duckpgq",
225 Self::Falkordb => "falkordb",
226 Self::Ladybug => "ladybug",
227 Self::Kuzu => "kuzu",
228 Self::Surrealdb => "surrealdb",
229 }
230 }
231
232 fn adapter_label(self) -> &'static str {
233 match self {
234 Self::DuckdbDuckpgq => "DuckDB/DuckPGQ read-only prototype",
235 Self::Falkordb => "FalkorDB read-only prototype",
236 Self::Ladybug => "Ladybug read-only prototype",
237 Self::Kuzu => "Kuzu (Vela-Engineering/kuzu) read-only prototype",
238 Self::Surrealdb => "SurrealDB read-only prototype",
239 }
240 }
241
242 fn projection_load(self) -> &'static str {
243 match self {
244 Self::Falkordb => {
245 "provider-neutral rows loaded into a FalkorDB-shaped read snapshot for parity and timing only; production FalkorDB storage remains behind backend-eval until a real adapter passes the full-projection gate"
246 }
247 Self::Kuzu => {
248 "provider-neutral rows loaded into a Kuzu-compatible in-process read snapshot for parity and performance gates; production Vela-Engineering/kuzu storage remains behind a future optional adapter"
249 }
250 Self::Surrealdb => {
251 "provider-neutral rows loaded into a SurrealDB-compatible read snapshot for parity and timing only; production SurrealDB storage remains behind backend-eval until a real optional adapter passes the full-projection gate"
252 }
253 _ => {
254 "provider-neutral rows loaded into a dependency-free in-process read snapshot for parity and performance gates"
255 }
256 }
257 }
258
259 fn lock_behavior(self) -> &'static str {
260 match self {
261 Self::Falkordb => {
262 "read-only FalkorDB prototype snapshot; production promotion must prove multi-process writer behavior and local fallback semantics before replacing SQLite"
263 }
264 Self::Kuzu => {
265 "read-only Kuzu prototype snapshot; no SQLite writer lock is taken during benchmarks, and production Vela-Engineering/kuzu promotion must prove concurrent writer semantics before replacing SQLite"
266 }
267 Self::Surrealdb => {
268 "read-only SurrealDB prototype snapshot; production promotion must prove embedded/file-backed writer and read-only lock behavior before replacing SQLite"
269 }
270 _ => "read-only snapshot/row adapter; no writer lock is taken during query benchmarks",
271 }
272 }
273
274 fn install_portability(self) -> &'static str {
275 match self {
276 Self::Falkordb => {
277 "prototype is dependency-free in this binary; production FalkorDB promotion must keep install optional and preserve cargo build/install without a service"
278 }
279 Self::Kuzu => {
280 "prototype is dependency-free in this binary; production Vela-Engineering/kuzu integration must stay optional so cargo build/install works without a native Kuzu toolchain"
281 }
282 Self::Surrealdb => {
283 "prototype is dependency-free in this binary; production SurrealDB integration must stay optional so cargo build/install works without pulling SurrealDB into the default build"
284 }
285 _ => {
286 "prototype is dependency-free in this binary; a production engine adapter must remain optional before promotion"
287 }
288 }
289 }
290
291 fn prototype_hold_reason(self) -> Option<&'static str> {
292 match self {
293 Self::DuckdbDuckpgq => Some(
294 "DuckDB/DuckPGQ remains behind backend-eval until a native production adapter proves projection writes, freshness/parity, full_projection wins, install portability, and lock behavior",
295 ),
296 Self::Falkordb => Some(
297 "FalkorDB remains behind backend-eval until a production adapter beats SQLite on full_projection conflict-matrix, evidence, dispatch-trace, path tiers, install portability, and lock behavior",
298 ),
299 Self::Ladybug => Some(
300 "Ladybug remains behind backend-eval until a native production adapter proves projection writes, freshness/parity, full_projection wins, install portability, and lock behavior",
301 ),
302 Self::Kuzu => Some(
303 "Kuzu remains behind backend-eval until a native optional adapter proves projection writes/load, SQLite parity, full_projection wins, install portability, and lock behavior",
304 ),
305 Self::Surrealdb => Some(
306 "SurrealDB remains behind backend-eval until a feature-gated optional adapter proves provider-neutral projection writes/load, SQLite parity, full_projection wins, install portability, and lock behavior",
307 ),
308 }
309 }
310
311 fn promotion_gate(self) -> GraphDbBackendPromotionGate {
312 match self {
313 Self::DuckdbDuckpgq => GraphDbBackendPromotionGate {
314 status: "hold_native_adapter_required".to_string(),
315 native_adapter_required: true,
316 required_checks: vec![
317 "native_duckdb_duckpgq_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
318 .to_string(),
319 "freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
320 .to_string(),
321 "embedded_or_service_lock_behavior_match_or_beat_sqlite".to_string(),
322 "operator_install_cost_keeps_cargo_build_install_duckdb_extension_free_by_default"
323 .to_string(),
324 ],
325 },
326 Self::Falkordb => GraphDbBackendPromotionGate {
327 status: "hold_native_adapter_required".to_string(),
328 native_adapter_required: true,
329 required_checks: vec![
330 "native_falkordb_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
331 .to_string(),
332 "freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
333 .to_string(),
334 "multi_process_writer_and_read_only_lock_behavior_match_or_beat_sqlite"
335 .to_string(),
336 "operator_install_cost_keeps_cargo_build_install_service_free_by_default"
337 .to_string(),
338 ],
339 },
340 Self::Ladybug => GraphDbBackendPromotionGate {
341 status: "hold_native_adapter_required".to_string(),
342 native_adapter_required: true,
343 required_checks: vec![
344 "native_ladybug_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
345 .to_string(),
346 "freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
347 .to_string(),
348 "concurrent_writer_and_read_only_lock_behavior_match_or_beat_sqlite"
349 .to_string(),
350 "operator_install_cost_keeps_cargo_build_install_ladybug_free_by_default"
351 .to_string(),
352 ],
353 },
354 Self::Kuzu => GraphDbBackendPromotionGate {
355 status: "hold_native_adapter_required".to_string(),
356 native_adapter_required: true,
357 required_checks: vec![
358 "native_kuzu_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
359 .to_string(),
360 "freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
361 .to_string(),
362 "concurrent_writer_and_read_only_lock_behavior_match_or_beat_sqlite"
363 .to_string(),
364 "operator_install_cost_keeps_cargo_build_install_native_kuzu_free_by_default"
365 .to_string(),
366 ],
367 },
368 Self::Surrealdb => GraphDbBackendPromotionGate {
369 status: "hold_native_adapter_required".to_string(),
370 native_adapter_required: true,
371 required_checks: vec![
372 "native_surrealdb_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
373 .to_string(),
374 "freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
375 .to_string(),
376 "embedded_file_backed_writer_and_read_only_lock_behavior_match_or_beat_sqlite"
377 .to_string(),
378 "operator_install_cost_keeps_cargo_build_install_surrealdb_free_by_default"
379 .to_string(),
380 ],
381 },
382 }
383 }
384
385 fn parse(raw: &str) -> Result<Self> {
386 match raw {
387 "duckdb-duckpgq" | "duckdb" | "duckpgq" => Ok(Self::DuckdbDuckpgq),
388 "falkordb" | "falkor" => Ok(Self::Falkordb),
389 "ladybug" => Ok(Self::Ladybug),
390 "kuzu" | "vela-kuzu" => Ok(Self::Kuzu),
391 "surrealdb" | "surreal" | "surreal-db" => Ok(Self::Surrealdb),
392 _ => {
393 bail!(
394 "unknown backend-eval candidate {raw:?}; expected duckdb-duckpgq, falkordb, ladybug, kuzu, or surrealdb"
395 )
396 }
397 }
398 }
399}
400
401
402pub fn run() -> Result<()> {
403 let cli = Cli::parse();
404 let compact = cli.compact;
405 let pretty = cli.pretty;
406 let terse = cli.terse || cli.ultra_terse;
407 let ultra_terse = cli.ultra_terse;
408 let absolute = cli.absolute;
409 let tabular = cli.tabular;
410 let schema = cli.schema;
411 let envelope = cli.envelope;
412 match cli.command {
413 Some(Commands::Search {
414 query,
415 path,
416 limit,
417 strategy,
418 exact,
419 scope,
420 federated,
421 lang,
422 kind,
423 node_kind,
424 section,
425 parent,
426 child,
427 fence_language,
428 list_depth,
429 heading_level,
430 json,
431 autoindex,
432 no_autoindex,
433 timeout,
434 max_items,
435 max_bytes,
436 budget,
437 no_tagpath,
438 tagpath_strict,
439 }) => cmd_search_with_budget(
440 query,
441 path,
442 limit,
443 if exact {
444 Some("exact".to_string())
445 } else {
446 strategy
447 },
448 scope,
449 federated,
450 json || terse || schema || envelope,
451 autoindex || !no_autoindex,
452 timeout,
453 compact,
454 pretty,
455 terse,
456 ultra_terse,
457 absolute,
458 tabular,
459 schema,
460 envelope,
461 ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
462 TagpathSearchOpts {
463 no_tagpath,
464 strict: tagpath_strict,
465 },
466 SearchFacetFilters {
467 languages: lang,
468 kinds: kind,
469 node_kinds: node_kind,
470 sections: section,
471 parents: parent,
472 children: child,
473 fence_languages: fence_language,
474 list_depths: list_depth,
475 heading_levels: heading_level,
476 },
477 ),
478 Some(Commands::SearchWorker {
479 path,
480 cache_dir,
481 query,
482 limit,
483 strategy,
484 output,
485 }) => cmd_search_worker(&path, &cache_dir, &query, limit, &strategy, &output),
486 Some(Commands::DigestRunner {
487 kind,
488 path,
489 runner,
490 shell_command,
491 json,
492 }) => cmd_digest_runner(
493 &kind,
494 &path,
495 runner.as_deref(),
496 &shell_command,
497 OutputFormat {
498 json_output: json || terse || schema || envelope,
499 compact,
500 pretty,
501 terse,
502 ultra_terse,
503 schema,
504 envelope,
505 },
506 ),
507 Some(Commands::Edit { dry_run, file }) => {
508 cmd_edit(dry_run, file, compact, pretty, terse, schema)
509 }
510 Some(Commands::EditIntents {
511 path,
512 scope,
513 file,
514 json,
515 apply,
516 verify,
517 verify_command,
518 max_items,
519 max_bytes,
520 budget,
521 }) => cmd_edit_intents(
522 &path,
523 scope.as_deref(),
524 file,
525 apply,
526 SemanticEditVerifyOptions {
527 enabled: verify,
528 command: verify_command.as_deref(),
529 },
530 OutputFormat {
531 json_output: json || terse || schema || envelope,
532 compact,
533 pretty,
534 terse,
535 ultra_terse,
536 schema,
537 envelope,
538 },
539 ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
540 ),
541 Some(Commands::Index {
542 path,
543 rebuild,
544 check,
545 exit_code,
546 prune,
547 quiet,
548 workspace,
549 submodule,
550 json,
551 }) => cmd_index(
552 &path,
553 rebuild,
554 check,
555 exit_code,
556 prune,
557 quiet,
558 workspace,
559 submodule.as_deref(),
560 json || terse || schema || envelope,
561 compact,
562 pretty,
563 terse,
564 absolute,
565 schema,
566 ),
567 Some(Commands::Rewrite { command, run }) => cmd_rewrite(
568 &command,
569 run,
570 OutputFormat {
571 json_output: terse || schema || envelope,
572 compact,
573 pretty,
574 terse,
575 ultra_terse,
576 schema,
577 envelope,
578 },
579 ),
580 Some(Commands::Route { task, id }) => cmd_route(&task, id),
581 Some(Commands::Memory { command }) => {
582 let json = command.json_output();
583 cmd_memory(
584 command,
585 OutputFormat {
586 json_output: json || terse || schema || envelope,
587 compact,
588 pretty,
589 terse,
590 ultra_terse,
591 schema,
592 envelope,
593 },
594 )
595 }
596 Some(Commands::Finding { command }) => match command {
597 cli::FindingCommand::Add {
598 path,
599 kind,
600 title,
601 body,
602 about,
603 confidence,
604 status,
605 relates,
606 scope,
607 json,
608 } => commands::finding::cmd_finding_add(
609 &path,
610 &kind,
611 &title,
612 &body,
613 &about,
614 confidence,
615 &status,
616 relates.as_deref(),
617 scope.as_deref(),
618 json || terse || schema || envelope,
619 pretty,
620 ),
621 cli::FindingCommand::List {
622 path,
623 about,
624 kind,
625 status,
626 include_stale,
627 scope,
628 json,
629 } => commands::finding::cmd_finding_list(
630 &path,
631 about.as_deref(),
632 kind.as_deref(),
633 status.as_deref(),
634 include_stale,
635 scope.as_deref(),
636 json || terse || schema || envelope,
637 pretty,
638 ),
639 cli::FindingCommand::Harvest { path, scope, json } => {
640 commands::finding::cmd_finding_harvest(
641 &path,
642 scope.as_deref(),
643 json || terse || schema || envelope,
644 pretty,
645 )
646 }
647 cli::FindingCommand::Promote { id, path, json } => {
648 commands::finding::cmd_finding_promote(
649 &path,
650 &id,
651 json || terse || schema || envelope,
652 pretty,
653 )
654 }
655 },
656 Some(Commands::Graph {
657 symbol,
658 path,
659 callers,
660 callees,
661 scope,
662 limit,
663 json,
664 no_tagpath,
665 tagpath_strict,
666 }) => cmd_graph(
667 &symbol,
668 &path,
669 callers,
670 callees,
671 scope.as_deref(),
672 limit,
673 json || terse || schema || envelope,
674 compact,
675 pretty,
676 terse,
677 absolute,
678 tabular,
679 schema,
680 TagpathSearchOpts {
681 no_tagpath,
682 strict: tagpath_strict,
683 },
684 ),
685 Some(Commands::Sql {
686 db,
687 query,
688 table,
689 json,
690 }) => cmd_sql(
691 &db,
692 query,
693 table,
694 json || terse || schema || envelope,
695 compact,
696 pretty,
697 terse,
698 schema,
699 ),
700 Some(Commands::Communities {
701 path,
702 scope,
703 min_size,
704 limit,
705 json,
706 no_tagpath,
707 tagpath_strict,
708 }) => cmd_communities(
709 &path,
710 scope.as_deref(),
711 min_size,
712 limit,
713 json || terse || schema || envelope,
714 compact,
715 pretty,
716 terse,
717 tabular,
718 schema,
719 TagpathSearchOpts {
720 no_tagpath,
721 strict: tagpath_strict,
722 },
723 ),
724 Some(Commands::Analyze {
725 path,
726 scope,
727 entry_points,
728 limit,
729 json,
730 }) => cmd_analyze(
731 &path,
732 scope.as_deref(),
733 &entry_points,
734 limit,
735 OutputFormat {
736 json_output: json || terse || schema || envelope,
737 compact,
738 pretty,
739 terse,
740 ultra_terse,
741 schema,
742 envelope,
743 },
744 ),
745 Some(Commands::Path {
746 from,
747 to,
748 path,
749 scope,
750 json,
751 no_tagpath,
752 tagpath_strict,
753 }) => cmd_path(
754 &from,
755 &to,
756 &path,
757 scope.as_deref(),
758 json || terse || schema || envelope,
759 compact,
760 pretty,
761 terse,
762 schema,
763 TagpathSearchOpts {
764 no_tagpath,
765 strict: tagpath_strict,
766 },
767 ),
768 Some(Commands::Explain {
769 symbol,
770 path,
771 scope,
772 limit,
773 json,
774 max_items,
775 max_bytes,
776 budget,
777 no_tagpath,
778 tagpath_strict,
779 }) => cmd_explain_with_budget(
780 &symbol,
781 &path,
782 scope.as_deref(),
783 limit,
784 json || terse || schema || envelope,
785 compact,
786 pretty,
787 terse,
788 ultra_terse,
789 absolute,
790 tabular,
791 schema,
792 envelope,
793 ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
794 TagpathSearchOpts {
795 no_tagpath,
796 strict: tagpath_strict,
797 },
798 ),
799 Some(Commands::Traverse {
800 node,
801 to,
802 path,
803 scope,
804 depth,
805 limit,
806 format,
807 convex_snapshot,
808 }) => cmd_traverse(
809 node.as_deref(),
810 to.as_deref(),
811 &path,
812 scope.as_deref(),
813 depth,
814 limit,
815 format,
816 pretty,
817 terse,
818 schema,
819 convex_snapshot.as_deref(),
820 ),
821 Some(Commands::ConvexSync {
822 path,
823 scope,
824 snapshot,
825 chunk_size,
826 remote_snapshot,
827 apply,
828 endpoint,
829 auth_token_env,
830 json,
831 }) => cmd_convex_sync(
832 ConvexSyncOptions {
833 path: &path,
834 scope: scope.as_deref(),
835 snapshot: snapshot.as_deref(),
836 chunk_size,
837 remote_snapshot,
838 apply,
839 endpoint: endpoint.as_deref(),
840 auth_token_env: &auth_token_env,
841 },
842 OutputFormat {
843 json_output: json || terse || schema || envelope,
844 compact,
845 pretty,
846 terse,
847 ultra_terse,
848 schema,
849 envelope,
850 },
851 ),
852 Some(Commands::GraphDb {
853 path,
854 scope,
855 backend,
856 convex_snapshot,
857 json,
858 query,
859 }) => cmd_graph_db(
860 &path,
861 scope.as_deref(),
862 backend,
863 convex_snapshot.as_deref(),
864 query,
865 OutputFormat {
866 json_output: json || terse || schema || envelope,
867 compact,
868 pretty,
869 terse,
870 ultra_terse,
871 schema,
872 envelope,
873 },
874 ),
875 Some(Commands::SourceRead {
876 file,
877 path,
878 start,
879 lines,
880 end,
881 scope,
882 json,
883 max_items,
884 max_bytes,
885 budget,
886 }) => cmd_source_read(
887 &file,
888 &path,
889 start,
890 lines,
891 end,
892 scope.as_deref(),
893 OutputFormat {
894 json_output: json || terse || schema || envelope,
895 compact,
896 pretty,
897 terse,
898 ultra_terse,
899 schema,
900 envelope,
901 },
902 absolute,
903 ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
904 ),
905 Some(Commands::MarkdownAst {
906 file,
907 path,
908 node,
909 json,
910 max_items,
911 max_bytes,
912 budget,
913 }) => cmd_markdown_ast(
914 &file,
915 &path,
916 node.as_deref(),
917 OutputFormat {
918 json_output: json || terse || schema || envelope,
919 compact,
920 pretty,
921 terse,
922 ultra_terse,
923 schema,
924 envelope,
925 },
926 absolute,
927 ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
928 ),
929 Some(Commands::SymbolRead {
930 symbol,
931 file,
932 path,
933 scope,
934 json,
935 max_items,
936 max_bytes,
937 budget,
938 }) => cmd_symbol_read(
939 &symbol,
940 file.as_deref(),
941 &path,
942 scope.as_deref(),
943 OutputFormat {
944 json_output: json || terse || schema || envelope,
945 compact,
946 pretty,
947 terse,
948 ultra_terse,
949 schema,
950 envelope,
951 },
952 absolute,
953 ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
954 ),
955 Some(Commands::Audit {
956 skills_dir,
957 manifest,
958 usage,
959 cleanup,
960 report,
961 json,
962 }) => cmd_audit(
963 &skills_dir,
964 manifest,
965 usage,
966 cleanup,
967 report,
968 json || terse || schema || envelope,
969 compact,
970 pretty,
971 terse,
972 schema,
973 ),
974 Some(Commands::AuditTagpath { path, scope, json }) => cmd_audit_tagpath(
975 &path,
976 scope.as_deref(),
977 json || terse || schema || envelope,
978 pretty,
979 terse,
980 schema,
981 ),
982 Some(Commands::Init {
983 path,
984 codex,
985 opencode,
986 workspace,
987 }) => cmd_init(&path, codex, opencode, workspace),
988 Some(Commands::Lint {
989 file,
990 index,
991 entities_from,
992 json,
993 }) => cmd_lint(
994 &file,
995 index,
996 entities_from,
997 json || terse || schema || envelope,
998 compact,
999 pretty,
1000 terse,
1001 schema,
1002 ),
1003 Some(Commands::Summarize {
1004 symbol,
1005 file,
1006 extract,
1007 diff,
1008 stats,
1009 path,
1010 json,
1011 }) => cmd_summarize(
1012 symbol,
1013 file,
1014 extract,
1015 diff,
1016 stats,
1017 &path,
1018 json || terse || schema || envelope,
1019 compact,
1020 pretty,
1021 terse,
1022 schema,
1023 ),
1024 Some(Commands::Semantic {
1025 query,
1026 path,
1027 scope,
1028 limit,
1029 kind,
1030 json,
1031 }) => cmd_semantic_related(
1032 &query,
1033 &path,
1034 scope.as_deref(),
1035 limit,
1036 kind,
1037 json || terse || schema || envelope,
1038 compact,
1039 pretty,
1040 terse,
1041 schema,
1042 ),
1043 Some(Commands::DiffDigest {
1044 path,
1045 cached,
1046 revision,
1047 max_parsed_files,
1048 json,
1049 }) => cmd_diff_digest(
1050 &path,
1051 cached,
1052 revision.as_deref(),
1053 max_parsed_files,
1054 OutputFormat {
1055 json_output: json || terse || schema || envelope,
1056 compact,
1057 pretty,
1058 terse,
1059 ultra_terse,
1060 schema,
1061 envelope,
1062 },
1063 ),
1064 Some(Commands::Impact {
1065 path,
1066 cached,
1067 revision,
1068 scope,
1069 limit,
1070 json,
1071 }) => cmd_impact(
1072 &path,
1073 cached,
1074 revision.as_deref(),
1075 scope.as_deref(),
1076 limit,
1077 OutputFormat {
1078 json_output: json || terse || schema || envelope,
1079 compact,
1080 pretty,
1081 terse,
1082 ultra_terse,
1083 schema,
1084 envelope,
1085 },
1086 ),
1087 Some(Commands::TestDigest {
1088 path,
1089 input,
1090 runner,
1091 json,
1092 }) => cmd_test_digest(
1093 &path,
1094 input.as_deref(),
1095 runner.as_deref(),
1096 OutputFormat {
1097 json_output: json || terse || schema || envelope,
1098 compact,
1099 pretty,
1100 terse,
1101 ultra_terse,
1102 schema,
1103 envelope,
1104 },
1105 ),
1106 Some(Commands::LogDigest { path, input, json }) => cmd_log_digest(
1107 &path,
1108 input.as_deref(),
1109 OutputFormat {
1110 json_output: json || terse || schema || envelope,
1111 compact,
1112 pretty,
1113 terse,
1114 ultra_terse,
1115 schema,
1116 envelope,
1117 },
1118 ),
1119 Some(Commands::ContextPack {
1120 path,
1121 test_input,
1122 runner,
1123 log_input,
1124 json,
1125 max_items,
1126 max_bytes,
1127 budget,
1128 convex_snapshot,
1129 }) => cmd_context_pack(
1130 &path,
1131 test_input.as_deref(),
1132 runner.as_deref(),
1133 log_input.as_deref(),
1134 OutputFormat {
1135 json_output: json || terse || schema || envelope,
1136 compact,
1137 pretty,
1138 terse,
1139 ultra_terse,
1140 schema,
1141 envelope,
1142 },
1143 ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
1144 convex_snapshot.as_deref(),
1145 ),
1146 Some(Commands::ConflictMatrix {
1147 targets,
1148 path,
1149 scope,
1150 depth,
1151 limit,
1152 impact_limit,
1153 json,
1154 }) => cmd_conflict_matrix(
1155 &path,
1156 scope.as_deref(),
1157 &targets,
1158 depth,
1159 limit,
1160 impact_limit,
1161 OutputFormat {
1162 json_output: json || terse || schema || envelope,
1163 compact,
1164 pretty,
1165 terse,
1166 ultra_terse,
1167 schema,
1168 envelope,
1169 },
1170 ),
1171 Some(Commands::DispatchTrace {
1172 targets,
1173 path,
1174 scope,
1175 depth,
1176 limit,
1177 impact_limit,
1178 format,
1179 json,
1180 }) => cmd_dispatch_trace(
1181 DispatchTraceOptions {
1182 path: &path,
1183 scope: scope.as_deref(),
1184 raw_targets: &targets,
1185 depth,
1186 limit,
1187 impact_limit,
1188 trace_format: if json {
1189 DispatchTraceFormat::Json
1190 } else {
1191 format
1192 },
1193 },
1194 OutputFormat {
1195 json_output: json || terse || schema || envelope,
1196 compact,
1197 pretty,
1198 terse,
1199 ultra_terse,
1200 schema,
1201 envelope,
1202 },
1203 ),
1204 Some(Commands::DependencyDag {
1205 targets,
1206 path,
1207 scope,
1208 depth,
1209 limit,
1210 json,
1211 }) => cmd_dependency_dag(
1212 &path,
1213 scope.as_deref(),
1214 &targets,
1215 depth,
1216 limit,
1217 OutputFormat {
1218 json_output: json || terse || schema || envelope,
1219 compact,
1220 pretty,
1221 terse,
1222 ultra_terse,
1223 schema,
1224 envelope,
1225 },
1226 ),
1227 Some(Commands::TokenSavings {
1228 fixture,
1229 fail_under,
1230 json,
1231 }) => token_savings::cmd_token_savings(
1232 &fixture,
1233 fail_under,
1234 OutputFormat {
1235 json_output: json || terse || schema || envelope,
1236 compact,
1237 pretty,
1238 terse,
1239 ultra_terse,
1240 schema,
1241 envelope,
1242 },
1243 ),
1244 Some(Commands::MetricDigest {
1245 input,
1246 baseline,
1247 metrics,
1248 lower_is_better,
1249 higher_is_better,
1250 history,
1251 top,
1252 json,
1253 }) => cmd_metric_digest(
1254 MetricDigestOptions {
1255 input_path: input.as_deref(),
1256 baseline_path: baseline.as_deref(),
1257 metrics: &metrics,
1258 lower_is_better: &lower_is_better,
1259 higher_is_better: &higher_is_better,
1260 history,
1261 top,
1262 },
1263 OutputFormat {
1264 json_output: json || terse || schema || envelope,
1265 compact,
1266 pretty,
1267 terse,
1268 ultra_terse,
1269 schema,
1270 envelope,
1271 },
1272 ),
1273 Some(Commands::DciBenchmark { fixture, json }) => cmd_dci_benchmark(
1274 &fixture,
1275 OutputFormat {
1276 json_output: json || terse || schema || envelope,
1277 compact,
1278 pretty,
1279 terse,
1280 ultra_terse,
1281 schema,
1282 envelope,
1283 },
1284 ),
1285 Some(Commands::TokenGate { command }) => {
1286 cmd_token_gate(command, OutputFormat {
1287 json_output: true,
1288 compact,
1289 pretty,
1290 terse,
1291 ultra_terse,
1292 schema,
1293 envelope,
1294 })?;
1295 Ok(())
1296 },
1297 Some(Commands::Workflow { topic, json }) => workflow::cmd_workflow(
1298 &topic,
1299 OutputFormat {
1300 json_output: json || terse || schema || envelope,
1301 compact,
1302 pretty,
1303 terse,
1304 ultra_terse,
1305 schema,
1306 envelope,
1307 },
1308 ),
1309 Some(Commands::SessionDigest {
1310 path,
1311 input,
1312 source,
1313 json,
1314 }) => cmd_session_digest(
1315 &path,
1316 input.as_deref(),
1317 source.as_deref(),
1318 OutputFormat {
1319 json_output: json || terse || schema || envelope,
1320 compact,
1321 pretty,
1322 terse,
1323 ultra_terse,
1324 schema,
1325 envelope,
1326 },
1327 ),
1328 Some(Commands::SessionCost {
1329 input,
1330 source,
1331 json,
1332 }) => cmd_session_cost(
1333 input.as_deref(),
1334 source.as_deref(),
1335 OutputFormat {
1336 json_output: json || terse || schema || envelope,
1337 compact,
1338 pretty,
1339 terse,
1340 ultra_terse,
1341 schema,
1342 envelope,
1343 },
1344 ),
1345 Some(Commands::SessionReview {
1346 path,
1347 next_context,
1348 json,
1349 max_items,
1350 max_bytes,
1351 budget,
1352 }) => cmd_session_review_with_budget(
1353 &path,
1354 next_context,
1355 OutputFormat {
1356 json_output: json || terse || schema || envelope,
1357 compact,
1358 pretty,
1359 terse,
1360 ultra_terse,
1361 schema,
1362 envelope,
1363 },
1364 ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
1365 ),
1366 Some(Commands::Status {
1367 path,
1368 fix,
1369 no_fix,
1370 json,
1371 }) => cmd_status(
1372 &path,
1373 StatusCommandOptions {
1374 fix,
1375 no_fix,
1376 json_output: json || terse || schema || envelope,
1377 compact,
1378 pretty,
1379 terse,
1380 schema,
1381 },
1382 ),
1383 Some(Commands::Locks { path, scope, json }) => cmd_locks(
1384 &path,
1385 scope.as_deref(),
1386 json || terse || schema || envelope,
1387 compact,
1388 pretty,
1389 terse,
1390 schema,
1391 ),
1392 None => {
1393 println!("tsift v{}", env!("CARGO_PKG_VERSION"));
1394 println!("Run `tsift --help` for usage.");
1395 Ok(())
1396 }
1397 }
1398}
1399
1400pub fn classify_task(task: &str) -> (&'static str, &'static str) {
1403 let lower = task.to_lowercase();
1404 for signal in &[
1406 "architect",
1407 "architecture",
1408 "design",
1409 "plan",
1410 "strateg",
1411 "analy",
1412 "review",
1413 "evaluate",
1414 "assess",
1415 ] {
1416 if lower.contains(signal) {
1417 return ("opus", "claude-opus-4-6");
1418 }
1419 }
1420 for signal in &[
1422 "edit",
1423 "write",
1424 "fix",
1425 "change",
1426 "update",
1427 "create",
1428 "add ",
1429 "remove",
1430 "delete",
1431 "modify",
1432 "refactor",
1433 "implement",
1434 "build",
1435 ] {
1436 if lower.contains(signal) {
1437 return ("sonnet", "claude-sonnet-4-6");
1438 }
1439 }
1440 ("haiku", "claude-haiku-4-5-20251001")
1442}
1443
1444#[cfg(test)]
1445fn to_json<T: serde::Serialize>(val: &T, pretty: bool, terse: bool) -> anyhow::Result<String> {
1446 to_json_schema(val, pretty, terse, false, false)
1447}
1448
1449pub(crate) fn inject_tagpath_stale_into_json(
1456 value: &mut serde_json::Value,
1457 stale: bool,
1458 reason: Option<&str>,
1459) {
1460 if !stale {
1461 return;
1462 }
1463 if let Some(obj) = value.as_object_mut() {
1464 obj.insert(
1465 "tagpath_index_stale".to_string(),
1466 serde_json::Value::Bool(true),
1467 );
1468 if let Some(reason) = reason {
1469 obj.insert(
1470 "tagpath_stale_reason".to_string(),
1471 serde_json::Value::String(reason.to_string()),
1472 );
1473 }
1474 }
1475}
1476
1477pub(crate) fn to_json_schema<T: serde::Serialize>(
1478 val: &T,
1479 pretty: bool,
1480 terse: bool,
1481 ultra_terse: bool,
1482 schema: bool,
1483) -> anyhow::Result<String> {
1484 if terse || schema {
1485 let value = serde_json::to_value(val)?;
1486 let mut transformed = if terse { terse_transform(value) } else { value };
1487 if ultra_terse {
1488 transformed = ultra_terse_transform(transformed);
1489 transformed = edge_index_transform(transformed);
1490 }
1491 if schema {
1492 transformed = schema_transform(transformed);
1493 }
1494 if terse {
1495 let terse_schema = terse_schema_for(&transformed);
1496 let wrapped = serde_json::json!({"_s": terse_schema, "d": transformed});
1497 if pretty {
1498 Ok(serde_json::to_string_pretty(&wrapped)?)
1499 } else {
1500 Ok(serde_json::to_string(&wrapped)?)
1501 }
1502 } else if pretty {
1503 Ok(serde_json::to_string_pretty(&transformed)?)
1504 } else {
1505 Ok(serde_json::to_string(&transformed)?)
1506 }
1507 } else if pretty {
1508 Ok(serde_json::to_string_pretty(val)?)
1509 } else {
1510 Ok(serde_json::to_string(val)?)
1511 }
1512}
1513
1514pub(crate) fn envelope_metric(label: &str, value: impl ToString) -> ToolEnvelopeMetric {
1515 ToolEnvelopeMetric {
1516 label: label.to_string(),
1517 value: value.to_string(),
1518 }
1519}
1520
1521pub(crate) fn dedupe_preserve_order(values: Vec<String>) -> Vec<String> {
1522 let mut seen = HashSet::new();
1523 let mut deduped = Vec::new();
1524 for value in values {
1525 if seen.insert(value.clone()) {
1526 deduped.push(value);
1527 }
1528 }
1529 deduped
1530}
1531
1532pub(crate) fn print_json_or_envelope<T: Serialize>(
1533 report: &T,
1534 format: &OutputFormat,
1535 tool: &str,
1536 view: &str,
1537 summary: ToolEnvelopeSummary,
1538 truncated: bool,
1539 follow_up: Vec<String>,
1540) -> Result<()> {
1541 if format.envelope {
1542 let envelope = ToolEnvelope {
1543 tool,
1544 view,
1545 summary,
1546 truncated,
1547 follow_up: dedupe_preserve_order(follow_up),
1548 report,
1549 };
1550 println!(
1551 "{}",
1552 to_json_schema(
1553 &envelope,
1554 format.pretty,
1555 format.terse,
1556 format.ultra_terse,
1557 format.schema
1558 )?
1559 );
1560 } else {
1561 println!(
1562 "{}",
1563 to_json_schema(
1564 report,
1565 format.pretty,
1566 format.terse,
1567 format.ultra_terse,
1568 format.schema
1569 )?
1570 );
1571 }
1572 Ok(())
1573}
1574
1575pub(crate) fn estimated_tokens_from_bytes(bytes: usize) -> usize {
1576 bytes.div_ceil(4)
1577}
1578
1579fn cmd_token_gate(
1580 command: cli::TokenGateCommand,
1581 format: OutputFormat,
1582) -> Result<()> {
1583 match command {
1584 cli::TokenGateCommand::Sample {
1585 surface,
1586 path,
1587 scope,
1588 target,
1589 depth,
1590 sample_index,
1591 json: _,
1592 } => cmd_token_gate_sample(&surface, &path, scope.as_deref(), target.as_deref(), depth, sample_index),
1593 cli::TokenGateCommand::Evaluate {
1594 history,
1595 allowed_regression_percent,
1596 json: _,
1597 } => cmd_token_gate_evaluate(history.as_deref(), allowed_regression_percent, &format),
1598 }
1599}
1600
1601fn cmd_token_gate_sample(
1602 surface: &str,
1603 path: &Path,
1604 scope: Option<&str>,
1605 target: Option<&str>,
1606 depth: usize,
1607 sample_index: usize,
1608) -> Result<()> {
1609 if !token_gate::TOKEN_GATE_SURFACES.contains(&surface) {
1610 bail!(
1611 "unknown surface `{}`; expected one of: {}",
1612 surface,
1613 token_gate::TOKEN_GATE_SURFACES.join(", ")
1614 );
1615 }
1616
1617 let path_str = path.to_string_lossy().to_string();
1618 let tsift_bin = std::env::current_exe()?;
1619
1620 let args: Vec<String> = match surface {
1621 "context_pack" => vec![
1622 "context-pack".to_string(),
1623 "--json".to_string(),
1624 path_str,
1625 ],
1626 "session_review_next_context" => vec![
1627 "session-review".to_string(),
1628 "--json".to_string(),
1629 "--next-context".to_string(),
1630 path_str,
1631 ],
1632 "graph_db_evidence" => {
1633 let tgt = target.unwrap_or("default").to_string();
1634 vec![
1635 "graph-db".to_string(),
1636 "--json".to_string(),
1637 "--path".to_string(),
1638 path_str,
1639 "evidence".to_string(),
1640 tgt,
1641 "--depth".to_string(),
1642 depth.to_string(),
1643 ]
1644 }
1645 "conflict_matrix" => {
1646 let tgt = target.unwrap_or("default").to_string();
1647 let mut a = vec![
1648 "conflict-matrix".to_string(),
1649 "--json".to_string(),
1650 "--path".to_string(),
1651 path_str,
1652 "--depth".to_string(),
1653 depth.to_string(),
1654 ];
1655 if let Some(s) = scope {
1656 a.push("--scope".to_string());
1657 a.push(s.to_string());
1658 }
1659 a.push(tgt);
1660 a
1661 }
1662 "dispatch_trace" => {
1663 let tgt = target.unwrap_or("default").to_string();
1664 vec![
1665 "dispatch-trace".to_string(),
1666 "--json".to_string(),
1667 "--path".to_string(),
1668 path_str,
1669 tgt,
1670 ]
1671 }
1672 _ => bail!("unhandled surface: {}", surface),
1673 };
1674
1675 let start = Instant::now();
1676 let child = Command::new(&tsift_bin)
1677 .args(&args)
1678 .stdout(Stdio::piped())
1679 .stderr(Stdio::piped())
1680 .env("TSIFT_QUIET", "1")
1681 .spawn();
1682 let output = match child {
1683 Ok(c) => c.wait_with_output()?,
1684 Err(e) => bail!("failed to spawn tsift for surface {}: {}", surface, e),
1685 };
1686 let runtime_micros = start.elapsed().as_micros() as f64;
1687
1688 let stdout = String::from_utf8_lossy(&output.stdout);
1689 let envelope_bytes = stdout.trim().len() as f64;
1690 let prompt_tokens = estimated_tokens_from_bytes(stdout.trim().len()) as f64;
1691
1692 let cache_hit_rate_percent = 0.0;
1693 let raw_read_avoidance = 0.0;
1694 let useful_hit_density = if prompt_tokens > 0.0 { 0.5 } else { 0.0 };
1695
1696 let timestamp = iso_timestamp_now();
1697 let id = format!(
1698 "{surface}-baseline-{}-sample-{sample_index}",
1699 ×tamp[..10]
1700 );
1701 let label = format!(
1702 "token-gate baseline {surface} sample {sample_index} for {}",
1703 path.display()
1704 );
1705
1706 let mut metrics = BTreeMap::new();
1707 metrics.insert("prompt_tokens".to_string(), prompt_tokens);
1708 metrics.insert("envelope_bytes".to_string(), envelope_bytes);
1709 metrics.insert("runtime_micros".to_string(), runtime_micros);
1710 metrics.insert("cache_hit_rate_percent".to_string(), cache_hit_rate_percent);
1711 metrics.insert("raw_read_avoidance".to_string(), raw_read_avoidance);
1712 metrics.insert("useful_hit_density".to_string(), useful_hit_density);
1713
1714 let sample = token_gate::TokenGateSample {
1715 label,
1716 id,
1717 timestamp: Some(timestamp),
1718 surface: surface.to_string(),
1719 metrics,
1720 };
1721
1722 println!("{}", serde_json::to_string_pretty(&sample)?);
1723 Ok(())
1724}
1725
1726fn cmd_token_gate_evaluate(
1727 history_path: Option<&Path>,
1728 allowed_regression_percent: f64,
1729 format: &OutputFormat,
1730) -> Result<()> {
1731 let history_path = history_path
1732 .map(PathBuf::from)
1733 .unwrap_or_else(|| {
1734 let mut p = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
1735 p.push("../../fixtures/token-gate-history.json");
1736 p
1737 });
1738
1739 let raw = std::fs::read_to_string(&history_path)
1740 .with_context(|| format!("failed to read token gate history: {}", history_path.display()))?;
1741 let samples = token_gate::parse_token_history(&raw)?;
1742 let report = token_gate::evaluate_token_gate(&samples, allowed_regression_percent);
1743
1744 if format.json_output {
1745 println!("{}", to_json_schema(&report, format.pretty, format.terse, false, format.schema)?);
1746 } else {
1747 println!("Token Gate Report");
1748 println!(" min_samples: {}", report.min_samples);
1749 println!(" allowed_regression: {:.1}%", report.allowed_regression_percent);
1750 println!(" decision: {:?}", report.decision);
1751 for eval in &report.surface_evaluations {
1752 println!(
1753 " {} ({} samples): {:?}",
1754 eval.display_name, eval.sample_count, eval.verdict
1755 );
1756 for me in &eval.metric_evaluations {
1757 println!(
1758 " {} ({:?}): {}",
1759 me.metric, me.direction, me.diagnostic
1760 );
1761 }
1762 }
1763 for d in &report.diagnostics {
1764 println!(" ! {}", d);
1765 }
1766 }
1767 Ok(())
1768}
1769
1770fn iso_timestamp_now() -> String {
1771 let dur = SystemTime::now()
1772 .duration_since(UNIX_EPOCH)
1773 .unwrap_or_default();
1774 let total_secs = dur.as_secs();
1775 let days_since_epoch = total_secs / 86400;
1776 let (year, month, day) = days_to_ymd(days_since_epoch);
1777 let time_of_day = total_secs % 86400;
1778 let hour = (time_of_day / 3600) as u8;
1779 let minute = ((time_of_day % 3600) / 60) as u8;
1780 let second = (time_of_day % 60) as u8;
1781 format!(
1782 "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z",
1783 year, month, day, hour, minute, second
1784 )
1785}
1786
1787fn days_to_ymd(mut days: u64) -> (u64, u8, u8) {
1788 let mut year = 1970u64;
1789 loop {
1790 let days_in_year = if is_leap(year) { 366 } else { 365 };
1791 if days < days_in_year {
1792 break;
1793 }
1794 days -= days_in_year;
1795 year += 1;
1796 }
1797 let leap = is_leap(year);
1798 let month_days: [u8; 12] = if leap {
1799 [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
1800 } else {
1801 [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
1802 };
1803 let mut month: u8 = 1;
1804 for &md in &month_days {
1805 if days < md as u64 {
1806 break;
1807 }
1808 days -= md as u64;
1809 month += 1;
1810 }
1811 let day = days as u8 + 1;
1812 (year, month, day)
1813}
1814
1815fn is_leap(year: u64) -> bool {
1816 year.is_multiple_of(4) && !year.is_multiple_of(100) || year.is_multiple_of(400)
1817}
1818
1819fn persist_transcript_artifact(
1820 root: &Path,
1821 prefix: &str,
1822 suffix: &str,
1823 key: &str,
1824 body: &str,
1825 expand: String,
1826) -> Result<TranscriptArtifactRef> {
1827 let handle = stable_handle(prefix, key);
1828 let artifacts_dir = root.join(".tsift/artifacts");
1829 fs::create_dir_all(&artifacts_dir).with_context(|| {
1830 format!(
1831 "creating transcript artifacts dir: {}",
1832 artifacts_dir.display()
1833 )
1834 })?;
1835 let file_name = format!("{handle}.{suffix}");
1836 let artifact_path = artifacts_dir.join(file_name);
1837 fs::write(&artifact_path, body)
1838 .with_context(|| format!("writing transcript artifact: {}", artifact_path.display()))?;
1839 let rel_path = relativize_pathbuf(&artifact_path, root);
1840 Ok(TranscriptArtifactRef {
1841 handle,
1842 path: rel_path.display().to_string(),
1843 bytes: body.len(),
1844 lines: body.lines().count(),
1845 expand,
1846 })
1847}
1848
1849fn terse_key(key: &str) -> &str {
1850 match key {
1851 "name" => "n",
1852 "kind" => "k",
1853 "file" => "f",
1854 "line" => "l",
1855 "path" => "p",
1856 "from" => "fr",
1857 "type" => "ty",
1858 "text" => "tx",
1859 "new" => "nw",
1860 "run" => "r",
1861 "use" => "u",
1862 "score" => "sc",
1863 "language" => "la",
1864 "status" => "st",
1865 "state" => "stt",
1866 "error" => "err",
1867 "errors" => "ers",
1868 "hops" => "hp",
1869 "tags" => "tg",
1870 "model" => "ml",
1871 "skill" => "sk",
1872 "count" => "ct",
1873 "total" => "tot",
1874 "column" => "col",
1875 "description" => "dsc",
1876 "end_line" => "el",
1877 "signature" => "sig",
1878 "parent_module" => "pm",
1879 "visibility" => "vis",
1880 "match_type" => "mt",
1881 "caller_file" => "cf",
1882 "caller_name" => "cn",
1883 "caller_line" => "cl",
1884 "callee_name" => "en",
1885 "call_site_line" => "csl",
1886 "members" => "m",
1887 "refs" => "refs",
1888 "role" => "rl",
1889 "peer" => "pr",
1890 "modularity" => "q",
1891 "modularity_contribution" => "mc",
1892 "iterations" => "it",
1893 "node_count" => "nc",
1894 "edge_count" => "ec",
1895 "community_count" => "cc",
1896 "communities" => "cms",
1897 "community" => "cm",
1898 "community_diagnostics" => "cd",
1899 "cache_hit" => "cah",
1900 "tagpath_state" => "tps",
1901 "tagpath_stale_reason" => "tsr",
1902 "annotated_community_count" => "acc",
1903 "annotated_member_count" => "amc",
1904 "ambiguous_member_count" => "ambc",
1905 "ambiguous_members" => "amb",
1906 "candidate_count" => "cand",
1907 "tagpath_candidate_count" => "tcand",
1908 "evidence" => "ev",
1909 "chosen_file" => "chf",
1910 "symbol" => "s",
1911 "symbols" => "sy",
1912 "definitions" => "df",
1913 "callers" => "crs",
1914 "callees" => "ces",
1915 "total_tracked" => "tt",
1916 "modified" => "md",
1917 "deleted" => "dl",
1918 "unchanged" => "uc",
1919 "changes" => "ch",
1920 "prune_stats" => "ps",
1921 "hits" => "h",
1922 "rank" => "rk",
1923 "snippet" => "sn",
1924 "confidence" => "co",
1925 "index" => "ix",
1926 "summaries" => "sms",
1927 "recommendations" => "rec",
1928 "total_files" => "tf",
1929 "stale_files" => "sf",
1930 "last_indexed_secs_ago" => "age",
1931 "cached_files" => "caf",
1932 "total_indexed_files" => "tif",
1933 "coverage_pct" => "cov",
1934 "symbol_name" => "syn",
1935 "file_path" => "fp",
1936 "content_hash" => "hsh",
1937 "summary" => "sum",
1938 "tool" => "tl",
1939 "view" => "vw",
1940 "truncated" => "tr",
1941 "follow_up" => "fu",
1942 "report" => "rp",
1943 "metrics" => "ms",
1944 "label" => "lb",
1945 "value" => "v",
1946 "command" => "cmd",
1947 "exit_code" => "xc",
1948 "success" => "ok",
1949 "artifact" => "art",
1950 "digest" => "dg",
1951 "bytes" => "bt",
1952 "lines" => "lns",
1953 "expand" => "xp",
1954 "entities" => "ent",
1955 "relationships" => "rel",
1956 "concept_labels" => "cls",
1957 "extracted_at" => "at",
1958 "tokens_input" => "ti",
1959 "tokens_output" => "tout",
1960 "total_summaries" => "ts",
1961 "stale_count" => "stc",
1962 "total_tokens_input" => "tti",
1963 "total_tokens_output" => "tto",
1964 "estimated_tokens_saved" => "ets",
1965 "files_processed" => "fps",
1966 "symbols_extracted" => "se",
1967 "skills_dir" => "sd",
1968 "healthy" => "ok",
1969 "broken" => "brk",
1970 "skills" => "sks",
1971 "manifest_diffs" => "mdf",
1972 "similar_pairs" => "sim",
1973 "usage" => "usg",
1974 "cleanup" => "cln",
1975 "has_skill_md" => "hsm",
1976 "is_symlink" => "isl",
1977 "issues" => "iss",
1978 "invocation_count" => "inv",
1979 "reasons" => "rsn",
1980 "token_estimate" => "te",
1981 "skill_a" => "sa",
1982 "skill_b" => "sb",
1983 "desc_a" => "da",
1984 "desc_b" => "db",
1985 "annotations" => "ann",
1986 "entity" => "ety",
1987 "suggestion" => "sug",
1988 "columns" => "cols",
1989 "row_count" => "rc",
1990 "notnull" => "nn",
1991 "default_value" => "dv",
1992 "replace_all" => "ra",
1993 other => other,
1994 }
1995}
1996
1997fn terse_transform(val: serde_json::Value) -> serde_json::Value {
1998 match val {
1999 serde_json::Value::Object(map) => {
2000 let mut new_map = serde_json::Map::new();
2001 for (k, v) in map {
2002 new_map.insert(terse_key(&k).to_string(), terse_transform(v));
2003 }
2004 serde_json::Value::Object(new_map)
2005 }
2006 serde_json::Value::Array(arr) => {
2007 serde_json::Value::Array(arr.into_iter().map(terse_transform).collect())
2008 }
2009 other => other,
2010 }
2011}
2012
2013fn ultra_terse_transform(val: serde_json::Value) -> serde_json::Value {
2014 match val {
2015 serde_json::Value::Object(mut map) => {
2016 let is_graph_node =
2017 map.contains_key("id") && map.contains_key("k") && map.contains_key("n");
2018 let is_graph_edge =
2019 map.contains_key("from_id") && map.contains_key("to_id") && map.contains_key("k");
2020 if is_graph_node || is_graph_edge {
2021 map.remove("properties");
2022 map.remove("provenance");
2023 map.remove("freshness");
2024 }
2025 if is_graph_edge
2026 && let Some(serde_json::Value::String(s)) = map.get_mut("k") {
2027 *s = abbreviate_edge_kind(s).to_string();
2028 }
2029 let is_coverage = map.contains_key("mode")
2030 && (map.contains_key("total_sector_count")
2031 || map.contains_key("dirty_sector_count"));
2032 if is_coverage {
2033 map.remove("active_rebuild");
2034 map.remove("completed_dirty_sector_count");
2035 map.remove("mounted_sector_count");
2036 map.remove("rebuilding_sector_count");
2037 map.remove("resumed_sector_count");
2038 map.remove("reused_sector_count");
2039 }
2040 if let Some(serde_json::Value::String(s)) = map.get_mut("sn") {
2041 *s = truncate_for_ultra_terse(s, 80);
2042 }
2043 if let Some(serde_json::Value::String(s)) = map.get_mut("snippet") {
2044 *s = truncate_for_ultra_terse(s, 80);
2045 }
2046 let new_map: serde_json::Map<String, serde_json::Value> = map
2047 .into_iter()
2048 .map(|(k, v)| (k, ultra_terse_transform(v)))
2049 .collect();
2050 serde_json::Value::Object(new_map)
2051 }
2052 serde_json::Value::Array(arr) => {
2053 serde_json::Value::Array(arr.into_iter().map(ultra_terse_transform).collect())
2054 }
2055 other => other,
2056 }
2057}
2058
2059fn edge_index_transform(val: serde_json::Value) -> serde_json::Value {
2060 match val {
2061 serde_json::Value::Object(mut map) => {
2062 let node_ids: Option<Vec<String>> = map.get("nodes").and_then(|nodes| {
2063 nodes.as_array().map(|arr| {
2064 arr.iter()
2065 .filter_map(|n| n.get("id").and_then(|v| v.as_str()).map(String::from))
2066 .collect()
2067 })
2068 });
2069 if let Some(ref ids) = node_ids {
2070 let id_map: std::collections::HashMap<&str, usize> = ids
2071 .iter()
2072 .enumerate()
2073 .map(|(i, id)| (id.as_str(), i))
2074 .collect();
2075 if let Some(serde_json::Value::Array(edges)) = map.get_mut("edges") {
2076 for edge in edges.iter_mut() {
2077 if let serde_json::Value::Object(edge_map) = edge {
2078 if let Some(serde_json::Value::String(fid)) = edge_map.remove("from_id") {
2079 if let Some(&idx) = id_map.get(fid.as_str()) {
2080 edge_map.insert("from".to_string(), serde_json::Value::Number(idx.into()));
2081 } else {
2082 edge_map.insert("from_id".to_string(), serde_json::Value::String(fid));
2083 }
2084 }
2085 if let Some(serde_json::Value::String(tid)) = edge_map.remove("to_id") {
2086 if let Some(&idx) = id_map.get(tid.as_str()) {
2087 edge_map.insert("to".to_string(), serde_json::Value::Number(idx.into()));
2088 } else {
2089 edge_map.insert("to_id".to_string(), serde_json::Value::String(tid));
2090 }
2091 }
2092 }
2093 }
2094 }
2095 }
2096 let new_map: serde_json::Map<String, serde_json::Value> = map
2097 .into_iter()
2098 .map(|(k, v)| (k, edge_index_transform(v)))
2099 .collect();
2100 serde_json::Value::Object(new_map)
2101 }
2102 serde_json::Value::Array(arr) => {
2103 serde_json::Value::Array(arr.into_iter().map(edge_index_transform).collect())
2104 }
2105 other => other,
2106 }
2107}
2108
2109fn truncate_for_ultra_terse(s: &str, max_len: usize) -> String {
2110 if s.len() <= max_len {
2111 s.to_string()
2112 } else {
2113 let truncated: String = s.chars().take(max_len.saturating_sub(3)).collect();
2114 format!("{truncated}...")
2115 }
2116}
2117
2118fn terse_schema_for(val: &serde_json::Value) -> serde_json::Value {
2119 let mut keys = HashSet::new();
2120 collect_terse_keys(val, &mut keys);
2121 let mut schema = serde_json::Map::new();
2122 for (long, short) in TERSE_PAIRS {
2123 if keys.contains(*short) {
2124 schema.insert(
2125 short.to_string(),
2126 serde_json::Value::String(long.to_string()),
2127 );
2128 }
2129 }
2130 serde_json::Value::Object(schema)
2131}
2132
2133fn collect_terse_keys(val: &serde_json::Value, keys: &mut HashSet<String>) {
2134 match val {
2135 serde_json::Value::Object(map) => {
2136 for (k, v) in map {
2137 keys.insert(k.clone());
2138 collect_terse_keys(v, keys);
2139 }
2140 }
2141 serde_json::Value::Array(arr) => {
2142 for v in arr {
2143 collect_terse_keys(v, keys);
2144 }
2145 }
2146 _ => {}
2147 }
2148}
2149
2150fn schema_transform(val: serde_json::Value) -> serde_json::Value {
2151 match val {
2152 serde_json::Value::Array(arr) if arr.len() >= 2 => {
2153 if let Some(cols) = homogeneous_keys(&arr) {
2154 let rows: Vec<serde_json::Value> = arr
2155 .into_iter()
2156 .map(|item| {
2157 if let serde_json::Value::Object(map) = item {
2158 let vals: Vec<serde_json::Value> = cols
2159 .iter()
2160 .map(|c| map.get(c).cloned().unwrap_or(serde_json::Value::Null))
2161 .collect();
2162 serde_json::Value::Array(vals)
2163 } else {
2164 item
2165 }
2166 })
2167 .collect();
2168 let col_vals: Vec<serde_json::Value> =
2169 cols.into_iter().map(serde_json::Value::String).collect();
2170 serde_json::json!({"_c": col_vals, "_r": rows})
2171 } else {
2172 serde_json::Value::Array(arr.into_iter().map(schema_transform).collect())
2173 }
2174 }
2175 serde_json::Value::Array(arr) => {
2176 serde_json::Value::Array(arr.into_iter().map(schema_transform).collect())
2177 }
2178 serde_json::Value::Object(map) => {
2179 let new_map: serde_json::Map<String, serde_json::Value> = map
2180 .into_iter()
2181 .map(|(k, v)| (k, schema_transform(v)))
2182 .collect();
2183 serde_json::Value::Object(new_map)
2184 }
2185 other => other,
2186 }
2187}
2188
2189fn homogeneous_keys(arr: &[serde_json::Value]) -> Option<Vec<String>> {
2190 let first = arr.first()?.as_object()?;
2191 let keys: Vec<String> = first.keys().cloned().collect();
2192 for item in &arr[1..] {
2193 let obj = item.as_object()?;
2194 if obj.len() != keys.len() {
2195 return None;
2196 }
2197 for k in &keys {
2198 if !obj.contains_key(k) {
2199 return None;
2200 }
2201 }
2202 }
2203 Some(keys)
2204}
2205
2206const TERSE_PAIRS: &[(&str, &str)] = &[
2207 ("name", "n"),
2208 ("kind", "k"),
2209 ("file", "f"),
2210 ("line", "l"),
2211 ("path", "p"),
2212 ("from", "fr"),
2213 ("type", "ty"),
2214 ("text", "tx"),
2215 ("new", "nw"),
2216 ("run", "r"),
2217 ("use", "u"),
2218 ("score", "sc"),
2219 ("language", "la"),
2220 ("status", "st"),
2221 ("state", "stt"),
2222 ("error", "err"),
2223 ("errors", "ers"),
2224 ("hops", "hp"),
2225 ("tags", "tg"),
2226 ("model", "ml"),
2227 ("skill", "sk"),
2228 ("count", "ct"),
2229 ("total", "tot"),
2230 ("column", "col"),
2231 ("description", "dsc"),
2232 ("end_line", "el"),
2233 ("signature", "sig"),
2234 ("parent_module", "pm"),
2235 ("visibility", "vis"),
2236 ("match_type", "mt"),
2237 ("caller_file", "cf"),
2238 ("caller_name", "cn"),
2239 ("caller_line", "cl"),
2240 ("callee_name", "en"),
2241 ("call_site_line", "csl"),
2242 ("members", "m"),
2243 ("refs", "refs"),
2244 ("role", "rl"),
2245 ("peer", "pr"),
2246 ("modularity", "q"),
2247 ("modularity_contribution", "mc"),
2248 ("iterations", "it"),
2249 ("node_count", "nc"),
2250 ("edge_count", "ec"),
2251 ("community_count", "cc"),
2252 ("communities", "cms"),
2253 ("community", "cm"),
2254 ("community_diagnostics", "cd"),
2255 ("cache_hit", "cah"),
2256 ("tagpath_state", "tps"),
2257 ("tagpath_stale_reason", "tsr"),
2258 ("annotated_community_count", "acc"),
2259 ("annotated_member_count", "amc"),
2260 ("ambiguous_member_count", "ambc"),
2261 ("ambiguous_members", "amb"),
2262 ("candidate_count", "cand"),
2263 ("tagpath_candidate_count", "tcand"),
2264 ("evidence", "ev"),
2265 ("chosen_file", "chf"),
2266 ("symbol", "s"),
2267 ("symbols", "sy"),
2268 ("definitions", "df"),
2269 ("callers", "crs"),
2270 ("callees", "ces"),
2271 ("total_tracked", "tt"),
2272 ("modified", "md"),
2273 ("deleted", "dl"),
2274 ("unchanged", "uc"),
2275 ("changes", "ch"),
2276 ("prune_stats", "ps"),
2277 ("hits", "h"),
2278 ("rank", "rk"),
2279 ("snippet", "sn"),
2280 ("confidence", "co"),
2281 ("index", "ix"),
2282 ("summaries", "sms"),
2283 ("recommendations", "rec"),
2284 ("total_files", "tf"),
2285 ("stale_files", "sf"),
2286 ("last_indexed_secs_ago", "age"),
2287 ("cached_files", "caf"),
2288 ("total_indexed_files", "tif"),
2289 ("coverage_pct", "cov"),
2290 ("symbol_name", "syn"),
2291 ("file_path", "fp"),
2292 ("content_hash", "hsh"),
2293 ("summary", "sum"),
2294 ("tool", "tl"),
2295 ("view", "vw"),
2296 ("truncated", "tr"),
2297 ("follow_up", "fu"),
2298 ("report", "rp"),
2299 ("metrics", "ms"),
2300 ("label", "lb"),
2301 ("value", "v"),
2302 ("command", "cmd"),
2303 ("exit_code", "xc"),
2304 ("success", "ok"),
2305 ("artifact", "art"),
2306 ("digest", "dg"),
2307 ("bytes", "bt"),
2308 ("lines", "lns"),
2309 ("expand", "xp"),
2310 ("entities", "ent"),
2311 ("relationships", "rel"),
2312 ("concept_labels", "cls"),
2313 ("extracted_at", "at"),
2314 ("tokens_input", "ti"),
2315 ("tokens_output", "tout"),
2316 ("total_summaries", "ts"),
2317 ("stale_count", "stc"),
2318 ("total_tokens_input", "tti"),
2319 ("total_tokens_output", "tto"),
2320 ("estimated_tokens_saved", "ets"),
2321 ("files_processed", "fps"),
2322 ("symbols_extracted", "se"),
2323 ("skills_dir", "sd"),
2324 ("healthy", "ok"),
2325 ("broken", "brk"),
2326 ("skills", "sks"),
2327 ("manifest_diffs", "mdf"),
2328 ("similar_pairs", "sim"),
2329 ("usage", "usg"),
2330 ("cleanup", "cln"),
2331 ("has_skill_md", "hsm"),
2332 ("is_symlink", "isl"),
2333 ("issues", "iss"),
2334 ("invocation_count", "inv"),
2335 ("reasons", "rsn"),
2336 ("token_estimate", "te"),
2337 ("skill_a", "sa"),
2338 ("skill_b", "sb"),
2339 ("desc_a", "da"),
2340 ("desc_b", "db"),
2341 ("annotations", "ann"),
2342 ("entity", "ety"),
2343 ("suggestion", "sug"),
2344 ("columns", "cols"),
2345 ("row_count", "rc"),
2346 ("notnull", "nn"),
2347 ("default_value", "dv"),
2348 ("replace_all", "ra"),
2349];
2350
2351pub(crate) fn relativize(path: &str, root: &std::path::Path) -> String {
2352 let root_str = root.to_string_lossy();
2353 let prefix = format!("{}/", root_str.trim_end_matches('/'));
2354 path.strip_prefix(&prefix).unwrap_or(path).to_string()
2355}
2356
2357fn transcript_artifact_root(path: &Path) -> Result<PathBuf> {
2358 let canonical = path
2359 .canonicalize()
2360 .with_context(|| format!("canonicalizing {}", path.display()))?;
2361 let start = if canonical.is_dir() {
2362 canonical.clone()
2363 } else {
2364 canonical
2365 .parent()
2366 .map(Path::to_path_buf)
2367 .unwrap_or_else(|| canonical.clone())
2368 };
2369
2370 for ancestor in start.ancestors() {
2371 if ancestor.join(".git").exists() || ancestor.join(".gitmodules").is_file() {
2372 return Ok(ancestor.to_path_buf());
2373 }
2374 }
2375
2376 Ok(start)
2377}
2378
2379pub(crate) fn relativize_pathbuf(path: &std::path::Path, root: &std::path::Path) -> PathBuf {
2380 path.strip_prefix(root)
2381 .map(|p| p.to_path_buf())
2382 .unwrap_or_else(|_| path.to_path_buf())
2383}
2384
2385pub(crate) fn relativize_edges(edges: &mut [index::StoredEdge], root: &std::path::Path) {
2386 for edge in edges {
2387 edge.caller_file = relativize(&edge.caller_file, root);
2388 }
2389}
2390
2391pub(crate) fn relativize_symbols(symbols: &mut [index::StoredSymbol], root: &std::path::Path) {
2392 for sym in symbols {
2393 sym.file = relativize(&sym.file, root);
2394 }
2395}
2396
2397pub(crate) fn relativize_symbol_hits(hits: &mut [index::SymbolHit], root: &std::path::Path) {
2398 for hit in hits {
2399 hit.file = relativize(&hit.file, root);
2400 }
2401}
2402
2403
2404#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2407pub enum EdgeSide {
2408 Caller,
2409 Callee,
2410}
2411
2412const JSON_PATH_KEYS: &[&str] = &["file", "path", "caller_file", "file_path"];
2413
2414pub(crate) fn relativize_json_paths(val: &mut serde_json::Value, root: &std::path::Path) {
2415 let root_str = root.to_string_lossy();
2416 let prefix = format!("{}/", root_str.trim_end_matches('/'));
2417 relativize_json_inner(val, &prefix);
2418}
2419
2420fn relativize_json_inner(val: &mut serde_json::Value, prefix: &str) {
2421 match val {
2422 serde_json::Value::Array(arr) => {
2423 for v in arr {
2424 relativize_json_inner(v, prefix);
2425 }
2426 }
2427 serde_json::Value::Object(map) => {
2428 for (k, v) in map.iter_mut() {
2429 if JSON_PATH_KEYS.contains(&k.as_str())
2430 && let serde_json::Value::String(s) = v
2431 && let Some(rest) = s.strip_prefix(prefix)
2432 {
2433 *s = rest.to_string();
2434 }
2435 relativize_json_inner(v, prefix);
2436 }
2437 }
2438 _ => {}
2439 }
2440}
2441
2442pub(crate) fn format_score(score: f64, compact: bool) -> String {
2443 if compact {
2444 format!("{score:.2}")
2445 } else {
2446 format!("{score:.4}")
2447 }
2448}
2449
2450pub(crate) fn truncate_for_compact(input: &str, max_chars: usize) -> String {
2451 let trimmed = input.trim();
2452 let count = trimmed.chars().count();
2453 if count <= max_chars {
2454 return trimmed.to_string();
2455 }
2456 let prefix: String = trimmed.chars().take(max_chars.saturating_sub(3)).collect();
2457 format!("{prefix}...")
2458}
2459
2460pub(crate) fn compact_snippet(snippet: &str) -> Option<String> {
2461 snippet
2462 .lines()
2463 .find(|line| !line.trim().is_empty())
2464 .map(|line| truncate_for_compact(line, 100))
2465}
2466
2467pub(crate) fn compact_members(members: &[graph::CommunityMember], limit: usize) -> String {
2468 let names: Vec<&str> = members.iter().map(|m| m.name.as_str()).collect();
2469 if names.len() <= limit {
2470 return names.join(", ");
2471 }
2472 format!(
2473 "{} (+{} more)",
2474 names[..limit].join(", "),
2475 names.len() - limit
2476 )
2477}
2478
2479pub(crate) fn stable_handle(prefix: &str, key: &str) -> String {
2480 let mut hasher = blake3::Hasher::new();
2481 hasher.update(prefix.as_bytes());
2482 hasher.update(&[0]);
2483 hasher.update(key.as_bytes());
2484 let hex = hasher.finalize().to_hex();
2485 format!("{prefix}-{}", &hex[..10])
2486}
2487
2488#[derive(Clone, Debug, PartialEq, Eq)]
2489struct CanonicalTagFamily {
2490 canonical: String,
2491 tag_alias: String,
2492}
2493
2494fn canonical_family_from_tagpath_family(
2495 family: tagpath_family::TagFamily,
2496) -> Option<CanonicalTagFamily> {
2497 let tag_alias = if family.dimensions.is_empty() {
2498 family.tags.join("/")
2499 } else {
2500 family
2501 .dimensions
2502 .iter()
2503 .filter(|dimension| !dimension.tags.is_empty())
2504 .map(|dimension| dimension.tags.join("."))
2505 .collect::<Vec<_>>()
2506 .join("/")
2507 };
2508
2509 if tag_alias.is_empty() {
2510 None
2511 } else {
2512 Some(CanonicalTagFamily {
2513 canonical: family.canonical,
2514 tag_alias,
2515 })
2516 }
2517}
2518
2519fn canonical_tag_family_from_name(name: &str) -> Option<CanonicalTagFamily> {
2520 let trimmed = name.trim();
2521 if trimmed.is_empty() {
2522 return None;
2523 }
2524
2525 canonical_family_from_tagpath_family(tagpath_family::generate_family(trimmed))
2526}
2527
2528fn canonical_tag_family_from_tags(tags: &str) -> Option<CanonicalTagFamily> {
2529 let canonical = tags
2530 .split(',')
2531 .map(str::trim)
2532 .filter(|tag| !tag.is_empty())
2533 .collect::<Vec<_>>()
2534 .join("_");
2535 if canonical.is_empty() {
2536 None
2537 } else {
2538 canonical_family_from_tagpath_family(tagpath_family::generate_family(&canonical))
2539 }
2540}
2541
2542pub(crate) fn canonical_tag_family_from_symbol(name: &str, tags: Option<&str>) -> Option<CanonicalTagFamily> {
2543 tags.and_then(canonical_tag_family_from_tags)
2544 .or_else(|| canonical_tag_family_from_name(name))
2545}
2546
2547fn tag_alias_from_name(name: &str) -> Option<String> {
2548 canonical_tag_family_from_name(name).map(|family| family.tag_alias)
2549}
2550
2551fn tag_alias_from_tags(name: &str, tags: Option<&str>) -> Option<String> {
2552 canonical_tag_family_from_symbol(name, tags).map(|family| family.tag_alias)
2553}
2554
2555pub(crate) fn family_query_from_tag_alias(tag_alias: &str) -> Option<String> {
2556 let query = tag_alias
2557 .split(['/', '.'])
2558 .map(str::trim)
2559 .filter(|part| !part.is_empty())
2560 .collect::<Vec<_>>()
2561 .join(" ");
2562 if query.is_empty() { None } else { Some(query) }
2563}
2564
2565#[derive(Serialize, Clone, Debug, PartialEq, Eq)]
2566struct CompactOntologyRefPreview {
2567 handle: String,
2568 tag: String,
2569 path: String,
2570 #[serde(skip_serializing_if = "Option::is_none")]
2571 title: Option<String>,
2572 #[serde(skip_serializing_if = "Option::is_none")]
2573 domain: Option<String>,
2574}
2575
2576#[derive(Clone, Debug)]
2577struct TagOntologyPreviewContext {
2578 project_root: PathBuf,
2579 tags: BTreeMap<String, tagpath_ontology::OntologyTag>,
2580}
2581
2582#[derive(Serialize, Clone, Debug, PartialEq, Eq)]
2583struct CompactSymbolRefPreview {
2584 handle: String,
2585 name: String,
2586 #[serde(skip_serializing_if = "Option::is_none")]
2587 tag_alias: Option<String>,
2588 #[serde(skip_serializing_if = "Vec::is_empty", default)]
2589 ontology_refs: Vec<CompactOntologyRefPreview>,
2590}
2591
2592fn build_compact_symbol_ref(
2593 prefix: &str,
2594 key: &str,
2595 name: &str,
2596 tags: Option<&str>,
2597 max_bytes: usize,
2598) -> CompactSymbolRefPreview {
2599 build_compact_symbol_ref_with_ontology(prefix, key, name, tags, max_bytes, None)
2600}
2601
2602fn build_compact_symbol_ref_with_ontology(
2603 prefix: &str,
2604 key: &str,
2605 name: &str,
2606 tags: Option<&str>,
2607 max_bytes: usize,
2608 ontology: Option<&TagOntologyPreviewContext>,
2609) -> CompactSymbolRefPreview {
2610 let tag_alias = tag_alias_from_tags(name, tags);
2611 let ontology_refs = tag_alias
2612 .as_deref()
2613 .map(|alias| ontology_refs_for_alias(ontology, alias))
2614 .unwrap_or_default();
2615 CompactSymbolRefPreview {
2616 handle: stable_handle(prefix, key),
2617 name: truncate_for_budget(name, max_bytes),
2618 tag_alias: tag_alias.map(|alias| truncate_for_budget(&alias, max_bytes)),
2619 ontology_refs,
2620 }
2621}
2622
2623fn load_tag_ontology_preview_context(root: &Path) -> Option<TagOntologyPreviewContext> {
2624 let report = tagpath_ontology::load_project(root).ok()?;
2625 if report.tags.is_empty() {
2626 return None;
2627 }
2628 Some(TagOntologyPreviewContext {
2629 project_root: report.project_path,
2630 tags: report
2631 .tags
2632 .into_iter()
2633 .map(|tag| (tag.tag.clone(), tag))
2634 .collect(),
2635 })
2636}
2637
2638fn ontology_refs_for_alias(
2639 ontology: Option<&TagOntologyPreviewContext>,
2640 alias: &str,
2641) -> Vec<CompactOntologyRefPreview> {
2642 let Some(ontology) = ontology else {
2643 return Vec::new();
2644 };
2645 let mut seen = BTreeSet::new();
2646 alias
2647 .split('/')
2648 .flat_map(|part| part.split('.'))
2649 .map(str::trim)
2650 .filter(|tag| !tag.is_empty())
2651 .filter_map(|tag| {
2652 let key = tag.to_ascii_lowercase();
2653 if !seen.insert(key.clone()) {
2654 return None;
2655 }
2656 let ontology_tag = ontology.tags.get(&key)?;
2657 let path = relativize_ontology_path(&ontology_tag.path, &ontology.project_root);
2658 Some(CompactOntologyRefPreview {
2659 handle: stable_handle("tont", &format!("{}:{path}", ontology_tag.tag)),
2660 tag: ontology_tag.tag.clone(),
2661 path,
2662 title: ontology_tag.title.clone(),
2663 domain: ontology_tag.domain.clone(),
2664 })
2665 })
2666 .collect()
2667}
2668
2669fn relativize_ontology_path(path: &Path, root: &Path) -> String {
2670 path.strip_prefix(root)
2671 .unwrap_or(path)
2672 .to_string_lossy()
2673 .replace('\\', "/")
2674}
2675
2676fn format_symbol_preview_line(handle: &str, name: &str, tag_alias: Option<&str>) -> String {
2677 match tag_alias {
2678 Some(alias) => format!("{handle} {name} tag:{alias}"),
2679 None => format!("{handle} {name}"),
2680 }
2681}
2682
2683fn format_summary_ref_line(summary: &ContextPackSummaryRefPreview) -> String {
2684 match summary.tag_alias.as_deref() {
2685 Some(alias) => format!(
2686 "{} {} tag:{} expand:{}",
2687 summary.handle, summary.symbol, alias, summary.expand
2688 ),
2689 None => format!(
2690 "{} {} expand:{}",
2691 summary.handle, summary.symbol, summary.expand
2692 ),
2693 }
2694}
2695
2696fn compact_symbol_ref_token(symbol: &CompactSymbolRefPreview) -> String {
2697 match symbol.tag_alias.as_deref() {
2698 Some(alias) => format!("{}@{}", symbol.handle, alias),
2699 None => format!("{}@{}", symbol.handle, symbol.name),
2700 }
2701}
2702
2703pub(crate) fn truncate_for_budget(input: &str, max_bytes: usize) -> String {
2704 let trimmed = input.trim();
2705 if trimmed.len() <= max_bytes {
2706 return trimmed.to_string();
2707 }
2708 if max_bytes <= 3 {
2709 return ".".repeat(max_bytes);
2710 }
2711
2712 let mut end = 0usize;
2713 for (idx, ch) in trimmed.char_indices() {
2714 let next = idx + ch.len_utf8();
2715 if next > max_bytes.saturating_sub(3) {
2716 break;
2717 }
2718 end = next;
2719 }
2720
2721 if end == 0 {
2722 "...".to_string()
2723 } else {
2724 format!("{}...", &trimmed[..end])
2725 }
2726}
2727
2728struct TokenCappedPreview {
2729 preview: Vec<SourceLinePreview>,
2730 capped_end: usize,
2731 was_capped: bool,
2732}
2733
2734fn build_token_capped_preview(
2735 all_lines: &[&str],
2736 start: usize,
2737 end: usize,
2738 max_bytes: usize,
2739 token_cap: usize,
2740) -> TokenCappedPreview {
2741 let mut preview = Vec::new();
2742 let mut accumulated_tokens = 0usize;
2743 let mut capped_end = end;
2744 let mut was_capped = false;
2745
2746 for (idx, line) in all_lines[(start - 1)..end].iter().enumerate() {
2747 let truncated = truncate_for_budget(line, max_bytes);
2748 let line_tokens = estimated_tokens_from_bytes(truncated.len());
2749 if accumulated_tokens + line_tokens > token_cap && !preview.is_empty() {
2750 capped_end = start + idx - 1;
2751 was_capped = true;
2752 break;
2753 }
2754 accumulated_tokens += line_tokens;
2755 preview.push(SourceLinePreview {
2756 line: start + idx,
2757 text: truncated,
2758 });
2759 }
2760
2761 TokenCappedPreview {
2762 preview,
2763 capped_end,
2764 was_capped,
2765 }
2766}
2767
2768pub(crate) fn abbreviate_kind(kind: &str) -> &str {
2769 match kind {
2770 "function" => "fn",
2771 "method" => "meth",
2772 "module" | "mod" => "mod",
2773 "struct" => "struct",
2774 "trait" => "trait",
2775 "impl" => "impl",
2776 "class" => "cls",
2777 "interface" => "iface",
2778 "type_alias" => "type",
2779 "data_class" => "data_cls",
2780 "sealed_class" => "sealed_cls",
2781 "enum_class" => "enum_cls",
2782 "companion_object" => "comp_obj",
2783 "object" => "obj",
2784 "heading" => "h",
2785 "code_block" => "code",
2786 "alias" => "alias",
2787 other => other,
2788 }
2789}
2790
2791pub(crate) fn abbreviate_edge_kind(kind: &str) -> &str {
2792 match kind {
2793 "calls" => "c",
2794 "defines" => "d",
2795 "contains" => "ct",
2796 "imports" => "i",
2797 "mentions" => "m",
2798 "mentions_concept" => "mc",
2799 "mentions_entity" => "me",
2800 "semantic_relation" => "sr",
2801 "belongs_to" => "bt",
2802 "scopes_context" => "sctx",
2803 "scopes_source" => "ssrc",
2804 "requests_context" => "rctx",
2805 "explains_result" => "er",
2806 "tagged_concept" => "tc",
2807 "tagged_entity" => "te",
2808 "related_concept" => "relc",
2809 "handled_by" => "hb",
2810 "defines_route" => "dr",
2811 "handles_route" => "hr",
2812 "targets" => "tgt",
2813 "has_vector_handle" => "hv",
2814 "parent" => "p",
2815 "child" => "ch",
2816 "uses" => "u",
2817 "projects_source" => "psrc",
2818 "records_memory_source" => "rms",
2819 "records_memory_event" => "rme",
2820 "has_ast_span" => "ha",
2821 "represents_symbol" => "rs",
2822 "contains_embedded_symbol" => "ces",
2823 "embedded_in_fence" => "ef",
2824 "contains_markdown_block" => "cmb",
2825 "contains_embedded_code" => "cec",
2826 "enclosing_module" => "em",
2827 "enclosing_section" => "es",
2828 "previous_sibling" => "psib",
2829 "next_sibling" => "nsib",
2830 "explicit_depends_on" => "edo",
2831 "worker_result_follow_up" => "wrf",
2832 "shared_resource" => "shr",
2833 "community_member" => "cm",
2834 other => other,
2835 }
2836}
2837
2838pub(crate) fn abbreviate_match_type(mt: &str) -> &str {
2839 match mt {
2840 "exact_name" => "exact",
2841 "all_tags" => "all_tags",
2842 "partial_tags" => "partial",
2843 other => other,
2844 }
2845}
2846
2847pub(crate) fn symbol_path_summary(path: &[graph::PathNode]) -> String {
2848 path.iter()
2849 .map(|n| n.name.as_str())
2850 .collect::<Vec<_>>()
2851 .join(" -> ")
2852}
2853
2854const SEARCH_GROUP_SAMPLE_LIMIT: usize = 2;
2855
2856struct SearchHitGroup {
2857 path: String,
2858 first_rank: usize,
2859 top_score: f64,
2860 confidence: String,
2861 hits: usize,
2862 samples: Vec<String>,
2863}
2864
2865fn format_search_sample(hit: &sift::SearchHit) -> Option<String> {
2866 let snippet = compact_snippet(&hit.snippet)?;
2867 Some(match hit.location.as_deref() {
2868 Some(location) => format!("{location}: {snippet}"),
2869 None => snippet,
2870 })
2871}
2872
2873pub(crate) fn group_search_hits(
2874 hits: &[sift::SearchHit],
2875 root: &Path,
2876 absolute: bool,
2877) -> Vec<SearchHitGroup> {
2878 let mut positions = BTreeMap::new();
2879 let mut groups = Vec::new();
2880 for hit in hits {
2881 let path = if absolute {
2882 hit.path.clone()
2883 } else {
2884 relativize(&hit.path, root)
2885 };
2886 let entry = positions.entry(path.clone()).or_insert_with(|| {
2887 groups.push(SearchHitGroup {
2888 path: path.clone(),
2889 first_rank: hit.rank,
2890 top_score: hit.score,
2891 confidence: format!("{:?}", hit.confidence),
2892 hits: 0,
2893 samples: Vec::new(),
2894 });
2895 groups.len() - 1
2896 });
2897 let group = &mut groups[*entry];
2898 group.hits += 1;
2899 if hit.rank < group.first_rank {
2900 group.first_rank = hit.rank;
2901 }
2902 if hit.score > group.top_score {
2903 group.top_score = hit.score;
2904 }
2905 if let Some(sample) = format_search_sample(hit)
2906 && group.samples.len() < SEARCH_GROUP_SAMPLE_LIMIT
2907 && !group.samples.contains(&sample)
2908 {
2909 group.samples.push(sample);
2910 }
2911 }
2912 groups.sort_by_key(|group| group.first_rank);
2913 groups
2914}
2915
2916pub(crate) fn should_collapse_search_hits(
2917 hits: &[sift::SearchHit],
2918 root: &Path,
2919 absolute: bool,
2920) -> bool {
2921 let groups = group_search_hits(hits, root, absolute);
2922 let max_hits_per_file = groups.iter().map(|group| group.hits).max().unwrap_or(0);
2923 max_hits_per_file >= 3 || (hits.len() >= 6 && groups.len() < hits.len())
2924}
2925
2926pub(crate) fn format_edge_groups(edges: &[index::StoredEdge], use_callers: bool) -> Vec<String> {
2927 let mut grouped: BTreeMap<&str, Vec<&str>> = BTreeMap::new();
2928 for edge in edges {
2929 let key = edge.caller_file.as_str();
2930 let name = if use_callers {
2931 edge.caller_name.as_str()
2932 } else {
2933 edge.callee_name.as_str()
2934 };
2935 let names = grouped.entry(key).or_default();
2936 if !names.contains(&name) {
2937 names.push(name);
2938 }
2939 }
2940
2941 grouped
2942 .into_iter()
2943 .map(|(file, names)| format!(" {} ({}): {}", file, names.len(), names.join(", ")))
2944 .collect()
2945}
2946
2947pub(crate) fn should_collapse_edge_groups(edges: &[index::StoredEdge]) -> bool {
2948 let mut grouped: BTreeMap<&str, usize> = BTreeMap::new();
2949 for edge in edges {
2950 *grouped.entry(edge.caller_file.as_str()).or_default() += 1;
2951 }
2952 let max_hits_per_file = grouped.values().copied().max().unwrap_or(0);
2953 max_hits_per_file >= 3 || (edges.len() >= 6 && grouped.len() < edges.len())
2954}
2955
2956
2957fn resolve_query_index_target(
2958 root: &Path,
2959 path_hint: &Path,
2960 scope: Option<&str>,
2961) -> Result<SearchIndexTarget> {
2962 let cfg = config::Config::load(root)?;
2963 if let Some(scope_name) = scope {
2964 if let Some(scope) = config::Config::find_submodule(root, scope_name)? {
2965 return Ok(SearchIndexTarget {
2966 label: format!("submodule `{}` index", scope.id),
2967 db_path: cfg.db_path_for(root, &scope.id),
2968 source_root: scope.source_root.clone(),
2969 scope_name: Some(scope.id.clone()),
2970 reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
2971 });
2972 }
2973 if let Some(package) = multiplicity::find_cargo_package(root, scope_name)? {
2974 return Ok(cargo_package_index_target(root, package));
2975 }
2976 config::Config::resolve_submodule(root, scope_name)?;
2977 }
2978
2979 if let Some(scope) = config::Config::infer_submodule_from_path(root, path_hint)? {
2980 return Ok(SearchIndexTarget {
2981 label: format!("submodule `{}` index", scope.id),
2982 db_path: cfg.db_path_for(root, &scope.id),
2983 source_root: scope.source_root.clone(),
2984 scope_name: Some(scope.id.clone()),
2985 reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
2986 });
2987 }
2988
2989 if let Some(package) = multiplicity::infer_cargo_package_from_path(root, path_hint)? {
2990 return Ok(cargo_package_index_target(root, package));
2991 }
2992
2993 if let Some(scope) = infer_agent_doc_task_submodule(root, path_hint)? {
2994 return Ok(SearchIndexTarget {
2995 label: format!("submodule `{}` index", scope.id),
2996 db_path: cfg.db_path_for(root, &scope.id),
2997 source_root: scope.source_root.clone(),
2998 scope_name: Some(scope.id.clone()),
2999 reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
3000 });
3001 }
3002
3003 let db_path = root.join(".tsift/index.db");
3004 if db_path.exists() {
3005 return Ok(SearchIndexTarget {
3006 label: "index".to_string(),
3007 db_path,
3008 source_root: root.to_path_buf(),
3009 scope_name: None,
3010 reindex_cmd: format!("tsift index {}", root.display()),
3011 });
3012 }
3013
3014 let scopes = config::Config::submodule_dirs(root)?;
3015 if scopes.is_empty() {
3016 return Ok(SearchIndexTarget {
3017 label: "index".to_string(),
3018 db_path,
3019 source_root: root.to_path_buf(),
3020 scope_name: None,
3021 reindex_cmd: format!("tsift index {}", root.display()),
3022 });
3023 }
3024
3025 let available_scopes = scopes
3026 .iter()
3027 .map(|scope| scope.id.as_str())
3028 .collect::<Vec<_>>()
3029 .join(", ");
3030 let indexed_scopes = scopes
3031 .iter()
3032 .filter(|scope| cfg.db_path_for(root, &scope.id).exists())
3033 .map(|scope| scope.id.as_str())
3034 .collect::<Vec<_>>();
3035 let indexed_label = if indexed_scopes.is_empty() {
3036 "none".to_string()
3037 } else {
3038 indexed_scopes.join(", ")
3039 };
3040
3041 bail!(
3042 "workspace root {} has no shared root index at {}. Read-only graph queries require `--scope <scope>` when the workspace is indexed into `.tsift/indexes/*/index.db`. Available scopes: {}. Indexed scopes: {}.",
3043 root.display(),
3044 db_path.display(),
3045 available_scopes,
3046 indexed_label
3047 );
3048}
3049
3050pub(crate) fn resolve_query_db_path(root: &Path, path_hint: &Path, scope: Option<&str>) -> Result<PathBuf> {
3051 Ok(resolve_query_index_target(root, path_hint, scope)?.db_path)
3052}
3053
3054fn ensure_query_index_current(root: &Path, target: &SearchIndexTarget) -> Result<()> {
3055 let state = inspect_search_index(target)?;
3056 let Some(reason) = index_reason_for_state(state) else {
3057 return Ok(());
3058 };
3059
3060 match apply_search_index_update(root, target) {
3061 Ok(_) => {
3062 index::inspect_scope_invalidate_all();
3063 Ok(())
3064 }
3065 Err(err) if is_active_writer_lock_error(&err) && target.db_path.exists() => {
3066 eprintln!(
3067 "note: active tsift writer detected; skipping graph-query autoindex because {}. \
3068 Continuing with the current read-only index snapshot; graph results may lag. \
3069 Retry `{}` after the active writer finishes for fresh graph results.",
3070 index_reason_detail(target, reason),
3071 target.reindex_cmd
3072 );
3073 Ok(())
3074 }
3075 Err(err) => Err(err),
3076 }
3077}
3078
3079pub(crate) fn open_index_db(path: &std::path::Path, scope: Option<&str>) -> Result<index::IndexDb> {
3080 let root = lint::resolve_project_root_or_canonical_path(path)?;
3081 let target = resolve_query_index_target(&root, path, scope)?;
3082 ensure_query_index_current(&root, &target)?;
3083 let db_path = target.db_path;
3084 if !db_path.exists() {
3085 bail!(
3086 "no index found at {}. Run `tsift index` first.",
3087 db_path.display()
3088 );
3089 }
3090 index::IndexDb::open_read_only_resilient(&db_path)
3091}
3092
3093pub(crate) fn query_tagpath_root(
3094 root: &std::path::Path,
3095 path_hint: &std::path::Path,
3096 scope: Option<&str>,
3097) -> Result<PathBuf> {
3098 if let Some(scope_name) = scope {
3099 if let Some(scope) = config::Config::find_submodule(root, scope_name)? {
3100 return Ok(scope.source_root);
3101 }
3102 if let Some(package) = multiplicity::find_cargo_package(root, scope_name)? {
3103 return Ok(package.package_root);
3104 }
3105 config::Config::resolve_submodule(root, scope_name)?;
3106 }
3107 if let Some(scope) = config::Config::infer_submodule_from_path(root, path_hint)? {
3108 return Ok(scope.source_root);
3109 }
3110 if let Some(package) = multiplicity::infer_cargo_package_from_path(root, path_hint)? {
3111 return Ok(package.package_root);
3112 }
3113 Ok(root.to_path_buf())
3114}
3115
3116#[derive(Clone, Debug, Serialize, PartialEq)]
3117struct TraversalNode {
3118 handle: String,
3119 kind: String,
3120 label: String,
3121 #[serde(skip_serializing_if = "Option::is_none")]
3122 ref_id: Option<String>,
3123 #[serde(skip_serializing_if = "Option::is_none")]
3124 path: Option<String>,
3125 #[serde(skip_serializing_if = "Option::is_none")]
3126 line: Option<i64>,
3127 #[serde(skip_serializing_if = "Option::is_none")]
3128 detail: Option<String>,
3129 #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
3130 properties: BTreeMap<String, String>,
3131 expand: String,
3132}
3133
3134#[derive(Clone, Debug, Serialize, PartialEq)]
3135struct TraversalEdge {
3136 from: String,
3137 to: String,
3138 relation: String,
3139 #[serde(skip_serializing_if = "Option::is_none")]
3140 label: Option<String>,
3141 weight: usize,
3142}
3143
3144#[derive(Clone, Debug, Default)]
3145struct TraversalGraphBuild {
3146 nodes: BTreeMap<String, TraversalNode>,
3147 edges: Vec<TraversalEdge>,
3148 edge_keys: BTreeSet<(String, String, String)>,
3149 warnings: Vec<String>,
3150}
3151
3152pub(crate) const GRAPH_PROJECTION_VERSION: &str = "tsift-traversal-v1";
3153const GRAPH_DB_EVIDENCE_CONTRACT_VERSION: &str = "graph-db-evidence-v1";
3154const WORKER_PROMPT_PACKET_CONTRACT_VERSION: &str = "worker-prompt-packet-v1";
3155const CONFLICT_MATRIX_CONTRACT_VERSION: &str = "conflict-matrix-v1";
3156const CONTEXT_PACK_GRAPH_ORCHESTRATION_CONTRACT_VERSION: &str =
3157 "context-pack-graph-orchestration-v1";
3158const SESSION_REVIEW_FOLLOW_UP_CONTRACT_VERSION: &str = "session-review-follow-up-v1";
3159const DISPATCH_TRACE_CONTRACT_VERSION: &str = "dispatch-trace-v1";
3160const DEPENDENCY_DAG_CONTRACT_VERSION: &str = "dependency-dag-v1";
3161const GRAPH_PROJECTION_META_KIND: &str = "projection_meta";
3162const GRAPH_DB_RANKED_NEIGHBOR_CAP: usize = 12;
3163const GRAPH_DB_SEMANTIC_MIN_EDGE_SCAN_CAP: usize = 16;
3164const GRAPH_DB_SEMANTIC_MAX_EDGE_SCAN_CAP: usize = 64;
3165
3166#[derive(Debug, Serialize, PartialEq)]
3167struct TraversalTotals {
3168 nodes: usize,
3169 edges: usize,
3170}
3171
3172#[derive(Debug, Serialize, PartialEq)]
3173struct TraversalPathReport {
3174 from: TraversalNode,
3175 to: TraversalNode,
3176 hops: usize,
3177 nodes: Vec<TraversalNode>,
3178 edges: Vec<TraversalEdge>,
3179}
3180
3181#[derive(Debug, Serialize, PartialEq)]
3182struct TraversalRecommendation {
3183 handle: String,
3184 kind: String,
3185 label: String,
3186 reason: String,
3187 score: usize,
3188 expand: String,
3189}
3190
3191#[derive(Debug, Serialize, PartialEq)]
3192struct TraversalReport {
3193 root: String,
3194 #[serde(skip_serializing_if = "Option::is_none")]
3195 scope: Option<String>,
3196 mode: String,
3197 totals: TraversalTotals,
3198 #[serde(skip_serializing_if = "Option::is_none")]
3199 query: Option<String>,
3200 #[serde(skip_serializing_if = "Option::is_none")]
3201 target: Option<String>,
3202 nodes: Vec<TraversalNode>,
3203 edges: Vec<TraversalEdge>,
3204 #[serde(skip_serializing_if = "Option::is_none")]
3205 shortest_path: Option<TraversalPathReport>,
3206 recommendations: Vec<TraversalRecommendation>,
3207 exploration: ExplorationPacket,
3208 truncated: bool,
3209 #[serde(skip_serializing_if = "Vec::is_empty", default)]
3210 warnings: Vec<String>,
3211}
3212
3213#[derive(Debug, Serialize, PartialEq)]
3214struct SemanticRelatedReport {
3215 root: String,
3216 #[serde(skip_serializing_if = "Option::is_none")]
3217 scope: Option<String>,
3218 query: String,
3219 embedding_model: String,
3220 count: usize,
3221 items: Vec<SemanticRelatedItem>,
3222 #[serde(skip_serializing_if = "Vec::is_empty", default)]
3223 warnings: Vec<String>,
3224}
3225
3226#[derive(Clone, Debug, Serialize, PartialEq)]
3227struct SemanticRelatedItem {
3228 handle: String,
3229 kind: String,
3230 label: String,
3231 score: f64,
3232 #[serde(skip_serializing_if = "Option::is_none")]
3233 file_path: Option<String>,
3234 #[serde(skip_serializing_if = "Option::is_none")]
3235 source_symbol: Option<String>,
3236 #[serde(skip_serializing_if = "Option::is_none")]
3237 detail: Option<String>,
3238 expand: String,
3239}
3240
3241#[derive(Clone)]
3242struct TraversalSymbolIndexEntry {
3243 handle: String,
3244 node: TraversalNode,
3245 tokens: BTreeSet<String>,
3246}
3247
3248#[derive(Clone)]
3249struct TraversalFileIndexEntry {
3250 handle: String,
3251 node: TraversalNode,
3252 tokens: BTreeSet<String>,
3253}
3254
3255#[derive(Clone)]
3256struct TraversalRouteIndexEntry {
3257 handle: String,
3258 node: TraversalNode,
3259 tokens: BTreeSet<String>,
3260}
3261
3262#[derive(Clone)]
3263struct TraversalAstSpanIndexEntry {
3264 handle: String,
3265 symbol_handle: String,
3266 file_handle: Option<String>,
3267 file: String,
3268 name: String,
3269 kind: String,
3270 language: String,
3271 node_kind: String,
3272 start_byte: usize,
3273 end_byte: usize,
3274 parent_module: Option<String>,
3275 markdown: Option<MarkdownSpanMetadata>,
3276}
3277
3278#[derive(Clone)]
3279struct TraversalMultiplicityIndexEntry {
3280 handle: String,
3281 node: TraversalNode,
3282 tokens: BTreeSet<String>,
3283}
3284
3285struct TraversalCodeLookup<'a> {
3286 symbols: &'a [TraversalSymbolIndexEntry],
3287 files: &'a [TraversalFileIndexEntry],
3288 routes: &'a [TraversalRouteIndexEntry],
3289 multiplicities: &'a [TraversalMultiplicityIndexEntry],
3290 symbol_index: HashMap<String, Vec<usize>>,
3291 file_index: HashMap<String, Vec<usize>>,
3292 route_index: HashMap<String, Vec<usize>>,
3293 multiplicity_index: HashMap<String, Vec<usize>>,
3294 file_path_index: HashMap<String, String>,
3295}
3296
3297#[derive(Clone, Debug, Serialize, PartialEq)]
3298struct ExplorationBudget {
3299 project_size: String,
3300 max_source_windows: usize,
3301 lines_per_window: usize,
3302 relationship_limit: usize,
3303}
3304
3305#[derive(Clone, Debug, Serialize, PartialEq)]
3306struct ExplorationRelation {
3307 from: String,
3308 relation: String,
3309 to: String,
3310 #[serde(skip_serializing_if = "Option::is_none")]
3311 label: Option<String>,
3312}
3313
3314#[derive(Clone, Debug, Serialize, PartialEq)]
3315struct ExplorationSourceWindow {
3316 handle: String,
3317 file: String,
3318 start: usize,
3319 end: usize,
3320 reason: String,
3321 expand: String,
3322}
3323
3324#[derive(Clone, Debug, Serialize, PartialEq)]
3325struct ExplorationWorkerContext {
3326 handle: String,
3327 target: String,
3328 summary: String,
3329 expand: String,
3330}
3331
3332#[derive(Clone, Debug, Serialize, PartialEq)]
3333struct ExplorationPacket {
3334 budget: ExplorationBudget,
3335 relationship_map: Vec<ExplorationRelation>,
3336 source_windows: Vec<ExplorationSourceWindow>,
3337 #[serde(skip_serializing_if = "Vec::is_empty", default)]
3338 worker_context: Vec<ExplorationWorkerContext>,
3339 no_reread_guidance: String,
3340}
3341
3342impl TraversalGraphBuild {
3343 fn add_node(&mut self, node: TraversalNode) {
3344 self.nodes.entry(node.handle.clone()).or_insert(node);
3345 }
3346
3347 fn add_edge(
3348 &mut self,
3349 from: &str,
3350 to: &str,
3351 relation: &str,
3352 label: Option<String>,
3353 weight: usize,
3354 ) {
3355 if from == to || !self.nodes.contains_key(from) || !self.nodes.contains_key(to) {
3356 return;
3357 }
3358 let key = (from.to_string(), to.to_string(), relation.to_string());
3359 if self.edge_keys.insert(key) {
3360 self.edges.push(TraversalEdge {
3361 from: from.to_string(),
3362 to: to.to_string(),
3363 relation: relation.to_string(),
3364 label,
3365 weight,
3366 });
3367 }
3368 }
3369}
3370
3371pub(crate) fn graph_substrate_db_path(root: &Path, scope: Option<&str>) -> PathBuf {
3372 match scope {
3373 Some(scope) => root.join(".tsift/indexes").join(scope).join("graph.db"),
3374 None => root.join(".tsift/graph.db"),
3375 }
3376}
3377
3378fn graph_projection_meta_id(scope: Option<&str>) -> String {
3379 format!("projection:tsift-traversal:{}", scope.unwrap_or("root"))
3380}
3381
3382pub(crate) fn content_hash<T: Serialize>(value: &T) -> Result<String> {
3383 let bytes = serde_json::to_vec(value)?;
3384 Ok(blake3::hash(&bytes).to_hex().to_string())
3385}
3386
3387fn node_with_content_freshness(mut node: SubstrateGraphNode) -> Result<SubstrateGraphNode> {
3388 let mut hashable = node.clone();
3389 hashable.freshness = None;
3390 node.freshness = Some(GraphFreshness::content_hash(content_hash(&hashable)?));
3391 Ok(node)
3392}
3393
3394fn edge_with_content_freshness(mut edge: SubstrateGraphEdge) -> Result<SubstrateGraphEdge> {
3395 let mut hashable = edge.clone();
3396 hashable.freshness = None;
3397 edge.freshness = Some(GraphFreshness::content_hash(content_hash(&hashable)?));
3398 Ok(edge)
3399}
3400
3401const SEMANTIC_EMBEDDING_DIM: usize = 32;
3402const SEMANTIC_EMBEDDING_MODEL: &str = "tsift-local-hash-v1";
3403const CLAUDE_MEM_GRAPH_LIMIT_PER_TABLE: usize = 200;
3404
3405fn semantic_related_kind_name(kind: SemanticRelatedKind) -> &'static str {
3406 match kind {
3407 SemanticRelatedKind::Concept => "concept",
3408 SemanticRelatedKind::Entity => "entity",
3409 SemanticRelatedKind::All => "all",
3410 }
3411}
3412
3413fn semantic_related_command(root: &Path, query: &str, kind: SemanticRelatedKind) -> String {
3414 format!(
3415 "tsift semantic {} --path {} --kind {} --limit 10",
3416 shell_quote(query),
3417 shell_quote(root.to_string_lossy().as_ref()),
3418 semantic_related_kind_name(kind)
3419 )
3420}
3421
3422fn semantic_embedding(input: &str) -> Vec<f64> {
3423 let mut vector = vec![0.0; SEMANTIC_EMBEDDING_DIM];
3424 let mut tokens = traversal_tokens(input);
3425 if tokens.is_empty() {
3426 let trimmed = input.trim().to_ascii_lowercase();
3427 if !trimmed.is_empty() {
3428 tokens.insert(trimmed);
3429 }
3430 }
3431
3432 for token in tokens {
3433 let hash = blake3::hash(token.as_bytes());
3434 let bytes = hash.as_bytes();
3435 let idx = usize::from(bytes[0]) % SEMANTIC_EMBEDDING_DIM;
3436 let sign = if bytes[1] & 1 == 0 { 1.0 } else { -1.0 };
3437 vector[idx] += sign;
3438 }
3439
3440 let norm = vector.iter().map(|value| value * value).sum::<f64>().sqrt();
3441 if norm > 0.0 {
3442 for value in &mut vector {
3443 *value /= norm;
3444 }
3445 }
3446 vector
3447}
3448
3449fn semantic_embedding_property(input: &str) -> String {
3450 semantic_embedding(input)
3451 .iter()
3452 .map(|value| format!("{value:.6}"))
3453 .collect::<Vec<_>>()
3454 .join(",")
3455}
3456
3457fn parse_semantic_embedding_property(value: &str) -> Option<Vec<f64>> {
3458 let parsed = value
3459 .split(',')
3460 .map(str::trim)
3461 .map(str::parse::<f64>)
3462 .collect::<std::result::Result<Vec<_>, _>>()
3463 .ok()?;
3464 (parsed.len() == SEMANTIC_EMBEDDING_DIM).then_some(parsed)
3465}
3466
3467fn semantic_cosine(left: &[f64], right: &[f64]) -> f64 {
3468 if left.len() != right.len() {
3469 return 0.0;
3470 }
3471 left.iter()
3472 .zip(right.iter())
3473 .map(|(left, right)| left * right)
3474 .sum::<f64>()
3475}
3476
3477fn semantic_entity_handle(name: &str, kind: &str) -> String {
3478 stable_handle(
3479 "gent",
3480 &format!(
3481 "entity:{}:{}",
3482 kind.trim().to_ascii_lowercase(),
3483 name.trim().to_ascii_lowercase()
3484 ),
3485 )
3486}
3487
3488fn semantic_concept_handle(label: &str) -> String {
3489 stable_handle(
3490 "gcon",
3491 &format!("concept:{}", label.trim().to_ascii_lowercase()),
3492 )
3493}
3494
3495fn summary_source_handles(
3496 summary: &summarize::Summary,
3497 file_node_by_path: &BTreeMap<String, String>,
3498 symbol_node_by_file_label: &BTreeMap<(String, String), String>,
3499) -> Vec<String> {
3500 let mut handles = Vec::new();
3501 if let Some(handle) = file_node_by_path.get(&summary.file_path) {
3502 handles.push(handle.clone());
3503 }
3504 if let Some(handle) =
3505 symbol_node_by_file_label.get(&(summary.file_path.clone(), summary.symbol_name.clone()))
3506 && !handles.iter().any(|existing| existing == handle)
3507 {
3508 handles.push(handle.clone());
3509 }
3510 handles
3511}
3512
3513fn semantic_entity_node(
3514 root: &Path,
3515 summary: &summarize::Summary,
3516 name: &str,
3517 kind: &str,
3518 description: &str,
3519 provenance: &GraphProvenance,
3520) -> SubstrateGraphNode {
3521 let handle = semantic_entity_handle(name, kind);
3522 let detail = if description.trim().is_empty() {
3523 format!("{kind} entity from cached summaries")
3524 } else {
3525 format!("{kind}: {description}")
3526 };
3527 SubstrateGraphNode::new(handle.clone(), "semantic_entity", name.to_string())
3528 .with_property("handle", handle)
3529 .with_property("ref_id", name.to_string())
3530 .with_property("detail", detail)
3531 .with_property("entity_kind", kind.to_string())
3532 .with_property("description", description.to_string())
3533 .with_property("source_file", summary.file_path.clone())
3534 .with_property("source_symbol", summary.symbol_name.clone())
3535 .with_property("embedding_model", SEMANTIC_EMBEDDING_MODEL)
3536 .with_property(
3537 "embedding",
3538 semantic_embedding_property(&format!("{name} {kind} {description}")),
3539 )
3540 .with_property(
3541 "expand",
3542 semantic_related_command(root, name, SemanticRelatedKind::Entity),
3543 )
3544 .with_provenance(provenance.clone())
3545}
3546
3547fn semantic_concept_node(
3548 root: &Path,
3549 summary: &summarize::Summary,
3550 label: &str,
3551 provenance: &GraphProvenance,
3552) -> SubstrateGraphNode {
3553 let handle = semantic_concept_handle(label);
3554 SubstrateGraphNode::new(handle.clone(), "semantic_concept", label.to_string())
3555 .with_property("handle", handle)
3556 .with_property("ref_id", label.to_string())
3557 .with_property("detail", "concept label from cached summaries".to_string())
3558 .with_property("source_file", summary.file_path.clone())
3559 .with_property("source_symbol", summary.symbol_name.clone())
3560 .with_property("embedding_model", SEMANTIC_EMBEDDING_MODEL)
3561 .with_property("embedding", semantic_embedding_property(label))
3562 .with_property(
3563 "expand",
3564 semantic_related_command(root, label, SemanticRelatedKind::Concept),
3565 )
3566 .with_provenance(provenance.clone())
3567}
3568
3569fn insert_semantic_edge(
3570 edge_map: &mut BTreeMap<(String, String, String), SubstrateGraphEdge>,
3571 edge: SubstrateGraphEdge,
3572) {
3573 edge_map
3574 .entry((edge.from_id.clone(), edge.to_id.clone(), edge.kind.clone()))
3575 .or_insert(edge);
3576}
3577
3578fn memory_event_key(event: &MemoryEvent) -> String {
3579 match (event.imported_from.as_deref(), event.imported_id.as_deref()) {
3580 (Some(imported_from), Some(imported_id)) => {
3581 format!("{imported_from}:{imported_id}")
3582 }
3583 _ => event.stable_id(),
3584 }
3585}
3586
3587fn memory_event_label(event: &MemoryEvent) -> String {
3588 let first_line = event
3589 .text
3590 .lines()
3591 .map(str::trim)
3592 .find(|line| !line.is_empty())
3593 .unwrap_or(event.kind.as_str());
3594 match event.kind.as_str() {
3595 "imported_observation" => {
3596 let observation_type = event
3597 .metadata
3598 .get("observation_type")
3599 .map(String::as_str)
3600 .unwrap_or("observation");
3601 truncate_for_compact(&format!("{observation_type}: {first_line}"), 80)
3602 }
3603 "imported_session_summary" => truncate_for_compact(&format!("summary: {first_line}"), 80),
3604 "imported_user_prompt" => truncate_for_compact(&format!("prompt: {first_line}"), 80),
3605 _ => truncate_for_compact(first_line, 80),
3606 }
3607}
3608
3609fn append_tsift_memory_graph_projection_rows(
3610 root: &Path,
3611 nodes: &mut Vec<SubstrateGraphNode>,
3612 edges: &mut Vec<SubstrateGraphEdge>,
3613) -> Result<()> {
3614 let memory_db = default_memory_db_path(root);
3615 if !memory_db.exists() {
3616 return Ok(());
3617 }
3618 let events = match read_memory_events(&memory_db, CLAUDE_MEM_GRAPH_LIMIT_PER_TABLE * 3) {
3619 Ok(events) => events,
3620 Err(_) => return Ok(()),
3621 };
3622 if events.is_empty() {
3623 return Ok(());
3624 }
3625
3626 let mut seen_sessions = BTreeSet::new();
3627 let mut edge_map = BTreeMap::<(String, String, String), SubstrateGraphEdge>::new();
3628
3629 for event in &events {
3630 let event_id = event.stable_id();
3631 let event_key = memory_event_key(event);
3632 let source_handle = stable_handle("tmemsrc", &event_key);
3633 let semantic_handle = stable_handle("tmemsem", &event_key);
3634 let provenance = GraphProvenance::new("tsift-memory", &event.source_ref);
3635 let imported_from = event.imported_from.as_deref().unwrap_or("native");
3636
3637 if let Some(session_id) = &event.session_id {
3638 let session_handle =
3639 format!("memsess:{}", blake3::hash(session_id.as_bytes()).to_hex());
3640 if seen_sessions.insert(session_id.clone()) {
3641 let session_node = SubstrateGraphNode::new(
3642 session_handle.clone(),
3643 "memory_session",
3644 truncate_for_compact(session_id, 80),
3645 )
3646 .with_property("handle", session_handle.clone())
3647 .with_property("ref_id", session_id.clone())
3648 .with_property("session_id", session_id.clone())
3649 .with_property("provider", "tsift-memory")
3650 .with_property(
3651 "expand",
3652 format!(
3653 "tsift memory status {} --json",
3654 shell_quote(root.to_string_lossy().as_ref())
3655 ),
3656 )
3657 .with_provenance(provenance.clone());
3658 nodes.push(node_with_content_freshness(session_node)?);
3659 }
3660
3661 insert_semantic_edge(
3662 &mut edge_map,
3663 SubstrateGraphEdge::new(
3664 session_handle.clone(),
3665 event_id.clone(),
3666 "records_memory_event",
3667 )
3668 .with_property("label", "tsift-memory session event")
3669 .with_provenance(provenance.clone()),
3670 );
3671 insert_semantic_edge(
3672 &mut edge_map,
3673 SubstrateGraphEdge::new(
3674 session_handle,
3675 source_handle.clone(),
3676 "records_memory_source",
3677 )
3678 .with_property("label", "tsift-memory session source")
3679 .with_provenance(provenance.clone()),
3680 );
3681 }
3682
3683 let label = memory_event_label(event);
3684 let mut event_node =
3685 SubstrateGraphNode::new(event_id.clone(), "memory_event", event.kind.as_str())
3686 .with_property("handle", event_id.clone())
3687 .with_property("ref_id", event.source_ref.clone())
3688 .with_property("source_ref", event.source_ref.clone())
3689 .with_property("provider", "tsift-memory")
3690 .with_property("memory_kind", event.kind.as_str())
3691 .with_property("imported_from", imported_from)
3692 .with_property("text_preview", truncate_for_compact(&event.text, 240))
3693 .with_property("token_estimate", event.token_estimate.to_string())
3694 .with_property(
3695 "expand",
3696 format!(
3697 "tsift memory status {} --json",
3698 shell_quote(root.to_string_lossy().as_ref())
3699 ),
3700 )
3701 .with_provenance(provenance.clone());
3702 if let Some(session_id) = &event.session_id {
3703 event_node = event_node.with_property("session_id", session_id.clone());
3704 }
3705 if let Some(observed_at_unix) = event.observed_at_unix {
3706 event_node = event_node.with_property("observed_at_unix", observed_at_unix.to_string());
3707 }
3708 if let Some(imported_id) = &event.imported_id {
3709 event_node = event_node.with_property("imported_id", imported_id.clone());
3710 }
3711 nodes.push(node_with_content_freshness(event_node)?);
3712
3713 let mut source_node =
3714 SubstrateGraphNode::new(source_handle.clone(), "source_handle", label.clone())
3715 .with_property("handle", source_handle.clone())
3716 .with_property("ref_id", event.source_ref.clone())
3717 .with_property("source_ref", event.source_ref.clone())
3718 .with_property("provider", "tsift-memory")
3719 .with_property("memory_kind", event.kind.as_str())
3720 .with_property("imported_from", imported_from)
3721 .with_property("text_preview", truncate_for_compact(&event.text, 240))
3722 .with_property("token_estimate", event.token_estimate.to_string())
3723 .with_property(
3724 "expand",
3725 format!(
3726 "tsift memory status {} --json",
3727 shell_quote(root.to_string_lossy().as_ref())
3728 ),
3729 )
3730 .with_provenance(provenance.clone());
3731 if let Some(session_id) = &event.session_id {
3732 source_node = source_node.with_property("session_id", session_id.clone());
3733 }
3734 if let Some(observed_at_unix) = event.observed_at_unix {
3735 source_node =
3736 source_node.with_property("observed_at_unix", observed_at_unix.to_string());
3737 }
3738 if let Some(imported_id) = &event.imported_id {
3739 source_node = source_node.with_property("imported_id", imported_id.clone());
3740 }
3741 nodes.push(node_with_content_freshness(source_node)?);
3742
3743 insert_semantic_edge(
3744 &mut edge_map,
3745 SubstrateGraphEdge::new(event_id.clone(), source_handle.clone(), "projects_source")
3746 .with_property("label", "tsift-memory source projection")
3747 .with_provenance(provenance.clone()),
3748 );
3749
3750 let semantic_text = format!("{} {}", label, event.text);
3751 let semantic_node =
3752 SubstrateGraphNode::new(semantic_handle.clone(), "semantic_concept", label.clone())
3753 .with_property("handle", semantic_handle.clone())
3754 .with_property("ref_id", event.source_ref.clone())
3755 .with_property("detail", "semantic row from tsift-memory")
3756 .with_property("source_ref", event.source_ref.clone())
3757 .with_property("provider", "tsift-memory")
3758 .with_property("memory_kind", event.kind.as_str())
3759 .with_property("imported_from", imported_from)
3760 .with_property("embedding_model", SEMANTIC_EMBEDDING_MODEL)
3761 .with_property("embedding", semantic_embedding_property(&semantic_text))
3762 .with_property(
3763 "expand",
3764 semantic_related_command(root, &label, SemanticRelatedKind::Concept),
3765 )
3766 .with_provenance(provenance.clone());
3767 nodes.push(node_with_content_freshness(semantic_node)?);
3768
3769 insert_semantic_edge(
3770 &mut edge_map,
3771 SubstrateGraphEdge::new(
3772 source_handle.clone(),
3773 semantic_handle.clone(),
3774 "mentions_concept",
3775 )
3776 .with_property("label", "tsift-memory semantic source")
3777 .with_provenance(provenance.clone()),
3778 );
3779 }
3780
3781 for edge in edge_map.into_values() {
3782 edges.push(edge_with_content_freshness(edge)?);
3783 }
3784
3785 Ok(())
3786}
3787
3788fn append_summary_semantic_projection_rows(
3789 root: &Path,
3790 graph: &TraversalGraphBuild,
3791 provenance: &GraphProvenance,
3792 nodes: &mut Vec<SubstrateGraphNode>,
3793 edges: &mut Vec<SubstrateGraphEdge>,
3794) -> Result<()> {
3795 let summaries_db = root.join(".tsift/summaries.db");
3796 if !summaries_db.exists() {
3797 return Ok(());
3798 }
3799
3800 let summary_db = summarize::SummaryDb::open_read_only_resilient(&summaries_db)?;
3801 let summaries = summary_db.all()?;
3802 if summaries.is_empty() {
3803 return Ok(());
3804 }
3805
3806 let file_node_by_path = graph
3807 .nodes
3808 .values()
3809 .filter(|node| node.kind == "file")
3810 .filter_map(|node| {
3811 node.path
3812 .as_ref()
3813 .map(|path| (path.clone(), node.handle.clone()))
3814 })
3815 .collect::<BTreeMap<_, _>>();
3816 let symbol_node_by_file_label = graph
3817 .nodes
3818 .values()
3819 .filter(|node| node.kind == "symbol")
3820 .filter_map(|node| {
3821 Some((
3822 (node.path.clone()?, node.label.clone()),
3823 node.handle.clone(),
3824 ))
3825 })
3826 .collect::<BTreeMap<_, _>>();
3827
3828 let mut semantic_nodes = BTreeMap::<String, SubstrateGraphNode>::new();
3829 let mut semantic_edges = BTreeMap::<(String, String, String), SubstrateGraphEdge>::new();
3830
3831 for summary in &summaries {
3832 let source_handles =
3833 summary_source_handles(summary, &file_node_by_path, &symbol_node_by_file_label);
3834 let mut entity_ids_by_name = BTreeMap::<String, String>::new();
3835
3836 if let Some(entities) = &summary.entities {
3837 for entity in entities {
3838 let node = semantic_entity_node(
3839 root,
3840 summary,
3841 &entity.name,
3842 &entity.kind,
3843 &entity.description,
3844 provenance,
3845 );
3846 let entity_id = node.id.clone();
3847 entity_ids_by_name.insert(entity.name.to_ascii_lowercase(), entity_id.clone());
3848 semantic_nodes.entry(entity_id.clone()).or_insert(node);
3849
3850 for source_handle in &source_handles {
3851 insert_semantic_edge(
3852 &mut semantic_edges,
3853 SubstrateGraphEdge::new(
3854 source_handle.clone(),
3855 entity_id.clone(),
3856 "mentions_entity",
3857 )
3858 .with_property("label", format!("summary entity: {}", entity.name))
3859 .with_property("source_file", summary.file_path.clone())
3860 .with_provenance(provenance.clone()),
3861 );
3862 }
3863 }
3864 }
3865
3866 let mut concept_ids = Vec::new();
3867 if let Some(labels) = &summary.concept_labels {
3868 for label in labels
3869 .iter()
3870 .map(|label| label.trim())
3871 .filter(|label| !label.is_empty())
3872 {
3873 let node = semantic_concept_node(root, summary, label, provenance);
3874 let concept_id = node.id.clone();
3875 semantic_nodes.entry(concept_id.clone()).or_insert(node);
3876 concept_ids.push(concept_id.clone());
3877
3878 for source_handle in &source_handles {
3879 insert_semantic_edge(
3880 &mut semantic_edges,
3881 SubstrateGraphEdge::new(
3882 source_handle.clone(),
3883 concept_id.clone(),
3884 "mentions_concept",
3885 )
3886 .with_property("label", format!("summary concept: {label}"))
3887 .with_property("source_file", summary.file_path.clone())
3888 .with_provenance(provenance.clone()),
3889 );
3890 }
3891 }
3892 }
3893
3894 for entity_id in entity_ids_by_name.values() {
3895 for concept_id in &concept_ids {
3896 insert_semantic_edge(
3897 &mut semantic_edges,
3898 SubstrateGraphEdge::new(
3899 entity_id.clone(),
3900 concept_id.clone(),
3901 "tagged_concept",
3902 )
3903 .with_property("label", "entity concept label".to_string())
3904 .with_property("source_file", summary.file_path.clone())
3905 .with_provenance(provenance.clone()),
3906 );
3907 }
3908 }
3909
3910 for idx in 0..concept_ids.len() {
3911 for next_idx in (idx + 1)..concept_ids.len() {
3912 insert_semantic_edge(
3913 &mut semantic_edges,
3914 SubstrateGraphEdge::new(
3915 concept_ids[idx].clone(),
3916 concept_ids[next_idx].clone(),
3917 "related_concept",
3918 )
3919 .with_property("label", format!("co-occurs in {}", summary.symbol_name))
3920 .with_property("source_file", summary.file_path.clone())
3921 .with_provenance(provenance.clone()),
3922 );
3923 }
3924 }
3925
3926 if let Some(relationships) = &summary.relationships {
3927 for relationship in relationships {
3928 let from_id = entity_ids_by_name
3929 .get(&relationship.from.to_ascii_lowercase())
3930 .cloned()
3931 .unwrap_or_else(|| {
3932 let node = semantic_entity_node(
3933 root,
3934 summary,
3935 &relationship.from,
3936 "unknown",
3937 "",
3938 provenance,
3939 );
3940 let id = node.id.clone();
3941 semantic_nodes.entry(id.clone()).or_insert(node);
3942 id
3943 });
3944 let to_id = entity_ids_by_name
3945 .get(&relationship.to.to_ascii_lowercase())
3946 .cloned()
3947 .unwrap_or_else(|| {
3948 let node = semantic_entity_node(
3949 root,
3950 summary,
3951 &relationship.to,
3952 "unknown",
3953 "",
3954 provenance,
3955 );
3956 let id = node.id.clone();
3957 semantic_nodes.entry(id.clone()).or_insert(node);
3958 id
3959 });
3960 insert_semantic_edge(
3961 &mut semantic_edges,
3962 SubstrateGraphEdge::new(from_id, to_id, "semantic_relation")
3963 .with_property("relationship_kind", relationship.kind.clone())
3964 .with_property("label", relationship.kind.clone())
3965 .with_property("source_file", summary.file_path.clone())
3966 .with_property("source_symbol", summary.symbol_name.clone())
3967 .with_provenance(provenance.clone()),
3968 );
3969 }
3970 }
3971 }
3972
3973 for node in semantic_nodes.into_values() {
3974 nodes.push(node_with_content_freshness(node)?);
3975 }
3976 for edge in semantic_edges.into_values() {
3977 edges.push(edge_with_content_freshness(edge)?);
3978 }
3979
3980 Ok(())
3981}
3982
3983fn projection_content_hash(
3984 nodes: &[SubstrateGraphNode],
3985 edges: &[SubstrateGraphEdge],
3986) -> Result<String> {
3987 #[derive(Serialize)]
3988 struct Payload<'a> {
3989 version: &'static str,
3990 nodes: &'a [SubstrateGraphNode],
3991 edges: &'a [SubstrateGraphEdge],
3992 }
3993
3994 content_hash(&Payload {
3995 version: GRAPH_PROJECTION_VERSION,
3996 nodes,
3997 edges,
3998 })
3999}
4000
4001pub(crate) fn graph_projection_content_hash(projection: &GraphProjection) -> Option<String> {
4002 projection
4003 .nodes
4004 .iter()
4005 .find(|node| node.kind == GRAPH_PROJECTION_META_KIND)
4006 .and_then(|node| node.properties.get("content_hash").cloned())
4007}
4008
4009fn traversal_projection_from_graph(
4010 root: &Path,
4011 scope: Option<&str>,
4012 graph: &TraversalGraphBuild,
4013) -> Result<GraphProjection> {
4014 let provenance = GraphProvenance::new(
4015 "tsift.traverse",
4016 format!("{}:{}", root.display(), scope.unwrap_or("root")),
4017 );
4018 let mut nodes = Vec::with_capacity(graph.nodes.len() + 1);
4019 for node in graph.nodes.values() {
4020 let mut projected =
4021 SubstrateGraphNode::new(node.handle.clone(), node.kind.clone(), node.label.clone())
4022 .with_property("handle", node.handle.clone())
4023 .with_property("expand", node.expand.clone())
4024 .with_provenance(provenance.clone());
4025 if let Some(ref_id) = &node.ref_id {
4026 projected = projected.with_property("ref_id", ref_id.clone());
4027 }
4028 if let Some(path) = &node.path {
4029 projected = projected.with_property("path", path.clone());
4030 }
4031 if let Some(line) = node.line {
4032 projected = projected.with_property("line", line.to_string());
4033 }
4034 if let Some(detail) = &node.detail {
4035 projected = projected.with_property("detail", detail.clone());
4036 }
4037 for (key, value) in &node.properties {
4038 projected = projected.with_property(key.clone(), value.clone());
4039 }
4040 nodes.push(node_with_content_freshness(projected)?);
4041 }
4042
4043 let mut edges = Vec::with_capacity(graph.edges.len());
4044 for edge in &graph.edges {
4045 let mut projected =
4046 SubstrateGraphEdge::new(edge.from.clone(), edge.to.clone(), edge.relation.clone())
4047 .with_property("weight", edge.weight.to_string())
4048 .with_provenance(provenance.clone());
4049 if let Some(label) = &edge.label {
4050 projected = projected.with_property("label", label.clone());
4051 }
4052 edges.push(edge_with_content_freshness(projected)?);
4053 }
4054
4055 append_traversal_context_projection_rows(root, graph, &provenance, &mut nodes, &mut edges)?;
4056 append_summary_semantic_projection_rows(root, graph, &provenance, &mut nodes, &mut edges)?;
4057 append_tsift_memory_graph_projection_rows(root, &mut nodes, &mut edges)?;
4058
4059 let projection_hash = projection_content_hash(&nodes, &edges)?;
4060 let meta = SubstrateGraphNode::new(
4061 graph_projection_meta_id(scope),
4062 GRAPH_PROJECTION_META_KIND,
4063 "tsift traversal projection",
4064 )
4065 .with_property("projection_version", GRAPH_PROJECTION_VERSION)
4066 .with_property("content_hash", projection_hash.clone())
4067 .with_property("root", root.to_string_lossy().to_string())
4068 .with_property("scope", scope.unwrap_or("root"))
4069 .with_property("node_count", graph.nodes.len().to_string())
4070 .with_property("edge_count", graph.edges.len().to_string())
4071 .with_provenance(provenance)
4072 .with_freshness(GraphFreshness::content_hash(projection_hash));
4073 nodes.push(meta);
4074
4075 Ok(GraphProjection { nodes, edges })
4076}
4077
4078#[allow(clippy::too_many_arguments)]
4079fn ensure_traversal_source_handle(
4080 root: &Path,
4081 provenance: &GraphProvenance,
4082 file_node_by_path: &BTreeMap<String, String>,
4083 node: &TraversalNode,
4084 budget: &ExplorationBudget,
4085 source_handle_by_node: &mut BTreeMap<String, String>,
4086 seen_windows: &mut BTreeMap<(String, usize, usize), String>,
4087 nodes: &mut Vec<SubstrateGraphNode>,
4088 edges: &mut Vec<SubstrateGraphEdge>,
4089) -> Result<Option<String>> {
4090 if let Some(handle) = source_handle_by_node.get(&node.handle) {
4091 return Ok(Some(handle.clone()));
4092 }
4093 let Some(window) = exploration_source_window_for_node(root, node, budget) else {
4094 return Ok(None);
4095 };
4096 let window_key = (window.file.clone(), window.start, window.end);
4097 let handle = if let Some(handle) = seen_windows.get(&window_key) {
4098 handle.clone()
4099 } else {
4100 let label = format!("{}:{}-{}", window.file, window.start, window.end);
4101 let projected = SubstrateGraphNode::new(window.handle.clone(), "source_handle", label)
4102 .with_property("handle", window.handle.clone())
4103 .with_property("file", window.file.clone())
4104 .with_property("start", window.start.to_string())
4105 .with_property("end", window.end.to_string())
4106 .with_property("reason", window.reason.clone())
4107 .with_property("expand", window.expand.clone())
4108 .with_provenance(provenance.clone());
4109 nodes.push(node_with_content_freshness(projected)?);
4110
4111 if let Some(file_handle) = file_node_by_path.get(&window.file) {
4112 let edge = SubstrateGraphEdge::new(
4113 window.handle.clone(),
4114 file_handle.clone(),
4115 "expands_source",
4116 )
4117 .with_property("label", window.reason.clone())
4118 .with_provenance(provenance.clone());
4119 edges.push(edge_with_content_freshness(edge)?);
4120 }
4121 if node.kind != "file" {
4122 let edge = SubstrateGraphEdge::new(
4123 window.handle.clone(),
4124 node.handle.clone(),
4125 "anchors_source",
4126 )
4127 .with_property("label", window.reason.clone())
4128 .with_provenance(provenance.clone());
4129 edges.push(edge_with_content_freshness(edge)?);
4130 }
4131 seen_windows.insert(window_key, window.handle.clone());
4132 window.handle
4133 };
4134 source_handle_by_node.insert(node.handle.clone(), handle.clone());
4135 Ok(Some(handle))
4136}
4137
4138fn push_traversal_backlog_target_handles<'a>(
4139 backlog: &TraversalNode,
4140 edges_by_from: &BTreeMap<&'a str, Vec<&'a TraversalEdge>>,
4141 node_by_handle: &BTreeMap<&'a str, &'a TraversalNode>,
4142 max_handles: usize,
4143 seen_target_nodes: &mut BTreeSet<String>,
4144 target_node_handles: &mut Vec<String>,
4145) {
4146 for edge in edges_by_from
4147 .get(backlog.handle.as_str())
4148 .into_iter()
4149 .flatten()
4150 .filter(|edge| edge.relation == "mentions")
4151 {
4152 let Some(target_node) = node_by_handle.get(edge.to.as_str()) else {
4153 continue;
4154 };
4155 if !matches!(
4156 target_node.kind.as_str(),
4157 "file" | "symbol" | "route" | "cargo_package" | "cargo_workspace"
4158 ) {
4159 continue;
4160 }
4161 if target_node
4162 .path
4163 .as_deref()
4164 .zip(backlog.path.as_deref())
4165 .is_some_and(|(target_path, backlog_path)| {
4166 target_path == backlog_path && target_path.ends_with(".md")
4167 })
4168 {
4169 continue;
4170 }
4171 if seen_target_nodes.insert(target_node.handle.clone()) {
4172 target_node_handles.push(target_node.handle.clone());
4173 }
4174 if target_node_handles.len() >= max_handles {
4175 break;
4176 }
4177 }
4178}
4179
4180fn append_traversal_context_projection_rows(
4181 root: &Path,
4182 graph: &TraversalGraphBuild,
4183 provenance: &GraphProvenance,
4184 nodes: &mut Vec<SubstrateGraphNode>,
4185 edges: &mut Vec<SubstrateGraphEdge>,
4186) -> Result<()> {
4187 let budget = exploration_budget_for_counts(graph.nodes.len(), graph.edges.len());
4188 let file_node_by_path = graph
4189 .nodes
4190 .values()
4191 .filter(|node| node.kind == "file")
4192 .filter_map(|node| {
4193 node.path
4194 .as_ref()
4195 .map(|path| (path.clone(), node.handle.clone()))
4196 })
4197 .collect::<BTreeMap<_, _>>();
4198
4199 let node_by_handle = graph
4200 .nodes
4201 .values()
4202 .map(|node| (node.handle.as_str(), node))
4203 .collect::<BTreeMap<_, _>>();
4204 let mut edges_by_from = BTreeMap::<&str, Vec<&TraversalEdge>>::new();
4205 for edge in &graph.edges {
4206 edges_by_from
4207 .entry(edge.from.as_str())
4208 .or_default()
4209 .push(edge);
4210 }
4211 for rows in edges_by_from.values_mut() {
4212 rows.sort_by(|left, right| {
4213 right
4214 .weight
4215 .cmp(&left.weight)
4216 .then(left.relation.cmp(&right.relation))
4217 .then(left.to.cmp(&right.to))
4218 });
4219 }
4220
4221 let mut seen_windows = BTreeMap::<(String, usize, usize), String>::new();
4222 let mut source_handle_by_node = BTreeMap::<String, String>::new();
4223
4224 let mut code_context_count = 0usize;
4225 let code_context_limit = budget.relationship_limit.min(8);
4226 for node in graph.nodes.values() {
4227 if !matches!(
4228 node.kind.as_str(),
4229 "backlog" | "job_packet" | "worker_result"
4230 ) {
4231 continue;
4232 }
4233 let mut target_node_handles = Vec::new();
4234 let mut fallback_target_handles = Vec::new();
4235 let mut seen_target_nodes = BTreeSet::new();
4236 if node.kind == "backlog" || node.kind == "worker_result" {
4237 push_traversal_backlog_target_handles(
4238 node,
4239 &edges_by_from,
4240 &node_by_handle,
4241 budget.max_source_windows,
4242 &mut seen_target_nodes,
4243 &mut target_node_handles,
4244 );
4245 fallback_target_handles.push(node.handle.clone());
4246 } else {
4247 for edge in edges_by_from
4248 .get(node.handle.as_str())
4249 .into_iter()
4250 .flatten()
4251 .filter(|edge| edge.relation == "targets")
4252 {
4253 let Some(backlog) = node_by_handle.get(edge.to.as_str()) else {
4254 continue;
4255 };
4256 fallback_target_handles.push(backlog.handle.clone());
4257 push_traversal_backlog_target_handles(
4258 backlog,
4259 &edges_by_from,
4260 &node_by_handle,
4261 budget.max_source_windows,
4262 &mut seen_target_nodes,
4263 &mut target_node_handles,
4264 );
4265 if target_node_handles.len() >= budget.max_source_windows {
4266 break;
4267 }
4268 }
4269 if fallback_target_handles.is_empty() {
4270 continue;
4271 }
4272 }
4273 let code_context = !target_node_handles.is_empty();
4274 if target_node_handles.is_empty() {
4275 target_node_handles = dedupe_preserve_order(fallback_target_handles);
4276 } else if code_context_count >= code_context_limit {
4277 continue;
4278 }
4279
4280 let mut worker_source_handles = Vec::new();
4281 let mut seen_worker_handles = BTreeSet::new();
4282 for target_handle in target_node_handles {
4283 if worker_source_handles.len() >= budget.max_source_windows {
4284 break;
4285 }
4286 let Some(target_node) = node_by_handle.get(target_handle.as_str()) else {
4287 continue;
4288 };
4289 let Some(handle) = ensure_traversal_source_handle(
4290 root,
4291 provenance,
4292 &file_node_by_path,
4293 target_node,
4294 &budget,
4295 &mut source_handle_by_node,
4296 &mut seen_windows,
4297 nodes,
4298 edges,
4299 )?
4300 else {
4301 continue;
4302 };
4303 if seen_worker_handles.insert(handle.clone()) {
4304 worker_source_handles.push(handle);
4305 }
4306 }
4307 if worker_source_handles.is_empty() {
4308 continue;
4309 }
4310 let target = node
4311 .path
4312 .clone()
4313 .unwrap_or_else(|| root.to_string_lossy().to_string());
4314 let summary = node.detail.clone().unwrap_or_else(|| node.label.clone());
4315 let handle = stable_handle("xwrk", &format!("{}:{}:{}", target, node.handle, summary));
4316 let projected = SubstrateGraphNode::new(handle.clone(), "worker_context", summary.clone())
4317 .with_property("handle", handle.clone())
4318 .with_property("target", target.clone())
4319 .with_property("summary", summary)
4320 .with_property(
4321 "source_handle_count",
4322 worker_source_handles.len().to_string(),
4323 )
4324 .with_property(
4325 "expand",
4326 format!(
4327 "tsift --envelope context-pack {} --budget normal",
4328 shell_quote(&target)
4329 ),
4330 )
4331 .with_provenance(provenance.clone());
4332 nodes.push(node_with_content_freshness(projected)?);
4333
4334 let request_edge =
4335 SubstrateGraphEdge::new(node.handle.clone(), handle.clone(), "requests_context")
4336 .with_property("label", "bounded worker context".to_string())
4337 .with_provenance(provenance.clone());
4338 edges.push(edge_with_content_freshness(request_edge)?);
4339
4340 for source_handle in &worker_source_handles {
4341 let scope_edge =
4342 SubstrateGraphEdge::new(handle.clone(), source_handle.clone(), "scopes_source")
4343 .with_property("label", "bounded worker source window".to_string())
4344 .with_provenance(provenance.clone());
4345 edges.push(edge_with_content_freshness(scope_edge)?);
4346 }
4347 if code_context {
4348 code_context_count += 1;
4349 }
4350 }
4351
4352 Ok(())
4353}
4354
4355fn traversal_node_from_graph_node(root: &Path, node: SubstrateGraphNode) -> TraversalNode {
4356 let handle = node
4357 .properties
4358 .get("handle")
4359 .cloned()
4360 .unwrap_or_else(|| node.id.clone());
4361 TraversalNode {
4362 expand: node
4363 .properties
4364 .get("expand")
4365 .cloned()
4366 .unwrap_or_else(|| traversal_expand_command(root, &handle)),
4367 handle,
4368 kind: node.kind,
4369 label: node.label,
4370 ref_id: node.properties.get("ref_id").cloned(),
4371 path: node.properties.get("path").cloned(),
4372 line: node
4373 .properties
4374 .get("line")
4375 .and_then(|value| value.parse::<i64>().ok()),
4376 detail: node.properties.get("detail").cloned(),
4377 properties: node.properties,
4378 }
4379}
4380
4381fn traversal_graph_from_store(root: &Path, store: &impl GraphStore) -> Result<TraversalGraphBuild> {
4382 let mut graph = TraversalGraphBuild::default();
4383 for node in store.all_nodes()? {
4384 if node.kind == GRAPH_PROJECTION_META_KIND {
4385 continue;
4386 }
4387 graph.add_node(traversal_node_from_graph_node(root, node));
4388 }
4389 for edge in store.all_edges()? {
4390 graph.add_edge(
4391 &edge.from_id,
4392 &edge.to_id,
4393 &edge.kind,
4394 edge.properties.get("label").cloned(),
4395 edge.properties
4396 .get("weight")
4397 .and_then(|value| value.parse::<usize>().ok())
4398 .unwrap_or(1),
4399 );
4400 }
4401 Ok(graph)
4402}
4403
4404pub(crate) fn convex_rows_from_graph_store(
4405 store: &impl GraphStore,
4406) -> Result<ConvexProjectionRows> {
4407 Ok(GraphProjection {
4408 nodes: store.all_nodes()?,
4409 edges: store.all_edges()?,
4410 }
4411 .to_convex_rows())
4412}
4413
4414#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
4415struct ConvexRequiredIndex {
4416 table: String,
4417 name: String,
4418 fields: Vec<String>,
4419}
4420
4421#[derive(Clone, Debug, Serialize, PartialEq)]
4422struct ConvexSyncChunk {
4423 operation: String,
4424 chunk: usize,
4425 count: usize,
4426 keys: Vec<String>,
4427 max_attempts: usize,
4428 retry_policy: String,
4429}
4430
4431#[derive(Clone, Debug, Serialize, PartialEq)]
4432struct ConvexTransportSummary {
4433 endpoint_env: String,
4434 endpoint_configured: bool,
4435 auth_token_env: String,
4436 auth_configured: bool,
4437 remote_snapshot: bool,
4438 applied_chunks: usize,
4439}
4440
4441#[derive(Clone, Debug, Serialize, PartialEq)]
4442struct ConvexTransportReceipt {
4443 operation: String,
4444 chunk: usize,
4445 attempt: usize,
4446 status: String,
4447 message: Option<String>,
4448}
4449
4450#[derive(Serialize)]
4451#[serde(rename_all = "camelCase")]
4452struct ConvexTransportRequest<'a> {
4453 operation: &'a str,
4454 chunk: usize,
4455 projection_version: &'a str,
4456 projection_hash: Option<&'a str>,
4457 #[serde(skip_serializing_if = "Option::is_none")]
4458 projection_meta_id: Option<&'a str>,
4459 node_rows: Vec<ConvexNodeRow>,
4460 edge_rows: Vec<ConvexEdgeRow>,
4461 keys: Vec<String>,
4462 #[serde(skip_serializing_if = "Option::is_none")]
4463 cursor: Option<String>,
4464 #[serde(skip_serializing_if = "Option::is_none")]
4465 limit: Option<usize>,
4466}
4467
4468#[derive(Deserialize)]
4469#[serde(rename_all = "camelCase")]
4470struct ConvexTransportResponse {
4471 status: Option<String>,
4472 message: Option<String>,
4473 rows: Option<ConvexProjectionRows>,
4474 #[serde(default)]
4475 meta: Option<ConvexSnapshotMeta>,
4476 #[serde(default)]
4477 page: Option<ConvexSnapshotPage>,
4478}
4479
4480#[derive(Deserialize, Debug, Clone)]
4481#[serde(rename_all = "camelCase")]
4482struct ConvexSnapshotMeta {
4483 #[serde(default)]
4487 #[allow(dead_code)]
4488 indexes: Vec<ConvexRequiredIndex>,
4489 #[serde(default)]
4490 #[allow(dead_code)]
4491 node_count: Option<usize>,
4492 #[serde(default)]
4493 #[allow(dead_code)]
4494 edge_count: Option<usize>,
4495 #[serde(default)]
4496 projection_hash: Option<String>,
4497 #[serde(default)]
4498 #[allow(dead_code)]
4499 page_size: Option<usize>,
4500}
4501
4502#[derive(Deserialize, Debug, Clone)]
4507#[serde(rename_all = "camelCase")]
4508struct ConvexSnapshotPage {
4509 rows: Vec<serde_json::Value>,
4510 #[serde(default)]
4511 next_cursor: Option<String>,
4512}
4513
4514#[derive(Clone, Debug, Serialize, PartialEq)]
4515struct ConvexProjectionFreshness {
4516 status: String,
4517 fail_closed: bool,
4518 local_hash: Option<String>,
4519 snapshot_hash: Option<String>,
4520 missing_nodes: Vec<String>,
4521 stale_nodes: Vec<String>,
4522 missing_edges: Vec<String>,
4523 stale_edges: Vec<String>,
4524 diagnostics: Vec<String>,
4525}
4526
4527const DEFAULT_CONVEX_GRAPH_URL_ENV: &str = "TSIFT_CONVEX_GRAPH_URL";
4528
4529impl ConvexProjectionFreshness {
4530 fn current(local_hash: Option<String>, snapshot_hash: Option<String>) -> Self {
4531 Self {
4532 status: "current".to_string(),
4533 fail_closed: false,
4534 local_hash,
4535 snapshot_hash,
4536 missing_nodes: Vec::new(),
4537 stale_nodes: Vec::new(),
4538 missing_edges: Vec::new(),
4539 stale_edges: Vec::new(),
4540 diagnostics: Vec::new(),
4541 }
4542 }
4543}
4544
4545#[derive(Clone, Debug, Serialize, PartialEq)]
4546struct ConvexSyncReport {
4547 root: String,
4548 #[serde(skip_serializing_if = "Option::is_none")]
4549 scope: Option<String>,
4550 graph_db: String,
4551 dry_run: bool,
4552 projection_version: String,
4553 projection_hash: Option<String>,
4554 required_indexes: Vec<ConvexRequiredIndex>,
4555 node_upserts: Vec<ConvexNodeRow>,
4556 edge_upserts: Vec<ConvexEdgeRow>,
4557 node_tombstones: Vec<String>,
4558 edge_tombstones: Vec<String>,
4559 chunks: Vec<ConvexSyncChunk>,
4560 freshness: ConvexProjectionFreshness,
4561 transport: Option<ConvexTransportSummary>,
4562 receipts: Vec<ConvexTransportReceipt>,
4563 diagnostics: Vec<String>,
4564 warnings: Vec<String>,
4565}
4566
4567fn convex_required_indexes() -> Vec<ConvexRequiredIndex> {
4568 vec![
4569 ConvexRequiredIndex {
4570 table: "nodes".to_string(),
4571 name: "by_external_id".to_string(),
4572 fields: vec!["externalId".to_string()],
4573 },
4574 ConvexRequiredIndex {
4575 table: "nodes".to_string(),
4576 name: "by_kind".to_string(),
4577 fields: vec!["kind".to_string()],
4578 },
4579 ConvexRequiredIndex {
4580 table: "edges".to_string(),
4581 name: "by_edge_key".to_string(),
4582 fields: vec!["edgeKey".to_string()],
4583 },
4584 ConvexRequiredIndex {
4585 table: "edges".to_string(),
4586 name: "by_from_kind".to_string(),
4587 fields: vec!["fromExternalId".to_string(), "kind".to_string()],
4588 },
4589 ConvexRequiredIndex {
4590 table: "edges".to_string(),
4591 name: "by_to_kind".to_string(),
4592 fields: vec!["toExternalId".to_string(), "kind".to_string()],
4593 },
4594 ]
4595}
4596
4597pub(crate) fn load_convex_projection_rows(path: &Path) -> Result<ConvexProjectionRows> {
4598 let content = fs::read_to_string(path)
4599 .with_context(|| format!("reading Convex projection snapshot {}", path.display()))?;
4600 serde_json::from_str(&content)
4601 .with_context(|| format!("parsing Convex projection snapshot {}", path.display()))
4602}
4603
4604fn convex_projection_row_diagnostics(rows: &ConvexProjectionRows) -> Vec<String> {
4605 let mut diagnostics = Vec::new();
4606 let mut node_counts = BTreeMap::<&str, usize>::new();
4607 for row in &rows.nodes {
4608 *node_counts.entry(row.external_id.as_str()).or_default() += 1;
4609 }
4610 for (external_id, count) in node_counts.iter().filter(|(_, count)| **count > 1) {
4611 diagnostics.push(format!(
4612 "Convex snapshot contains duplicate node externalId {external_id} ({count} rows)"
4613 ));
4614 }
4615
4616 let node_ids = node_counts.keys().copied().collect::<BTreeSet<_>>();
4617 let mut edge_counts = BTreeMap::<&str, usize>::new();
4618 for edge in &rows.edges {
4619 *edge_counts.entry(edge.edge_key.as_str()).or_default() += 1;
4620 if !node_ids.contains(edge.from_external_id.as_str()) {
4621 diagnostics.push(format!(
4622 "Convex snapshot edge {} references missing from node {}",
4623 edge.edge_key, edge.from_external_id
4624 ));
4625 }
4626 if !node_ids.contains(edge.to_external_id.as_str()) {
4627 diagnostics.push(format!(
4628 "Convex snapshot edge {} references missing to node {}",
4629 edge.edge_key, edge.to_external_id
4630 ));
4631 }
4632 let expected_key =
4633 ConvexEdgeRow::stable_key(&edge.from_external_id, &edge.to_external_id, &edge.kind);
4634 if edge.edge_key != expected_key {
4635 diagnostics.push(format!(
4636 "Convex snapshot edge {} has non-canonical key; expected {} for ({}, {}, {})",
4637 edge.edge_key, expected_key, edge.from_external_id, edge.kind, edge.to_external_id
4638 ));
4639 }
4640 }
4641 for (edge_key, count) in edge_counts.iter().filter(|(_, count)| **count > 1) {
4642 diagnostics.push(format!(
4643 "Convex snapshot contains duplicate edgeKey {edge_key} ({count} rows)"
4644 ));
4645 }
4646 diagnostics
4647}
4648
4649pub(crate) fn validate_convex_projection_rows(rows: &ConvexProjectionRows) -> Result<()> {
4650 let diagnostics = convex_projection_row_diagnostics(rows);
4651 if diagnostics.is_empty() {
4652 Ok(())
4653 } else {
4654 bail!("{}", diagnostics.join("; "))
4655 }
4656}
4657
4658pub(crate) struct ConvexHttpTransport {
4659 endpoint: String,
4660 auth_token_env: String,
4661 auth_token: Option<String>,
4662}
4663
4664impl ConvexHttpTransport {
4665 fn from_options(endpoint: Option<&str>, auth_token_env: &str) -> Result<Self> {
4666 let endpoint = endpoint
4667 .map(str::to_string)
4668 .or_else(|| env::var(DEFAULT_CONVEX_GRAPH_URL_ENV).ok())
4669 .context("Convex transport requires --endpoint or TSIFT_CONVEX_GRAPH_URL")?;
4670 let auth_token = env::var(auth_token_env)
4671 .ok()
4672 .filter(|value| !value.trim().is_empty());
4673 Ok(Self {
4674 endpoint,
4675 auth_token_env: auth_token_env.to_string(),
4676 auth_token,
4677 })
4678 }
4679
4680 fn summary(&self, remote_snapshot: bool, applied_chunks: usize) -> ConvexTransportSummary {
4681 ConvexTransportSummary {
4682 endpoint_env: DEFAULT_CONVEX_GRAPH_URL_ENV.to_string(),
4683 endpoint_configured: true,
4684 auth_token_env: self.auth_token_env.clone(),
4685 auth_configured: self.auth_token.is_some(),
4686 remote_snapshot,
4687 applied_chunks,
4688 }
4689 }
4690
4691 fn post(&self, request: &ConvexTransportRequest<'_>) -> Result<ConvexTransportResponse> {
4692 let mut builder = ureq::post(&self.endpoint);
4693 if let Some(token) = &self.auth_token {
4694 builder = builder.header("Authorization", &format!("Bearer {token}"));
4695 }
4696 builder
4697 .send_json(request)
4698 .with_context(|| format!("calling Convex graph transport {}", self.endpoint))?
4699 .body_mut()
4700 .read_json::<ConvexTransportResponse>()
4701 .with_context(|| format!("parsing Convex graph transport response {}", self.endpoint))
4702 }
4703
4704 fn fetch_snapshot(
4715 &self,
4716 projection_version: &str,
4717 scope: Option<&str>,
4718 local_hash: Option<&str>,
4719 local_rows: Option<&ConvexProjectionRows>,
4720 ) -> Result<(ConvexProjectionRows, Vec<String>)> {
4721 match self.fetch_snapshot_paginated(projection_version, scope, local_hash, local_rows) {
4722 Ok(rows) => Ok(rows),
4723 Err(err) => {
4724 let msg = format!("{err:#}");
4729 let is_unknown_op = msg.contains("unknown operation")
4730 || msg.contains("snapshot_meta")
4731 || msg.contains("404");
4732 if !is_unknown_op {
4733 return Err(err);
4734 }
4735 self.fetch_snapshot_legacy(projection_version)
4736 .map(|rows| (rows, Vec::new()))
4737 }
4738 }
4739 }
4740
4741 fn fetch_snapshot_legacy(&self, projection_version: &str) -> Result<ConvexProjectionRows> {
4742 let response = self.post(&ConvexTransportRequest {
4743 operation: "snapshot",
4744 chunk: 0,
4745 projection_version,
4746 projection_hash: None,
4747 projection_meta_id: None,
4748 node_rows: Vec::new(),
4749 edge_rows: Vec::new(),
4750 keys: Vec::new(),
4751 cursor: None,
4752 limit: None,
4753 })?;
4754 response
4755 .rows
4756 .context("Convex snapshot response did not include rows")
4757 }
4758
4759 fn fetch_snapshot_paginated(
4760 &self,
4761 projection_version: &str,
4762 scope: Option<&str>,
4763 local_hash: Option<&str>,
4764 local_rows: Option<&ConvexProjectionRows>,
4765 ) -> Result<(ConvexProjectionRows, Vec<String>)> {
4766 let projection_meta_id = graph_projection_meta_id(scope);
4767 let meta_response = self.post(&ConvexTransportRequest {
4768 operation: "snapshot_meta",
4769 chunk: 0,
4770 projection_version,
4771 projection_hash: None,
4772 projection_meta_id: Some(&projection_meta_id),
4773 node_rows: Vec::new(),
4774 edge_rows: Vec::new(),
4775 keys: Vec::new(),
4776 cursor: None,
4777 limit: None,
4778 })?;
4779 if matches!(meta_response.status.as_deref(), Some("error")) {
4780 anyhow::bail!(
4781 "Convex snapshot_meta returned error: {}",
4782 meta_response.message.unwrap_or_default()
4783 );
4784 }
4785 let meta = meta_response
4786 .meta
4787 .context("Convex snapshot_meta response did not include meta")?;
4788 if let (Some(remote_hash), Some(local_hash), Some(local_rows)) =
4789 (meta.projection_hash.as_deref(), local_hash, local_rows)
4790 && remote_hash == local_hash
4791 {
4792 return Ok((
4793 local_rows.clone(),
4794 vec![
4795 "remote projection hash matched local graph; skipped full row-page snapshot diff"
4796 .to_string(),
4797 ],
4798 ));
4799 }
4800
4801 let mut nodes: Vec<ConvexNodeRow> = Vec::with_capacity(meta.node_count.unwrap_or_default());
4802 let mut node_cursor: Option<String> = None;
4803 loop {
4804 let response = self.post(&ConvexTransportRequest {
4805 operation: "snapshot_nodes_page",
4806 chunk: 0,
4807 projection_version,
4808 projection_hash: None,
4809 projection_meta_id: None,
4810 node_rows: Vec::new(),
4811 edge_rows: Vec::new(),
4812 keys: Vec::new(),
4813 cursor: node_cursor.clone(),
4814 limit: None,
4815 })?;
4816 let page = response
4817 .page
4818 .context("Convex snapshot_nodes_page response did not include page")?;
4819 for raw in page.rows {
4820 let row: ConvexNodeRow =
4821 serde_json::from_value(raw).context("decoding Convex snapshot node row")?;
4822 nodes.push(row);
4823 }
4824 match page.next_cursor {
4825 Some(next) => node_cursor = Some(next),
4826 None => break,
4827 }
4828 }
4829
4830 let mut edges: Vec<ConvexEdgeRow> = Vec::with_capacity(meta.edge_count.unwrap_or_default());
4831 let mut edge_cursor: Option<String> = None;
4832 loop {
4833 let response = self.post(&ConvexTransportRequest {
4834 operation: "snapshot_edges_page",
4835 chunk: 0,
4836 projection_version,
4837 projection_hash: None,
4838 projection_meta_id: None,
4839 node_rows: Vec::new(),
4840 edge_rows: Vec::new(),
4841 keys: Vec::new(),
4842 cursor: edge_cursor.clone(),
4843 limit: None,
4844 })?;
4845 let page = response
4846 .page
4847 .context("Convex snapshot_edges_page response did not include page")?;
4848 for raw in page.rows {
4849 let row: ConvexEdgeRow =
4850 serde_json::from_value(raw).context("decoding Convex snapshot edge row")?;
4851 edges.push(row);
4852 }
4853 match page.next_cursor {
4854 Some(next) => edge_cursor = Some(next),
4855 None => break,
4856 }
4857 }
4858
4859 Ok((ConvexProjectionRows { nodes, edges }, Vec::new()))
4860 }
4861
4862 fn apply_chunk(
4863 &self,
4864 report: &ConvexSyncReport,
4865 chunk: &ConvexSyncChunk,
4866 ) -> Result<ConvexTransportReceipt> {
4867 let node_rows = if chunk.operation == "upsert_nodes" {
4868 report
4869 .node_upserts
4870 .iter()
4871 .filter(|row| chunk.keys.contains(&row.external_id))
4872 .cloned()
4873 .collect()
4874 } else {
4875 Vec::new()
4876 };
4877 let edge_rows = if chunk.operation == "upsert_edges" {
4878 report
4879 .edge_upserts
4880 .iter()
4881 .filter(|row| chunk.keys.contains(&row.edge_key))
4882 .cloned()
4883 .collect()
4884 } else {
4885 Vec::new()
4886 };
4887 let request = ConvexTransportRequest {
4888 operation: &chunk.operation,
4889 chunk: chunk.chunk,
4890 projection_version: &report.projection_version,
4891 projection_hash: report.projection_hash.as_deref(),
4892 projection_meta_id: None,
4893 node_rows,
4894 edge_rows,
4895 keys: chunk.keys.clone(),
4896 cursor: None,
4897 limit: None,
4898 };
4899 let mut last_error = None;
4900 for attempt in 1..=chunk.max_attempts {
4901 match self.post(&request) {
4902 Ok(response) => {
4903 return Ok(ConvexTransportReceipt {
4904 operation: chunk.operation.clone(),
4905 chunk: chunk.chunk,
4906 attempt,
4907 status: response.status.unwrap_or_else(|| "ok".to_string()),
4908 message: response.message,
4909 });
4910 }
4911 Err(err) => {
4912 last_error = Some(err);
4913 if attempt < chunk.max_attempts {
4914 std::thread::sleep(Duration::from_millis(100 * attempt as u64));
4915 }
4916 }
4917 }
4918 }
4919 Err(last_error.unwrap_or_else(|| anyhow::anyhow!("Convex transport chunk failed")))
4920 .with_context(|| format!("applying Convex {} chunk {}", chunk.operation, chunk.chunk))
4921 }
4922}
4923
4924fn convex_projection_hash(rows: &ConvexProjectionRows, scope: Option<&str>) -> Option<String> {
4925 let meta_id = graph_projection_meta_id(scope);
4926 rows.nodes
4927 .iter()
4928 .find(|row| row.external_id == meta_id && row.kind == GRAPH_PROJECTION_META_KIND)
4929 .and_then(|row| row.properties.get("content_hash").cloned())
4930}
4931
4932fn convex_projection_freshness(
4933 local: &ConvexProjectionRows,
4934 snapshot: Option<&ConvexProjectionRows>,
4935 scope: Option<&str>,
4936) -> ConvexProjectionFreshness {
4937 let local_hash = convex_projection_hash(local, scope);
4938 let Some(snapshot) = snapshot else {
4939 return ConvexProjectionFreshness {
4940 status: "unchecked".to_string(),
4941 fail_closed: false,
4942 local_hash,
4943 snapshot_hash: None,
4944 missing_nodes: Vec::new(),
4945 stale_nodes: Vec::new(),
4946 missing_edges: Vec::new(),
4947 stale_edges: Vec::new(),
4948 diagnostics: vec![
4949 "no Convex snapshot supplied; sync output is a local dry-run plan".to_string(),
4950 ],
4951 };
4952 };
4953
4954 let snapshot_hash = convex_projection_hash(snapshot, scope);
4955 let snapshot_nodes = snapshot
4956 .nodes
4957 .iter()
4958 .map(|row| (row.external_id.as_str(), row))
4959 .collect::<BTreeMap<_, _>>();
4960 let snapshot_edges = snapshot
4961 .edges
4962 .iter()
4963 .map(|row| (row.edge_key.as_str(), row))
4964 .collect::<BTreeMap<_, _>>();
4965
4966 let mut missing_nodes = Vec::new();
4967 let mut stale_nodes = Vec::new();
4968 for row in &local.nodes {
4969 match snapshot_nodes.get(row.external_id.as_str()) {
4970 Some(snapshot_row) if *snapshot_row == row => {}
4971 Some(_) => stale_nodes.push(row.external_id.clone()),
4972 None => missing_nodes.push(row.external_id.clone()),
4973 }
4974 }
4975
4976 let mut missing_edges = Vec::new();
4977 let mut stale_edges = Vec::new();
4978 for row in &local.edges {
4979 match snapshot_edges.get(row.edge_key.as_str()) {
4980 Some(snapshot_row) if *snapshot_row == row => {}
4981 Some(_) => stale_edges.push(row.edge_key.clone()),
4982 None => missing_edges.push(row.edge_key.clone()),
4983 }
4984 }
4985
4986 let hash_current = local_hash.is_some() && local_hash == snapshot_hash;
4987 let rows_current = missing_nodes.is_empty()
4988 && stale_nodes.is_empty()
4989 && missing_edges.is_empty()
4990 && stale_edges.is_empty();
4991 if hash_current && rows_current {
4992 return ConvexProjectionFreshness::current(local_hash, snapshot_hash);
4993 }
4994
4995 let mut diagnostics = Vec::new();
4996 if local_hash != snapshot_hash {
4997 diagnostics.push(format!(
4998 "projection hash mismatch: local={} snapshot={}",
4999 local_hash.as_deref().unwrap_or("missing"),
5000 snapshot_hash.as_deref().unwrap_or("missing")
5001 ));
5002 }
5003 if !missing_nodes.is_empty() || !missing_edges.is_empty() {
5004 diagnostics.push(format!(
5005 "Convex snapshot is missing {} node(s) and {} edge(s)",
5006 missing_nodes.len(),
5007 missing_edges.len()
5008 ));
5009 }
5010 if !stale_nodes.is_empty() || !stale_edges.is_empty() {
5011 diagnostics.push(format!(
5012 "Convex snapshot has {} stale node row(s) and {} stale edge row(s)",
5013 stale_nodes.len(),
5014 stale_edges.len()
5015 ));
5016 }
5017
5018 ConvexProjectionFreshness {
5019 status: "stale".to_string(),
5020 fail_closed: true,
5021 local_hash,
5022 snapshot_hash,
5023 missing_nodes,
5024 stale_nodes,
5025 missing_edges,
5026 stale_edges,
5027 diagnostics,
5028 }
5029}
5030
5031pub(crate) fn verify_convex_projection_snapshot(
5032 root: &Path,
5033 scope: Option<&str>,
5034 snapshot_path: &Path,
5035) -> Result<()> {
5036 let graph_db = graph_substrate_db_path(root, scope);
5037 let store = SqliteGraphStore::open_read_only_resilient(&graph_db)?;
5038 let local = convex_rows_from_graph_store(&store)?;
5039 let snapshot = load_convex_projection_rows(snapshot_path)?;
5040 validate_convex_projection_rows(&snapshot)?;
5041 let freshness = convex_projection_freshness(&local, Some(&snapshot), scope);
5042 if freshness.fail_closed {
5043 bail!(
5044 "Convex graph projection is not current for {}: {}",
5045 root.display(),
5046 freshness.diagnostics.join("; ")
5047 );
5048 }
5049 Ok(())
5050}
5051
5052fn convex_rows_diff(
5053 local: &ConvexProjectionRows,
5054 snapshot: Option<&ConvexProjectionRows>,
5055) -> (
5056 Vec<ConvexNodeRow>,
5057 Vec<ConvexEdgeRow>,
5058 Vec<String>,
5059 Vec<String>,
5060) {
5061 let Some(snapshot) = snapshot else {
5062 return (
5063 local.nodes.clone(),
5064 local.edges.clone(),
5065 Vec::new(),
5066 Vec::new(),
5067 );
5068 };
5069 let local_nodes = local
5070 .nodes
5071 .iter()
5072 .map(|row| (row.external_id.as_str(), row))
5073 .collect::<BTreeMap<_, _>>();
5074 let local_edges = local
5075 .edges
5076 .iter()
5077 .map(|row| (row.edge_key.as_str(), row))
5078 .collect::<BTreeMap<_, _>>();
5079 let snapshot_nodes = snapshot
5080 .nodes
5081 .iter()
5082 .map(|row| (row.external_id.as_str(), row))
5083 .collect::<BTreeMap<_, _>>();
5084 let snapshot_edges = snapshot
5085 .edges
5086 .iter()
5087 .map(|row| (row.edge_key.as_str(), row))
5088 .collect::<BTreeMap<_, _>>();
5089
5090 let node_upserts = local
5091 .nodes
5092 .iter()
5093 .filter(|row| {
5094 snapshot_nodes
5095 .get(row.external_id.as_str())
5096 .is_none_or(|snapshot_row| *snapshot_row != *row)
5097 })
5098 .cloned()
5099 .collect::<Vec<_>>();
5100 let edge_upserts = local
5101 .edges
5102 .iter()
5103 .filter(|row| {
5104 snapshot_edges
5105 .get(row.edge_key.as_str())
5106 .is_none_or(|snapshot_row| *snapshot_row != *row)
5107 })
5108 .cloned()
5109 .collect::<Vec<_>>();
5110 let node_tombstones = snapshot
5111 .nodes
5112 .iter()
5113 .filter(|row| !local_nodes.contains_key(row.external_id.as_str()))
5114 .map(|row| row.external_id.clone())
5115 .collect::<Vec<_>>();
5116 let edge_tombstones = snapshot
5117 .edges
5118 .iter()
5119 .filter(|row| !local_edges.contains_key(row.edge_key.as_str()))
5120 .map(|row| row.edge_key.clone())
5121 .collect::<Vec<_>>();
5122
5123 (node_upserts, edge_upserts, node_tombstones, edge_tombstones)
5124}
5125
5126fn push_sync_chunks(
5127 chunks: &mut Vec<ConvexSyncChunk>,
5128 operation: &str,
5129 keys: Vec<String>,
5130 size: usize,
5131) {
5132 if keys.is_empty() {
5133 return;
5134 }
5135 for (idx, chunk) in keys.chunks(size).enumerate() {
5136 chunks.push(ConvexSyncChunk {
5137 operation: operation.to_string(),
5138 chunk: idx + 1,
5139 count: chunk.len(),
5140 keys: chunk.to_vec(),
5141 max_attempts: 3,
5142 retry_policy:
5143 "retry the whole chunk; rows are idempotent by externalId/edgeKey, stop on a repeated partial failure"
5144 .to_string(),
5145 });
5146 }
5147}
5148
5149pub(crate) fn build_convex_sync_report_with_snapshot(
5150 path: &Path,
5151 scope: Option<&str>,
5152 snapshot: Option<ConvexProjectionRows>,
5153 chunk_size: usize,
5154 dry_run: bool,
5155) -> Result<ConvexSyncReport> {
5156 if chunk_size == 0 {
5157 bail!("--chunk-size must be greater than zero");
5158 }
5159 let root = lint::resolve_project_root_or_canonical_path(path)?;
5160 let (graph, _refresh) = write_traversal_graph_store(&root, path, scope)?;
5161 let graph_db = graph_substrate_db_path(&root, scope);
5162 let store = SqliteGraphStore::open_read_only_resilient(&graph_db)?;
5163 let local = convex_rows_from_graph_store(&store)?;
5164 let freshness = convex_projection_freshness(&local, snapshot.as_ref(), scope);
5165 let (node_upserts, edge_upserts, node_tombstones, edge_tombstones) =
5166 convex_rows_diff(&local, snapshot.as_ref());
5167
5168 let mut chunks = Vec::new();
5169 push_sync_chunks(
5170 &mut chunks,
5171 "delete_edges",
5172 edge_tombstones.clone(),
5173 chunk_size,
5174 );
5175 push_sync_chunks(
5176 &mut chunks,
5177 "upsert_nodes",
5178 node_upserts
5179 .iter()
5180 .map(|row| row.external_id.clone())
5181 .collect(),
5182 chunk_size,
5183 );
5184 push_sync_chunks(
5185 &mut chunks,
5186 "upsert_edges",
5187 edge_upserts
5188 .iter()
5189 .map(|row| row.edge_key.clone())
5190 .collect(),
5191 chunk_size,
5192 );
5193 push_sync_chunks(
5194 &mut chunks,
5195 "delete_nodes",
5196 node_tombstones.clone(),
5197 chunk_size,
5198 );
5199
5200 let mut diagnostics = vec![
5201 "apply node upserts before edge upserts; apply edge tombstones before node tombstones"
5202 .to_string(),
5203 ];
5204 if dry_run {
5205 diagnostics.push("dry-run only: no Convex network mutation was attempted".to_string());
5206 }
5207 if freshness.fail_closed {
5208 diagnostics.push(
5209 "Convex-backed traverse/context-pack reads must fail closed until this plan is applied"
5210 .to_string(),
5211 );
5212 }
5213
5214 Ok(ConvexSyncReport {
5215 root: root.to_string_lossy().to_string(),
5216 scope: scope.map(str::to_string),
5217 graph_db: graph_db.to_string_lossy().to_string(),
5218 dry_run,
5219 projection_version: GRAPH_PROJECTION_VERSION.to_string(),
5220 projection_hash: convex_projection_hash(&local, scope),
5221 required_indexes: convex_required_indexes(),
5222 node_upserts,
5223 edge_upserts,
5224 node_tombstones,
5225 edge_tombstones,
5226 chunks,
5227 freshness,
5228 transport: None,
5229 receipts: Vec::new(),
5230 diagnostics,
5231 warnings: graph.warnings,
5232 })
5233}
5234
5235#[cfg(test)]
5236fn build_convex_sync_report(
5237 path: &Path,
5238 scope: Option<&str>,
5239 snapshot_path: Option<&Path>,
5240 chunk_size: usize,
5241) -> Result<ConvexSyncReport> {
5242 let snapshot = snapshot_path.map(load_convex_projection_rows).transpose()?;
5243 build_convex_sync_report_with_snapshot(path, scope, snapshot, chunk_size, true)
5244}
5245
5246pub(crate) fn print_convex_sync_human(report: &ConvexSyncReport, compact: bool) {
5247 if compact {
5248 println!(
5249 "convex-sync nodes:+{} -{} edges:+{} -{} chunks:{} freshness:{}",
5250 report.node_upserts.len(),
5251 report.node_tombstones.len(),
5252 report.edge_upserts.len(),
5253 report.edge_tombstones.len(),
5254 report.chunks.len(),
5255 report.freshness.status
5256 );
5257 return;
5258 }
5259
5260 println!(
5261 "Convex graph sync {}",
5262 if report.dry_run { "dry-run" } else { "apply" }
5263 );
5264 println!("root: {}", report.root);
5265 println!("graph_db: {}", report.graph_db);
5266 println!(
5267 "upserts: {} node(s), {} edge(s)",
5268 report.node_upserts.len(),
5269 report.edge_upserts.len()
5270 );
5271 println!(
5272 "tombstones: {} node(s), {} edge(s)",
5273 report.node_tombstones.len(),
5274 report.edge_tombstones.len()
5275 );
5276 println!("chunks: {}", report.chunks.len());
5277 println!("freshness: {}", report.freshness.status);
5278 if let Some(transport) = &report.transport {
5279 println!(
5280 "transport: endpoint_env={} auth_env={} applied_chunks={}",
5281 transport.endpoint_env, transport.auth_token_env, transport.applied_chunks
5282 );
5283 }
5284 for receipt in &report.receipts {
5285 println!(
5286 "receipt: {} chunk {} attempt {} {}",
5287 receipt.operation, receipt.chunk, receipt.attempt, receipt.status
5288 );
5289 }
5290 for diagnostic in report
5291 .diagnostics
5292 .iter()
5293 .chain(report.freshness.diagnostics.iter())
5294 {
5295 println!("- {}", diagnostic);
5296 }
5297}
5298
5299pub(crate) struct ConvexSyncOptions<'a> {
5300 path: &'a Path,
5301 scope: Option<&'a str>,
5302 snapshot: Option<&'a Path>,
5303 chunk_size: usize,
5304 remote_snapshot: bool,
5305 apply: bool,
5306 endpoint: Option<&'a str>,
5307 auth_token_env: &'a str,
5308}
5309
5310#[derive(Serialize)]
5311struct GraphDbSchemaField {
5312 name: &'static str,
5313 value_type: &'static str,
5314 description: &'static str,
5315}
5316
5317#[derive(Serialize)]
5318struct GraphDbSchemaOperation {
5319 command: &'static str,
5320 description: &'static str,
5321}
5322
5323#[derive(Serialize)]
5324struct GraphDbSchemaContract {
5325 name: &'static str,
5326 version: &'static str,
5327 description: &'static str,
5328}
5329
5330#[derive(Serialize)]
5331struct GraphDbSchema {
5332 contract_versions: Vec<GraphDbSchemaContract>,
5333 node_fields: Vec<GraphDbSchemaField>,
5334 edge_fields: Vec<GraphDbSchemaField>,
5335 operations: Vec<GraphDbSchemaOperation>,
5336}
5337
5338#[derive(Clone, Serialize, Deserialize)]
5339struct GraphDbFreshnessReport {
5340 status: String,
5341 fail_closed: bool,
5342 projection_version: Option<String>,
5343 content_hash: Option<String>,
5344 source_watermark: Option<String>,
5345 diagnostics: Vec<String>,
5346}
5347
5348#[derive(Clone, Debug, Serialize)]
5349pub(crate) struct GraphEffectivenessReadiness {
5350 pub(crate) status: String,
5351 pub(crate) fail_closed: bool,
5352 pub(crate) reason: String,
5353 pub(crate) diagnostics: Vec<String>,
5354 pub(crate) next_commands: Vec<String>,
5355}
5356
5357#[derive(Clone, Debug, Serialize, PartialEq)]
5358struct GraphDbPropertyFilter {
5359 key: String,
5360 value: String,
5361}
5362
5363#[derive(Clone, Debug, Default)]
5364struct GraphDbQueryOptions {
5365 cursor: Option<String>,
5366 limit: Option<usize>,
5367 property_filters: Vec<GraphDbPropertyFilter>,
5368}
5369
5370#[derive(Clone, Debug, Serialize, PartialEq)]
5371struct GraphDbPageReport {
5372 #[serde(skip_serializing_if = "Option::is_none")]
5373 cursor: Option<String>,
5374 #[serde(skip_serializing_if = "Option::is_none")]
5375 limit: Option<usize>,
5376 #[serde(skip_serializing_if = "Option::is_none")]
5377 next_cursor: Option<String>,
5378 returned_nodes: usize,
5379 returned_edges: usize,
5380 truncated: bool,
5381 property_filters: Vec<GraphDbPropertyFilter>,
5382 #[serde(skip_serializing_if = "Vec::is_empty", default)]
5383 diagnostics: Vec<String>,
5384}
5385
5386type GraphDbRankedNeighbor = resolution::RankedNeighbor;
5387
5388#[derive(Clone, Debug, Serialize)]
5389struct CommunityTruncationSummary {
5390 total_communities: usize,
5391 fully_kept: usize,
5392 partially_pruned: usize,
5393 fully_pruned: usize,
5394 pruned_community_kinds: Vec<String>,
5395 pruned_community_top_labels: Vec<String>,
5396}
5397
5398#[derive(Clone, Debug, Serialize)]
5399struct GraphDbRankedNeighborhoodComparison {
5400 traversal_nodes: usize,
5401 traversal_edges: usize,
5402 pruned_count: usize,
5403 total_discovered: usize,
5404 latency_micros: u128,
5405 overlap_with_unranked_pct: f64,
5406 useful_hit_density_ranked: f64,
5407 useful_hit_density_unranked: f64,
5408 duplicate_name_count_ranked: usize,
5409 duplicate_name_count_unranked: usize,
5410 handle_coverage_ranked_pct: f64,
5411 handle_coverage_unranked_pct: f64,
5412 #[serde(skip_serializing_if = "Option::is_none")]
5413 community_truncation_summary: Option<CommunityTruncationSummary>,
5414 diagnostics: Vec<String>,
5415}
5416
5417#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
5418struct GraphDbDroppedByBudget {
5419 item: String,
5420 kind: String,
5421 dropped: usize,
5422 reason: String,
5423}
5424
5425#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
5426struct GraphDbOutputBudgetReport {
5427 max_tokens: usize,
5428 estimated_tokens: usize,
5429 selected_nodes: usize,
5430 selected_edges: usize,
5431 candidate_nodes: usize,
5432 candidate_edges: usize,
5433 dropped_by_budget: Vec<GraphDbDroppedByBudget>,
5434 diagnostics: Vec<String>,
5435}
5436
5437#[derive(Clone, Debug, Serialize, PartialEq)]
5438struct GraphDbKnowledgeRetrieval {
5439 mode: String,
5440 query: String,
5441 seed_kind: String,
5442 seed_limit: usize,
5443 seed_count: usize,
5444 depth: usize,
5445 limit: usize,
5446 node_count: usize,
5447 edge_count: usize,
5448 truncated: bool,
5449 traversal: String,
5450 freshness_boundary: String,
5451 privacy_boundary: String,
5452 diagnostics: Vec<String>,
5453}
5454
5455struct GraphDbSemanticSeededSubgraph {
5456 nodes: Vec<SubstrateGraphNode>,
5457 edges: Vec<SubstrateGraphEdge>,
5458 truncated: bool,
5459 diagnostics: Vec<String>,
5460}
5461
5462type GraphDbNeighborhoodRankingGate = resolution::NeighborhoodRankingGate;
5463
5464#[derive(Serialize)]
5465struct GraphDbReport {
5466 root: String,
5467 #[serde(skip_serializing_if = "Option::is_none")]
5468 scope: Option<String>,
5469 backend: String,
5470 query: String,
5471 freshness: GraphDbFreshnessReport,
5472 #[serde(skip_serializing_if = "Option::is_none")]
5473 readiness: Option<GraphEffectivenessReadiness>,
5474 #[serde(skip_serializing_if = "Option::is_none")]
5475 schema: Option<GraphDbSchema>,
5476 #[serde(skip_serializing_if = "Option::is_none")]
5477 node: Option<SubstrateTerseGraphNode>,
5478 #[serde(skip_serializing_if = "Option::is_none")]
5479 edge: Option<SubstrateTerseGraphEdge>,
5480 #[serde(skip_serializing_if = "Vec::is_empty", default)]
5481 nodes: Vec<SubstrateTerseGraphNode>,
5482 #[serde(skip_serializing_if = "Vec::is_empty", default)]
5483 edges: Vec<SubstrateTerseGraphEdge>,
5484 #[serde(skip_serializing_if = "Vec::is_empty", default)]
5485 ranked_neighbors: Vec<GraphDbRankedNeighbor>,
5486 #[serde(skip_serializing_if = "Vec::is_empty", default)]
5487 semantic_related: Vec<SemanticRelatedItem>,
5488 #[serde(skip_serializing_if = "Option::is_none")]
5489 neighborhood_ranking_gate: Option<GraphDbNeighborhoodRankingGate>,
5490 #[serde(skip_serializing_if = "Option::is_none")]
5491 ranked_neighborhood_comparison: Option<GraphDbRankedNeighborhoodComparison>,
5492 #[serde(skip_serializing_if = "Option::is_none")]
5493 knowledge_retrieval: Option<GraphDbKnowledgeRetrieval>,
5494 #[serde(skip_serializing_if = "Option::is_none")]
5495 output_budget: Option<GraphDbOutputBudgetReport>,
5496 #[serde(skip_serializing_if = "Option::is_none")]
5497 path: Option<substrate::GraphPath>,
5498 #[serde(skip_serializing_if = "Option::is_none")]
5499 page: Option<GraphDbPageReport>,
5500 #[serde(skip_serializing_if = "Vec::is_empty", default)]
5501 warnings: Vec<String>,
5502}
5503
5504struct ExperimentalReadOnlyGraphStore {
5505 backend: GraphDbExperimentalBackend,
5506 nodes: BTreeMap<String, SubstrateGraphNode>,
5507 edges: BTreeMap<String, SubstrateGraphEdge>,
5508 node_ids_by_kind: BTreeMap<String, Vec<String>>,
5509 outgoing_edge_keys_by_from: BTreeMap<String, Vec<String>>,
5510}
5511
5512impl ExperimentalReadOnlyGraphStore {
5513 fn from_rows(backend: GraphDbExperimentalBackend, rows: &ConvexProjectionRows) -> Result<Self> {
5514 validate_convex_projection_rows(rows)?;
5515 let nodes = rows
5516 .nodes
5517 .iter()
5518 .map(|row| {
5519 let node = SubstrateGraphNode {
5520 id: row.external_id.clone(),
5521 kind: row.kind.clone(),
5522 label: row.label.clone(),
5523 properties: row.properties.clone(),
5524 provenance: row.provenance.clone(),
5525 freshness: row.freshness.clone(),
5526 };
5527 (node.id.clone(), node)
5528 })
5529 .collect::<BTreeMap<_, _>>();
5530 let edges = rows
5531 .edges
5532 .iter()
5533 .map(|row| {
5534 let edge = SubstrateGraphEdge {
5535 id: row.edge_key.clone(),
5536 from_id: row.from_external_id.clone(),
5537 to_id: row.to_external_id.clone(),
5538 kind: row.kind.clone(),
5539 properties: row.properties.clone(),
5540 provenance: row.provenance.clone(),
5541 freshness: row.freshness.clone(),
5542 };
5543 (graph_db_edge_key(&edge), edge)
5544 })
5545 .collect::<BTreeMap<_, _>>();
5546 let mut node_ids_by_kind = BTreeMap::<String, Vec<String>>::new();
5547 for node in nodes.values() {
5548 node_ids_by_kind
5549 .entry(node.kind.clone())
5550 .or_default()
5551 .push(node.id.clone());
5552 }
5553 for ids in node_ids_by_kind.values_mut() {
5554 ids.sort();
5555 }
5556 let mut outgoing_edge_keys_by_from = BTreeMap::<String, Vec<String>>::new();
5557 for edge in edges.values() {
5558 outgoing_edge_keys_by_from
5559 .entry(edge.from_id.clone())
5560 .or_default()
5561 .push(graph_db_edge_key(edge));
5562 }
5563 for edge_keys in outgoing_edge_keys_by_from.values_mut() {
5564 edge_keys.sort_by(|left_key, right_key| {
5565 let left = &edges[left_key];
5566 let right = &edges[right_key];
5567 left.to_id
5568 .cmp(&right.to_id)
5569 .then(left.kind.cmp(&right.kind))
5570 .then(left_key.cmp(right_key))
5571 });
5572 }
5573 Ok(Self {
5574 backend,
5575 nodes,
5576 edges,
5577 node_ids_by_kind,
5578 outgoing_edge_keys_by_from,
5579 })
5580 }
5581}
5582
5583impl GraphStore for ExperimentalReadOnlyGraphStore {
5584 fn upsert_node(&self, _node: &SubstrateGraphNode) -> Result<()> {
5585 bail!("{} backend-eval adapter is read-only", self.backend.name())
5586 }
5587
5588 fn upsert_edge(&self, _edge: &SubstrateGraphEdge) -> Result<()> {
5589 bail!("{} backend-eval adapter is read-only", self.backend.name())
5590 }
5591
5592 fn delete_node(&self, _id: &str) -> Result<usize> {
5593 bail!("{} backend-eval adapter is read-only", self.backend.name())
5594 }
5595
5596 fn delete_edge(&self, _from_id: &str, _to_id: &str, _kind: &str) -> Result<usize> {
5597 bail!("{} backend-eval adapter is read-only", self.backend.name())
5598 }
5599
5600 fn node(&self, id: &str) -> Result<Option<SubstrateGraphNode>> {
5601 Ok(self.nodes.get(id).cloned())
5602 }
5603
5604 fn all_nodes(&self) -> Result<Vec<SubstrateGraphNode>> {
5605 Ok(self.nodes.values().cloned().collect())
5606 }
5607
5608 fn all_edges(&self) -> Result<Vec<SubstrateGraphEdge>> {
5609 let mut edges = self.edges.values().cloned().collect::<Vec<_>>();
5610 edges.sort_by(|left, right| {
5611 left.from_id
5612 .cmp(&right.from_id)
5613 .then(left.kind.cmp(&right.kind))
5614 .then(left.to_id.cmp(&right.to_id))
5615 });
5616 Ok(edges)
5617 }
5618
5619 fn graph_counts(&self) -> Result<(usize, usize)> {
5620 Ok((self.nodes.len(), self.edges.len()))
5621 }
5622
5623 fn sample_edge(&self, kind: Option<&str>) -> Result<Option<SubstrateGraphEdge>> {
5624 let mut edges = self
5625 .edges
5626 .values()
5627 .filter(|edge| edge.from_id != edge.to_id)
5628 .filter(|edge| kind.is_none_or(|kind| edge.kind == kind))
5629 .cloned()
5630 .collect::<Vec<_>>();
5631 edges.sort_by(|left, right| {
5632 left.from_id
5633 .cmp(&right.from_id)
5634 .then(left.kind.cmp(&right.kind))
5635 .then(left.to_id.cmp(&right.to_id))
5636 });
5637 Ok(edges.into_iter().next())
5638 }
5639
5640 fn sample_edge_with_property(
5641 &self,
5642 ) -> Result<Option<(SubstrateGraphEdge, GraphPropertyFilter)>> {
5643 Ok(self
5644 .edges
5645 .values()
5646 .filter(|edge| edge.from_id != edge.to_id)
5647 .filter_map(|edge| {
5648 edge.properties.iter().next().map(|(key, value)| {
5649 (
5650 edge,
5651 GraphPropertyFilter {
5652 key: key.clone(),
5653 value: value.clone(),
5654 },
5655 )
5656 })
5657 })
5658 .min_by(|(left_edge, left_filter), (right_edge, right_filter)| {
5659 left_filter
5660 .key
5661 .cmp(&right_filter.key)
5662 .then(left_filter.value.cmp(&right_filter.value))
5663 .then_with(|| graph_db_edge_key(left_edge).cmp(&graph_db_edge_key(right_edge)))
5664 })
5665 .map(|(edge, filter)| (edge.clone(), filter)))
5666 }
5667
5668 fn nodes_by_kind(&self, kind: &str) -> Result<Vec<SubstrateGraphNode>> {
5669 Ok(self
5670 .node_ids_by_kind
5671 .get(kind)
5672 .into_iter()
5673 .flatten()
5674 .filter_map(|id| self.nodes.get(id).cloned())
5675 .collect())
5676 }
5677
5678 fn outgoing_edges(&self, from_id: &str, kind: Option<&str>) -> Result<Vec<SubstrateGraphEdge>> {
5679 Ok(self
5680 .outgoing_edge_keys_by_from
5681 .get(from_id)
5682 .into_iter()
5683 .flatten()
5684 .filter_map(|key| self.edges.get(key))
5685 .filter(|edge| kind.is_none_or(|kind| edge.kind == kind))
5686 .cloned()
5687 .collect())
5688 }
5689
5690 fn edges_between_nodes(&self, node_ids: &BTreeSet<String>) -> Result<Vec<SubstrateGraphEdge>> {
5691 Ok(self
5692 .edges
5693 .values()
5694 .filter(|edge| node_ids.contains(&edge.from_id) && node_ids.contains(&edge.to_id))
5695 .cloned()
5696 .collect())
5697 }
5698
5699 fn shortest_path(
5700 &self,
5701 from_id: &str,
5702 to_id: &str,
5703 kind: Option<&str>,
5704 ) -> Result<Option<substrate::GraphPath>> {
5705 if from_id == to_id {
5706 return Ok(Some(substrate::GraphPath {
5707 nodes: vec![from_id.to_string()],
5708 hops: 0,
5709 }));
5710 }
5711
5712 let mut queue = VecDeque::new();
5713 let mut parent = BTreeMap::<String, String>::new();
5714 parent.insert(from_id.to_string(), String::new());
5715 queue.push_back(from_id.to_string());
5716
5717 while let Some(current) = queue.pop_front() {
5718 for edge in self.outgoing_edges(¤t, kind)? {
5719 if parent.contains_key(&edge.to_id) {
5720 continue;
5721 }
5722 parent.insert(edge.to_id.clone(), current.clone());
5723 if edge.to_id == to_id {
5724 let mut nodes = vec![to_id.to_string()];
5725 let mut cursor = to_id;
5726 while let Some(previous) = parent.get(cursor) {
5727 if previous.is_empty() {
5728 break;
5729 }
5730 nodes.push(previous.clone());
5731 cursor = previous;
5732 }
5733 nodes.reverse();
5734 return Ok(Some(substrate::GraphPath {
5735 hops: nodes.len().saturating_sub(1),
5736 nodes,
5737 }));
5738 }
5739 queue.push_back(edge.to_id);
5740 }
5741 }
5742
5743 Ok(None)
5744 }
5745
5746 fn reachable_nodes_by_kinds(
5747 &self,
5748 from_id: &str,
5749 kinds: &[&str],
5750 depth: usize,
5751 limit: usize,
5752 ) -> Result<BTreeMap<String, Vec<(SubstrateGraphNode, substrate::GraphPath)>>> {
5753 let requested = kinds.iter().copied().collect::<BTreeSet<_>>();
5754 let mut rows = requested
5755 .iter()
5756 .map(|kind| {
5757 (
5758 (*kind).to_string(),
5759 BTreeMap::<String, (SubstrateGraphNode, substrate::GraphPath)>::new(),
5760 )
5761 })
5762 .collect::<BTreeMap<_, _>>();
5763 if requested.is_empty() {
5764 return Ok(BTreeMap::new());
5765 }
5766
5767 let mut seen = BTreeSet::from([from_id.to_string()]);
5768 let mut queue = VecDeque::from([(from_id.to_string(), vec![from_id.to_string()])]);
5769 while let Some((current, path)) = queue.pop_front() {
5770 let current_depth = path.len().saturating_sub(1);
5771 if current_depth >= depth {
5772 continue;
5773 }
5774 for edge in self.outgoing_edges(¤t, None)? {
5775 if !seen.insert(edge.to_id.clone()) {
5776 continue;
5777 }
5778 let Some(node) = self.nodes.get(&edge.to_id).cloned() else {
5779 continue;
5780 };
5781 let mut next_path = path.clone();
5782 next_path.push(edge.to_id.clone());
5783 let graph_path = substrate::GraphPath {
5784 hops: next_path.len().saturating_sub(1),
5785 nodes: next_path.clone(),
5786 };
5787 if requested.contains(node.kind.as_str()) {
5788 rows.entry(node.kind.clone())
5789 .or_default()
5790 .entry(node.id.clone())
5791 .or_insert((node.clone(), graph_path));
5792 }
5793 queue.push_back((edge.to_id, next_path));
5794 }
5795 }
5796
5797 Ok(rows
5798 .into_iter()
5799 .map(|(kind, values)| {
5800 let mut values = values.into_values().collect::<Vec<_>>();
5801 values.sort_by(|(left_node, left_path), (right_node, right_path)| {
5802 left_path
5803 .hops
5804 .cmp(&right_path.hops)
5805 .then(left_node.label.cmp(&right_node.label))
5806 .then(left_node.id.cmp(&right_node.id))
5807 });
5808 if limit > 0 && values.len() > limit {
5809 values.truncate(limit);
5810 }
5811 (kind, values)
5812 })
5813 .collect())
5814 }
5815}
5816
5817pub(crate) const GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS: usize = 64;
5818pub(crate) const GRAPH_DB_BACKEND_EVAL_EXTENDED_PATH_HOPS: [usize; 3] = [128, 256, 512];
5819pub(crate) const GRAPH_DB_BACKEND_EVAL_DIRECT_PATH_HOPS: usize = 1;
5820const GRAPH_DB_BACKEND_EVAL_ALLOWED_REGRESSION_PERCENT: f64 = 10.0;
5821pub(crate) const GRAPH_DB_BACKEND_EVAL_NORMALIZATION_ROW_UNIT: f64 = 1000.0;
5822const GRAPH_DB_BACKEND_EVAL_MIN_SAMPLE_RUNS: usize = 3;
5823const CONFLICT_MATRIX_PREPARATION_CACHE_VERSION: &str = "conflict-matrix-prep-v1";
5824const CONFLICT_MATRIX_GRAPH_PREPARATION_CACHE_VERSION: &str = "conflict-matrix-graph-prep-v1";
5825const GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION: &str = "backend-eval-full-projection-v5";
5826
5827#[derive(Clone, Serialize, Deserialize)]
5828pub(crate) struct GraphDbBackendEvalPhaseTiming {
5829 name: String,
5830 duration_micros: u128,
5831 detail: String,
5832}
5833
5834#[derive(Serialize, Deserialize)]
5835struct GraphDbBackendEvalFullProjectionCache {
5836 version: String,
5837 key: String,
5838 source_watermark: String,
5839 projection: GraphProjection,
5840 warnings: Vec<String>,
5841}
5842
5843#[derive(Clone, Default)]
5844struct GraphDbBackendEvalFullProjectionCacheStats {
5845 hit: bool,
5846 disk_bytes: u64,
5847 json_bytes: u64,
5848 pruned_files: usize,
5849 pruned_bytes: u64,
5850}
5851
5852#[derive(Serialize)]
5853struct GraphDbBackendEvalRawSourceWatermarkRow {
5854 path: String,
5855 bytes: u64,
5856 content_hash: String,
5857}
5858
5859#[derive(Clone)]
5860struct GraphDbBackendEvalFullProjectionSourceWatermark {
5861 value: String,
5862 detail: String,
5863}
5864
5865#[derive(Serialize)]
5866pub(crate) struct GraphDbBackendEvalConfig {
5867 high_degree_nodes: usize,
5868 high_degree_fanout: usize,
5869 deep_chain_nodes: usize,
5870 deep_chain_fanout: usize,
5871 depth: usize,
5872 limit: usize,
5873 impact_limit: usize,
5874 path_max_hops: usize,
5875 path_direct_hop_budget: usize,
5876 path_deep_chain_hop_budget: usize,
5877 path_extended_hop_budgets: Vec<usize>,
5878 path_hop_policy: String,
5879 path_probe_strategy: String,
5880 path_query_plan_checks: Vec<String>,
5881 full_projection_enabled: bool,
5882 full_projection_profile: String,
5883 normalization_row_unit: usize,
5884}
5885
5886#[derive(Clone)]
5887struct GraphDbBackendEvalSignature {
5888 operation: String,
5889 value: serde_json::Value,
5890}
5891
5892#[derive(Serialize)]
5893struct GraphDbBackendEvalOperation {
5894 name: String,
5895 supported: bool,
5896 status: String,
5897 duration_micros: u128,
5898 #[serde(skip_serializing_if = "Option::is_none")]
5899 rows: Option<usize>,
5900 #[serde(skip_serializing_if = "Option::is_none")]
5901 error: Option<String>,
5902}
5903
5904#[derive(Serialize)]
5905struct GraphDbBackendEvalParity {
5906 matches_sqlite: bool,
5907 diagnostics: Vec<String>,
5908}
5909
5910#[derive(Serialize)]
5911struct GraphDbBackendEvalBackendReport {
5912 backend: String,
5913 adapter: String,
5914 read_only: bool,
5915 projection_load: String,
5916 operations: Vec<GraphDbBackendEvalOperation>,
5917 total_micros: u128,
5918 parity: GraphDbBackendEvalParity,
5919 lock_behavior: String,
5920 install_portability: String,
5921}
5922
5923#[derive(Serialize)]
5924struct GraphDbBackendEvalDataset {
5925 name: String,
5926 target_count: usize,
5927 nodes: usize,
5928 edges: usize,
5929 backends: Vec<GraphDbBackendEvalBackendReport>,
5930}
5931
5932#[derive(Serialize)]
5933struct GraphDbBackendPromotionDecision {
5934 backend: String,
5935 decision: String,
5936 reasons: Vec<String>,
5937 gate: GraphDbBackendPromotionGate,
5938}
5939
5940#[derive(Serialize)]
5941struct GraphDbBackendEvalPerformanceGate {
5942 baseline_fixture: String,
5943 ci_profile: String,
5944 opt_in_real_profile: String,
5945 full_projection_cache_hit_gate: String,
5946 allowed_regression_percent: f64,
5947 minimum_sample_runs: usize,
5948 normalized_metric_unit: String,
5949 required_metrics: Vec<String>,
5950 digest_command: String,
5951 repeated_sample_command: String,
5952 hop_cap_promotion: GraphDbHopCapPromotionGate,
5953 backend_adapter_spike: GraphDbBackendAdapterSpikeGate,
5954}
5955
5956#[derive(Serialize)]
5957struct GraphDbHopCapPromotionGate {
5958 status: String,
5959 current_default_hops: usize,
5960 candidate_hop_tiers: Vec<usize>,
5961 required_backend: String,
5962 required_workloads: Vec<String>,
5963 required_metrics: Vec<String>,
5964 allowed_regression_percent: f64,
5965 minimum_sample_runs: usize,
5966 decision_rule: String,
5967}
5968
5969#[derive(Serialize)]
5970struct GraphDbBackendAdapterSpikeGate {
5971 status: String,
5972 candidate_backends: Vec<GraphDbBackendAdapterSpikeCandidate>,
5973 required_workloads: Vec<String>,
5974 required_checks: Vec<String>,
5975 decision_rule: String,
5976 evidence_plan: String,
5977}
5978
5979#[derive(Serialize)]
5980struct GraphDbBackendAdapterSpikeCandidate {
5981 backend: String,
5982 adapter_label: String,
5983 projection_load: String,
5984 lock_behavior: String,
5985 install_portability: String,
5986}
5987
5988#[derive(Serialize)]
5989pub(crate) struct GraphDbBackendEvalReport {
5990 root: String,
5991 #[serde(skip_serializing_if = "Option::is_none")]
5992 scope: Option<String>,
5993 label: String,
5994 baseline_backend: String,
5995 candidates: Vec<String>,
5996 targets: Vec<String>,
5997 config: GraphDbBackendEvalConfig,
5998 phase_timings: Vec<GraphDbBackendEvalPhaseTiming>,
5999 datasets: Vec<GraphDbBackendEvalDataset>,
6000 promotion: Vec<GraphDbBackendPromotionDecision>,
6001 performance_gate: GraphDbBackendEvalPerformanceGate,
6002 metrics: BTreeMap<String, f64>,
6003 metric_digest_command: String,
6004 warnings: Vec<String>,
6005}
6006
6007#[derive(Clone, Debug, Serialize)]
6008struct GraphDbDoctorCheck {
6009 name: String,
6010 status: String,
6011 fail_closed: bool,
6012 diagnostics: Vec<String>,
6013 repair_commands: Vec<String>,
6014}
6015
6016#[derive(Serialize)]
6017pub(crate) struct GraphDbDoctorReport {
6018 root: String,
6019 #[serde(skip_serializing_if = "Option::is_none")]
6020 scope: Option<String>,
6021 backend: String,
6022 graph_db: String,
6023 #[serde(skip_serializing_if = "Option::is_none")]
6024 convex_snapshot: Option<String>,
6025 status: String,
6026 fail_closed: bool,
6027 checks: Vec<GraphDbDoctorCheck>,
6028 repair_commands: Vec<String>,
6029 #[serde(skip_serializing_if = "Vec::is_empty", default)]
6030 required_indexes: Vec<ConvexRequiredIndex>,
6031}
6032
6033#[derive(Serialize)]
6034struct GraphDbDriftSummary {
6035 node_upserts: usize,
6036 edge_upserts: usize,
6037 node_tombstones: usize,
6038 edge_tombstones: usize,
6039 stale_nodes: usize,
6040 stale_edges: usize,
6041 stale_projection_metadata: usize,
6042 duplicate_failures: usize,
6043 orphan_failures: usize,
6044 missing_required_indexes: usize,
6045}
6046
6047#[derive(Serialize)]
6048struct GraphDbDriftReport {
6049 root: String,
6050 #[serde(skip_serializing_if = "Option::is_none")]
6051 scope: Option<String>,
6052 graph_db: String,
6053 convex_snapshot: String,
6054 status: String,
6055 graph_reads_allowed: bool,
6056 projection_version: String,
6057 local_hash: Option<String>,
6058 snapshot_hash: Option<String>,
6059 summary: GraphDbDriftSummary,
6060 node_upserts: Vec<String>,
6061 edge_upserts: Vec<String>,
6062 node_tombstones: Vec<String>,
6063 edge_tombstones: Vec<String>,
6064 stale_nodes: Vec<String>,
6065 stale_edges: Vec<String>,
6066 diagnostics: Vec<String>,
6067 next_commands: Vec<String>,
6068 required_indexes: Vec<ConvexRequiredIndex>,
6069 #[serde(skip_serializing_if = "Vec::is_empty", default)]
6070 warnings: Vec<String>,
6071}
6072
6073#[derive(Clone, Serialize)]
6074struct GraphDbTombstoneCounts {
6075 nodes: usize,
6076 edges: usize,
6077 total: usize,
6078}
6079
6080#[derive(Clone, Serialize)]
6081struct GraphDbOperatorCounts {
6082 nodes: usize,
6083 edges: usize,
6084 tombstones: GraphDbTombstoneCounts,
6085 #[serde(skip_serializing_if = "Option::is_none")]
6086 file_size_bytes: Option<u64>,
6087 #[serde(skip_serializing_if = "Option::is_none")]
6088 freelist_bytes: Option<u64>,
6089}
6090
6091#[derive(Clone, Serialize)]
6092struct GraphDbCompactionPolicy {
6093 status: String,
6094 tombstone_scan_rows: usize,
6095 live_rows: usize,
6096 file_size_bytes: Option<u64>,
6097 freelist_bytes: Option<u64>,
6098 safe_to_prune_tombstones: bool,
6099 requires_convex_reconciliation: bool,
6100 recommendations: Vec<String>,
6101 proof: Vec<String>,
6102}
6103
6104#[derive(Serialize)]
6105pub(crate) struct GraphDbRefreshSummary {
6106 scope: String,
6107 projection_version: String,
6108 mode: String,
6109 #[serde(skip_serializing_if = "Option::is_none")]
6110 source_watermark: Option<String>,
6111 tombstoned_nodes: usize,
6112 tombstoned_edges: usize,
6113 upserted_nodes: usize,
6114 upserted_edges: usize,
6115 unchanged_nodes: usize,
6116 unchanged_edges: usize,
6117 upserted_properties: usize,
6118 unchanged_properties: usize,
6119 deleted_properties: usize,
6120 deleted_nodes: usize,
6121 deleted_edges: usize,
6122 pruned_tombstones: usize,
6123 #[serde(skip_serializing_if = "Option::is_none")]
6124 file_size_bytes_before: Option<u64>,
6125 #[serde(skip_serializing_if = "Option::is_none")]
6126 file_size_bytes_after: Option<u64>,
6127 #[serde(skip_serializing_if = "Vec::is_empty", default)]
6128 phase_timings: Vec<GraphDbBackendEvalPhaseTiming>,
6129}
6130
6131#[derive(Serialize)]
6132struct GraphDbOperatorReport {
6133 root: String,
6134 #[serde(skip_serializing_if = "Option::is_none")]
6135 scope: Option<String>,
6136 graph_db: String,
6137 operation: String,
6138 status: String,
6139 materialized: bool,
6140 freshness: GraphDbFreshnessReport,
6141 readiness: GraphEffectivenessReadiness,
6142 counts: GraphDbOperatorCounts,
6143 #[serde(skip_serializing_if = "Option::is_none")]
6144 refresh: Option<GraphDbRefreshSummary>,
6145 compaction: GraphDbCompactionPolicy,
6146 #[serde(skip_serializing_if = "Option::is_none")]
6147 recovery: Option<index::ReadOnlyRecovery>,
6148 next_commands: Vec<String>,
6149 #[serde(skip_serializing_if = "Vec::is_empty", default)]
6150 warnings: Vec<String>,
6151}
6152
6153#[derive(Serialize)]
6154pub(crate) struct GraphDbCompactionReport {
6155 root: String,
6156 #[serde(skip_serializing_if = "Option::is_none")]
6157 scope: Option<String>,
6158 graph_db: String,
6159 applied: bool,
6160 pruned_tombstones: usize,
6161 counts_before: GraphDbOperatorCounts,
6162 counts_after: GraphDbOperatorCounts,
6163 compaction_before: GraphDbCompactionPolicy,
6164 compaction_after: GraphDbCompactionPolicy,
6165 reclaimed_bytes: i64,
6166 next_commands: Vec<String>,
6167 #[serde(skip_serializing_if = "Vec::is_empty", default)]
6168 warnings: Vec<String>,
6169}
6170
6171#[derive(Clone, Serialize, Deserialize)]
6172struct GraphDbEvidencePath {
6173 to: String,
6174 kind: String,
6175 label: String,
6176 #[serde(skip_serializing_if = "Option::is_none")]
6177 path: Option<substrate::GraphPath>,
6178 #[serde(skip_serializing_if = "Option::is_none")]
6179 expand: Option<String>,
6180}
6181
6182#[derive(Clone, Serialize, Deserialize)]
6183struct GraphDbFixtureCoverage {
6184 test: String,
6185 fixture: String,
6186 assertions: Vec<String>,
6187}
6188
6189#[derive(Clone, Serialize, Deserialize)]
6190struct GraphDbEvidenceReport {
6191 root: String,
6192 #[serde(skip_serializing_if = "Option::is_none")]
6193 scope: Option<String>,
6194 backend: String,
6195 contract_version: String,
6196 target: String,
6197 packet_id: String,
6198 #[serde(skip_serializing_if = "Option::is_none")]
6199 projection_hash: Option<String>,
6200 freshness: GraphDbFreshnessReport,
6201 target_node: SubstrateTerseGraphNode,
6202 worker_context: Vec<SubstrateTerseGraphNode>,
6203 source_handles: Vec<SubstrateTerseGraphNode>,
6204 worker_results: Vec<SubstrateTerseGraphNode>,
6205 semantic_related: Vec<SubstrateTerseGraphNode>,
6206 shortest_paths: Vec<GraphDbEvidencePath>,
6207 #[serde(skip_serializing_if = "Option::is_none")]
6208 output_budget: Option<GraphDbOutputBudgetReport>,
6209 #[serde(default)]
6210 truncated: bool,
6211 #[serde(skip_serializing_if = "Option::is_none")]
6212 next_cursor: Option<String>,
6213 next_commands: Vec<String>,
6214 replay_commands: Vec<String>,
6215 repair_commands: Vec<String>,
6216 fixture_coverage: GraphDbFixtureCoverage,
6217 #[serde(skip_serializing_if = "Vec::is_empty", default)]
6218 warnings: Vec<String>,
6219}
6220
6221pub(crate) struct GraphDbEvidenceInput<'a, S: GraphStore> {
6222 root: &'a Path,
6223 scope: Option<&'a str>,
6224 backend: &'a str,
6225 target: &'a str,
6226 depth: usize,
6227 limit: usize,
6228 cursor: Option<&'a str>,
6229 store: &'a S,
6230 freshness: GraphDbFreshnessReport,
6231 warnings: Vec<String>,
6232}
6233
6234impl GraphDbDoctorReport {
6235 fn new(
6236 root: &Path,
6237 scope: Option<&str>,
6238 backend: &str,
6239 graph_db: &Path,
6240 convex_snapshot: Option<&Path>,
6241 ) -> Self {
6242 Self {
6243 root: root.to_string_lossy().to_string(),
6244 scope: scope.map(str::to_string),
6245 backend: backend.to_string(),
6246 graph_db: graph_db.to_string_lossy().to_string(),
6247 convex_snapshot: convex_snapshot.map(|path| path.to_string_lossy().to_string()),
6248 status: "ok".to_string(),
6249 fail_closed: false,
6250 checks: Vec::new(),
6251 repair_commands: Vec::new(),
6252 required_indexes: Vec::new(),
6253 }
6254 }
6255
6256 fn push_check(&mut self, check: GraphDbDoctorCheck) {
6257 self.checks.push(check);
6258 }
6259
6260 fn finalize(&mut self) {
6261 self.fail_closed = self.checks.iter().any(|check| check.fail_closed);
6262 self.status = if self.fail_closed {
6263 "fail_closed"
6264 } else {
6265 "ok"
6266 }
6267 .to_string();
6268 let mut commands = BTreeSet::new();
6269 for check in &self.checks {
6270 commands.extend(check.repair_commands.iter().cloned());
6271 }
6272 self.repair_commands = commands.into_iter().collect();
6273 }
6274
6275 fn summary(&self) -> String {
6276 self.checks
6277 .iter()
6278 .filter(|check| check.fail_closed)
6279 .flat_map(|check| check.diagnostics.iter())
6280 .take(3)
6281 .cloned()
6282 .collect::<Vec<_>>()
6283 .join("; ")
6284 }
6285}
6286
6287fn graph_db_doctor_check(
6288 name: impl Into<String>,
6289 diagnostics: Vec<String>,
6290 repair_commands: Vec<String>,
6291) -> GraphDbDoctorCheck {
6292 let fail_closed = !diagnostics.is_empty();
6293 GraphDbDoctorCheck {
6294 name: name.into(),
6295 status: if fail_closed { "fail_closed" } else { "ok" }.to_string(),
6296 fail_closed,
6297 diagnostics,
6298 repair_commands: if fail_closed {
6299 repair_commands
6300 } else {
6301 Vec::new()
6302 },
6303 }
6304}
6305
6306pub(crate) fn graph_db_scope_arg(scope: Option<&str>) -> String {
6307 scope
6308 .map(|scope| format!(" --scope {}", shell_quote(scope)))
6309 .unwrap_or_default()
6310}
6311
6312fn graph_db_refresh_command(root: &Path, scope: Option<&str>) -> String {
6313 format!(
6314 "tsift graph-db --path {}{} refresh --json",
6315 shell_quote(root.to_string_lossy().as_ref()),
6316 graph_db_scope_arg(scope)
6317 )
6318}
6319
6320fn graph_db_rebuild_command(root: &Path, scope: Option<&str>) -> String {
6321 graph_db_refresh_command(root, scope)
6322}
6323
6324fn graph_db_backup_rebuild_command(root: &Path, scope: Option<&str>, graph_db: &Path) -> String {
6325 let backup = format!("{}.bak", graph_db.to_string_lossy());
6326 format!(
6327 "mv {} {} && {}",
6328 shell_quote(graph_db.to_string_lossy().as_ref()),
6329 shell_quote(&backup),
6330 graph_db_rebuild_command(root, scope)
6331 )
6332}
6333
6334fn convex_refresh_command(root: &Path, scope: Option<&str>) -> String {
6335 format!(
6336 "tsift convex-sync {}{} --remote-snapshot --apply --json",
6337 shell_quote(root.to_string_lossy().as_ref()),
6338 graph_db_scope_arg(scope)
6339 )
6340}
6341
6342fn open_sqlite_graph_db_readonly(graph_db: &Path) -> Result<substrate::SqliteReadOnlyConnection> {
6343 substrate::open_graph_read_only_connection_resilient(graph_db)
6344}
6345
6346fn sqlite_table_exists(conn: &Connection, table: &str) -> Result<bool> {
6347 conn.query_row(
6348 "SELECT EXISTS(SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = ?1)",
6349 [table],
6350 |row| row.get::<_, bool>(0),
6351 )
6352 .map_err(Into::into)
6353}
6354
6355fn sqlite_known_table_count(conn: &Connection, table: &str) -> Result<usize> {
6356 let sql = match table {
6357 "graph_nodes" => "SELECT COUNT(*) FROM graph_nodes",
6358 "graph_edges" => "SELECT COUNT(*) FROM graph_edges",
6359 "graph_tombstones" => "SELECT COUNT(*) FROM graph_tombstones",
6360 other => bail!("unsupported graph count table {other}"),
6361 };
6362 conn.query_row(sql, [], |row| row.get::<_, usize>(0))
6363 .map_err(Into::into)
6364}
6365
6366fn sqlite_tombstone_counts(conn: &Connection) -> Result<GraphDbTombstoneCounts> {
6367 if !sqlite_table_exists(conn, "graph_tombstones")? {
6368 return Ok(GraphDbTombstoneCounts {
6369 nodes: 0,
6370 edges: 0,
6371 total: 0,
6372 });
6373 }
6374 let mut stmt =
6375 conn.prepare("SELECT row_kind, COUNT(*) FROM graph_tombstones GROUP BY row_kind")?;
6376 let mut rows = stmt.query([])?;
6377 let mut nodes = 0usize;
6378 let mut edges = 0usize;
6379 while let Some(row) = rows.next()? {
6380 let row_kind: String = row.get(0)?;
6381 let count: usize = row.get(1)?;
6382 match row_kind.as_str() {
6383 "node" => nodes = count,
6384 "edge" => edges = count,
6385 _ => {}
6386 }
6387 }
6388 Ok(GraphDbTombstoneCounts {
6389 nodes,
6390 edges,
6391 total: nodes + edges,
6392 })
6393}
6394
6395fn sqlite_graph_counts_from_cache(
6396 conn: &Connection,
6397 scope: &str,
6398) -> Result<Option<GraphDbOperatorCounts>> {
6399 if !sqlite_table_exists(conn, "graph_operator_stats")? {
6400 return Ok(None);
6401 }
6402 let row = conn
6403 .query_row(
6404 r#"
6405 SELECT nodes, edges, tombstone_nodes, tombstone_edges, file_size_bytes, freelist_bytes
6406 FROM graph_operator_stats
6407 WHERE scope = ?1
6408 "#,
6409 [scope],
6410 |row| {
6411 Ok((
6412 row.get::<_, usize>(0)?,
6413 row.get::<_, usize>(1)?,
6414 row.get::<_, usize>(2)?,
6415 row.get::<_, usize>(3)?,
6416 row.get::<_, Option<i64>>(4)?,
6417 row.get::<_, Option<i64>>(5)?,
6418 ))
6419 },
6420 )
6421 .optional()?;
6422 Ok(row.map(
6423 |(nodes, edges, tombstone_nodes, tombstone_edges, file_size_bytes, freelist_bytes)| {
6424 GraphDbOperatorCounts {
6425 nodes,
6426 edges,
6427 tombstones: GraphDbTombstoneCounts {
6428 nodes: tombstone_nodes,
6429 edges: tombstone_edges,
6430 total: tombstone_nodes + tombstone_edges,
6431 },
6432 file_size_bytes: file_size_bytes
6433 .and_then(|value| u64::try_from(value).ok())
6434 .or_else(|| sqlite_database_size_bytes(conn).ok()),
6435 freelist_bytes: freelist_bytes
6436 .and_then(|value| u64::try_from(value).ok())
6437 .or_else(|| sqlite_database_freelist_bytes(conn).ok()),
6438 }
6439 },
6440 ))
6441}
6442
6443fn sqlite_graph_counts(conn: &Connection, scope: &str) -> Result<GraphDbOperatorCounts> {
6444 if let Some(counts) = sqlite_graph_counts_from_cache(conn, scope)? {
6445 return Ok(counts);
6446 }
6447 let nodes = if sqlite_table_exists(conn, "graph_nodes")? {
6448 sqlite_known_table_count(conn, "graph_nodes")?
6449 } else {
6450 0
6451 };
6452 let edges = if sqlite_table_exists(conn, "graph_edges")? {
6453 sqlite_known_table_count(conn, "graph_edges")?
6454 } else {
6455 0
6456 };
6457 Ok(GraphDbOperatorCounts {
6458 nodes,
6459 edges,
6460 tombstones: sqlite_tombstone_counts(conn)?,
6461 file_size_bytes: sqlite_database_size_bytes(conn).ok(),
6462 freelist_bytes: sqlite_database_freelist_bytes(conn).ok(),
6463 })
6464}
6465
6466fn sqlite_graph_semantic_node_count(conn: &Connection) -> Result<usize> {
6467 if !sqlite_table_exists(conn, "graph_nodes")? {
6468 return Ok(0);
6469 }
6470 let count: i64 = conn.query_row(
6471 "SELECT COUNT(*) FROM graph_nodes WHERE kind IN ('semantic_concept', 'semantic_entity')",
6472 [],
6473 |row| row.get(0),
6474 )?;
6475 Ok(count as usize)
6476}
6477
6478pub(crate) fn graph_db_compaction_policy(
6479 root: &Path,
6480 scope: Option<&str>,
6481 counts: &GraphDbOperatorCounts,
6482 prune_confirmed: bool,
6483) -> GraphDbCompactionPolicy {
6484 let live_rows = counts.nodes + counts.edges;
6485 let tombstone_scan_rows = counts.tombstones.total;
6486 let tombstone_heavy = tombstone_scan_rows > live_rows.max(1);
6487 let freelist_heavy = counts
6488 .file_size_bytes
6489 .zip(counts.freelist_bytes)
6490 .is_some_and(|(file_size, freelist)| freelist > 0 && freelist >= file_size / 20);
6491 let status = if tombstone_heavy || freelist_heavy {
6492 "recommended"
6493 } else {
6494 "not_needed"
6495 }
6496 .to_string();
6497 let mut recommendations = vec![
6498 convex_refresh_command(root, scope),
6499 graph_db_refresh_command(root, scope),
6500 format!(
6501 "tsift graph-db --path {}{} compact --apply --json",
6502 shell_quote(root.to_string_lossy().as_ref()),
6503 graph_db_scope_arg(scope)
6504 ),
6505 ];
6506 if prune_confirmed {
6507 recommendations.push(format!(
6508 "tsift graph-db --path {}{} compact --apply --prune-tombstones --confirmed-convex-reconciled --json",
6509 shell_quote(root.to_string_lossy().as_ref()),
6510 graph_db_scope_arg(scope)
6511 ));
6512 }
6513 let proof = vec![
6514 format!("{live_rows} live graph row(s)"),
6515 format!("{tombstone_scan_rows} retained tombstone row(s) scanned by status/doctor"),
6516 format!(
6517 "graph.db file_size={} byte(s), freelist={} byte(s)",
6518 counts.file_size_bytes.unwrap_or(0),
6519 counts.freelist_bytes.unwrap_or(0)
6520 ),
6521 ];
6522 GraphDbCompactionPolicy {
6523 status,
6524 tombstone_scan_rows,
6525 live_rows,
6526 file_size_bytes: counts.file_size_bytes,
6527 freelist_bytes: counts.freelist_bytes,
6528 safe_to_prune_tombstones: prune_confirmed,
6529 requires_convex_reconciliation: tombstone_scan_rows > 0 && !prune_confirmed,
6530 recommendations,
6531 proof,
6532 }
6533}
6534
6535fn sqlite_database_size_bytes(conn: &Connection) -> Result<u64> {
6536 let page_count: u64 = conn.query_row("PRAGMA page_count", [], |row| row.get(0))?;
6537 let page_size: u64 = conn.query_row("PRAGMA page_size", [], |row| row.get(0))?;
6538 Ok(page_count.saturating_mul(page_size))
6539}
6540
6541fn sqlite_database_freelist_bytes(conn: &Connection) -> Result<u64> {
6542 let freelist_count: u64 = conn.query_row("PRAGMA freelist_count", [], |row| row.get(0))?;
6543 let page_size: u64 = conn.query_row("PRAGMA page_size", [], |row| row.get(0))?;
6544 Ok(freelist_count.saturating_mul(page_size))
6545}
6546
6547fn sqlite_graph_tombstone_retention_diagnostics(
6548 conn: &Connection,
6549 scope: &str,
6550) -> Result<Vec<String>> {
6551 if !sqlite_table_exists(conn, "graph_tombstones")? {
6552 return Ok(Vec::new());
6553 }
6554 let cached = sqlite_graph_counts_from_cache(conn, scope)?;
6555 let counts = match cached.clone() {
6556 Some(counts) => counts,
6557 None => sqlite_graph_counts(conn, scope)?,
6558 };
6559 let live_rows = counts.nodes + counts.edges;
6560 let file_size = counts.file_size_bytes.unwrap_or(0);
6561 let freelist = counts.freelist_bytes.unwrap_or(0);
6562 let stale_live_tombstones = if cached.is_some() {
6563 0
6564 } else {
6565 let mut live_keys = BTreeSet::new();
6566 if sqlite_table_exists(conn, "graph_nodes")? {
6567 let mut stmt = conn.prepare("SELECT id FROM graph_nodes")?;
6568 for row in stmt.query_map([], |row| row.get::<_, String>(0))? {
6569 live_keys.insert(format!("node:{}", row?));
6570 }
6571 }
6572 if sqlite_table_exists(conn, "graph_edges")? {
6573 let mut stmt = conn.prepare("SELECT edge_key FROM graph_edges")?;
6574 for row in stmt.query_map([], |row| row.get::<_, String>(0))? {
6575 live_keys.insert(format!("edge:{}", row?));
6576 }
6577 }
6578 let mut stale_live_tombstones = 0usize;
6579 let mut stmt = conn.prepare("SELECT row_key FROM graph_tombstones ORDER BY row_key")?;
6580 for row in stmt.query_map([], |row| row.get::<_, String>(0))? {
6581 if live_keys.contains(&row?) {
6582 stale_live_tombstones += 1;
6583 }
6584 }
6585 stale_live_tombstones
6586 };
6587
6588 let mut diagnostics = Vec::new();
6589 if stale_live_tombstones > 0 {
6590 diagnostics.push(format!(
6591 "{stale_live_tombstones} tombstone(s) reference rows that are live again; the next graph-db refresh prunes those stale tombstones before inserting new deletion markers"
6592 ));
6593 }
6594 if counts.tombstones.total > live_rows.max(1) {
6595 let source = if cached.is_some() {
6596 "cached refresh stats"
6597 } else {
6598 "live row scan"
6599 };
6600 diagnostics.push(format!(
6601 "tombstone retention exceeds live graph rows: {} tombstone(s) vs {} live row(s) from {}; graph.db file_size={} byte(s), freelist={} byte(s), status/doctor tombstone scans inspect {} extra row(s). Run convex-sync against the remote snapshot before rebuild/compaction if a remote consumer may still need deletion reconciliation.",
6602 counts.tombstones.total,
6603 live_rows,
6604 source,
6605 file_size,
6606 freelist,
6607 counts.tombstones.total
6608 ));
6609 }
6610 Ok(diagnostics)
6611}
6612
6613fn sqlite_graph_freshness_from_conn(
6614 conn: &Connection,
6615 scope: &str,
6616) -> Result<GraphDbFreshnessReport> {
6617 if !sqlite_table_exists(conn, "graph_projection_versions")? {
6618 return Ok(GraphDbFreshnessReport {
6619 status: "missing".to_string(),
6620 fail_closed: true,
6621 projection_version: None,
6622 content_hash: None,
6623 source_watermark: None,
6624 diagnostics: vec![
6625 "graph projection metadata table is missing; refresh graph.db before trusting reads"
6626 .to_string(),
6627 ],
6628 });
6629 }
6630 let version = conn
6631 .query_row(
6632 r#"
6633 SELECT projection_version, content_hash, source_watermark
6634 FROM graph_projection_versions
6635 WHERE scope = ?1
6636 "#,
6637 [scope],
6638 |row| {
6639 Ok((
6640 row.get::<_, String>(0)?,
6641 row.get::<_, Option<String>>(1)?,
6642 row.get::<_, Option<String>>(2)?,
6643 ))
6644 },
6645 )
6646 .optional()?;
6647 let Some((projection_version, content_hash, source_watermark)) = version else {
6648 return Ok(GraphDbFreshnessReport {
6649 status: "missing".to_string(),
6650 fail_closed: true,
6651 projection_version: None,
6652 content_hash: None,
6653 source_watermark: None,
6654 diagnostics: vec![
6655 "graph projection metadata is missing; refresh graph.db before trusting reads"
6656 .to_string(),
6657 ],
6658 });
6659 };
6660
6661 let mut diagnostics = Vec::new();
6662 if projection_version != GRAPH_PROJECTION_VERSION {
6663 diagnostics.push(format!(
6664 "projection version mismatch: expected {} got {}",
6665 GRAPH_PROJECTION_VERSION, projection_version
6666 ));
6667 }
6668 if content_hash.is_none() {
6669 diagnostics.push("projection content hash is missing".to_string());
6670 }
6671 let fail_closed = !diagnostics.is_empty();
6672 Ok(GraphDbFreshnessReport {
6673 status: if fail_closed { "stale" } else { "current" }.to_string(),
6674 fail_closed,
6675 projection_version: Some(projection_version),
6676 content_hash,
6677 source_watermark,
6678 diagnostics,
6679 })
6680}
6681
6682fn graph_db_operator_next_commands(
6683 root: &Path,
6684 scope: Option<&str>,
6685 include_refresh: bool,
6686) -> Vec<String> {
6687 let mut commands = Vec::new();
6688 if include_refresh {
6689 commands.push(graph_db_refresh_command(root, scope));
6690 }
6691 commands.push(format!(
6692 "tsift graph-db --path {}{} doctor --json",
6693 shell_quote(root.to_string_lossy().as_ref()),
6694 graph_db_scope_arg(scope)
6695 ));
6696 commands.push(format!(
6697 "tsift graph-db --path {}{} --backend convex-snapshot --convex-snapshot <rows.json> drift --json",
6698 shell_quote(root.to_string_lossy().as_ref()),
6699 graph_db_scope_arg(scope)
6700 ));
6701 commands.push(format!(
6702 "tsift convex-sync {}{} --remote-snapshot --apply --json",
6703 shell_quote(root.to_string_lossy().as_ref()),
6704 graph_db_scope_arg(scope)
6705 ));
6706 commands
6707}
6708
6709pub(crate) fn graph_db_read_recovery_diagnostic(recovery: index::ReadOnlyRecovery) -> String {
6710 match recovery {
6711 index::ReadOnlyRecovery::SnapshotFallback => {
6712 "graph.db read recovered through snapshot fallback after a rollback-journal lock on the live database".to_string()
6713 }
6714 index::ReadOnlyRecovery::SnapshotFallbackWal => {
6715 "graph.db read recovered through WAL-aware snapshot fallback after copying live -wal/-shm sidecars".to_string()
6716 }
6717 }
6718}
6719
6720fn sqlite_string_set(conn: &Connection, sql: &str) -> Result<BTreeSet<String>> {
6721 let mut stmt = conn.prepare(sql)?;
6722 let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
6723 let mut values = BTreeSet::new();
6724 for row in rows {
6725 values.insert(row?);
6726 }
6727 Ok(values)
6728}
6729
6730fn sqlite_column_names(conn: &Connection, table: &str) -> Result<BTreeSet<String>> {
6731 let mut stmt = conn.prepare(&format!("PRAGMA table_info({table})"))?;
6732 let rows = stmt.query_map([], |row| row.get::<_, String>(1))?;
6733 let mut columns = BTreeSet::new();
6734 for row in rows {
6735 columns.insert(row?);
6736 }
6737 Ok(columns)
6738}
6739
6740fn sqlite_graph_schema_diagnostics(conn: &Connection) -> Result<Vec<String>> {
6741 let mut diagnostics = Vec::new();
6742 let user_version: i64 =
6743 conn.pragma_query_value(None, "user_version", |row| row.get::<_, i64>(0))?;
6744 if user_version > SQLITE_GRAPH_SCHEMA_VERSION {
6745 diagnostics.push(format!(
6746 "graph.db schema version {user_version} is newer than supported version {SQLITE_GRAPH_SCHEMA_VERSION}"
6747 ));
6748 } else if user_version < SQLITE_GRAPH_SCHEMA_VERSION {
6749 diagnostics.push(format!(
6750 "graph.db schema version {user_version} is older than supported version {SQLITE_GRAPH_SCHEMA_VERSION}"
6751 ));
6752 }
6753
6754 let tables = sqlite_string_set(
6755 conn,
6756 "SELECT name FROM sqlite_master WHERE type = 'table' ORDER BY name",
6757 )?;
6758 let required_tables = [
6759 (
6760 "graph_nodes",
6761 vec![
6762 "id",
6763 "kind",
6764 "label",
6765 "properties_json",
6766 "provenance_json",
6767 "freshness_json",
6768 "row_hash",
6769 "source_watermark",
6770 ],
6771 ),
6772 (
6773 "graph_edges",
6774 vec![
6775 "edge_key",
6776 "from_id",
6777 "to_id",
6778 "kind",
6779 "properties_json",
6780 "provenance_json",
6781 "freshness_json",
6782 "row_hash",
6783 "source_watermark",
6784 ],
6785 ),
6786 (
6787 "graph_projection_versions",
6788 vec![
6789 "scope",
6790 "projection_version",
6791 "content_hash",
6792 "source_watermark",
6793 "observed_at_unix",
6794 ],
6795 ),
6796 (
6797 "graph_tombstones",
6798 vec!["row_key", "row_kind", "deleted_at_unix"],
6799 ),
6800 ("graph_node_properties", vec!["node_id", "key", "value"]),
6801 ("graph_edge_properties", vec!["edge_key", "key", "value"]),
6802 ];
6803 for (table, required_columns) in required_tables {
6804 if !tables.contains(table) {
6805 diagnostics.push(format!("graph.db schema drift: missing table {table}"));
6806 continue;
6807 }
6808 let columns = sqlite_column_names(conn, table)?;
6809 for column in required_columns {
6810 if !columns.contains(column) {
6811 diagnostics.push(format!(
6812 "graph.db schema drift: missing column {table}.{column}"
6813 ));
6814 }
6815 }
6816 }
6817
6818 let indexes = sqlite_string_set(
6819 conn,
6820 "SELECT name FROM sqlite_master WHERE type = 'index' ORDER BY name",
6821 )?;
6822 for index in [
6823 "idx_graph_nodes_kind",
6824 "idx_graph_edges_from_kind",
6825 "idx_graph_edges_to_kind",
6826 "idx_graph_edges_edge_key",
6827 "idx_graph_node_properties_key_value_node",
6828 "idx_graph_edge_properties_key_value_edge",
6829 ] {
6830 if !indexes.contains(index) {
6831 diagnostics.push(format!("graph.db schema drift: missing index {index}"));
6832 }
6833 }
6834
6835 if tables.contains("graph_edges") {
6836 let mut stmt = conn.prepare("PRAGMA foreign_key_list(graph_edges)")?;
6837 let rows = stmt.query_map([], |row| {
6838 Ok((row.get::<_, String>(3)?, row.get::<_, String>(4)?))
6839 })?;
6840 let mut fks = BTreeSet::new();
6841 for row in rows {
6842 fks.insert(row?);
6843 }
6844 for expected in [
6845 ("from_id".to_string(), "id".to_string()),
6846 ("to_id".to_string(), "id".to_string()),
6847 ] {
6848 if !fks.contains(&expected) {
6849 diagnostics.push(format!(
6850 "graph.db schema drift: missing graph_edges foreign key {} -> graph_nodes.{}",
6851 expected.0, expected.1
6852 ));
6853 }
6854 }
6855 }
6856
6857 Ok(diagnostics)
6858}
6859
6860fn sqlite_query_diagnostics(conn: &Connection, sql: &str) -> Result<Vec<String>> {
6861 let mut stmt = conn.prepare(sql)?;
6862 let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
6863 let mut diagnostics = Vec::new();
6864 for row in rows {
6865 diagnostics.push(row?);
6866 }
6867 Ok(diagnostics)
6868}
6869
6870fn sqlite_graph_duplicate_diagnostics(conn: &Connection) -> Result<Vec<String>> {
6871 let mut diagnostics = sqlite_query_diagnostics(
6872 conn,
6873 r#"
6874 SELECT 'duplicate graph_nodes.id ' || id || ' (' || COUNT(*) || ' rows)'
6875 FROM graph_nodes
6876 GROUP BY id
6877 HAVING COUNT(*) > 1
6878 ORDER BY id
6879 "#,
6880 )?;
6881 diagnostics.extend(sqlite_query_diagnostics(
6882 conn,
6883 r#"
6884 SELECT 'duplicate graph_edges key ' || from_id || ' -' || kind || '-> ' || to_id || ' (' || COUNT(*) || ' rows)'
6885 FROM graph_edges
6886 GROUP BY from_id, to_id, kind
6887 HAVING COUNT(*) > 1
6888 ORDER BY from_id, kind, to_id
6889 "#,
6890 )?);
6891 diagnostics.extend(sqlite_query_diagnostics(
6892 conn,
6893 r#"
6894 SELECT 'duplicate graph_edges.edge_key ' || edge_key || ' (' || COUNT(*) || ' rows)'
6895 FROM graph_edges
6896 GROUP BY edge_key
6897 HAVING COUNT(*) > 1
6898 ORDER BY edge_key
6899 "#,
6900 )?);
6901 Ok(diagnostics)
6902}
6903
6904fn sqlite_graph_orphan_diagnostics(conn: &Connection) -> Result<Vec<String>> {
6905 sqlite_query_diagnostics(
6906 conn,
6907 r#"
6908 SELECT 'orphan edge missing from node: ' || e.from_id || ' -' || e.kind || '-> ' || e.to_id
6909 FROM graph_edges e
6910 LEFT JOIN graph_nodes n ON n.id = e.from_id
6911 WHERE n.id IS NULL
6912 UNION ALL
6913 SELECT 'orphan edge missing to node: ' || e.from_id || ' -' || e.kind || '-> ' || e.to_id
6914 FROM graph_edges e
6915 LEFT JOIN graph_nodes n ON n.id = e.to_id
6916 WHERE n.id IS NULL
6917 ORDER BY 1
6918 "#,
6919 )
6920}
6921
6922fn sqlite_graph_json_diagnostics(conn: &Connection) -> Result<Vec<String>> {
6923 let mut diagnostics = Vec::new();
6924 let mut node_stmt = conn.prepare(
6925 "SELECT id, properties_json, provenance_json, freshness_json FROM graph_nodes ORDER BY id",
6926 )?;
6927 let node_rows = node_stmt.query_map([], |row| {
6928 Ok((
6929 row.get::<_, String>(0)?,
6930 row.get::<_, String>(1)?,
6931 row.get::<_, String>(2)?,
6932 row.get::<_, Option<String>>(3)?,
6933 ))
6934 })?;
6935 for row in node_rows {
6936 let (id, properties_json, provenance_json, freshness_json) = row?;
6937 if let Err(err) = serde_json::from_str::<BTreeMap<String, String>>(&properties_json) {
6938 diagnostics.push(format!(
6939 "graph_nodes {id} properties_json is invalid: {err}"
6940 ));
6941 }
6942 if let Err(err) = serde_json::from_str::<Vec<GraphProvenance>>(&provenance_json) {
6943 diagnostics.push(format!(
6944 "graph_nodes {id} provenance_json is invalid: {err}"
6945 ));
6946 }
6947 if let Some(freshness_json) = freshness_json
6948 && let Err(err) = serde_json::from_str::<GraphFreshness>(&freshness_json)
6949 {
6950 diagnostics.push(format!("graph_nodes {id} freshness_json is invalid: {err}"));
6951 }
6952 }
6953
6954 let mut edge_stmt = conn.prepare(
6955 "SELECT edge_key, from_id, to_id, kind, properties_json, provenance_json, freshness_json FROM graph_edges ORDER BY from_id, kind, to_id",
6956 )?;
6957 let edge_rows = edge_stmt.query_map([], |row| {
6958 Ok((
6959 row.get::<_, String>(0)?,
6960 row.get::<_, String>(1)?,
6961 row.get::<_, String>(2)?,
6962 row.get::<_, String>(3)?,
6963 row.get::<_, String>(4)?,
6964 row.get::<_, String>(5)?,
6965 row.get::<_, Option<String>>(6)?,
6966 ))
6967 })?;
6968 for row in edge_rows {
6969 let (edge_key, from_id, to_id, kind, properties_json, provenance_json, freshness_json) =
6970 row?;
6971 let edge = format!("{edge_key} {from_id} -{kind}-> {to_id}");
6972 if let Err(err) = serde_json::from_str::<BTreeMap<String, String>>(&properties_json) {
6973 diagnostics.push(format!(
6974 "graph_edges {edge} properties_json is invalid: {err}"
6975 ));
6976 }
6977 if let Err(err) = serde_json::from_str::<Vec<GraphProvenance>>(&provenance_json) {
6978 diagnostics.push(format!(
6979 "graph_edges {edge} provenance_json is invalid: {err}"
6980 ));
6981 }
6982 if let Some(freshness_json) = freshness_json
6983 && let Err(err) = serde_json::from_str::<GraphFreshness>(&freshness_json)
6984 {
6985 diagnostics.push(format!(
6986 "graph_edges {edge} freshness_json is invalid: {err}"
6987 ));
6988 }
6989 }
6990 Ok(diagnostics)
6991}
6992
6993fn sqlite_graph_projection_metadata_diagnostics(
6994 conn: &Connection,
6995 scope: Option<&str>,
6996) -> Result<Vec<String>> {
6997 let mut diagnostics = Vec::new();
6998 let scope_key = scope.unwrap_or("root");
6999 let version = conn
7000 .query_row(
7001 r#"
7002 SELECT projection_version, content_hash, source_watermark
7003 FROM graph_projection_versions
7004 WHERE scope = ?1
7005 "#,
7006 [scope_key],
7007 |row| {
7008 Ok((
7009 row.get::<_, String>(0)?,
7010 row.get::<_, Option<String>>(1)?,
7011 row.get::<_, Option<String>>(2)?,
7012 ))
7013 },
7014 )
7015 .optional()?;
7016 let Some((projection_version, content_hash, _source_watermark)) = version else {
7017 diagnostics.push(format!(
7018 "graph projection metadata is missing for scope {scope_key}"
7019 ));
7020 return Ok(diagnostics);
7021 };
7022 if projection_version != GRAPH_PROJECTION_VERSION {
7023 diagnostics.push(format!(
7024 "projection version mismatch: expected {GRAPH_PROJECTION_VERSION} got {projection_version}"
7025 ));
7026 }
7027 if content_hash.is_none() {
7028 diagnostics.push("projection content hash is missing".to_string());
7029 }
7030
7031 let meta_id = graph_projection_meta_id(scope);
7032 let meta_properties = conn
7033 .query_row(
7034 "SELECT properties_json FROM graph_nodes WHERE id = ?1 AND kind = ?2",
7035 (&meta_id, GRAPH_PROJECTION_META_KIND),
7036 |row| row.get::<_, String>(0),
7037 )
7038 .optional()?;
7039 let Some(meta_properties) = meta_properties else {
7040 diagnostics.push(format!("projection_meta node {meta_id} is missing"));
7041 return Ok(diagnostics);
7042 };
7043 let properties = serde_json::from_str::<BTreeMap<String, String>>(&meta_properties)
7044 .with_context(|| format!("parsing projection_meta properties for {meta_id}"))?;
7045 if properties.get("projection_version").map(String::as_str) != Some(GRAPH_PROJECTION_VERSION) {
7046 diagnostics.push(format!(
7047 "projection_meta node {meta_id} has stale projection_version"
7048 ));
7049 }
7050 if properties.get("content_hash") != content_hash.as_ref() {
7051 diagnostics.push(format!(
7052 "projection_meta node {meta_id} content_hash does not match graph_projection_versions"
7053 ));
7054 }
7055 Ok(diagnostics)
7056}
7057
7058pub(crate) fn sqlite_convex_rows_from_conn(conn: &Connection) -> Result<ConvexProjectionRows> {
7059 let mut node_stmt = conn.prepare(
7060 "SELECT id, kind, label, properties_json, provenance_json, freshness_json FROM graph_nodes ORDER BY id",
7061 )?;
7062 let node_rows = node_stmt.query_map([], |row| {
7063 let properties_json: String = row.get(3)?;
7064 let provenance_json: String = row.get(4)?;
7065 let freshness_json: Option<String> = row.get(5)?;
7066 Ok((
7067 row.get::<_, String>(0)?,
7068 row.get::<_, String>(1)?,
7069 row.get::<_, String>(2)?,
7070 properties_json,
7071 provenance_json,
7072 freshness_json,
7073 ))
7074 })?;
7075 let mut nodes = Vec::new();
7076 for row in node_rows {
7077 let (external_id, kind, label, properties_json, provenance_json, freshness_json) = row?;
7078 nodes.push(ConvexNodeRow {
7079 external_id,
7080 kind,
7081 label,
7082 properties: serde_json::from_str(&properties_json)?,
7083 provenance: serde_json::from_str(&provenance_json)?,
7084 freshness: freshness_json
7085 .map(|value| serde_json::from_str(&value))
7086 .transpose()?,
7087 });
7088 }
7089
7090 let mut edge_stmt = conn.prepare(
7091 "SELECT edge_key, from_id, to_id, kind, properties_json, provenance_json, freshness_json FROM graph_edges ORDER BY from_id, kind, to_id",
7092 )?;
7093 let edge_rows = edge_stmt.query_map([], |row| {
7094 let properties_json: String = row.get(4)?;
7095 let provenance_json: String = row.get(5)?;
7096 let freshness_json: Option<String> = row.get(6)?;
7097 Ok((
7098 row.get::<_, String>(0)?,
7099 row.get::<_, String>(1)?,
7100 row.get::<_, String>(2)?,
7101 row.get::<_, String>(3)?,
7102 properties_json,
7103 provenance_json,
7104 freshness_json,
7105 ))
7106 })?;
7107 let mut edges = Vec::new();
7108 for row in edge_rows {
7109 let (
7110 edge_key,
7111 from_external_id,
7112 to_external_id,
7113 kind,
7114 properties_json,
7115 provenance_json,
7116 freshness_json,
7117 ) = row?;
7118 edges.push(ConvexEdgeRow {
7119 edge_key,
7120 from_external_id,
7121 to_external_id,
7122 kind,
7123 properties: serde_json::from_str(&properties_json)?,
7124 provenance: serde_json::from_str(&provenance_json)?,
7125 freshness: freshness_json
7126 .map(|value| serde_json::from_str(&value))
7127 .transpose()?,
7128 });
7129 }
7130 Ok(ConvexProjectionRows { nodes, edges })
7131}
7132
7133fn convex_required_index_label(index: &ConvexRequiredIndex) -> String {
7134 format!("{}.{}({})", index.table, index.name, index.fields.join(","))
7135}
7136
7137fn convex_snapshot_index_value(value: &serde_json::Value) -> Option<&serde_json::Value> {
7138 value
7139 .get("indexes")
7140 .or_else(|| value.get("requiredIndexes"))
7141 .or_else(|| {
7142 value
7143 .get("metadata")
7144 .and_then(|metadata| metadata.get("indexes"))
7145 })
7146}
7147
7148fn convex_snapshot_declared_indexes(
7149 value: &serde_json::Value,
7150) -> Result<Option<Vec<ConvexRequiredIndex>>> {
7151 convex_snapshot_index_value(value)
7152 .map(|indexes| {
7153 serde_json::from_value::<Vec<ConvexRequiredIndex>>(indexes.clone())
7154 .context("parsing Convex snapshot index metadata")
7155 })
7156 .transpose()
7157}
7158
7159fn convex_snapshot_index_diagnostics(value: &serde_json::Value) -> Result<Vec<String>> {
7160 let required = convex_required_indexes();
7161 let Some(declared) = convex_snapshot_declared_indexes(value)? else {
7162 return Ok(vec![format!(
7163 "Convex snapshot index metadata is missing; required indexes not confirmed: {}",
7164 required
7165 .iter()
7166 .map(convex_required_index_label)
7167 .collect::<Vec<_>>()
7168 .join(", ")
7169 )]);
7170 };
7171 let declared = declared.into_iter().collect::<BTreeSet<_>>();
7172 let missing = required
7173 .iter()
7174 .filter(|index| !declared.contains(*index))
7175 .map(convex_required_index_label)
7176 .collect::<Vec<_>>();
7177 if missing.is_empty() {
7178 Ok(Vec::new())
7179 } else {
7180 Ok(vec![format!(
7181 "Convex snapshot is missing required index metadata: {}",
7182 missing.join(", ")
7183 )])
7184 }
7185}
7186
7187pub(crate) fn load_convex_projection_snapshot_value(
7188 snapshot_path: &Path,
7189) -> Result<(ConvexProjectionRows, serde_json::Value)> {
7190 let content = fs::read_to_string(snapshot_path).with_context(|| {
7191 format!(
7192 "reading Convex projection snapshot {}",
7193 snapshot_path.display()
7194 )
7195 })?;
7196 let value = serde_json::from_str::<serde_json::Value>(&content).with_context(|| {
7197 format!(
7198 "parsing Convex projection snapshot {}",
7199 snapshot_path.display()
7200 )
7201 })?;
7202 let rows = serde_json::from_value::<ConvexProjectionRows>(value.clone())
7203 .with_context(|| format!("parsing Convex projection rows {}", snapshot_path.display()))?;
7204 Ok((rows, value))
7205}
7206
7207pub(crate) fn append_sqlite_graph_doctor_checks(
7208 report: &mut GraphDbDoctorReport,
7209 root: &Path,
7210 scope: Option<&str>,
7211 graph_db: &Path,
7212) -> Option<substrate::SqliteReadOnlyConnection> {
7213 let rebuild = graph_db_rebuild_command(root, scope);
7214 let backup_rebuild = graph_db_backup_rebuild_command(root, scope, graph_db);
7215 if !graph_db.exists() {
7216 report.push_check(graph_db_doctor_check(
7217 "sqlite_graph_db_exists",
7218 vec![format!("graph.db is missing at {}", graph_db.display())],
7219 vec![rebuild],
7220 ));
7221 return None;
7222 }
7223 report.push_check(graph_db_doctor_check(
7224 "sqlite_graph_db_exists",
7225 Vec::new(),
7226 vec![rebuild.clone()],
7227 ));
7228
7229 let conn = match open_sqlite_graph_db_readonly(graph_db) {
7230 Ok(conn) => conn,
7231 Err(err) => {
7232 report.push_check(graph_db_doctor_check(
7233 "sqlite_graph_db_open",
7234 vec![err.to_string()],
7235 vec![backup_rebuild],
7236 ));
7237 return None;
7238 }
7239 };
7240 report.push_check(graph_db_doctor_check(
7241 "sqlite_graph_db_open",
7242 Vec::new(),
7243 vec![rebuild.clone()],
7244 ));
7245 if let Some(recovery) = conn.recovery() {
7246 report.push_check(GraphDbDoctorCheck {
7247 name: "sqlite_graph_db_read_recovery".to_string(),
7248 status: "recovered".to_string(),
7249 fail_closed: false,
7250 diagnostics: vec![graph_db_read_recovery_diagnostic(recovery)],
7251 repair_commands: Vec::new(),
7252 });
7253 }
7254
7255 let schema_diagnostics = sqlite_graph_schema_diagnostics(conn.conn())
7256 .unwrap_or_else(|err| vec![format!("graph.db schema inspection failed: {err}")]);
7257 report.push_check(graph_db_doctor_check(
7258 "sqlite_schema",
7259 schema_diagnostics,
7260 vec![backup_rebuild.clone()],
7261 ));
7262
7263 let metadata_diagnostics = sqlite_graph_projection_metadata_diagnostics(conn.conn(), scope)
7264 .unwrap_or_else(|err| {
7265 vec![format!(
7266 "graph projection metadata inspection failed: {err}"
7267 )]
7268 });
7269 report.push_check(graph_db_doctor_check(
7270 "sqlite_projection_metadata",
7271 metadata_diagnostics,
7272 vec![rebuild.clone()],
7273 ));
7274
7275 let duplicate_diagnostics = sqlite_graph_duplicate_diagnostics(conn.conn())
7276 .unwrap_or_else(|err| vec![format!("duplicate id inspection failed: {err}")]);
7277 report.push_check(graph_db_doctor_check(
7278 "sqlite_duplicate_ids",
7279 duplicate_diagnostics,
7280 vec![backup_rebuild.clone()],
7281 ));
7282
7283 let orphan_diagnostics = sqlite_graph_orphan_diagnostics(conn.conn())
7284 .unwrap_or_else(|err| vec![format!("orphan edge inspection failed: {err}")]);
7285 report.push_check(graph_db_doctor_check(
7286 "sqlite_orphan_edges",
7287 orphan_diagnostics,
7288 vec![rebuild.clone()],
7289 ));
7290
7291 let json_diagnostics = sqlite_graph_json_diagnostics(conn.conn())
7292 .unwrap_or_else(|err| vec![format!("graph row JSON inspection failed: {err}")]);
7293 report.push_check(graph_db_doctor_check(
7294 "sqlite_row_json",
7295 json_diagnostics,
7296 vec![backup_rebuild],
7297 ));
7298
7299 let tombstone_diagnostics =
7300 sqlite_graph_tombstone_retention_diagnostics(conn.conn(), scope.unwrap_or("root"))
7301 .unwrap_or_else(|err| {
7302 vec![format!(
7303 "graph tombstone retention inspection failed: {err}"
7304 )]
7305 });
7306 report.push_check(GraphDbDoctorCheck {
7307 name: "sqlite_tombstone_retention".to_string(),
7308 status: if tombstone_diagnostics.is_empty() {
7309 "ok".to_string()
7310 } else {
7311 "warning".to_string()
7312 },
7313 fail_closed: false,
7314 diagnostics: tombstone_diagnostics,
7315 repair_commands: Vec::new(),
7316 });
7317 let compaction_check = match sqlite_graph_counts(conn.conn(), scope.unwrap_or("root")) {
7318 Ok(counts) => {
7319 let policy = graph_db_compaction_policy(root, scope, &counts, false);
7320 GraphDbDoctorCheck {
7321 name: "sqlite_compaction_policy".to_string(),
7322 status: policy.status.clone(),
7323 fail_closed: false,
7324 diagnostics: policy.proof,
7325 repair_commands: if policy.status == "recommended" {
7326 policy.recommendations
7327 } else {
7328 Vec::new()
7329 },
7330 }
7331 }
7332 Err(err) => GraphDbDoctorCheck {
7333 name: "sqlite_compaction_policy".to_string(),
7334 status: "warning".to_string(),
7335 fail_closed: false,
7336 diagnostics: vec![format!("graph compaction policy inspection failed: {err}")],
7337 repair_commands: Vec::new(),
7338 },
7339 };
7340 report.push_check(compaction_check);
7341
7342 Some(conn)
7343}
7344
7345pub(crate) fn append_convex_snapshot_doctor_checks(
7346 report: &mut GraphDbDoctorReport,
7347 root: &Path,
7348 scope: Option<&str>,
7349 local_rows: Option<&ConvexProjectionRows>,
7350 snapshot_path: Option<&Path>,
7351) {
7352 let repair = convex_refresh_command(root, scope);
7353 let Some(snapshot_path) = snapshot_path else {
7354 report.push_check(graph_db_doctor_check(
7355 "convex_snapshot_present",
7356 vec!["--backend convex-snapshot requires --convex-snapshot <rows.json>".to_string()],
7357 vec![format!(
7358 "tsift convex-sync {}{} --json > convex-rows.json",
7359 shell_quote(root.to_string_lossy().as_ref()),
7360 graph_db_scope_arg(scope)
7361 )],
7362 ));
7363 return;
7364 };
7365 report.push_check(graph_db_doctor_check(
7366 "convex_snapshot_present",
7367 Vec::new(),
7368 vec![repair.clone()],
7369 ));
7370
7371 let (snapshot, snapshot_value) = match load_convex_projection_snapshot_value(snapshot_path) {
7372 Ok(snapshot) => snapshot,
7373 Err(err) => {
7374 report.push_check(graph_db_doctor_check(
7375 "convex_snapshot_parse",
7376 vec![err.to_string()],
7377 vec![repair],
7378 ));
7379 return;
7380 }
7381 };
7382 report.push_check(graph_db_doctor_check(
7383 "convex_snapshot_parse",
7384 Vec::new(),
7385 vec![repair.clone()],
7386 ));
7387
7388 let row_diagnostics = convex_projection_row_diagnostics(&snapshot);
7389 report.push_check(graph_db_doctor_check(
7390 "convex_snapshot_rows",
7391 row_diagnostics,
7392 vec![repair.clone()],
7393 ));
7394
7395 let index_diagnostics = convex_snapshot_index_diagnostics(&snapshot_value)
7396 .unwrap_or_else(|err| vec![err.to_string()]);
7397 report.required_indexes = convex_required_indexes();
7398 report.push_check(graph_db_doctor_check(
7399 "convex_required_indexes",
7400 index_diagnostics,
7401 vec![
7402 "Add the indexes from examples/convex-graph/schema.ts, then redeploy the Convex app"
7403 .to_string(),
7404 ],
7405 ));
7406
7407 if let Some(local_rows) = local_rows {
7408 let freshness = convex_projection_freshness(local_rows, Some(&snapshot), scope);
7409 report.push_check(graph_db_doctor_check(
7410 "convex_projection_freshness",
7411 freshness.diagnostics,
7412 vec![repair],
7413 ));
7414 } else {
7415 report.push_check(graph_db_doctor_check(
7416 "convex_projection_freshness",
7417 vec![
7418 "local SQLite graph.db could not be read, so Convex freshness cannot be verified"
7419 .to_string(),
7420 ],
7421 vec![graph_db_rebuild_command(root, scope)],
7422 ));
7423 }
7424}
7425
7426fn graph_db_convex_snapshot_doctor_command(
7427 root: &Path,
7428 scope: Option<&str>,
7429 snapshot_path: &Path,
7430) -> String {
7431 format!(
7432 "tsift graph-db --path {}{} --backend convex-snapshot --convex-snapshot {} doctor --json",
7433 shell_quote(root.to_string_lossy().as_ref()),
7434 graph_db_scope_arg(scope),
7435 shell_quote(snapshot_path.to_string_lossy().as_ref())
7436 )
7437}
7438
7439fn graph_db_convex_snapshot_read_command(
7440 root: &Path,
7441 scope: Option<&str>,
7442 snapshot_path: &Path,
7443) -> String {
7444 format!(
7445 "tsift graph-db --path {}{} --backend convex-snapshot --convex-snapshot {} schema --json",
7446 shell_quote(root.to_string_lossy().as_ref()),
7447 graph_db_scope_arg(scope),
7448 shell_quote(snapshot_path.to_string_lossy().as_ref())
7449 )
7450}
7451
7452fn convex_sync_snapshot_diff_command(
7453 root: &Path,
7454 scope: Option<&str>,
7455 snapshot_path: &Path,
7456) -> String {
7457 format!(
7458 "tsift convex-sync {}{} --snapshot {} --json",
7459 shell_quote(root.to_string_lossy().as_ref()),
7460 graph_db_scope_arg(scope),
7461 shell_quote(snapshot_path.to_string_lossy().as_ref())
7462 )
7463}
7464
7465pub(crate) struct GraphDbDriftInput<'a> {
7466 root: &'a Path,
7467 scope: Option<&'a str>,
7468 graph_db: &'a Path,
7469 snapshot_path: &'a Path,
7470 local: &'a ConvexProjectionRows,
7471 snapshot: &'a ConvexProjectionRows,
7472 snapshot_value: &'a serde_json::Value,
7473 warnings: Vec<String>,
7474}
7475
7476pub(crate) fn graph_db_drift_report(input: GraphDbDriftInput<'_>) -> GraphDbDriftReport {
7477 let GraphDbDriftInput {
7478 root,
7479 scope,
7480 graph_db,
7481 snapshot_path,
7482 local,
7483 snapshot,
7484 snapshot_value,
7485 warnings,
7486 } = input;
7487 let freshness = convex_projection_freshness(local, Some(snapshot), scope);
7488 let (node_upserts, edge_upserts, node_tombstones, edge_tombstones) =
7489 convex_rows_diff(local, Some(snapshot));
7490 let row_diagnostics = convex_projection_row_diagnostics(snapshot);
7491 let index_diagnostics = convex_snapshot_index_diagnostics(snapshot_value)
7492 .unwrap_or_else(|err| vec![format!("Convex snapshot index metadata failed: {err}")]);
7493 let local_hash = freshness.local_hash.clone();
7494 let snapshot_hash = freshness.snapshot_hash.clone();
7495 let stale_nodes = freshness.stale_nodes.clone();
7496 let stale_edges = freshness.stale_edges.clone();
7497
7498 let duplicate_failures = row_diagnostics
7499 .iter()
7500 .filter(|diagnostic| diagnostic.contains("duplicate"))
7501 .count();
7502 let orphan_failures = row_diagnostics
7503 .iter()
7504 .filter(|diagnostic| diagnostic.contains("references missing"))
7505 .count();
7506 let missing_required_indexes = index_diagnostics.len();
7507 let stale_projection_metadata =
7508 usize::from(local_hash != snapshot_hash || snapshot_hash.is_none());
7509 let hard_failures = duplicate_failures + orphan_failures + missing_required_indexes;
7510 let has_drift = freshness.fail_closed
7511 || !node_upserts.is_empty()
7512 || !edge_upserts.is_empty()
7513 || !node_tombstones.is_empty()
7514 || !edge_tombstones.is_empty();
7515 let status = if hard_failures > 0 {
7516 "fail_closed"
7517 } else if has_drift {
7518 "drift"
7519 } else {
7520 "current"
7521 }
7522 .to_string();
7523
7524 let mut diagnostics = Vec::new();
7525 diagnostics.extend(row_diagnostics);
7526 diagnostics.extend(index_diagnostics);
7527 diagnostics.extend(freshness.diagnostics.clone());
7528 if has_drift {
7529 diagnostics.push(format!(
7530 "projection diff: {} node upsert(s), {} edge upsert(s), {} node tombstone(s), {} edge tombstone(s)",
7531 node_upserts.len(),
7532 edge_upserts.len(),
7533 node_tombstones.len(),
7534 edge_tombstones.len()
7535 ));
7536 }
7537
7538 let mut next_commands = vec![graph_db_convex_snapshot_doctor_command(
7539 root,
7540 scope,
7541 snapshot_path,
7542 )];
7543 if status == "current" {
7544 next_commands.push(graph_db_convex_snapshot_read_command(
7545 root,
7546 scope,
7547 snapshot_path,
7548 ));
7549 } else {
7550 next_commands.push(convex_sync_snapshot_diff_command(
7551 root,
7552 scope,
7553 snapshot_path,
7554 ));
7555 next_commands.push(convex_refresh_command(root, scope));
7556 }
7557
7558 GraphDbDriftReport {
7559 root: root.to_string_lossy().to_string(),
7560 scope: scope.map(str::to_string),
7561 graph_db: graph_db.to_string_lossy().to_string(),
7562 convex_snapshot: snapshot_path.to_string_lossy().to_string(),
7563 status: status.clone(),
7564 graph_reads_allowed: status == "current",
7565 projection_version: GRAPH_PROJECTION_VERSION.to_string(),
7566 local_hash,
7567 snapshot_hash,
7568 summary: GraphDbDriftSummary {
7569 node_upserts: node_upserts.len(),
7570 edge_upserts: edge_upserts.len(),
7571 node_tombstones: node_tombstones.len(),
7572 edge_tombstones: edge_tombstones.len(),
7573 stale_nodes: stale_nodes.len(),
7574 stale_edges: stale_edges.len(),
7575 stale_projection_metadata,
7576 duplicate_failures,
7577 orphan_failures,
7578 missing_required_indexes,
7579 },
7580 node_upserts: node_upserts
7581 .into_iter()
7582 .map(|row| row.external_id)
7583 .collect(),
7584 edge_upserts: edge_upserts.into_iter().map(|row| row.edge_key).collect(),
7585 node_tombstones,
7586 edge_tombstones,
7587 stale_nodes,
7588 stale_edges,
7589 diagnostics,
7590 next_commands,
7591 required_indexes: convex_required_indexes(),
7592 warnings,
7593 }
7594}
7595
7596pub(crate) fn print_graph_db_drift_human(report: &GraphDbDriftReport) {
7597 println!(
7598 "graph-db drift status: {} reads_allowed: {}",
7599 report.status, report.graph_reads_allowed
7600 );
7601 println!("graph_db: {}", report.graph_db);
7602 println!("convex_snapshot: {}", report.convex_snapshot);
7603 println!(
7604 "upserts: {} node(s), {} edge(s)",
7605 report.summary.node_upserts, report.summary.edge_upserts
7606 );
7607 println!(
7608 "tombstones: {} node(s), {} edge(s)",
7609 report.summary.node_tombstones, report.summary.edge_tombstones
7610 );
7611 for diagnostic in &report.diagnostics {
7612 println!("diagnostic: {diagnostic}");
7613 }
7614 for command in &report.next_commands {
7615 println!("next: {command}");
7616 }
7617}
7618
7619pub(crate) fn print_graph_db_doctor_human(report: &GraphDbDoctorReport) {
7620 println!(
7621 "graph-db doctor backend: {} status: {}",
7622 report.backend, report.status
7623 );
7624 println!("graph_db: {}", report.graph_db);
7625 if let Some(snapshot) = &report.convex_snapshot {
7626 println!("convex_snapshot: {snapshot}");
7627 }
7628 for check in &report.checks {
7629 println!("check: {} {}", check.name, check.status);
7630 for diagnostic in &check.diagnostics {
7631 println!(" diagnostic: {diagnostic}");
7632 }
7633 }
7634 for command in &report.repair_commands {
7635 println!("repair: {command}");
7636 }
7637}
7638
7639pub(crate) fn graph_db_operator_report_from_disk(
7640 root: &Path,
7641 scope: Option<&str>,
7642 graph_db: &Path,
7643 operation: &str,
7644 refresh: Option<GraphDbRefreshSummary>,
7645 warnings: Vec<String>,
7646) -> Result<GraphDbOperatorReport> {
7647 if !graph_db.exists() {
7648 let next_commands = graph_db_operator_next_commands(root, scope, true);
7649 let counts = GraphDbOperatorCounts {
7650 nodes: 0,
7651 edges: 0,
7652 tombstones: GraphDbTombstoneCounts {
7653 nodes: 0,
7654 edges: 0,
7655 total: 0,
7656 },
7657 file_size_bytes: None,
7658 freelist_bytes: None,
7659 };
7660 return Ok(GraphDbOperatorReport {
7661 root: root.to_string_lossy().to_string(),
7662 scope: scope.map(str::to_string),
7663 graph_db: graph_db.to_string_lossy().to_string(),
7664 operation: operation.to_string(),
7665 status: "missing".to_string(),
7666 materialized: false,
7667 freshness: GraphDbFreshnessReport {
7668 status: "missing".to_string(),
7669 fail_closed: true,
7670 projection_version: None,
7671 content_hash: None,
7672 source_watermark: None,
7673 diagnostics: vec![
7674 "graph.db is missing; run graph-db refresh before trusting graph reads"
7675 .to_string(),
7676 ],
7677 },
7678 readiness: graph_effectiveness_blocked(
7679 "graph_db_missing",
7680 vec![
7681 "graph.db is missing; materialize the projection before relying on graph effectiveness".to_string(),
7682 ],
7683 next_commands.clone(),
7684 ),
7685 counts: counts.clone(),
7686 refresh,
7687 compaction: graph_db_compaction_policy(root, scope, &counts, false),
7688 recovery: None,
7689 next_commands,
7690 warnings,
7691 });
7692 }
7693
7694 let conn = open_sqlite_graph_db_readonly(graph_db)?;
7695 let recovery = conn.recovery();
7696 let mut warnings = warnings;
7697 if let Some(recovery) = recovery {
7698 warnings.push(graph_db_read_recovery_diagnostic(recovery));
7699 }
7700 let mut freshness = sqlite_graph_freshness_from_conn(conn.conn(), scope.unwrap_or("root"))?;
7701 let schema_diagnostics = sqlite_graph_schema_diagnostics(conn.conn())
7702 .unwrap_or_else(|err| vec![format!("graph.db schema inspection failed: {err}")]);
7703 if !schema_diagnostics.is_empty() {
7704 freshness.diagnostics.extend(schema_diagnostics);
7705 freshness.fail_closed = true;
7706 freshness.status = "stale".to_string();
7707 }
7708 let counts = sqlite_graph_counts(conn.conn(), scope.unwrap_or("root"))?;
7709 let semantic_row_count = sqlite_graph_semantic_node_count(conn.conn()).ok();
7710 warnings.extend(
7711 sqlite_graph_tombstone_retention_diagnostics(conn.conn(), scope.unwrap_or("root"))
7712 .unwrap_or_else(|err| {
7713 vec![format!(
7714 "graph tombstone retention inspection failed: {err}"
7715 )]
7716 }),
7717 );
7718 let status = if freshness.fail_closed {
7719 "stale"
7720 } else {
7721 "current"
7722 }
7723 .to_string();
7724
7725 Ok(GraphDbOperatorReport {
7726 root: root.to_string_lossy().to_string(),
7727 scope: scope.map(str::to_string),
7728 graph_db: graph_db.to_string_lossy().to_string(),
7729 operation: operation.to_string(),
7730 status,
7731 materialized: true,
7732 freshness,
7733 readiness: graph_db_semantic_readiness(root, scope, semantic_row_count),
7734 compaction: graph_db_compaction_policy(root, scope, &counts, false),
7735 counts,
7736 refresh,
7737 recovery,
7738 next_commands: graph_db_operator_next_commands(root, scope, false),
7739 warnings,
7740 })
7741}
7742
7743fn print_graph_db_operator_human(report: &GraphDbOperatorReport) {
7744 println!(
7745 "graph-db {} status: {} materialized: {}",
7746 report.operation, report.status, report.materialized
7747 );
7748 println!("graph_db: {}", report.graph_db);
7749 println!(
7750 "projection: version={} hash={} watermark={}",
7751 report
7752 .freshness
7753 .projection_version
7754 .as_deref()
7755 .unwrap_or("<missing>"),
7756 report
7757 .freshness
7758 .content_hash
7759 .as_deref()
7760 .unwrap_or("<missing>"),
7761 report
7762 .freshness
7763 .source_watermark
7764 .as_deref()
7765 .unwrap_or("<missing>")
7766 );
7767 println!(
7768 "rows: {} node(s), {} edge(s), {} tombstone(s)",
7769 report.counts.nodes, report.counts.edges, report.counts.tombstones.total
7770 );
7771 println!(
7772 "readiness: {} reason: {} fail_closed: {}",
7773 report.readiness.status, report.readiness.reason, report.readiness.fail_closed
7774 );
7775 if let Some(file_size) = report.counts.file_size_bytes {
7776 println!(
7777 "storage: {} byte(s), {} free byte(s)",
7778 file_size,
7779 report.counts.freelist_bytes.unwrap_or(0)
7780 );
7781 }
7782 if let Some(refresh) = &report.refresh {
7783 println!(
7784 "refresh: {} tombstoned node(s), {} tombstoned edge(s)",
7785 refresh.tombstoned_nodes, refresh.tombstoned_edges
7786 );
7787 println!(
7788 "delta: {} node upsert(s), {} edge upsert(s), {} property row upsert(s), {} unchanged node(s), {} unchanged edge(s), {} unchanged property row(s), {} deleted property row(s), {} pruned tombstone(s)",
7789 refresh.upserted_nodes,
7790 refresh.upserted_edges,
7791 refresh.upserted_properties,
7792 refresh.unchanged_nodes,
7793 refresh.unchanged_edges,
7794 refresh.unchanged_properties,
7795 refresh.deleted_properties,
7796 refresh.pruned_tombstones
7797 );
7798 }
7799 println!(
7800 "compaction: {} tombstone_scan_rows={} live_rows={}",
7801 report.compaction.status,
7802 report.compaction.tombstone_scan_rows,
7803 report.compaction.live_rows
7804 );
7805 for proof in &report.compaction.proof {
7806 println!("compaction proof: {proof}");
7807 }
7808 if let Some(recovery) = report.recovery {
7809 println!("recovery: {}", graph_db_read_recovery_diagnostic(recovery));
7810 }
7811 for diagnostic in &report.freshness.diagnostics {
7812 println!("diagnostic: {diagnostic}");
7813 }
7814 for diagnostic in &report.readiness.diagnostics {
7815 println!("readiness diagnostic: {diagnostic}");
7816 }
7817 for warning in &report.warnings {
7818 println!("warning: {warning}");
7819 }
7820 for command in &report.readiness.next_commands {
7821 println!("readiness next: {command}");
7822 }
7823 for command in &report.next_commands {
7824 println!("next: {command}");
7825 }
7826}
7827
7828pub(crate) fn print_graph_db_operator_report(
7829 report: &GraphDbOperatorReport,
7830 format: OutputFormat,
7831) -> Result<()> {
7832 if format.json_output {
7833 print_json_or_envelope(
7834 report,
7835 &format,
7836 "graph-db",
7837 &report.operation,
7838 ToolEnvelopeSummary {
7839 text: format!(
7840 "Graph DB {} status {} with {} node(s), {} edge(s), {} tombstone(s)",
7841 report.operation,
7842 report.status,
7843 report.counts.nodes,
7844 report.counts.edges,
7845 report.counts.tombstones.total
7846 ),
7847 metrics: vec![
7848 envelope_metric("operation", &report.operation),
7849 envelope_metric("status", &report.status),
7850 envelope_metric("nodes", report.counts.nodes),
7851 envelope_metric("edges", report.counts.edges),
7852 envelope_metric("tombstones", report.counts.tombstones.total),
7853 envelope_metric("compaction", &report.compaction.status),
7854 envelope_metric("readiness", &report.readiness.status),
7855 ],
7856 },
7857 false,
7858 report.next_commands.clone(),
7859 )
7860 } else {
7861 print_graph_db_operator_human(report);
7862 Ok(())
7863 }
7864}
7865
7866fn status_run_command_without_notes(run: &str) -> &str {
7867 run.split_once(" (")
7868 .map(|(command, _)| command)
7869 .unwrap_or(run)
7870}
7871
7872fn status_summarize_extract_command(run: &str) -> &str {
7873 let run = status_run_command_without_notes(run);
7874 run.split(" && ")
7875 .find(|command| command.contains("summarize --extract"))
7876 .unwrap_or(run)
7877}
7878
7879fn graph_db_status_summarize_command(report: &status::StatusReport) -> String {
7880 report
7881 .recommendations
7882 .run
7883 .as_deref()
7884 .filter(|command| command.contains("summarize --extract"))
7885 .map(status_summarize_extract_command)
7886 .unwrap_or("tsift summarize --extract .")
7887 .to_string()
7888}
7889
7890fn graph_db_semantic_rows_readiness(row_count: usize, source: &str) -> GraphEffectivenessReadiness {
7891 let mut readiness = graph_effectiveness_ready("semantic_rows_available");
7892 readiness.diagnostics.push(format!(
7893 "graph projection has {row_count} semantic_concept/semantic_entity row(s) from {source}; graph semantic rows are available"
7894 ));
7895 readiness
7896}
7897
7898fn graph_db_semantic_readiness(
7899 root: &Path,
7900 scope: Option<&str>,
7901 semantic_row_count: Option<usize>,
7902) -> GraphEffectivenessReadiness {
7903 if let Some(row_count) = semantic_row_count
7904 && row_count > 0
7905 {
7906 return graph_db_semantic_rows_readiness(row_count, "materialized graph projection");
7907 }
7908
7909 let report = match status::check_status(root) {
7910 Ok(report) => report,
7911 Err(err) => {
7912 return graph_effectiveness_blocked(
7913 "status_check_unavailable",
7914 vec![format!(
7915 "semantic readiness could not inspect summary cache after graph-db refresh: {err:#}"
7916 )],
7917 vec![graph_db_refresh_command(root, scope)],
7918 );
7919 }
7920 };
7921
7922 match &report.summaries {
7923 status::SummaryStatus::Available {
7924 cached_files,
7925 total_indexed_files,
7926 coverage_pct,
7927 ..
7928 } => {
7929 let mut readiness = graph_effectiveness_ready("semantic_rows_available");
7930 readiness.diagnostics.push(format!(
7931 "summary cache has {cached_files}/{total_indexed_files} indexed file(s) cached ({coverage_pct}% coverage); graph semantic rows are available"
7932 ));
7933 readiness
7934 }
7935 status::SummaryStatus::None { .. } => {
7936 let summarize = graph_db_status_summarize_command(&report);
7937 let index_command = report
7938 .recommendations
7939 .run
7940 .as_deref()
7941 .filter(|cmd| cmd.contains("index"))
7942 .map(str::to_string);
7943 let mut repair = Vec::new();
7944 if let Some(cmd) = index_command {
7945 repair.push(cmd);
7946 }
7947 repair.push(summarize.clone());
7948 repair.push(graph_db_refresh_command(root, scope));
7949 graph_effectiveness_blocked(
7950 "summary_cache_empty",
7951 vec![format!(
7952 "summary cache empty: graph-db materialized code/session rows but semantic rows are unavailable; run `{}` from {} and rerun `{}` before relying on semantic evidence",
7953 summarize,
7954 root.display(),
7955 graph_db_refresh_command(root, scope)
7956 )],
7957 repair,
7958 )
7959 }
7960 status::SummaryStatus::Unavailable => {
7961 let mut repair: Vec<String> = report
7962 .recommendations
7963 .run
7964 .clone()
7965 .into_iter()
7966 .collect();
7967 let summarize = "tsift summarize --extract .".to_string();
7968 repair.push(summarize);
7969 repair.push(graph_db_refresh_command(root, scope));
7970 graph_effectiveness_blocked(
7971 "summary_cache_unavailable",
7972 vec![
7973 "summary cache unavailable because the source index is missing; build the index, extract summaries, and refresh the graph before relying on semantic graph evidence".to_string(),
7974 ],
7975 repair,
7976 )
7977 }
7978 }
7979}
7980
7981pub(crate) fn graph_db_operator_status_warnings(root: &Path, scope: Option<&str>) -> Vec<String> {
7982 let report = match status::check_status(root) {
7983 Ok(report) => report,
7984 Err(err) => {
7985 return vec![format!(
7986 "status check unavailable after graph-db refresh: {err:#}"
7987 )];
7988 }
7989 };
7990
7991 let summarize_run = if matches!(report.summaries, status::SummaryStatus::None { .. }) {
7992 Some(graph_db_status_summarize_command(&report))
7993 } else {
7994 None
7995 };
7996 let mut warnings = report.reminders;
7997 if matches!(report.summaries, status::SummaryStatus::None { .. }) {
7998 let run = summarize_run.unwrap_or_else(|| "tsift summarize --extract .".to_string());
7999 warnings.push(format!(
8000 "summary cache empty: graph-db refresh materialized code/session rows but semantic rows are unavailable; run `{}` from {} and rerun `{}` before relying on semantic evidence",
8001 run,
8002 root.display(),
8003 graph_db_refresh_command(root, scope)
8004 ));
8005 }
8006 dedupe_preserve_order(warnings)
8007}
8008
8009pub(crate) fn print_graph_db_compaction_human(report: &GraphDbCompactionReport) {
8010 println!(
8011 "graph-db compact applied:{} pruned_tombstones:{} reclaimed:{} byte(s)",
8012 report.applied, report.pruned_tombstones, report.reclaimed_bytes
8013 );
8014 println!("graph_db: {}", report.graph_db);
8015 println!(
8016 "before: {} node(s), {} edge(s), {} tombstone(s), file={} free={}",
8017 report.counts_before.nodes,
8018 report.counts_before.edges,
8019 report.counts_before.tombstones.total,
8020 report.counts_before.file_size_bytes.unwrap_or(0),
8021 report.counts_before.freelist_bytes.unwrap_or(0)
8022 );
8023 println!(
8024 "after: {} node(s), {} edge(s), {} tombstone(s), file={} free={}",
8025 report.counts_after.nodes,
8026 report.counts_after.edges,
8027 report.counts_after.tombstones.total,
8028 report.counts_after.file_size_bytes.unwrap_or(0),
8029 report.counts_after.freelist_bytes.unwrap_or(0)
8030 );
8031 for proof in &report.compaction_after.proof {
8032 println!("proof: {proof}");
8033 }
8034 for warning in &report.warnings {
8035 println!("warning: {warning}");
8036 }
8037 for command in &report.next_commands {
8038 println!("next: {command}");
8039 }
8040}
8041
8042fn parse_graph_db_property_filters(raw: &[String]) -> Result<Vec<GraphDbPropertyFilter>> {
8043 raw.iter()
8044 .map(|value| {
8045 let (key, filter_value) = value
8046 .split_once('=')
8047 .with_context(|| format!("graph-db --property expects KEY=VALUE, got {value:?}"))?;
8048 let key = key.trim();
8049 let filter_value = filter_value.trim();
8050 if key.is_empty() || filter_value.is_empty() {
8051 bail!("graph-db --property expects non-empty KEY=VALUE, got {value:?}");
8052 }
8053 Ok(GraphDbPropertyFilter {
8054 key: key.to_string(),
8055 value: filter_value.to_string(),
8056 })
8057 })
8058 .collect()
8059}
8060
8061fn graph_db_query_options(
8062 cursor: Option<String>,
8063 limit: Option<usize>,
8064 property_filters: &[String],
8065) -> Result<GraphDbQueryOptions> {
8066 Ok(GraphDbQueryOptions {
8067 cursor,
8068 limit: limit.filter(|limit| *limit > 0),
8069 property_filters: parse_graph_db_property_filters(property_filters)?,
8070 })
8071}
8072
8073fn graph_db_query_options_for_store(options: &GraphDbQueryOptions) -> GraphQueryOptions {
8074 GraphQueryOptions {
8075 cursor: options.cursor.clone(),
8076 limit: options.limit,
8077 property_filters: options
8078 .property_filters
8079 .iter()
8080 .map(|filter| GraphPropertyFilter {
8081 key: filter.key.clone(),
8082 value: filter.value.clone(),
8083 })
8084 .collect(),
8085 }
8086}
8087
8088fn graph_db_page_report_from_store(
8089 page: GraphQueryPage,
8090 property_filters: Vec<GraphDbPropertyFilter>,
8091) -> GraphDbPageReport {
8092 GraphDbPageReport {
8093 cursor: page.cursor,
8094 limit: page.limit,
8095 next_cursor: page.next_cursor,
8096 returned_nodes: page.returned_nodes,
8097 returned_edges: page.returned_edges,
8098 truncated: page.truncated,
8099 property_filters,
8100 diagnostics: page.diagnostics,
8101 }
8102}
8103
8104fn graph_db_neighborhood_ranking_gate(
8105 ranked_neighbor_cap: usize,
8106) -> GraphDbNeighborhoodRankingGate {
8107 GraphDbNeighborhoodRankingGate {
8108 status: "held_default_order_unchanged".to_string(),
8109 ranked_output_default: false,
8110 default_order: "stable_node_id".to_string(),
8111 default_change_gate: "community_search_quality_metrics".to_string(),
8112 required_workloads: metric_digest::COMMUNITY_SEARCH_WORKLOADS
8113 .iter()
8114 .map(|workload| (*workload).to_string())
8115 .collect(),
8116 required_metrics: metric_digest::COMMUNITY_SEARCH_REQUIRED_METRICS
8117 .iter()
8118 .map(|metric| (*metric).to_string())
8119 .collect(),
8120 max_duration_regression_percent: metric_digest::COMMUNITY_MAX_DURATION_REGRESSION_PERCENT,
8121 min_handle_coverage_pct: metric_digest::COMMUNITY_MIN_HANDLE_COVERAGE_PCT,
8122 min_duplicate_name_precision: metric_digest::COMMUNITY_MIN_DUPLICATE_NAME_PRECISION,
8123 min_top_community_stability: metric_digest::COMMUNITY_MIN_TOP_COMMUNITY_STABILITY,
8124 diagnostics: vec![
8125 "ranked_neighbors is additive; neighborhood nodes remain ordered by stable node id for cursor pagination".to_string(),
8126 format!(
8127 "ranked_neighbors is score-capped at {ranked_neighbor_cap} entries so previews stay bounded while cursor pagination remains exhaustive"
8128 ),
8129 "changing the default neighborhood order requires the community-search gate to pass for every required workload".to_string(),
8130 ],
8131 }
8132}
8133
8134fn graph_db_ranked_neighbor_cap(limit: Option<usize>) -> usize {
8135 match limit {
8136 Some(0) | None => GRAPH_DB_RANKED_NEIGHBOR_CAP,
8137 Some(limit) => limit.clamp(1, GRAPH_DB_RANKED_NEIGHBOR_CAP),
8138 }
8139}
8140
8141fn graph_db_ranked_neighbors(
8142 center_id: &str,
8143 nodes: &[SubstrateGraphNode],
8144 edges: &[SubstrateGraphEdge],
8145 cap: usize,
8146) -> Vec<GraphDbRankedNeighbor> {
8147 resolution::ranked_neighbors_capped(center_id, nodes, edges, cap)
8148}
8149
8150fn graph_db_ranked_neighborhood_comparison<S: GraphStore>(
8151 center_id: &str,
8152 depth: usize,
8153 edge_kind: Option<&str>,
8154 limit: Option<usize>,
8155 unranked_nodes: &[SubstrateGraphNode],
8156 unranked_edges: &[SubstrateGraphEdge],
8157 store: &S,
8158) -> Result<Option<GraphDbRankedNeighborhoodComparison>> {
8159 use std::time::Instant;
8160 let max_nodes = match limit {
8161 Some(0) | None => 200,
8162 Some(n) => n.clamp(10, 500),
8163 };
8164 let mut options = RankedNeighborhoodOptions::new(depth, max_nodes)
8165 .with_scoring(NeighborhoodScoring::EdgeKindWeighted);
8166 if let Some(kind) = edge_kind {
8167 options = options.with_edge_kind(kind);
8168 }
8169 let start = Instant::now();
8170 let result = store.ranked_neighborhood(center_id, &options)?;
8171 let latency = start.elapsed().as_micros();
8172 let Some(ranked) = result else {
8173 return Ok(None);
8174 };
8175 let unranked_ids: BTreeSet<_> = unranked_nodes.iter().map(|n| n.id.as_str()).collect();
8176 let ranked_ids: BTreeSet<_> = ranked.nodes.iter().map(|n| n.id.as_str()).collect();
8177 let overlap_count = ranked_ids.intersection(&unranked_ids).count();
8178 let overlap_pct = if unranked_ids.is_empty() || ranked_ids.is_empty() {
8179 0.0
8180 } else {
8181 (overlap_count as f64 / unranked_ids.len().max(ranked_ids.len()) as f64) * 100.0
8182 };
8183 let count_duplicates = |nodes: &[SubstrateGraphNode]| -> usize {
8184 let mut name_count = BTreeMap::<&str, usize>::new();
8185 for n in nodes {
8186 *name_count.entry(&n.label).or_default() += 1;
8187 }
8188 name_count.values().filter(|&&c| c > 1).count()
8189 };
8190 let count_handle_coverage = |nodes: &[SubstrateGraphNode]| -> f64 {
8191 if nodes.is_empty() {
8192 return 100.0;
8193 }
8194 let with_handle = nodes
8195 .iter()
8196 .filter(|n| n.properties.contains_key("handle") || n.properties.contains_key("ref_id"))
8197 .count();
8198 (with_handle as f64 / nodes.len() as f64) * 100.0
8199 };
8200 let useful_density = |nodes: &[SubstrateGraphNode], edges: &[SubstrateGraphEdge]| -> f64 {
8201 if nodes.is_empty() {
8202 return 0.0;
8203 }
8204 let semantic_kinds = [
8205 "semantic_concept",
8206 "semantic_entity",
8207 "symbol",
8208 "file",
8209 "source_handle",
8210 ];
8211 let useful = nodes
8212 .iter()
8213 .filter(|n| semantic_kinds.contains(&n.kind.as_str()))
8214 .count();
8215 let edge_diversity = edges.iter().map(|e| &e.kind).collect::<BTreeSet<_>>().len();
8216 let kind_diversity = nodes.iter().map(|n| &n.kind).collect::<BTreeSet<_>>().len();
8217 (useful as f64 * 0.5 + kind_diversity as f64 * 0.3 + edge_diversity as f64 * 0.2)
8218 / nodes.len() as f64
8219 };
8220 let community_truncation_summary = if ranked.pruned_count > 0 && !ranked.edges.is_empty() {
8221 let edge_pairs: Vec<(String, String)> = ranked
8222 .edges
8223 .iter()
8224 .map(|e| (e.from_id.clone(), e.to_id.clone()))
8225 .collect();
8226 let cr = tsift_graph::detect_communities(&edge_pairs);
8227 let kept_labels: BTreeSet<&str> = ranked.nodes.iter().map(|n| n.label.as_str()).collect();
8228 let mut fully_kept = 0usize;
8229 let mut partially_pruned = 0usize;
8230 let mut fully_pruned = 0usize;
8231 let mut pruned_kinds = BTreeSet::new();
8232 let mut pruned_labels = Vec::new();
8233 for comm in &cr.communities {
8234 let kept_in_comm: Vec<&str> = comm
8235 .members
8236 .iter()
8237 .filter(|m| kept_labels.contains(m.name.as_str()))
8238 .map(|m| m.name.as_str())
8239 .collect();
8240 if kept_in_comm.len() == comm.members.len() {
8241 fully_kept += 1;
8242 } else if kept_in_comm.is_empty() {
8243 fully_pruned += 1;
8244 for m in &comm.members {
8245 if let Some(n) = ranked.nodes.iter().find(|n| n.label == m.name) {
8246 pruned_kinds.insert(n.kind.clone());
8247 }
8248 pruned_labels.push(m.name.clone());
8249 }
8250 } else {
8251 partially_pruned += 1;
8252 }
8253 }
8254 pruned_labels.truncate(5);
8255 Some(CommunityTruncationSummary {
8256 total_communities: cr.communities.len(),
8257 fully_kept,
8258 partially_pruned,
8259 fully_pruned,
8260 pruned_community_kinds: pruned_kinds.into_iter().collect(),
8261 pruned_community_top_labels: pruned_labels,
8262 })
8263 } else {
8264 None
8265 };
8266 Ok(Some(GraphDbRankedNeighborhoodComparison {
8267 traversal_nodes: ranked.nodes.len(),
8268 traversal_edges: ranked.edges.len(),
8269 pruned_count: ranked.pruned_count,
8270 total_discovered: ranked.total_discovered,
8271 latency_micros: latency,
8272 overlap_with_unranked_pct: (overlap_pct * 100.0).round() / 100.0,
8273 useful_hit_density_ranked: (useful_density(&ranked.nodes, &ranked.edges) * 1000.0).round()
8274 / 1000.0,
8275 useful_hit_density_unranked: (useful_density(unranked_nodes, unranked_edges) * 1000.0)
8276 .round()
8277 / 1000.0,
8278 duplicate_name_count_ranked: count_duplicates(&ranked.nodes),
8279 duplicate_name_count_unranked: count_duplicates(unranked_nodes),
8280 handle_coverage_ranked_pct: (count_handle_coverage(&ranked.nodes) * 100.0).round() / 100.0,
8281 handle_coverage_unranked_pct: (count_handle_coverage(unranked_nodes) * 100.0).round()
8282 / 100.0,
8283 community_truncation_summary,
8284 diagnostics: vec![
8285 format!(
8286 "ranked_neighborhood traversed {} node(s), {} edge(s) with {} pruned of {} discovered in {}µs",
8287 ranked.nodes.len(),
8288 ranked.edges.len(),
8289 ranked.pruned_count,
8290 ranked.total_discovered,
8291 latency
8292 ),
8293 format!(
8294 "overlap with unranked BFS: {:.1}% ({} shared of {} unranked, {} ranked)",
8295 overlap_pct,
8296 overlap_count,
8297 unranked_ids.len(),
8298 ranked_ids.len()
8299 ),
8300 "comparison is diagnostic; promotion requires community-search quality gate to pass for every required workload".to_string(),
8301 ],
8302 }))
8303}
8304
8305struct GraphDbBudgetedSubgraph {
8306 nodes: Vec<SubstrateGraphNode>,
8307 edges: Vec<SubstrateGraphEdge>,
8308 report: GraphDbOutputBudgetReport,
8309 truncated: bool,
8310 next_cursor: Option<String>,
8311}
8312
8313const GRAPH_DB_OUTPUT_DEFAULT_TOKEN_CAP: usize = 6_000;
8314const GRAPH_DB_OUTPUT_MIN_TOKEN_CAP: usize = 1_200;
8315const GRAPH_DB_OUTPUT_MAX_TOKEN_CAP: usize = 12_000;
8316
8317fn graph_db_output_token_cap(limit: Option<usize>) -> usize {
8318 match limit {
8319 Some(0) | None => GRAPH_DB_OUTPUT_DEFAULT_TOKEN_CAP,
8320 Some(limit) => limit
8321 .saturating_mul(320)
8322 .clamp(GRAPH_DB_OUTPUT_MIN_TOKEN_CAP, GRAPH_DB_OUTPUT_MAX_TOKEN_CAP),
8323 }
8324}
8325
8326fn graph_db_node_kind_quota(kind: &str, limit: Option<usize>) -> usize {
8327 if matches!(limit, Some(0) | None) {
8328 return match kind {
8329 "source_handle" => 10,
8330 "worker_context" | "worker_result" => 8,
8331 "semantic_concept" | "semantic_entity" => 10,
8332 "file" | "symbol" | "route" => 12,
8333 _ => 8,
8334 };
8335 }
8336 let base = limit.unwrap_or(0).max(1);
8337 match kind {
8338 "source_handle" => base.saturating_add(4),
8339 "worker_context" | "worker_result" => base.saturating_add(2),
8340 "semantic_concept" | "semantic_entity" => base.saturating_add(4),
8341 "file" | "symbol" | "route" => base.saturating_add(4),
8342 _ => base.saturating_add(1),
8343 }
8344}
8345
8346fn graph_db_edge_kind_quota(kind: &str, limit: Option<usize>) -> usize {
8347 if matches!(limit, Some(0) | None) {
8348 return match kind {
8349 "mentions" | "mentions_concept" | "mentions_entity" => 24,
8350 "semantic_relation" | "calls" | "defines" => 20,
8351 _ => 16,
8352 };
8353 }
8354 let base = limit.unwrap_or(0).max(1);
8355 match kind {
8356 "mentions" | "mentions_concept" | "mentions_entity" => base.saturating_mul(3),
8357 "semantic_relation" | "calls" | "defines" => base.saturating_mul(2),
8358 _ => base.saturating_add(2),
8359 }
8360}
8361
8362fn graph_db_estimated_tokens<T: Serialize>(value: &T) -> usize {
8363 serde_json::to_vec(value)
8364 .map(|bytes| bytes.len().div_ceil(4).max(1))
8365 .unwrap_or(1)
8366}
8367
8368fn graph_db_node_search_text(node: &SubstrateGraphNode) -> String {
8369 let mut parts = vec![node.kind.clone(), node.label.clone()];
8370 for key in [
8371 "detail",
8372 "description",
8373 "source_ref",
8374 "path",
8375 "source_file",
8376 "source_symbol",
8377 "text_preview",
8378 ] {
8379 if let Some(value) = node.properties.get(key) {
8380 parts.push(value.clone());
8381 }
8382 }
8383 parts.join(" ")
8384}
8385
8386fn graph_db_semantic_scores_for_query(
8387 query: Option<&str>,
8388 nodes: &[SubstrateGraphNode],
8389) -> BTreeMap<String, f64> {
8390 let Some(query) = query.filter(|value| !value.trim().is_empty()) else {
8391 return BTreeMap::new();
8392 };
8393 let query_embedding = semantic_embedding(query);
8394 nodes
8395 .iter()
8396 .filter(|node| matches!(node.kind.as_str(), "semantic_concept" | "semantic_entity"))
8397 .filter_map(|node| {
8398 let embedding = node
8399 .properties
8400 .get("embedding")
8401 .and_then(|value| parse_semantic_embedding_property(value))?;
8402 Some((
8403 node.id.clone(),
8404 semantic_cosine(&query_embedding, &embedding),
8405 ))
8406 })
8407 .collect()
8408}
8409
8410fn graph_db_depth_by_id(
8411 origin_ids: &[String],
8412 edges: &[SubstrateGraphEdge],
8413) -> BTreeMap<String, usize> {
8414 let mut adjacency = BTreeMap::<String, Vec<String>>::new();
8415 for edge in edges {
8416 adjacency
8417 .entry(edge.from_id.clone())
8418 .or_default()
8419 .push(edge.to_id.clone());
8420 adjacency
8421 .entry(edge.to_id.clone())
8422 .or_default()
8423 .push(edge.from_id.clone());
8424 }
8425
8426 let mut depth_by_id = BTreeMap::<String, usize>::new();
8427 let mut queue = VecDeque::<String>::new();
8428 for origin in origin_ids {
8429 if depth_by_id.insert(origin.clone(), 0).is_none() {
8430 queue.push_back(origin.clone());
8431 }
8432 }
8433 while let Some(current) = queue.pop_front() {
8434 let depth = depth_by_id.get(¤t).copied().unwrap_or(0);
8435 for next in adjacency.get(¤t).into_iter().flatten() {
8436 if depth_by_id.contains_key(next) {
8437 continue;
8438 }
8439 depth_by_id.insert(next.clone(), depth.saturating_add(1));
8440 queue.push_back(next.clone());
8441 }
8442 }
8443 depth_by_id
8444}
8445
8446fn graph_db_source_covered_ids(
8447 nodes: &[SubstrateGraphNode],
8448 edges: &[SubstrateGraphEdge],
8449) -> BTreeSet<String> {
8450 let source_ids = nodes
8451 .iter()
8452 .filter(|node| node.kind == "source_handle")
8453 .map(|node| node.id.as_str())
8454 .collect::<BTreeSet<_>>();
8455 let mut covered = source_ids
8456 .iter()
8457 .map(|id| (*id).to_string())
8458 .collect::<BTreeSet<_>>();
8459 for edge in edges {
8460 if source_ids.contains(edge.from_id.as_str()) {
8461 covered.insert(edge.to_id.clone());
8462 }
8463 if source_ids.contains(edge.to_id.as_str()) {
8464 covered.insert(edge.from_id.clone());
8465 }
8466 }
8467 covered
8468}
8469
8470fn graph_db_recency_score(node: &SubstrateGraphNode) -> i64 {
8471 for key in [
8472 "observed_at_unix",
8473 "completed_at_unix",
8474 "created_at_unix",
8475 "started_at_unix",
8476 ] {
8477 if let Some(value) = node.properties.get(key)
8478 && let Ok(epoch) = value.parse::<i64>()
8479 {
8480 return epoch.div_euclid(86_400).clamp(0, 40_000);
8481 }
8482 }
8483 0
8484}
8485
8486fn graph_db_node_kind_score(kind: &str) -> i64 {
8487 match kind {
8488 "source_handle" => 180,
8489 "worker_context" => 170,
8490 "worker_result" => 160,
8491 "semantic_concept" | "semantic_entity" => 150,
8492 "backlog" | "job_packet" => 130,
8493 "symbol" => 120,
8494 "file" => 110,
8495 "route" => 105,
8496 "session" => 90,
8497 _ => 40,
8498 }
8499}
8500
8501fn graph_db_edge_kind_score(kind: &str) -> i64 {
8502 match kind {
8503 "mentions_concept" | "mentions_entity" => 180,
8504 "semantic_relation" => 170,
8505 "mentions" => 165,
8506 "requests_context" | "scopes_context" | "scopes_source" => 155,
8507 "explains_result" => 150,
8508 "calls" => 145,
8509 "defines" | "handled_by" | "defines_route" => 130,
8510 "contains" | "targets" => 120,
8511 "records_memory_source" | "has_vector_handle" => 115,
8512 _ => 40,
8513 }
8514}
8515
8516fn graph_db_node_usefulness_score(
8517 node: &SubstrateGraphNode,
8518 depth_by_id: &BTreeMap<String, usize>,
8519 semantic_scores: &BTreeMap<String, f64>,
8520 source_covered_ids: &BTreeSet<String>,
8521 origin_ids: &[String],
8522) -> i64 {
8523 if origin_ids.iter().any(|origin| origin == &node.id) {
8524 return 1_000_000;
8525 }
8526 let semantic = semantic_scores
8527 .get(&node.id)
8528 .map(|score| (score.max(0.0) * 1_000.0) as i64)
8529 .unwrap_or(0);
8530 let depth_penalty = depth_by_id
8531 .get(&node.id)
8532 .map(|depth| (*depth as i64).saturating_mul(55))
8533 .unwrap_or(180);
8534 let source_coverage = if source_covered_ids.contains(&node.id)
8535 || node.properties.contains_key("source_ref")
8536 || node.properties.contains_key("path")
8537 {
8538 120
8539 } else {
8540 0
8541 };
8542 graph_db_node_kind_score(&node.kind)
8543 + semantic
8544 + source_coverage
8545 + graph_db_recency_score(node).min(80)
8546 - depth_penalty
8547}
8548
8549fn graph_db_edge_usefulness_score(
8550 edge: &SubstrateGraphEdge,
8551 node_score_by_id: &BTreeMap<String, i64>,
8552 depth_by_id: &BTreeMap<String, usize>,
8553) -> i64 {
8554 let endpoint_score = node_score_by_id
8555 .get(&edge.from_id)
8556 .copied()
8557 .unwrap_or_default()
8558 .max(
8559 node_score_by_id
8560 .get(&edge.to_id)
8561 .copied()
8562 .unwrap_or_default(),
8563 );
8564 let depth_penalty = depth_by_id
8565 .get(&edge.from_id)
8566 .into_iter()
8567 .chain(depth_by_id.get(&edge.to_id))
8568 .min()
8569 .map(|depth| (*depth as i64).saturating_mul(35))
8570 .unwrap_or(140);
8571 graph_db_edge_kind_score(&edge.kind) + (endpoint_score / 8) - depth_penalty
8572}
8573
8574fn graph_db_push_drop(
8575 drops: &mut BTreeMap<(String, String, String), usize>,
8576 item: &str,
8577 kind: &str,
8578 reason: &str,
8579) {
8580 *drops
8581 .entry((item.to_string(), kind.to_string(), reason.to_string()))
8582 .or_default() += 1;
8583}
8584
8585fn graph_db_budget_drop_report(
8586 drops: BTreeMap<(String, String, String), usize>,
8587) -> Vec<GraphDbDroppedByBudget> {
8588 drops
8589 .into_iter()
8590 .map(|((item, kind, reason), dropped)| GraphDbDroppedByBudget {
8591 item,
8592 kind,
8593 reason,
8594 dropped,
8595 })
8596 .collect()
8597}
8598
8599fn graph_db_apply_output_budget(
8600 origin_ids: &[String],
8601 semantic_scores: &BTreeMap<String, f64>,
8602 nodes: Vec<SubstrateGraphNode>,
8603 edges: Vec<SubstrateGraphEdge>,
8604 limit: Option<usize>,
8605) -> GraphDbBudgetedSubgraph {
8606 graph_db_apply_output_budget_with_depths_and_cursor(
8607 origin_ids,
8608 semantic_scores,
8609 nodes,
8610 edges,
8611 limit,
8612 None,
8613 None,
8614 )
8615}
8616
8617fn graph_db_apply_output_budget_with_depths_and_cursor(
8618 origin_ids: &[String],
8619 semantic_scores: &BTreeMap<String, f64>,
8620 nodes: Vec<SubstrateGraphNode>,
8621 edges: Vec<SubstrateGraphEdge>,
8622 limit: Option<usize>,
8623 depth_overrides: Option<&BTreeMap<String, usize>>,
8624 cursor: Option<&str>,
8625) -> GraphDbBudgetedSubgraph {
8626 let max_tokens = graph_db_output_token_cap(limit);
8627 let candidate_nodes = nodes.len();
8628 let candidate_edges = edges.len();
8629 let mut depth_by_id = graph_db_depth_by_id(origin_ids, &edges);
8630 if let Some(depth_overrides) = depth_overrides {
8631 for (id, depth) in depth_overrides {
8632 depth_by_id
8633 .entry(id.clone())
8634 .and_modify(|current| *current = (*current).min(*depth))
8635 .or_insert(*depth);
8636 }
8637 }
8638 let source_covered_ids = graph_db_source_covered_ids(&nodes, &edges);
8639 let node_score_by_id = nodes
8640 .iter()
8641 .map(|node| {
8642 (
8643 node.id.clone(),
8644 graph_db_node_usefulness_score(
8645 node,
8646 &depth_by_id,
8647 semantic_scores,
8648 &source_covered_ids,
8649 origin_ids,
8650 ),
8651 )
8652 })
8653 .collect::<BTreeMap<_, _>>();
8654
8655 let mut node_candidates = nodes.iter().collect::<Vec<_>>();
8656 node_candidates.sort_by(|left, right| {
8657 node_score_by_id
8658 .get(&right.id)
8659 .cmp(&node_score_by_id.get(&left.id))
8660 .then_with(|| left.kind.cmp(&right.kind))
8661 .then_with(|| left.label.cmp(&right.label))
8662 .then_with(|| left.id.cmp(&right.id))
8663 });
8664
8665 let cursor_skip = if let Some(cursor) = cursor {
8666 node_candidates
8667 .iter()
8668 .position(|node| node.id == cursor)
8669 .map(|pos| pos.saturating_add(1))
8670 .unwrap_or(0)
8671 } else {
8672 0
8673 };
8674 if cursor_skip > 0 {
8675 node_candidates = node_candidates.into_iter().skip(cursor_skip).collect();
8676 }
8677
8678 let mut selected_node_ids = BTreeSet::new();
8679 let mut selected_node_counts = BTreeMap::<String, usize>::new();
8680 let mut estimated_tokens = 0usize;
8681 let mut drops = BTreeMap::<(String, String, String), usize>::new();
8682 for node in &node_candidates {
8683 let kind_count = selected_node_counts
8684 .get(&node.kind)
8685 .copied()
8686 .unwrap_or_default();
8687 if !origin_ids.iter().any(|origin| origin == &node.id)
8688 && kind_count >= graph_db_node_kind_quota(&node.kind, limit)
8689 {
8690 graph_db_push_drop(&mut drops, "node", &node.kind, "per_kind_quota");
8691 continue;
8692 }
8693 let tokens = graph_db_estimated_tokens(node);
8694 if !origin_ids.iter().any(|origin| origin == &node.id)
8695 && estimated_tokens.saturating_add(tokens) > max_tokens
8696 {
8697 graph_db_push_drop(&mut drops, "node", &node.kind, "estimated_token_cap");
8698 continue;
8699 }
8700 selected_node_ids.insert(node.id.clone());
8701 *selected_node_counts.entry(node.kind.clone()).or_default() += 1;
8702 estimated_tokens = estimated_tokens.saturating_add(tokens);
8703 }
8704
8705 let has_remaining_candidates = node_candidates
8706 .iter()
8707 .any(|node| !selected_node_ids.contains(&node.id));
8708
8709 let mut selected_nodes = nodes
8710 .into_iter()
8711 .filter(|node| selected_node_ids.contains(&node.id))
8712 .collect::<Vec<_>>();
8713
8714 let mut edge_candidates = edges
8715 .iter()
8716 .filter(|edge| {
8717 selected_node_ids.contains(&edge.from_id) && selected_node_ids.contains(&edge.to_id)
8718 })
8719 .collect::<Vec<_>>();
8720 let edge_score_by_key = edge_candidates
8721 .iter()
8722 .map(|edge| {
8723 (
8724 graph_db_edge_key(edge),
8725 graph_db_edge_usefulness_score(edge, &node_score_by_id, &depth_by_id),
8726 )
8727 })
8728 .collect::<BTreeMap<_, _>>();
8729 edge_candidates.sort_by(|left, right| {
8730 edge_score_by_key
8731 .get(&graph_db_edge_key(right))
8732 .cmp(&edge_score_by_key.get(&graph_db_edge_key(left)))
8733 .then_with(|| left.kind.cmp(&right.kind))
8734 .then_with(|| left.from_id.cmp(&right.from_id))
8735 .then_with(|| left.to_id.cmp(&right.to_id))
8736 });
8737
8738 let endpoint_dropped_edges = edges
8739 .iter()
8740 .filter(|edge| {
8741 !selected_node_ids.contains(&edge.from_id) || !selected_node_ids.contains(&edge.to_id)
8742 })
8743 .count();
8744 if endpoint_dropped_edges > 0 {
8745 drops.insert(
8746 (
8747 "edge".to_string(),
8748 "*".to_string(),
8749 "endpoint_node_dropped".to_string(),
8750 ),
8751 endpoint_dropped_edges,
8752 );
8753 }
8754
8755 let mut selected_edge_ids = BTreeSet::new();
8756 let mut selected_edge_counts = BTreeMap::<String, usize>::new();
8757 for edge in edge_candidates {
8758 let kind_count = selected_edge_counts
8759 .get(&edge.kind)
8760 .copied()
8761 .unwrap_or_default();
8762 if kind_count >= graph_db_edge_kind_quota(&edge.kind, limit) {
8763 graph_db_push_drop(&mut drops, "edge", &edge.kind, "per_kind_quota");
8764 continue;
8765 }
8766 let tokens = graph_db_estimated_tokens(edge);
8767 if estimated_tokens.saturating_add(tokens) > max_tokens {
8768 graph_db_push_drop(&mut drops, "edge", &edge.kind, "estimated_token_cap");
8769 continue;
8770 }
8771 selected_edge_ids.insert(graph_db_edge_key(edge));
8772 *selected_edge_counts.entry(edge.kind.clone()).or_default() += 1;
8773 estimated_tokens = estimated_tokens.saturating_add(tokens);
8774 }
8775
8776 let selected_edges = edges
8777 .into_iter()
8778 .filter(|edge| selected_edge_ids.contains(&graph_db_edge_key(edge)))
8779 .collect::<Vec<_>>();
8780 let dropped_by_budget = graph_db_budget_drop_report(drops);
8781 let truncated = has_remaining_candidates;
8782 let next_cursor = if truncated {
8783 selected_nodes.last().map(|node| node.id.clone())
8784 } else {
8785 None
8786 };
8787 let mut diagnostics = vec![
8788 "budget ranking signals: semantic_match, edge_kind, depth, recency, source_handle_coverage"
8789 .to_string(),
8790 format!(
8791 "selected {} of {} candidate node(s) and {} of {} candidate edge(s) within estimated token cap {}",
8792 selected_nodes.len(),
8793 candidate_nodes,
8794 selected_edges.len(),
8795 candidate_edges,
8796 max_tokens
8797 ),
8798 ];
8799 if cursor.is_some() {
8800 diagnostics.push(format!(
8801 "cursor skipped {} previously returned candidate(s)",
8802 cursor_skip
8803 ));
8804 }
8805 if next_cursor.is_some() {
8806 diagnostics.push(
8807 "result was truncated; pass next_cursor as --cursor for the next page".to_string(),
8808 );
8809 }
8810 selected_nodes.shrink_to_fit();
8811
8812 GraphDbBudgetedSubgraph {
8813 nodes: selected_nodes,
8814 edges: selected_edges,
8815 report: GraphDbOutputBudgetReport {
8816 max_tokens,
8817 estimated_tokens,
8818 selected_nodes: selected_node_ids.len(),
8819 selected_edges: selected_edge_ids.len(),
8820 candidate_nodes,
8821 candidate_edges,
8822 dropped_by_budget,
8823 diagnostics,
8824 },
8825 truncated,
8826 next_cursor,
8827 }
8828}
8829
8830fn graph_db_edge_key(edge: &SubstrateGraphEdge) -> String {
8831 if edge.id.is_empty() {
8832 substrate::ConvexEdgeRow::stable_key(&edge.from_id, &edge.to_id, &edge.kind)
8833 } else {
8834 edge.id.clone()
8835 }
8836}
8837
8838fn graph_db_schema() -> GraphDbSchema {
8839 GraphDbSchema {
8840 contract_versions: vec![
8841 GraphDbSchemaContract {
8842 name: "graph_db_evidence",
8843 version: GRAPH_DB_EVIDENCE_CONTRACT_VERSION,
8844 description: "graph-db evidence JSON packet including packet_id, projection hash, worker context, source handles, worker results, semantic rows, replay commands, and repair commands",
8845 },
8846 GraphDbSchemaContract {
8847 name: "worker_prompt_packet",
8848 version: WORKER_PROMPT_PACKET_CONTRACT_VERSION,
8849 description: "conflict-matrix worker prompt packet with owned scope, scheduler fields, stable graph handles, expected tests, expansion commands, token budget, semantic ranking reasons, worker feedback closure controls, and fail-closed prompt text",
8850 },
8851 GraphDbSchemaContract {
8852 name: "conflict_matrix",
8853 version: CONFLICT_MATRIX_CONTRACT_VERSION,
8854 description: "parallel-dispatch decision report keyed by graph evidence packets, scheduler block fields, hard file/symbol/test/config gates, and soft worker-feedback closure ranking",
8855 },
8856 GraphDbSchemaContract {
8857 name: "context_pack_graph_orchestration",
8858 version: CONTEXT_PACK_GRAPH_ORCHESTRATION_CONTRACT_VERSION,
8859 description: "context-pack graph orchestration summary with projection freshness, evidence packet ids, ownership blocks, and follow-up graph commands",
8860 },
8861 GraphDbSchemaContract {
8862 name: "session_review_follow_up",
8863 version: SESSION_REVIEW_FOLLOW_UP_CONTRACT_VERSION,
8864 description: "session-review next-context follow-up command contract for resumable digest/context-pack commands",
8865 },
8866 GraphDbSchemaContract {
8867 name: "dispatch_trace",
8868 version: DISPATCH_TRACE_CONTRACT_VERSION,
8869 description: "operator review trace linking backlog, job packets, worker results, source handles, semantic rows, scheduler fields, evidence packet ids, worker feedback closure controls, and worker prompt packets",
8870 },
8871 GraphDbSchemaContract {
8872 name: "dependency_dag",
8873 version: DEPENDENCY_DAG_CONTRACT_VERSION,
8874 description: "topological planning DAG for agent-doc backlog targets with replayable dependency edges, topo batches, and cycle diagnostics",
8875 },
8876 ],
8877 node_fields: vec![
8878 GraphDbSchemaField {
8879 name: "id",
8880 value_type: "string",
8881 description: "Stable provider-neutral node id",
8882 },
8883 GraphDbSchemaField {
8884 name: "kind",
8885 value_type: "string",
8886 description: "Application-defined node family such as file, symbol, or backlog",
8887 },
8888 GraphDbSchemaField {
8889 name: "label",
8890 value_type: "string",
8891 description: "Human-readable label",
8892 },
8893 GraphDbSchemaField {
8894 name: "properties",
8895 value_type: "object<string,string>",
8896 description: "Adapter-specific string properties",
8897 },
8898 GraphDbSchemaField {
8899 name: "provenance",
8900 value_type: "array",
8901 description: "Source system and source reference metadata",
8902 },
8903 GraphDbSchemaField {
8904 name: "freshness",
8905 value_type: "object|null",
8906 description: "Optional content hash and observed timestamp",
8907 },
8908 ],
8909 edge_fields: vec![
8910 GraphDbSchemaField {
8911 name: "id",
8912 value_type: "string",
8913 description: "Stable provider-neutral edge id derived from from_id, kind, and to_id",
8914 },
8915 GraphDbSchemaField {
8916 name: "from_id",
8917 value_type: "string",
8918 description: "Source node id",
8919 },
8920 GraphDbSchemaField {
8921 name: "to_id",
8922 value_type: "string",
8923 description: "Target node id",
8924 },
8925 GraphDbSchemaField {
8926 name: "kind",
8927 value_type: "string",
8928 description: "Application-defined edge relation",
8929 },
8930 GraphDbSchemaField {
8931 name: "properties",
8932 value_type: "object<string,string>",
8933 description: "Adapter-specific string properties",
8934 },
8935 GraphDbSchemaField {
8936 name: "provenance",
8937 value_type: "array",
8938 description: "Source system and source reference metadata",
8939 },
8940 GraphDbSchemaField {
8941 name: "freshness",
8942 value_type: "object|null",
8943 description: "Optional content hash and observed timestamp",
8944 },
8945 ],
8946 operations: vec![
8947 GraphDbSchemaOperation {
8948 command: "refresh",
8949 description: "Materialize .tsift/graph.db explicitly with delta upserts/deletes, row hash watermarks, tombstone pruning, projection metadata, row counts, and operator next commands",
8950 },
8951 GraphDbSchemaOperation {
8952 command: "status",
8953 description: "Inspect .tsift/graph.db freshness, projection metadata, row counts, tombstone counts, file-size impact, and operator next commands without refreshing",
8954 },
8955 GraphDbSchemaOperation {
8956 command: "doctor",
8957 description: "Validate graph.db or Convex snapshot health and return fail-closed repair diagnostics plus non-fatal SQLite tombstone-retention warnings",
8958 },
8959 GraphDbSchemaOperation {
8960 command: "drift",
8961 description: "Compare local SQLite projection rows with a Convex snapshot and return upsert, tombstone, metadata, duplicate, orphan, and next-command diagnostics",
8962 },
8963 GraphDbSchemaOperation {
8964 command: "compact [--apply] [--prune-tombstones --confirmed-convex-reconciled]",
8965 description: "Return or apply the post-reconciliation SQLite graph compaction policy, including WAL checkpoint/VACUUM proof and guarded tombstone pruning",
8966 },
8967 GraphDbSchemaOperation {
8968 command: "backend-eval [--candidate duckdb-duckpgq|falkordb|ladybug|kuzu|surrealdb] [--target ID] [--full-projection]",
8969 description: "Benchmark experimental read-only GraphStore backend prototypes against SQLite on bounded real, optional full-project, and synthetic projections across refresh/status/path tiers/evidence/conflict-matrix/dispatch-trace and emit promotion hold/eligibility gates",
8970 },
8971 GraphDbSchemaOperation {
8972 command: "evidence <target> [--depth N] [--limit N]",
8973 description: "Return a bounded versioned graph-db handoff packet for a backlog id or job packet handle, including packet_id, projection hash, worker_context rows, source_handle rows, worker_result rows, semantic_concept/entity rows, shortest paths, replay commands, repair commands, and next commands",
8974 },
8975 GraphDbSchemaOperation {
8976 command: "related <phrase> [--kind concept|entity|all] [--depth N] [--seed-limit N] [--limit N]",
8977 description: "Resolve a natural-language phrase to cached semantic concept/entity seed nodes, then return an incident/outgoing GraphStore neighborhood around those seeds for general knowledge retrieval without changing stable neighborhood pagination defaults",
8978 },
8979 GraphDbSchemaOperation {
8980 command: "dispatch-trace [target...] --path <session> [--format json|html]",
8981 description: "Export a compact graph-backed dispatch trace with evidence packet ids, worker-result feedback closure summaries, graph links, and conflict-matrix worker prompt packets",
8982 },
8983 GraphDbSchemaOperation {
8984 command: "dependency-dag [target...] --path <session>",
8985 description: "Extract a versioned agent-doc dependency DAG from backlog ids, explicit depends-on text, shared file/symbol/test/config evidence, semantic overlap, and worker-result follow-up ids",
8986 },
8987 GraphDbSchemaOperation {
8988 command: "schema",
8989 description: "Return record and operation schemas",
8990 },
8991 GraphDbSchemaOperation {
8992 command: "node <id>",
8993 description: "Return one node by stable id",
8994 },
8995 GraphDbSchemaOperation {
8996 command: "edge <id>",
8997 description: "Return one edge by stable edge id",
8998 },
8999 GraphDbSchemaOperation {
9000 command: "edges [--edge-kind <kind>] [--property KEY=VALUE] [--cursor EDGE_ID] [--limit N]",
9001 description: "Return edge records ordered by stable edge id with SQLite-pushed edge-property filtering and cursor pagination",
9002 },
9003 GraphDbSchemaOperation {
9004 command: "incident <id> [--edge-kind <kind>] [--property KEY=VALUE] [--cursor EDGE_ID] [--limit N]",
9005 description: "Return incoming and outgoing edges incident to one node, ordered by stable edge id with optional kind and edge-property filters",
9006 },
9007 GraphDbSchemaOperation {
9008 command: "kind <kind> [--property KEY=VALUE] [--cursor ID] [--limit N]",
9009 description: "Return nodes of one kind ordered by id with SQLite-pushed property filtering/cursor pagination and query-plan diagnostics",
9010 },
9011 GraphDbSchemaOperation {
9012 command: "neighborhood <id> --depth <n> [--edge-kind <kind>] [--property KEY=VALUE] [--cursor ID] [--limit N]",
9013 description: "Return a directed outgoing subgraph around a node using batched SQLite recursive traversal plus pushed filters/paging when available; JSON also includes additive ranked_neighbors while default nodes remain stable-id ordered",
9014 },
9015 GraphDbSchemaOperation {
9016 command: "path <from> <to> [--edge-kind <kind>] [--max-hops N]",
9017 description: "Return the shortest directed path by node id, optionally bounded by hop count",
9018 },
9019 ],
9020 }
9021}
9022
9023pub(crate) fn sqlite_graph_freshness(
9024 store: &SqliteGraphStore,
9025 scope: &str,
9026) -> Result<GraphDbFreshnessReport> {
9027 let version = store.projection_version(scope)?;
9028 let Some(version) = version else {
9029 return Ok(GraphDbFreshnessReport {
9030 status: "missing".to_string(),
9031 fail_closed: true,
9032 projection_version: None,
9033 content_hash: None,
9034 source_watermark: None,
9035 diagnostics: vec![
9036 "graph projection metadata is missing; rebuild the graph before trusting reads"
9037 .to_string(),
9038 ],
9039 });
9040 };
9041 let mut diagnostics = Vec::new();
9042 let fail_closed =
9043 version.projection_version != GRAPH_PROJECTION_VERSION || version.content_hash.is_none();
9044 if version.projection_version != GRAPH_PROJECTION_VERSION {
9045 diagnostics.push(format!(
9046 "projection version mismatch: expected {} got {}",
9047 GRAPH_PROJECTION_VERSION, version.projection_version
9048 ));
9049 }
9050 if version.content_hash.is_none() {
9051 diagnostics.push("projection content hash is missing".to_string());
9052 }
9053 Ok(GraphDbFreshnessReport {
9054 status: if fail_closed { "stale" } else { "current" }.to_string(),
9055 fail_closed,
9056 projection_version: Some(version.projection_version),
9057 content_hash: version.content_hash,
9058 source_watermark: version.source_watermark,
9059 diagnostics,
9060 })
9061}
9062
9063pub(crate) fn convex_graph_freshness(
9064 local: &ConvexProjectionRows,
9065 snapshot: &ConvexProjectionRows,
9066 scope: Option<&str>,
9067) -> GraphDbFreshnessReport {
9068 let freshness = convex_projection_freshness(local, Some(snapshot), scope);
9069 GraphDbFreshnessReport {
9070 status: freshness.status,
9071 fail_closed: freshness.fail_closed,
9072 projection_version: Some(GRAPH_PROJECTION_VERSION.to_string()),
9073 content_hash: freshness.snapshot_hash,
9074 source_watermark: None,
9075 diagnostics: freshness.diagnostics,
9076 }
9077}
9078
9079pub(crate) fn tokensave_graph_freshness(store: &TokensaveDb) -> Result<GraphDbFreshnessReport> {
9080 let (nodes, edges) = store.graph_counts()?;
9081 let files = store.file_count()?;
9082 Ok(GraphDbFreshnessReport {
9083 status: "current".to_string(),
9084 fail_closed: false,
9085 projection_version: Some("tokensave-readonly".to_string()),
9086 content_hash: None,
9087 source_watermark: Some(store.db_path().to_string_lossy().to_string()),
9088 diagnostics: vec![format!(
9089 "tokensave read-only adapter opened {} node(s), {} edge(s), {} file(s)",
9090 nodes, edges, files
9091 )],
9092 })
9093}
9094
9095pub(crate) fn append_tokensave_graph_doctor_checks(report: &mut GraphDbDoctorReport, root: &Path) {
9096 match TokensaveDb::discover(root) {
9097 Ok(Some(store)) => {
9098 report.push_check(GraphDbDoctorCheck {
9099 name: "tokensave_db_open".to_string(),
9100 status: "ok".to_string(),
9101 fail_closed: false,
9102 diagnostics: vec![format!(
9103 "opened tokensave database at {}",
9104 store.db_path().display()
9105 )],
9106 repair_commands: Vec::new(),
9107 });
9108 match (store.node_count(), store.edge_count(), store.file_count()) {
9109 (Ok(nodes), Ok(edges), Ok(files)) => {
9110 report.push_check(GraphDbDoctorCheck {
9111 name: "tokensave_counts".to_string(),
9112 status: "ok".to_string(),
9113 fail_closed: false,
9114 diagnostics: vec![format!(
9115 "tokensave contains {} node(s), {} edge(s), {} file(s)",
9116 nodes, edges, files
9117 )],
9118 repair_commands: Vec::new(),
9119 });
9120 }
9121 (nodes, edges, files) => {
9122 report.push_check(graph_db_doctor_check(
9123 "tokensave_counts",
9124 vec![format!(
9125 "tokensave count inspection failed: nodes={:?} edges={:?} files={:?}",
9126 nodes.err(),
9127 edges.err(),
9128 files.err()
9129 )],
9130 Vec::new(),
9131 ));
9132 }
9133 }
9134 }
9135 Ok(None) => report.push_check(graph_db_doctor_check(
9136 "tokensave_db_exists",
9137 vec![format!(
9138 "tokensave database is missing at {}",
9139 root.join(".tokensave").join("tokensave.db").display()
9140 )],
9141 Vec::new(),
9142 )),
9143 Err(err) => report.push_check(graph_db_doctor_check(
9144 "tokensave_db_open",
9145 vec![err.to_string()],
9146 Vec::new(),
9147 )),
9148 }
9149}
9150
9151pub(crate) fn graph_db_resolve_evidence_target(
9152 store: &impl GraphStore,
9153 target: &str,
9154) -> Result<Option<SubstrateGraphNode>> {
9155 store.resolve_evidence_target(
9156 target,
9157 &[
9158 "backlog",
9159 "job_packet",
9160 "worker_result",
9161 "worker_context",
9162 "source_handle",
9163 ],
9164 )
9165}
9166
9167fn graph_db_reachable_nodes_by_kind(
9168 store: &impl GraphStore,
9169 from_id: &str,
9170 kind: &str,
9171 depth: usize,
9172 limit: usize,
9173) -> Result<Vec<(SubstrateGraphNode, substrate::GraphPath)>> {
9174 store.reachable_nodes_by_kind(from_id, kind, depth, limit)
9175}
9176
9177fn graph_db_evidence_completed_queue_drift_warnings(
9178 store: &impl GraphStore,
9179 target: &SubstrateGraphNode,
9180 worker_results: &[SubstrateGraphNode],
9181) -> Result<Vec<String>> {
9182 let ref_id = target.properties.get("ref_id").map(String::as_str);
9183 let has_completed_result = worker_results.iter().any(|node| {
9184 node.properties.get("status").map(String::as_str) == Some("completed")
9185 && node.properties.get("ref_id").map(String::as_str) == ref_id
9186 });
9187 if !has_completed_result {
9188 return Ok(Vec::new());
9189 }
9190 let active_jobs = store
9191 .nodes_by_kind("job_packet")?
9192 .into_iter()
9193 .filter(|node| {
9194 node.properties.get("ref_id").map(String::as_str) == ref_id
9195 && node.label.starts_with("do #")
9196 })
9197 .collect::<Vec<_>>();
9198 if active_jobs.is_empty() {
9199 return Ok(Vec::new());
9200 }
9201 let repair = match (target.properties.get("path"), ref_id) {
9202 (Some(path), Some(id)) => format!(
9203 "repair with `agent-doc write --commit {} --done {}` or the next `agent-doc finalize --done {}` closeout",
9204 shell_quote(path),
9205 shell_quote(id),
9206 shell_quote(id)
9207 ),
9208 _ => {
9209 "repair by marking the queue item done/reaping it in the agent-doc session".to_string()
9210 }
9211 };
9212 Ok(vec![format!(
9213 "queue-head drift: target {} has {} active queued do packet(s) but already has a completed worker_result; {repair}; do not redispatch or reactivate the completed item",
9214 target.label,
9215 active_jobs.len()
9216 )])
9217}
9218
9219fn graph_db_evidence_next_commands(
9220 root: &Path,
9221 scope: Option<&str>,
9222 target: &SubstrateGraphNode,
9223 worker_context: &[SubstrateGraphNode],
9224 source_handles: &[SubstrateGraphNode],
9225 worker_results: &[SubstrateGraphNode],
9226 semantic_related: &[SubstrateGraphNode],
9227) -> Vec<String> {
9228 let mut commands = BTreeSet::new();
9229 if let Some(expand) = target.properties.get("expand") {
9230 commands.insert(expand.clone());
9231 }
9232 for worker in worker_context {
9233 if let Some(expand) = worker.properties.get("expand") {
9234 commands.insert(expand.clone());
9235 }
9236 }
9237 for source in source_handles {
9238 if let Some(expand) = source.properties.get("expand") {
9239 commands.insert(expand.clone());
9240 }
9241 }
9242 for result in worker_results {
9243 if let Some(expand) = result.properties.get("expand") {
9244 commands.insert(expand.clone());
9245 }
9246 }
9247 for semantic in semantic_related {
9248 if let Some(expand) = semantic.properties.get("expand") {
9249 commands.insert(expand.clone());
9250 }
9251 }
9252 commands.insert(format!(
9253 "tsift graph-db --path {}{} status --json",
9254 shell_quote(root.to_string_lossy().as_ref()),
9255 graph_db_scope_arg(scope)
9256 ));
9257 commands.insert(format!(
9258 "tsift graph-db --path {}{} doctor --json",
9259 shell_quote(root.to_string_lossy().as_ref()),
9260 graph_db_scope_arg(scope)
9261 ));
9262 commands.into_iter().collect()
9263}
9264
9265fn graph_db_repair_commands(root: &Path, scope: Option<&str>) -> Vec<String> {
9266 vec![
9267 format!(
9268 "tsift graph-db --path {}{} refresh --json",
9269 shell_quote(root.to_string_lossy().as_ref()),
9270 graph_db_scope_arg(scope)
9271 ),
9272 format!(
9273 "tsift graph-db --path {}{} doctor --json",
9274 shell_quote(root.to_string_lossy().as_ref()),
9275 graph_db_scope_arg(scope)
9276 ),
9277 ]
9278}
9279
9280fn graph_db_evidence_replay_commands(
9281 root: &Path,
9282 scope: Option<&str>,
9283 target: &str,
9284 depth: usize,
9285 limit: usize,
9286) -> Vec<String> {
9287 vec![
9288 format!(
9289 "tsift graph-db --path {}{} evidence {} --depth {} --limit {} --json",
9290 shell_quote(root.to_string_lossy().as_ref()),
9291 graph_db_scope_arg(scope),
9292 shell_quote(target),
9293 depth,
9294 limit
9295 ),
9296 format!(
9297 "tsift conflict-matrix --path {} {} --json",
9298 shell_quote(root.to_string_lossy().as_ref()),
9299 shell_quote(target)
9300 ),
9301 ]
9302}
9303
9304fn graph_db_evidence_packet_id(
9305 target: &str,
9306 target_node: &SubstrateGraphNode,
9307 freshness: &GraphDbFreshnessReport,
9308) -> String {
9309 stable_handle(
9310 "gevd",
9311 &format!(
9312 "{}:{}:{}:{}",
9313 GRAPH_DB_EVIDENCE_CONTRACT_VERSION,
9314 target,
9315 target_node.id,
9316 freshness.content_hash.as_deref().unwrap_or("no-hash")
9317 ),
9318 )
9319}
9320
9321pub(crate) fn graph_db_evidence_report_from_store<S: GraphStore>(
9322 input: GraphDbEvidenceInput<'_, S>,
9323) -> Result<GraphDbEvidenceReport> {
9324 let GraphDbEvidenceInput {
9325 root,
9326 scope,
9327 backend,
9328 target,
9329 depth,
9330 limit,
9331 cursor,
9332 store,
9333 freshness,
9334 mut warnings,
9335 } = input;
9336 let repair_commands = graph_db_repair_commands(root, scope);
9337 if freshness.fail_closed {
9338 bail!(
9339 "graph database evidence failed closed for {} backend: {}; repair: {}",
9340 backend,
9341 freshness.diagnostics.join("; "),
9342 repair_commands.join("; ")
9343 );
9344 }
9345 let semantic_readiness = graph_db_semantic_readiness(
9346 root,
9347 scope,
9348 graph_store_semantic_node_count(store).ok(),
9349 );
9350 if semantic_readiness.fail_closed {
9351 warnings.push(format!(
9352 "graph evidence semantic readiness blocked: {} — {}",
9353 semantic_readiness.reason,
9354 semantic_readiness.diagnostics.join("; ")
9355 ));
9356 warnings.push(format!(
9357 "repair: {}",
9358 semantic_readiness.next_commands.join("; then ")
9359 ));
9360 }
9361 let target_node = graph_db_resolve_evidence_target(store, target)?
9362 .with_context(|| format!("graph-db evidence target not found: {target}"))?;
9363 let max_rows = if limit == 0 { usize::MAX } else { limit };
9364 let mut reachable = store.reachable_nodes_by_kinds(
9365 &target_node.id,
9366 &[
9367 "worker_context",
9368 "source_handle",
9369 "worker_result",
9370 "semantic_concept",
9371 "semantic_entity",
9372 ],
9373 depth,
9374 max_rows,
9375 )?;
9376 let worker_paths = reachable.remove("worker_context").unwrap_or_default();
9377 let source_paths = reachable.remove("source_handle").unwrap_or_default();
9378 let worker_result_paths = reachable.remove("worker_result").unwrap_or_default();
9379 let mut semantic_paths = reachable.remove("semantic_concept").unwrap_or_default();
9380 semantic_paths.extend(reachable.remove("semantic_entity").unwrap_or_default());
9381 semantic_paths.sort_by(|(left_node, left_path), (right_node, right_path)| {
9382 left_path
9383 .hops
9384 .cmp(&right_path.hops)
9385 .then(left_node.kind.cmp(&right_node.kind))
9386 .then(left_node.label.cmp(&right_node.label))
9387 .then(left_node.id.cmp(&right_node.id))
9388 });
9389 if max_rows != usize::MAX && semantic_paths.len() > max_rows {
9390 semantic_paths.truncate(max_rows);
9391 }
9392
9393 let evidence_nodes = worker_paths
9394 .iter()
9395 .chain(source_paths.iter())
9396 .chain(worker_result_paths.iter())
9397 .chain(semantic_paths.iter())
9398 .map(|(node, _)| node.clone())
9399 .collect::<Vec<_>>();
9400 let evidence_depth_by_id = worker_paths
9401 .iter()
9402 .chain(source_paths.iter())
9403 .chain(worker_result_paths.iter())
9404 .chain(semantic_paths.iter())
9405 .map(|(node, path)| (node.id.clone(), path.hops))
9406 .collect::<BTreeMap<_, _>>();
9407 let target_query = graph_db_node_search_text(&target_node);
9408 let semantic_scores = graph_db_semantic_scores_for_query(Some(&target_query), &evidence_nodes);
9409 let budgeted = graph_db_apply_output_budget_with_depths_and_cursor(
9410 std::slice::from_ref(&target_node.id),
9411 &semantic_scores,
9412 evidence_nodes,
9413 Vec::new(),
9414 Some(limit),
9415 Some(&evidence_depth_by_id),
9416 cursor,
9417 );
9418 let output_budget = budgeted.report;
9419 let truncated = budgeted.truncated;
9420 let next_cursor = budgeted.next_cursor;
9421 let retained_evidence_ids = budgeted
9422 .nodes
9423 .iter()
9424 .map(|node| node.id.as_str())
9425 .collect::<BTreeSet<_>>();
9426 let worker_context = worker_paths
9427 .iter()
9428 .filter(|(node, _)| retained_evidence_ids.contains(node.id.as_str()))
9429 .map(|(node, _)| node.clone())
9430 .collect::<Vec<_>>();
9431 let source_handles = source_paths
9432 .iter()
9433 .filter(|(node, _)| retained_evidence_ids.contains(node.id.as_str()))
9434 .map(|(node, _)| node.clone())
9435 .collect::<Vec<_>>();
9436 let worker_results = worker_result_paths
9437 .iter()
9438 .filter(|(node, _)| retained_evidence_ids.contains(node.id.as_str()))
9439 .map(|(node, _)| node.clone())
9440 .collect::<Vec<_>>();
9441 let semantic_related = semantic_paths
9442 .iter()
9443 .filter(|(node, _)| retained_evidence_ids.contains(node.id.as_str()))
9444 .map(|(node, _)| node.clone())
9445 .collect::<Vec<_>>();
9446 warnings.extend(graph_db_evidence_completed_queue_drift_warnings(
9447 store,
9448 &target_node,
9449 &worker_results,
9450 )?);
9451 if worker_context.is_empty()
9452 && source_handles.is_empty()
9453 && worker_results.is_empty()
9454 && semantic_related.is_empty()
9455 {
9456 warnings.push(format!(
9457 "graph-db evidence target {} resolved to a {} node but has no projection-linked context rows; add source/file tokens to the backlog text or rerun graph-db refresh after the session document is indexed",
9458 target, target_node.kind
9459 ));
9460 }
9461 let shortest_paths = worker_paths
9462 .iter()
9463 .chain(source_paths.iter())
9464 .chain(worker_result_paths.iter())
9465 .chain(semantic_paths.iter())
9466 .filter(|(node, _)| retained_evidence_ids.contains(node.id.as_str()))
9467 .map(|(node, path)| GraphDbEvidencePath {
9468 to: node.id.clone(),
9469 kind: node.kind.clone(),
9470 label: node.label.clone(),
9471 path: Some(path.clone()),
9472 expand: node.properties.get("expand").cloned(),
9473 })
9474 .collect::<Vec<_>>();
9475 let next_commands = graph_db_evidence_next_commands(
9476 root,
9477 scope,
9478 &target_node,
9479 &worker_context,
9480 &source_handles,
9481 &worker_results,
9482 &semantic_related,
9483 );
9484 let replay_commands = graph_db_evidence_replay_commands(root, scope, target, depth, limit);
9485 let packet_id = graph_db_evidence_packet_id(target, &target_node, &freshness);
9486 let projection_hash = freshness.content_hash.clone();
9487
9488 Ok(GraphDbEvidenceReport {
9489 root: root.to_string_lossy().to_string(),
9490 scope: scope.map(str::to_string),
9491 backend: backend.to_string(),
9492 contract_version: GRAPH_DB_EVIDENCE_CONTRACT_VERSION.to_string(),
9493 target: target.to_string(),
9494 packet_id,
9495 projection_hash,
9496 freshness,
9497 target_node: target_node.into(),
9498 worker_context: worker_context.into_iter().map(Into::into).collect(),
9499 source_handles: source_handles.into_iter().map(Into::into).collect(),
9500 worker_results: worker_results.into_iter().map(Into::into).collect(),
9501 semantic_related: semantic_related.into_iter().map(Into::into).collect(),
9502 shortest_paths,
9503 output_budget: Some(output_budget),
9504 truncated,
9505 next_cursor,
9506 next_commands,
9507 replay_commands,
9508 repair_commands,
9509 fixture_coverage: GraphDbFixtureCoverage {
9510 test: "graph_db_evidence_packet_covers_backlog_job_worker_context_and_source_handles"
9511 .to_string(),
9512 fixture: "tests/graph_db_conformance.rs::graph_db_project".to_string(),
9513 assertions: vec![
9514 "backlog id and job packet handle resolve to graph nodes".to_string(),
9515 "worker_context rows are reachable from queued work".to_string(),
9516 "source_handle rows are reachable through bounded shortest paths".to_string(),
9517 "worker_result rows are reachable from completed or blocked work".to_string(),
9518 ],
9519 },
9520 warnings,
9521 })
9522}
9523
9524fn print_graph_db_evidence_human(report: &GraphDbEvidenceReport) {
9525 println!(
9526 "graph-db evidence backend: {} target: {} [{}] packet:{}",
9527 report.backend, report.target_node.id, report.target_node.kind, report.packet_id
9528 );
9529 let page_info = if report.truncated {
9530 let cursor = report.next_cursor.as_deref().unwrap_or("?");
9531 format!(" (truncated, next_cursor: {cursor})")
9532 } else {
9533 String::new()
9534 };
9535 println!(
9536 "evidence: {} worker_context row(s), {} source_handle row(s), {} worker_result row(s), {} semantic row(s), {} path(s){page_info}",
9537 report.worker_context.len(),
9538 report.source_handles.len(),
9539 report.worker_results.len(),
9540 report.semantic_related.len(),
9541 report.shortest_paths.len()
9542 );
9543 for path in &report.shortest_paths {
9544 if let Some(graph_path) = &path.path {
9545 println!(
9546 "path: {} hop(s) {}",
9547 graph_path.hops,
9548 graph_path.nodes.join(" -> ")
9549 );
9550 }
9551 }
9552 for command in &report.next_commands {
9553 println!("next: {command}");
9554 }
9555 for warning in &report.warnings {
9556 println!("warning: {warning}");
9557 }
9558}
9559
9560pub(crate) fn print_graph_db_evidence_report(
9561 report: &GraphDbEvidenceReport,
9562 format: OutputFormat,
9563) -> Result<()> {
9564 if format.json_output {
9565 let page_info = if report.truncated {
9566 let cursor = report.next_cursor.as_deref().unwrap_or("?");
9567 format!(" (truncated, next_cursor: {cursor})")
9568 } else {
9569 String::new()
9570 };
9571 print_json_or_envelope(
9572 report,
9573 &format,
9574 "graph-db",
9575 "evidence",
9576 ToolEnvelopeSummary {
9577 text: format!(
9578 "Graph DB evidence for {} returned {} worker context row(s), {} source handle(s), {} worker result row(s), {} semantic row(s), and {} shortest path(s){page_info}",
9579 report.target,
9580 report.worker_context.len(),
9581 report.source_handles.len(),
9582 report.worker_results.len(),
9583 report.semantic_related.len(),
9584 report.shortest_paths.len()
9585 ),
9586 metrics: vec![
9587 envelope_metric("backend", &report.backend),
9588 envelope_metric("worker_context", report.worker_context.len()),
9589 envelope_metric("source_handles", report.source_handles.len()),
9590 envelope_metric("worker_results", report.worker_results.len()),
9591 envelope_metric("semantic_related", report.semantic_related.len()),
9592 envelope_metric("paths", report.shortest_paths.len()),
9593 ],
9594 },
9595 report.truncated,
9596 report.next_commands.clone(),
9597 )
9598 } else {
9599 print_graph_db_evidence_human(report);
9600 Ok(())
9601 }
9602}
9603
9604pub(crate) fn graph_db_report_from_store(
9605 root: &Path,
9606 scope: Option<&str>,
9607 backend: &str,
9608 query: GraphDbQuery,
9609 store: &impl GraphStore,
9610 freshness: GraphDbFreshnessReport,
9611 warnings: Vec<String>,
9612) -> Result<GraphDbReport> {
9613 if freshness.fail_closed {
9614 bail!(
9615 "graph database read failed closed for {} backend: {}",
9616 backend,
9617 freshness.diagnostics.join("; ")
9618 );
9619 }
9620 let mut report = GraphDbReport {
9621 root: root.to_string_lossy().to_string(),
9622 scope: scope.map(str::to_string),
9623 backend: backend.to_string(),
9624 query: format!("{query:?}"),
9625 freshness,
9626 readiness: None,
9627 schema: None,
9628 node: None,
9629 edge: None,
9630 nodes: Vec::new(),
9631 edges: Vec::new(),
9632 ranked_neighbors: Vec::new(),
9633 semantic_related: Vec::new(),
9634 neighborhood_ranking_gate: None,
9635 ranked_neighborhood_comparison: None,
9636 knowledge_retrieval: None,
9637 output_budget: None,
9638 path: None,
9639 page: None,
9640 warnings,
9641 };
9642
9643 match query {
9644 GraphDbQuery::Refresh => {
9645 bail!("graph-db refresh must be handled by the refresh command path");
9646 }
9647 GraphDbQuery::Status => {
9648 bail!("graph-db status must be handled by the status command path");
9649 }
9650 GraphDbQuery::Doctor => {
9651 bail!("graph-db doctor must be handled by the doctor command path");
9652 }
9653 GraphDbQuery::Drift => {
9654 bail!("graph-db drift must be handled by the drift command path");
9655 }
9656 GraphDbQuery::Compact { .. } => {
9657 bail!("graph-db compact must be handled by the compact command path");
9658 }
9659 GraphDbQuery::BackendEval { .. } => {
9660 bail!("graph-db backend-eval must be handled by the benchmark command path");
9661 }
9662 GraphDbQuery::Evidence { .. } => {
9663 bail!("graph-db evidence must be handled by the evidence command path");
9664 }
9665 GraphDbQuery::Related {
9666 query,
9667 kind,
9668 depth,
9669 seed_limit,
9670 limit,
9671 } => {
9672 let semantic =
9673 semantic_related_report_from_store(root, scope, &query, seed_limit, kind, store)?;
9674 let SemanticRelatedReport {
9675 items,
9676 warnings: semantic_warnings,
9677 ..
9678 } = semantic;
9679 let readiness = graph_db_semantic_readiness(
9680 root,
9681 scope,
9682 (!items.is_empty()).then_some(items.len()),
9683 );
9684 report.warnings.extend(semantic_warnings);
9685 let seed_ids = items
9686 .iter()
9687 .map(|item| item.handle.clone())
9688 .collect::<Vec<_>>();
9689 let semantic_scores = items
9690 .iter()
9691 .map(|item| (item.handle.clone(), item.score))
9692 .collect::<BTreeMap<_, _>>();
9693 let subgraph = graph_db_semantic_seeded_neighborhood(store, &seed_ids, depth, limit)?;
9694 let seed_count = seed_ids.len();
9695 let mut diagnostics = subgraph.diagnostics;
9696 let budgeted = graph_db_apply_output_budget(
9697 &seed_ids,
9698 &semantic_scores,
9699 subgraph.nodes,
9700 subgraph.edges,
9701 Some(limit),
9702 );
9703 let budget_report = budgeted.report;
9704 let dropped_by_budget = !budget_report.dropped_by_budget.is_empty();
9705 diagnostics.extend(budget_report.diagnostics.clone());
9706 diagnostics.extend(readiness.diagnostics.clone());
9707
9708 report.readiness = Some(readiness);
9709 report.semantic_related = items;
9710 if let Some(seed_id) = seed_ids.first() {
9711 let ranked_neighbor_cap = graph_db_ranked_neighbor_cap(Some(limit));
9712 report.ranked_neighbors = graph_db_ranked_neighbors(
9713 seed_id,
9714 &budgeted.nodes,
9715 &budgeted.edges,
9716 ranked_neighbor_cap,
9717 );
9718 report.neighborhood_ranking_gate =
9719 Some(graph_db_neighborhood_ranking_gate(ranked_neighbor_cap));
9720 }
9721 report.nodes = budgeted.nodes.into_iter().map(Into::into).collect();
9722 report.edges = budgeted.edges.into_iter().map(Into::into).collect();
9723 report.knowledge_retrieval = Some(GraphDbKnowledgeRetrieval {
9724 mode: "semantic_seeded_neighborhood".to_string(),
9725 query,
9726 seed_kind: semantic_related_kind_name(kind).to_string(),
9727 seed_limit,
9728 seed_count,
9729 depth,
9730 limit,
9731 node_count: report.nodes.len(),
9732 edge_count: report.edges.len(),
9733 truncated: subgraph.truncated || dropped_by_budget,
9734 traversal: "incident_plus_outgoing_edges".to_string(),
9735 freshness_boundary:
9736 "semantic rows must come from refreshed summary or tsift-memory graph records"
9737 .to_string(),
9738 privacy_boundary:
9739 "GraphStore stores substrate records only; user consent, deletion policy, persona policy, and LiveKit session state stay in the avatar/agent adapter"
9740 .to_string(),
9741 diagnostics,
9742 });
9743 report.output_budget = Some(budget_report);
9744 }
9745 GraphDbQuery::Schema => {
9746 report.schema = Some(graph_db_schema());
9747 }
9748 GraphDbQuery::Node { id } => {
9749 report.node = store.node(&id)?.map(Into::into);
9750 }
9751 GraphDbQuery::Edge { id } => {
9752 report.edge = store.edge(&id)?.map(Into::into);
9753 }
9754 GraphDbQuery::Edges {
9755 edge_kind,
9756 cursor,
9757 limit,
9758 property_filters,
9759 } => {
9760 let options = graph_db_query_options(cursor, limit, &property_filters)?;
9761 let paged = store.paged_edges(
9762 edge_kind.as_deref(),
9763 graph_db_query_options_for_store(&options),
9764 )?;
9765 report.edges = paged.edges.into_iter().map(Into::into).collect();
9766 report.page = Some(graph_db_page_report_from_store(
9767 paged.page,
9768 options.property_filters,
9769 ));
9770 }
9771 GraphDbQuery::Incident {
9772 id,
9773 edge_kind,
9774 cursor,
9775 limit,
9776 property_filters,
9777 } => {
9778 let options = graph_db_query_options(cursor, limit, &property_filters)?;
9779 let paged = store.paged_incident_edges(
9780 &id,
9781 edge_kind.as_deref(),
9782 graph_db_query_options_for_store(&options),
9783 )?;
9784 report.edges = paged.edges.into_iter().map(Into::into).collect();
9785 report.page = Some(graph_db_page_report_from_store(
9786 paged.page,
9787 options.property_filters,
9788 ));
9789 }
9790 GraphDbQuery::Kind {
9791 kind,
9792 cursor,
9793 limit,
9794 property_filters,
9795 } => {
9796 let options = graph_db_query_options(cursor, limit, &property_filters)?;
9797 let paged =
9798 store.paged_nodes_by_kind(&kind, graph_db_query_options_for_store(&options))?;
9799 report.nodes = paged.nodes.into_iter().map(Into::into).collect();
9800 report.edges = paged.edges.into_iter().map(Into::into).collect();
9801 report.page = Some(graph_db_page_report_from_store(
9802 paged.page,
9803 options.property_filters,
9804 ));
9805 }
9806 GraphDbQuery::Neighborhood {
9807 id,
9808 depth,
9809 edge_kind,
9810 cursor,
9811 limit,
9812 property_filters,
9813 } => {
9814 let options = graph_db_query_options(cursor, limit, &property_filters)?;
9815 if let Some(paged) = store.paged_neighborhood(
9816 &id,
9817 depth,
9818 edge_kind.as_deref(),
9819 graph_db_query_options_for_store(&options),
9820 )? {
9821 let budgeted = graph_db_apply_output_budget(
9822 std::slice::from_ref(&id),
9823 &BTreeMap::new(),
9824 paged.nodes,
9825 paged.edges,
9826 options.limit,
9827 );
9828 let budget_report = budgeted.report;
9829 let ranked_neighbor_cap = graph_db_ranked_neighbor_cap(options.limit);
9830 let ranked_neighbors = graph_db_ranked_neighbors(
9831 &id,
9832 &budgeted.nodes,
9833 &budgeted.edges,
9834 ranked_neighbor_cap,
9835 );
9836 let comparison = graph_db_ranked_neighborhood_comparison(
9837 &id,
9838 depth,
9839 edge_kind.as_deref(),
9840 options.limit,
9841 &budgeted.nodes,
9842 &budgeted.edges,
9843 store,
9844 )?;
9845 report.nodes = budgeted.nodes.into_iter().map(Into::into).collect();
9846 report.edges = budgeted.edges.into_iter().map(Into::into).collect();
9847 report.ranked_neighbors = ranked_neighbors;
9848 report.neighborhood_ranking_gate =
9849 Some(graph_db_neighborhood_ranking_gate(ranked_neighbor_cap));
9850 let mut page =
9851 graph_db_page_report_from_store(paged.page, options.property_filters);
9852 page.returned_nodes = report.nodes.len();
9853 page.returned_edges = report.edges.len();
9854 page.truncated |= !budget_report.dropped_by_budget.is_empty();
9855 page.diagnostics.extend(budget_report.diagnostics.clone());
9856 report.page = Some(page);
9857 report.output_budget = Some(budget_report);
9858 if let Some(comparison) = comparison {
9859 report.ranked_neighborhood_comparison = Some(comparison);
9860 }
9861 }
9862 }
9863 GraphDbQuery::Path {
9864 from,
9865 to,
9866 edge_kind,
9867 max_hops,
9868 } => {
9869 report.path =
9870 store.shortest_path_with_max_hops(&from, &to, edge_kind.as_deref(), max_hops)?;
9871 if let Some(max_hops) = max_hops
9872 && report.path.is_none()
9873 {
9874 report.warnings.push(format!(
9875 "no directed path found within --max-hops {}",
9876 max_hops
9877 ));
9878 }
9879 }
9880 GraphDbQuery::Map { .. } => {
9881 bail!("graph-db map must be handled by the map command path");
9882 }
9883 }
9884 Ok(report)
9885}
9886
9887pub(crate) fn print_graph_db_human(report: &GraphDbReport, compact: bool) {
9888 if compact {
9889 println!(
9890 "graph-db backend:{} query:{} nodes:{} edges:{} freshness:{}",
9891 report.backend,
9892 report.query,
9893 report.nodes.len() + usize::from(report.node.is_some()),
9894 report.edges.len() + usize::from(report.edge.is_some()),
9895 report.freshness.status
9896 );
9897 return;
9898 }
9899 println!("graph-db backend: {}", report.backend);
9900 println!("freshness: {}", report.freshness.status);
9901 if let Some(readiness) = &report.readiness {
9902 println!(
9903 "readiness: {} reason: {} fail_closed: {}",
9904 readiness.status, readiness.reason, readiness.fail_closed
9905 );
9906 for diagnostic in &readiness.diagnostics {
9907 println!("readiness diagnostic: {diagnostic}");
9908 }
9909 for command in &readiness.next_commands {
9910 println!("readiness next: {command}");
9911 }
9912 }
9913 if let Some(schema) = &report.schema {
9914 println!(
9915 "schema: {} node fields, {} edge fields, {} operations",
9916 schema.node_fields.len(),
9917 schema.edge_fields.len(),
9918 schema.operations.len()
9919 );
9920 }
9921 if let Some(node) = &report.node {
9922 println!("node: {} [{}] {}", node.id, node.kind, node.label);
9923 }
9924 if let Some(edge) = &report.edge {
9925 let edge_full: SubstrateGraphEdge = edge.into();
9926 println!(
9927 "edge: {} {} -{}-> {}",
9928 graph_db_edge_key(&edge_full),
9929 edge.from_id,
9930 edge.kind,
9931 edge.to_id
9932 );
9933 }
9934 if let Some(knowledge) = &report.knowledge_retrieval {
9935 println!(
9936 "knowledge_retrieval: {} seeds:{} depth:{} traversal:{}",
9937 knowledge.mode, knowledge.seed_count, knowledge.depth, knowledge.traversal
9938 );
9939 }
9940 for item in &report.semantic_related {
9941 println!(
9942 "semantic_seed: {:.3} [{}] {} ({})",
9943 item.score, item.kind, item.label, item.handle
9944 );
9945 }
9946 for node in &report.nodes {
9947 println!("node: {} [{}] {}", node.id, node.kind, node.label);
9948 }
9949 for edge in &report.edges {
9950 let edge_full: SubstrateGraphEdge = edge.into();
9951 println!(
9952 "edge: {} {} -{}-> {}",
9953 graph_db_edge_key(&edge_full),
9954 edge.from_id,
9955 edge.kind,
9956 edge.to_id
9957 );
9958 }
9959 for neighbor in &report.ranked_neighbors {
9960 println!(
9961 "ranked_neighbor: #{} score:{} depth:{} {} [{}] {}",
9962 neighbor.rank,
9963 neighbor.score,
9964 neighbor
9965 .depth
9966 .map(|depth| depth.to_string())
9967 .unwrap_or_else(|| "unknown".to_string()),
9968 neighbor.node_id,
9969 neighbor.kind,
9970 neighbor.label
9971 );
9972 }
9973 if let Some(gate) = &report.neighborhood_ranking_gate {
9974 println!(
9975 "neighborhood_ranking_gate: {} default_order:{} ranked_output_default:{}",
9976 gate.status, gate.default_order, gate.ranked_output_default
9977 );
9978 }
9979 if let Some(path) = &report.path {
9980 println!("path: {} hop(s) {}", path.hops, path.nodes.join(" -> "));
9981 }
9982 if let Some(page) = &report.page {
9983 if let Some(next_cursor) = &page.next_cursor {
9984 println!("next_cursor: {next_cursor}");
9985 }
9986 for diagnostic in &page.diagnostics {
9987 println!("page: {diagnostic}");
9988 }
9989 }
9990 for warning in &report.warnings {
9991 println!("warning: {warning}");
9992 }
9993}
9994
9995pub(crate) fn graph_db_backend_eval_phase_timing(
9996 name: &str,
9997 duration_micros: u128,
9998 detail: &str,
9999) -> GraphDbBackendEvalPhaseTiming {
10000 GraphDbBackendEvalPhaseTiming {
10001 name: name.to_string(),
10002 duration_micros,
10003 detail: detail.to_string(),
10004 }
10005}
10006
10007pub(crate) fn graph_db_backend_eval_timed_phase<T>(
10008 phases: &mut Vec<GraphDbBackendEvalPhaseTiming>,
10009 name: &str,
10010 detail: &str,
10011 run: impl FnOnce() -> Result<T>,
10012) -> Result<T> {
10013 let started = Instant::now();
10014 let result = run();
10015 phases.push(graph_db_backend_eval_phase_timing(
10016 name,
10017 started.elapsed().as_micros(),
10018 detail,
10019 ));
10020 result
10021}
10022
10023pub(crate) fn graph_db_backend_eval_refresh_total_micros(
10024 phases: &[GraphDbBackendEvalPhaseTiming],
10025) -> u128 {
10026 phases
10027 .iter()
10028 .filter(|phase| phase.name != "conflict_matrix_preparation")
10029 .map(|phase| phase.duration_micros)
10030 .sum()
10031}
10032
10033pub(crate) fn graph_db_backend_eval_cached_refresh(
10034 root: &Path,
10035 scope: Option<&str>,
10036 source_watermark: Option<&str>,
10037) -> Result<
10038 Option<(
10039 TraversalGraphBuild,
10040 SqliteProjectionRefresh,
10041 Vec<GraphDbBackendEvalPhaseTiming>,
10042 )>,
10043> {
10044 let Some(source_watermark) = source_watermark else {
10045 return Ok(None);
10046 };
10047 let graph_db = graph_substrate_db_path(root, scope);
10048 if !graph_db.exists() {
10049 return Ok(None);
10050 }
10051
10052 let started = Instant::now();
10053 let store = match SqliteGraphStore::open_read_only_resilient(&graph_db) {
10054 Ok(store) => store,
10055 Err(_) => return Ok(None),
10056 };
10057 if store.has_user_triggers().unwrap_or(true) {
10058 return Ok(None);
10059 }
10060 let freshness = sqlite_graph_freshness(&store, scope.unwrap_or("root"))?;
10061 if freshness.fail_closed || freshness.source_watermark.as_deref() != Some(source_watermark) {
10062 return Ok(None);
10063 }
10064
10065 let phases = vec![
10066 graph_db_backend_eval_phase_timing(
10067 "source_graph_build",
10068 started.elapsed().as_micros(),
10069 "reused current graph.db projection because the source watermark matched; skipped code-index loading, session markdown scanning, source-handle construction, and semantic summary reads",
10070 ),
10071 graph_db_backend_eval_phase_timing(
10072 "projection_rows",
10073 0,
10074 "reused cached provider-neutral projection rows from graph.db",
10075 ),
10076 graph_db_backend_eval_phase_timing(
10077 "sqlite_open",
10078 0,
10079 "reused existing graph.db projection without opening a write transaction",
10080 ),
10081 ];
10082 let refresh = SqliteProjectionRefresh {
10083 scope: scope.unwrap_or("root").to_string(),
10084 projection_version: freshness
10085 .projection_version
10086 .unwrap_or_else(|| GRAPH_PROJECTION_VERSION.to_string()),
10087 source_watermark: Some(source_watermark.to_string()),
10088 tombstoned_nodes: Vec::new(),
10089 tombstoned_edges: Vec::new(),
10090 upserted_nodes: 0,
10091 upserted_edges: 0,
10092 unchanged_nodes: 0,
10093 unchanged_edges: 0,
10094 upserted_properties: 0,
10095 unchanged_properties: 0,
10096 deleted_properties: 0,
10097 deleted_nodes: 0,
10098 deleted_edges: 0,
10099 pruned_tombstones: 0,
10100 file_size_bytes_before: None,
10101 file_size_bytes_after: None,
10102 phase_timings: Vec::new(),
10103 };
10104 Ok(Some((TraversalGraphBuild::default(), refresh, phases)))
10105}
10106
10107pub(crate) fn graph_db_backend_eval_reused_cached_projection(
10108 phases: &[GraphDbBackendEvalPhaseTiming],
10109) -> bool {
10110 phases.iter().any(|phase| {
10111 phase.name == "source_graph_build"
10112 && phase.detail.contains("reused current graph.db projection")
10113 })
10114}
10115
10116pub(crate) fn graph_db_backend_eval_update_source_watermark(
10117 root: &Path,
10118 path_hint: &Path,
10119 scope: Option<&str>,
10120) -> Result<()> {
10121 let Some(source_watermark) = traversal_source_watermark(root, path_hint, scope, false)? else {
10122 return Ok(());
10123 };
10124 let graph_db = graph_substrate_db_path(root, scope);
10125 let mut store = SqliteGraphStore::open(&graph_db)?;
10126 store.update_projection_source_watermark(scope.unwrap_or("root"), Some(source_watermark))?;
10127 Ok(())
10128}
10129
10130pub(crate) fn graph_db_backend_eval_refresh_with_profile(
10131 root: &Path,
10132 path_hint: &Path,
10133 scope: Option<&str>,
10134) -> Result<(
10135 TraversalGraphBuild,
10136 SqliteProjectionRefresh,
10137 Vec<GraphDbBackendEvalPhaseTiming>,
10138)> {
10139 let source_watermark = traversal_source_watermark(root, path_hint, scope, false)?;
10140 if let Some(cached) =
10141 graph_db_backend_eval_cached_refresh(root, scope, source_watermark.as_deref())?
10142 {
10143 return Ok(cached);
10144 }
10145
10146 let mut phases = Vec::new();
10147 let source_graph_detail = if hinted_markdown_file(root, path_hint).is_some() {
10148 "bounded session projection: index/source loading plus agent-doc session markdown scan, source-handle construction, and semantic summary reads; skips global call-edge materialization because full-projection is the complete-call-graph regression guard"
10149 } else {
10150 "index/source loading plus agent-doc session markdown scan, source-handle construction, and semantic summary reads when summaries are cached"
10151 };
10152 let source_graph = graph_db_backend_eval_timed_phase(
10153 &mut phases,
10154 "source_graph_build",
10155 source_graph_detail,
10156 || build_traversal_graph_source_with_options(root, path_hint, scope, false),
10157 )?;
10158 let projection = graph_db_backend_eval_timed_phase(
10159 &mut phases,
10160 "projection_rows",
10161 "provider-neutral GraphStore node/edge row construction before SQLite persistence",
10162 || traversal_projection_from_graph(root, scope, &source_graph),
10163 )?;
10164 let graph_db = graph_substrate_db_path(root, scope);
10165 let mut store = graph_db_backend_eval_timed_phase(
10166 &mut phases,
10167 "sqlite_open",
10168 "open the local SQLite graph.db with WAL and busy-timeout settings",
10169 || SqliteGraphStore::open(&graph_db),
10170 )?;
10171 let refreshed_source_watermark = traversal_source_watermark(root, path_hint, scope, false)
10172 .ok()
10173 .flatten();
10174 let refresh = store.replace_projection_with_version(
10175 scope.unwrap_or("root"),
10176 &projection,
10177 Some(GRAPH_PROJECTION_VERSION),
10178 refreshed_source_watermark
10179 .or(source_watermark)
10180 .or_else(|| graph_projection_content_hash(&projection)),
10181 )?;
10182 phases.extend(
10183 refresh
10184 .phase_timings
10185 .iter()
10186 .map(|phase| GraphDbBackendEvalPhaseTiming {
10187 name: phase.name.clone(),
10188 duration_micros: phase.duration_micros,
10189 detail: phase.detail.clone(),
10190 }),
10191 );
10192 Ok((source_graph, refresh, phases))
10193}
10194
10195fn graph_db_backend_eval_disk_cache_dir(root: &Path) -> PathBuf {
10196 root.join(".tsift/backend-eval-cache")
10197}
10198
10199fn graph_db_backend_eval_disk_cache_path(root: &Path, kind: &str, key: &str) -> PathBuf {
10200 graph_db_backend_eval_disk_cache_dir(root)
10201 .join(kind)
10202 .join(format!("{key}.json.gz"))
10203}
10204
10205fn graph_db_backend_eval_legacy_disk_cache_path(root: &Path, kind: &str, key: &str) -> PathBuf {
10206 graph_db_backend_eval_disk_cache_dir(root)
10207 .join(kind)
10208 .join(format!("{key}.json"))
10209}
10210
10211#[derive(Default, Clone)]
10212struct GraphDbBackendEvalDiskCacheReadProfile {
10213 file_read_micros: u128,
10214 gzip_decode_micros: u128,
10215 serde_decode_micros: u128,
10216 legacy: bool,
10217}
10218
10219fn graph_db_backend_eval_read_disk_cache<T: for<'de> Deserialize<'de>>(
10220 root: &Path,
10221 kind: &str,
10222 key: &str,
10223) -> Option<(T, u64, u64, GraphDbBackendEvalDiskCacheReadProfile)> {
10224 let mut profile = GraphDbBackendEvalDiskCacheReadProfile::default();
10225 let path = graph_db_backend_eval_disk_cache_path(root, kind, key);
10226 let read_started = Instant::now();
10227 let read_result = fs::read(&path);
10228 profile.file_read_micros = read_started.elapsed().as_micros();
10229 if let Ok(bytes) = read_result {
10230 let decode_started = Instant::now();
10231 let mut decoder = GzDecoder::new(bytes.as_slice());
10232 let mut decoded = Vec::new();
10233 let decode_ok = decoder.read_to_end(&mut decoded).is_ok();
10234 profile.gzip_decode_micros = decode_started.elapsed().as_micros();
10235 if decode_ok {
10236 let serde_started = Instant::now();
10237 let parsed: Option<T> = serde_json::from_slice(&decoded).ok();
10238 profile.serde_decode_micros = serde_started.elapsed().as_micros();
10239 if let Some(value) = parsed {
10240 return Some((value, bytes.len() as u64, decoded.len() as u64, profile));
10241 }
10242 }
10243 }
10244
10245 let legacy_path = graph_db_backend_eval_legacy_disk_cache_path(root, kind, key);
10246 let legacy_started = Instant::now();
10247 let bytes = fs::read(legacy_path).ok()?;
10248 profile.file_read_micros = profile
10249 .file_read_micros
10250 .saturating_add(legacy_started.elapsed().as_micros());
10251 let serde_started = Instant::now();
10252 let value = serde_json::from_slice(&bytes).ok()?;
10253 profile.serde_decode_micros = profile
10254 .serde_decode_micros
10255 .saturating_add(serde_started.elapsed().as_micros());
10256 profile.legacy = true;
10257 Some((value, bytes.len() as u64, bytes.len() as u64, profile))
10258}
10259
10260#[derive(Default, Clone)]
10261struct GraphDbBackendEvalDiskCacheWriteProfile {
10262 serde_encode_micros: u128,
10263 gzip_encode_micros: u128,
10264 file_write_micros: u128,
10265}
10266
10267fn graph_db_backend_eval_write_disk_cache<T: Serialize>(
10268 root: &Path,
10269 kind: &str,
10270 key: &str,
10271 value: &T,
10272) -> Option<(u64, u64, GraphDbBackendEvalDiskCacheWriteProfile)> {
10273 let mut profile = GraphDbBackendEvalDiskCacheWriteProfile::default();
10274 let path = graph_db_backend_eval_disk_cache_path(root, kind, key);
10275 let parent = path.parent()?;
10276 if fs::create_dir_all(parent).is_err() {
10277 return None;
10278 }
10279 let serde_started = Instant::now();
10280 let bytes = serde_json::to_vec(value).ok()?;
10281 profile.serde_encode_micros = serde_started.elapsed().as_micros();
10282 let gzip_started = Instant::now();
10283 let mut encoder = GzEncoder::new(Vec::new(), Compression::fast());
10284 if encoder.write_all(&bytes).is_err() {
10285 return None;
10286 }
10287 let encoded = encoder.finish().ok()?;
10288 profile.gzip_encode_micros = gzip_started.elapsed().as_micros();
10289 let write_started = Instant::now();
10290 if fs::write(&path, &encoded).is_err() {
10291 return None;
10292 }
10293 profile.file_write_micros = write_started.elapsed().as_micros();
10294 Some((encoded.len() as u64, bytes.len() as u64, profile))
10295}
10296
10297fn graph_db_backend_eval_prune_disk_cache(root: &Path, kind: &str, keep_key: &str) -> (usize, u64) {
10298 let dir = graph_db_backend_eval_disk_cache_dir(root).join(kind);
10299 let Ok(entries) = fs::read_dir(dir) else {
10300 return (0, 0);
10301 };
10302 let keep_name = format!("{keep_key}.json.gz");
10303 let mut pruned_files = 0usize;
10304 let mut pruned_bytes = 0u64;
10305 for entry in entries.flatten() {
10306 let path = entry.path();
10307 if !path.is_file() {
10308 continue;
10309 }
10310 let Some(name) = path.file_name().and_then(|name| name.to_str()) else {
10311 continue;
10312 };
10313 if name == keep_name {
10314 continue;
10315 }
10316 let is_backend_eval_cache = name.ends_with(".json") || name.ends_with(".json.gz");
10317 if !is_backend_eval_cache {
10318 continue;
10319 }
10320 let bytes = entry.metadata().map(|metadata| metadata.len()).unwrap_or(0);
10321 if fs::remove_file(&path).is_ok() {
10322 pruned_files += 1;
10323 pruned_bytes += bytes;
10324 }
10325 }
10326 (pruned_files, pruned_bytes)
10327}
10328
10329fn graph_db_backend_eval_full_projection_raw_watermark_rows(
10330 root: &Path,
10331 source_root: &Path,
10332) -> Result<Vec<GraphDbBackendEvalRawSourceWatermarkRow>> {
10333 let mut rows = Vec::new();
10334 let mut entries = walk::walk_files(source_root)?;
10335 entries.sort_by(|left, right| left.path.cmp(&right.path));
10336 for entry in entries {
10337 if traversal_path_is_generated_artifact(root, source_root, &entry.path) {
10338 continue;
10339 }
10340 if traversal_path_is_session_markdown(root, source_root, &entry.path) {
10341 continue;
10342 }
10343 let bytes = fs::read(&entry.path)
10344 .with_context(|| format!("reading source input {}", entry.path.display()))?;
10345 rows.push(GraphDbBackendEvalRawSourceWatermarkRow {
10346 path: traversal_watermark_path(root, &entry.path),
10347 bytes: bytes.len() as u64,
10348 content_hash: content_hash(&bytes)?,
10349 });
10350 }
10351 Ok(rows)
10352}
10353
10354fn graph_db_backend_eval_full_projection_source_watermark(
10355 root: &Path,
10356 scope: Option<&str>,
10357) -> Result<GraphDbBackendEvalFullProjectionSourceWatermark> {
10358 let path_hint = root;
10359 let mut detail_parts = Vec::new();
10360 let mut parts = vec![
10361 format!("projection_version:{GRAPH_PROJECTION_VERSION}"),
10362 format!("cache_version:{GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION}"),
10363 "watermark_kind:stable_full_projection_inputs".to_string(),
10364 format!("scope:{}", scope.unwrap_or("root")),
10365 format!("path_hint:{}", traversal_watermark_path(root, path_hint)),
10366 ];
10367
10368 let gate = prepare_agent_doc_index_gate(root, path_hint, scope, "full-projection cache key");
10369 match gate.db_path.as_ref().filter(|db_path| db_path.exists()) {
10370 Some(db_path) => {
10371 let db = index::IndexDb::open_read_only_resilient(db_path)?;
10372 parts.push("index_mode:indexed".to_string());
10373 detail_parts.push("mode=indexed".to_string());
10374 parts.push(format!(
10375 "index_source_root:{}",
10376 traversal_watermark_path(root, &gate.source_root)
10377 ));
10378
10379 let symbols = db
10380 .all_symbols()?
10381 .into_iter()
10382 .filter(|symbol| {
10383 !traversal_path_is_generated_artifact(
10384 root,
10385 &gate.source_root,
10386 Path::new(&symbol.file),
10387 ) && !traversal_path_is_session_markdown(
10388 root,
10389 &gate.source_root,
10390 Path::new(&symbol.file),
10391 )
10392 })
10393 .collect::<Vec<_>>();
10394 let symbols_hash = content_hash(&symbols)?;
10395 detail_parts.push(format!("symbols={symbols_hash}"));
10396 parts.push(format!("index_symbols:{symbols_hash}"));
10397
10398 let edges = db
10399 .all_stored_edges()?
10400 .into_iter()
10401 .filter(|edge| {
10402 !traversal_path_is_generated_artifact(
10403 root,
10404 &gate.source_root,
10405 Path::new(&edge.caller_file),
10406 ) && !traversal_path_is_session_markdown(
10407 root,
10408 &gate.source_root,
10409 Path::new(&edge.caller_file),
10410 )
10411 })
10412 .collect::<Vec<_>>();
10413 let edges_hash = content_hash(&edges)?;
10414 detail_parts.push(format!("call_edges={edges_hash}"));
10415 parts.push(format!("index_call_edges:{edges_hash}"));
10416
10417 let routes = db
10418 .all_routes()?
10419 .into_iter()
10420 .filter(|route| {
10421 !traversal_path_is_generated_artifact(
10422 root,
10423 &gate.source_root,
10424 Path::new(&route.file),
10425 ) && !traversal_path_is_session_markdown(
10426 root,
10427 &gate.source_root,
10428 Path::new(&route.file),
10429 )
10430 })
10431 .collect::<Vec<_>>();
10432 let routes_hash = content_hash(&routes)?;
10433 detail_parts.push(format!("routes={routes_hash}"));
10434 parts.push(format!("index_routes:{routes_hash}"));
10435 }
10436 None => {
10437 parts.push("index_mode:raw_fallback".to_string());
10438 detail_parts.push("mode=raw_fallback".to_string());
10439 parts.push(format!(
10440 "raw_source_root:{}",
10441 traversal_watermark_path(root, &gate.source_root)
10442 ));
10443 let raw_rows =
10444 graph_db_backend_eval_full_projection_raw_watermark_rows(root, &gate.source_root)?;
10445 let raw_hash = content_hash(&raw_rows)?;
10446 detail_parts.push(format!("raw_source_files={raw_hash}"));
10447 parts.push(format!("raw_source_files:{raw_hash}"));
10448 }
10449 }
10450
10451 parts.push("agent_doc_session_markdown:bounded_real_dataset_only".to_string());
10452 detail_parts.push("session_markdown=bounded_real_dataset_only".to_string());
10453 let summaries_start = parts.len();
10454 push_traversal_summaries_watermark_part(root, &mut parts)?;
10455 let summaries_hash = content_hash(&parts[summaries_start..].to_vec())?;
10456 detail_parts.push(format!("summaries={summaries_hash}"));
10457 let value = content_hash(&parts)?;
10458 detail_parts.push(format!("watermark={value}"));
10459 Ok(GraphDbBackendEvalFullProjectionSourceWatermark {
10460 value,
10461 detail: detail_parts.join(" "),
10462 })
10463}
10464
10465fn graph_db_backend_eval_full_projection_cache_key(
10466 root: &Path,
10467 scope: Option<&str>,
10468) -> Result<(String, String, String)> {
10469 let source_watermark = graph_db_backend_eval_full_projection_source_watermark(root, scope)?;
10470 let key = graph_db_backend_eval_full_projection_cache_key_for_watermark(
10471 root,
10472 scope,
10473 &source_watermark.value,
10474 )?;
10475 Ok((source_watermark.value, key, source_watermark.detail))
10476}
10477
10478fn graph_db_backend_eval_full_projection_cache_key_for_watermark(
10479 root: &Path,
10480 scope: Option<&str>,
10481 source_watermark: &str,
10482) -> Result<String> {
10483 content_hash(&serde_json::json!({
10484 "version": GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION,
10485 "root": root.display().to_string(),
10486 "scope": scope.unwrap_or("root"),
10487 "source_watermark": source_watermark,
10488 }))
10489}
10490
10491pub(crate) fn graph_db_backend_eval_full_projection_with_profile(
10492 root: &Path,
10493 scope: Option<&str>,
10494) -> Result<(
10495 GraphProjection,
10496 Vec<String>,
10497 Vec<GraphDbBackendEvalPhaseTiming>,
10498 GraphDbBackendEvalFullProjectionCacheStats,
10499)> {
10500 let (source_watermark, key, source_watermark_detail) =
10501 graph_db_backend_eval_full_projection_cache_key(root, scope)?;
10502 let lookup_started = Instant::now();
10503 if let Some((cached, disk_bytes, json_bytes, read_profile)) =
10504 graph_db_backend_eval_read_disk_cache::<GraphDbBackendEvalFullProjectionCache>(
10505 root,
10506 "full_projection",
10507 &key,
10508 )
10509 && cached.version == GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION
10510 && cached.key == key
10511 && cached.source_watermark == source_watermark
10512 {
10513 let lookup_overhead_micros = lookup_started
10514 .elapsed()
10515 .as_micros()
10516 .saturating_sub(read_profile.file_read_micros)
10517 .saturating_sub(read_profile.gzip_decode_micros)
10518 .saturating_sub(read_profile.serde_decode_micros);
10519 let prune_started = Instant::now();
10520 let (pruned_files, pruned_bytes) =
10521 graph_db_backend_eval_prune_disk_cache(root, "full_projection", &key);
10522 let prune_micros = prune_started.elapsed().as_micros();
10523 let cache_stats = GraphDbBackendEvalFullProjectionCacheStats {
10524 hit: true,
10525 disk_bytes,
10526 json_bytes,
10527 pruned_files,
10528 pruned_bytes,
10529 };
10530 let read_detail_suffix = if read_profile.legacy {
10531 " (legacy uncompressed cache path)"
10532 } else {
10533 ""
10534 };
10535 return Ok((
10536 cached.projection,
10537 cached.warnings,
10538 vec![
10539 graph_db_backend_eval_phase_timing(
10540 "full_projection.cache_lookup",
10541 lookup_overhead_micros,
10542 &format!(
10543 "watermark/version check overhead around the cache load phases; {source_watermark_detail}"
10544 ),
10545 ),
10546 graph_db_backend_eval_phase_timing(
10547 "full_projection.cache.file_read",
10548 read_profile.file_read_micros,
10549 &format!(
10550 "read compressed cache bytes from .tsift/backend-eval-cache{read_detail_suffix}"
10551 ),
10552 ),
10553 graph_db_backend_eval_phase_timing(
10554 "full_projection.cache.gzip_decode",
10555 read_profile.gzip_decode_micros,
10556 "gunzip the compressed projection cache bytes",
10557 ),
10558 graph_db_backend_eval_phase_timing(
10559 "full_projection.cache.serde_decode",
10560 read_profile.serde_decode_micros,
10561 "serde_json deserialize the decoded projection cache payload",
10562 ),
10563 graph_db_backend_eval_phase_timing(
10564 "full_projection.cache.prune",
10565 prune_micros,
10566 "prune sibling cache files older than the current key",
10567 ),
10568 graph_db_backend_eval_phase_timing(
10569 "full_projection.source_graph_build",
10570 0,
10571 "reused cached full-project source graph; skipped code-index loading, session markdown scanning, source-handle construction, and semantic summary reads",
10572 ),
10573 graph_db_backend_eval_phase_timing(
10574 "full_projection.projection_rows",
10575 0,
10576 "reused cached provider-neutral full-project projection rows",
10577 ),
10578 ],
10579 cache_stats,
10580 ));
10581 }
10582
10583 let mut cache_stats = GraphDbBackendEvalFullProjectionCacheStats::default();
10584 let mut phases = vec![graph_db_backend_eval_phase_timing(
10585 "full_projection.cache_lookup",
10586 lookup_started.elapsed().as_micros(),
10587 &format!(
10588 "no full-project projection cache entry matched the source watermark; {source_watermark_detail}"
10589 ),
10590 )];
10591 let full_source = graph_db_backend_eval_timed_phase(
10592 &mut phases,
10593 "full_projection.source_graph_build",
10594 "opt-in full-project source graph build; uses the project root as the path hint so bounded session projections cannot hide full-graph regressions",
10595 || build_traversal_graph_source_with_options(root, root, scope, false),
10596 )?;
10597 let projection = graph_db_backend_eval_timed_phase(
10598 &mut phases,
10599 "full_projection.projection_rows",
10600 "provider-neutral row construction for the opt-in full-project projection dataset",
10601 || traversal_projection_from_graph(root, scope, &full_source),
10602 )?;
10603 let warnings = full_source.warnings;
10604 let refreshed_source_watermark =
10605 graph_db_backend_eval_full_projection_source_watermark(root, scope)
10606 .map(|watermark| watermark.value)
10607 .unwrap_or_else(|_| source_watermark.clone());
10608 let write_key = graph_db_backend_eval_full_projection_cache_key_for_watermark(
10609 root,
10610 scope,
10611 &refreshed_source_watermark,
10612 )?;
10613 let cache = GraphDbBackendEvalFullProjectionCache {
10614 version: GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION.to_string(),
10615 key: write_key.clone(),
10616 source_watermark: refreshed_source_watermark,
10617 projection: projection.clone(),
10618 warnings: warnings.clone(),
10619 };
10620 if let Some((disk_bytes, json_bytes, write_profile)) =
10621 graph_db_backend_eval_write_disk_cache(root, "full_projection", &write_key, &cache)
10622 {
10623 cache_stats.disk_bytes = disk_bytes;
10624 cache_stats.json_bytes = json_bytes;
10625 phases.push(graph_db_backend_eval_phase_timing(
10626 "full_projection.cache.serde_encode",
10627 write_profile.serde_encode_micros,
10628 "serde_json serialize the projection cache payload before compression",
10629 ));
10630 phases.push(graph_db_backend_eval_phase_timing(
10631 "full_projection.cache.gzip_encode",
10632 write_profile.gzip_encode_micros,
10633 "gzip-compress the serialized projection cache payload",
10634 ));
10635 phases.push(graph_db_backend_eval_phase_timing(
10636 "full_projection.cache.file_write",
10637 write_profile.file_write_micros,
10638 "write the compressed projection cache bytes to .tsift/backend-eval-cache",
10639 ));
10640 }
10641 let prune_started = Instant::now();
10642 let (pruned_files, pruned_bytes) =
10643 graph_db_backend_eval_prune_disk_cache(root, "full_projection", &write_key);
10644 phases.push(graph_db_backend_eval_phase_timing(
10645 "full_projection.cache.prune",
10646 prune_started.elapsed().as_micros(),
10647 "prune sibling cache files older than the current key",
10648 ));
10649 cache_stats.pruned_files = pruned_files;
10650 cache_stats.pruned_bytes = pruned_bytes;
10651 Ok((projection, warnings, phases, cache_stats))
10652}
10653
10654fn graph_db_backend_eval_timed(
10655 name: &str,
10656 run: impl FnOnce() -> Result<(Option<usize>, serde_json::Value)>,
10657) -> (
10658 GraphDbBackendEvalOperation,
10659 Option<GraphDbBackendEvalSignature>,
10660) {
10661 let started = Instant::now();
10662 match run() {
10663 Ok((rows, value)) => (
10664 GraphDbBackendEvalOperation {
10665 name: name.to_string(),
10666 supported: true,
10667 status: "ok".to_string(),
10668 duration_micros: started.elapsed().as_micros(),
10669 rows,
10670 error: None,
10671 },
10672 Some(GraphDbBackendEvalSignature {
10673 operation: name.to_string(),
10674 value,
10675 }),
10676 ),
10677 Err(err) => (
10678 GraphDbBackendEvalOperation {
10679 name: name.to_string(),
10680 supported: false,
10681 status: "error".to_string(),
10682 duration_micros: started.elapsed().as_micros(),
10683 rows: None,
10684 error: Some(format!("{err:#}")),
10685 },
10686 None,
10687 ),
10688 }
10689}
10690
10691fn graph_db_backend_eval_parity(
10692 sqlite_signatures: Option<&[GraphDbBackendEvalSignature]>,
10693 candidate_signatures: &[GraphDbBackendEvalSignature],
10694) -> GraphDbBackendEvalParity {
10695 let Some(sqlite_signatures) = sqlite_signatures else {
10696 return GraphDbBackendEvalParity {
10697 matches_sqlite: true,
10698 diagnostics: Vec::new(),
10699 };
10700 };
10701 let sqlite = sqlite_signatures
10702 .iter()
10703 .map(|signature| (signature.operation.as_str(), &signature.value))
10704 .collect::<BTreeMap<_, _>>();
10705 let candidate = candidate_signatures
10706 .iter()
10707 .map(|signature| (signature.operation.as_str(), &signature.value))
10708 .collect::<BTreeMap<_, _>>();
10709 let mut diagnostics = Vec::new();
10710 for (operation, sqlite_value) in sqlite {
10711 match candidate.get(operation) {
10712 Some(candidate_value) if *candidate_value == sqlite_value => {}
10713 Some(_) => diagnostics.push(format!("{operation} output differed from SQLite")),
10714 None => diagnostics.push(format!(
10715 "{operation} did not complete for candidate backend"
10716 )),
10717 }
10718 }
10719 GraphDbBackendEvalParity {
10720 matches_sqlite: diagnostics.is_empty(),
10721 diagnostics,
10722 }
10723}
10724
10725pub(crate) fn graph_db_backend_eval_targets(
10726 store: &impl GraphStore,
10727 requested: &[String],
10728) -> Result<Vec<String>> {
10729 let requested = requested
10730 .iter()
10731 .filter_map(|target| normalize_conflict_target(target))
10732 .collect::<Vec<_>>();
10733 if !requested.is_empty() {
10734 return Ok(requested);
10735 }
10736
10737 for kind in ["backlog", "job_packet"] {
10738 let nodes = store.nodes_by_kind(kind)?;
10739 if let Some(node) = nodes.first() {
10740 if let Some(ref_id) = node.properties.get("ref_id") {
10741 return Ok(vec![ref_id.clone()]);
10742 }
10743 return Ok(vec![node.id.clone()]);
10744 }
10745 }
10746 Ok(Vec::new())
10747}
10748
10749fn graph_db_backend_eval_path_targets(
10750 store: &impl GraphStore,
10751 max_hops: usize,
10752) -> Result<Option<(String, String, usize)>> {
10753 let synthetic_from = "gsym-synthetic-0000";
10754 let synthetic_to = format!("gsym-synthetic-{max_hops:04}");
10755 if store.node(synthetic_from)?.is_some() && store.node(&synthetic_to)?.is_some() {
10756 let outgoing = store.outgoing_edges(synthetic_from, None)?;
10757 if outgoing.len() > 1
10758 && let Some(edge) = outgoing.first()
10759 {
10760 return Ok(Some((
10761 edge.from_id.clone(),
10762 edge.to_id.clone(),
10763 GRAPH_DB_BACKEND_EVAL_DIRECT_PATH_HOPS,
10764 )));
10765 }
10766 return Ok(Some((synthetic_from.to_string(), synthetic_to, max_hops)));
10767 }
10768
10769 Ok(store.sample_edge(None)?.map(|edge| {
10770 (
10771 edge.from_id,
10772 edge.to_id,
10773 GRAPH_DB_BACKEND_EVAL_DIRECT_PATH_HOPS,
10774 )
10775 }))
10776}
10777
10778fn graph_db_backend_eval_path_operation<S: GraphStore>(
10779 store: &S,
10780 configured_max_hops: usize,
10781) -> (
10782 GraphDbBackendEvalOperation,
10783 Option<GraphDbBackendEvalSignature>,
10784) {
10785 let operation_name = if configured_max_hops == GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS {
10786 "path_max_hops".to_string()
10787 } else {
10788 format!("path_max_hops_{configured_max_hops}")
10789 };
10790 graph_db_backend_eval_timed(&operation_name, || {
10791 let (from, to, effective_max_hops) =
10792 graph_db_backend_eval_path_targets(store, configured_max_hops)?
10793 .context("backend-eval path probe requires at least one traversable edge")?;
10794 let path = store.shortest_path_with_max_hops(&from, &to, None, Some(effective_max_hops))?;
10795 let warning = if configured_max_hops > GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS {
10796 Some(format!(
10797 "{configured_max_hops}-hop tier is measured only; keep user-facing defaults at {} until repeated samples and SQLite query-plan checks pass",
10798 GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS
10799 ))
10800 } else if path.is_none() && effective_max_hops == configured_max_hops {
10801 Some(format!(
10802 "path probe truncated at {configured_max_hops} hops before a route was found"
10803 ))
10804 } else {
10805 None
10806 };
10807 Ok((
10808 path.as_ref().map(|path| path.nodes.len()),
10809 serde_json::json!({
10810 "from": from,
10811 "to": to,
10812 "configured_max_hops": configured_max_hops,
10813 "effective_max_hops": effective_max_hops,
10814 "hops": path.as_ref().map(|path| path.hops),
10815 "nodes": path.as_ref().map(|path| &path.nodes),
10816 "found": path.is_some(),
10817 "warning": warning,
10818 }),
10819 ))
10820 })
10821}
10822
10823fn graph_db_backend_eval_neighborhood_operation<S: GraphStore>(
10824 store: &S,
10825 depth: usize,
10826 limit: usize,
10827) -> (
10828 GraphDbBackendEvalOperation,
10829 Option<GraphDbBackendEvalSignature>,
10830) {
10831 graph_db_backend_eval_timed("neighborhood", || {
10832 let edge = match store.sample_edge(Some("calls"))? {
10833 Some(edge) => edge,
10834 None => store.sample_edge(None)?.context(
10835 "backend-eval neighborhood probe requires at least one traversable edge",
10836 )?,
10837 };
10838 let page = store
10839 .paged_neighborhood(
10840 &edge.from_id,
10841 depth,
10842 Some(&edge.kind),
10843 GraphQueryOptions {
10844 limit: Some(limit.max(1)),
10845 ..GraphQueryOptions::default()
10846 },
10847 )?
10848 .with_context(|| {
10849 format!(
10850 "backend-eval neighborhood target not found: {}",
10851 edge.from_id
10852 )
10853 })?;
10854 Ok((
10855 Some(page.nodes.len() + page.edges.len()),
10856 serde_json::json!({
10857 "center": edge.from_id,
10858 "kind": edge.kind,
10859 "depth": depth,
10860 "limit": limit.max(1),
10861 "node_ids": page.nodes.iter().map(|node| &node.id).collect::<Vec<_>>(),
10862 "edge_ids": page.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
10863 "truncated": page.page.truncated,
10864 }),
10865 ))
10866 })
10867}
10868
10869fn graph_db_backend_eval_related_operation<S: GraphStore>(
10870 root: &Path,
10871 scope: Option<&str>,
10872 store: &S,
10873 depth: usize,
10874 limit: usize,
10875) -> (
10876 GraphDbBackendEvalOperation,
10877 Option<GraphDbBackendEvalSignature>,
10878) {
10879 graph_db_backend_eval_timed("related", || {
10880 let query = "backend evaluation";
10881 let semantic = semantic_related_report_from_store(
10882 root,
10883 scope,
10884 query,
10885 3,
10886 SemanticRelatedKind::All,
10887 store,
10888 )?;
10889 let seed_ids = semantic
10890 .items
10891 .iter()
10892 .map(|item| item.handle.clone())
10893 .collect::<Vec<_>>();
10894 let subgraph =
10895 graph_db_semantic_seeded_neighborhood(store, &seed_ids, depth, limit.max(1))?;
10896 Ok((
10897 Some(subgraph.nodes.len() + subgraph.edges.len()),
10898 serde_json::json!({
10899 "query": query,
10900 "seed_ids": seed_ids,
10901 "node_ids": subgraph.nodes.iter().map(|node| &node.id).collect::<Vec<_>>(),
10902 "edge_ids": subgraph.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
10903 "truncated": subgraph.truncated,
10904 "warnings": semantic.warnings,
10905 "diagnostics": subgraph.diagnostics,
10906 }),
10907 ))
10908 })
10909}
10910
10911fn graph_db_backend_eval_evidence_signature(report: &GraphDbEvidenceReport) -> serde_json::Value {
10912 serde_json::json!({
10913 "target": report.target,
10914 "target_node_id": report.target_node.id,
10915 "target_kind": report.target_node.kind,
10916 "worker_context": report.worker_context.iter().map(|node| &node.id).collect::<Vec<_>>(),
10917 "source_handles": report.source_handles.iter().map(|node| &node.id).collect::<Vec<_>>(),
10918 "worker_results": report.worker_results.iter().map(|node| &node.id).collect::<Vec<_>>(),
10919 "semantic_related": report.semantic_related.iter().map(|node| &node.id).collect::<Vec<_>>(),
10920 "path_count": report.shortest_paths.len(),
10921 })
10922}
10923
10924fn graph_db_backend_eval_target_resolution_signature(
10925 resolved: &[(String, SubstrateGraphNode)],
10926) -> serde_json::Value {
10927 serde_json::json!({
10928 "targets": resolved.iter().map(|(target, node)| {
10929 serde_json::json!({
10930 "target": target,
10931 "target_node_id": node.id,
10932 "target_kind": node.kind,
10933 "target_label": node.label,
10934 })
10935 }).collect::<Vec<_>>(),
10936 })
10937}
10938
10939fn graph_db_backend_eval_conflict_signature(report: &ConflictMatrixReport) -> serde_json::Value {
10940 serde_json::json!({
10941 "targets": report.targets,
10942 "can_parallel": report.can_parallel,
10943 "fail_closed": report.fail_closed,
10944 "cross_target_parallel_safe": report.cross_target_parallel_safe,
10945 "per_target_fail_closed": report.per_target_fail_closed.iter().map(|target| &target.target).collect::<Vec<_>>(),
10946 "candidates": report.candidates.iter().map(|candidate| {
10947 serde_json::json!({
10948 "target": candidate.target,
10949 "risk": conflict_risk_label(candidate.risk),
10950 "owned_files": candidate.owned_files,
10951 "owned_symbols": candidate.owned_symbols,
10952 "source_handles": candidate.source_handles.iter().map(|handle| &handle.handle).collect::<Vec<_>>(),
10953 "previously_completed": candidate.previously_completed,
10954 "parallel_safe": candidate.parallel_safe,
10955 })
10956 }).collect::<Vec<_>>(),
10957 "conflicts": report.conflicts.iter().map(|pair| {
10958 serde_json::json!({
10959 "left": pair.left,
10960 "right": pair.right,
10961 "risk": conflict_risk_label(pair.risk),
10962 })
10963 }).collect::<Vec<_>>(),
10964 })
10965}
10966
10967fn graph_db_backend_eval_dispatch_signature(report: &DispatchTraceReport) -> serde_json::Value {
10968 serde_json::json!({
10969 "targets": report.targets,
10970 "node_ids": report.nodes.iter().map(|node| &node.id).collect::<Vec<_>>(),
10971 "edge_keys": report.edges.iter().map(|e| graph_db_edge_key(&SubstrateGraphEdge::from(e))).collect::<Vec<_>>(),
10972 "evidence_packet_ids": report.evidence_packet_ids,
10973 "worker_prompt_targets": report.worker_prompt_packets.iter().map(|packet| &packet.target).collect::<Vec<_>>(),
10974 "truncated": report.truncated,
10975 })
10976}
10977
10978fn graph_db_backend_eval_edge_scan_probe(
10979 store: &impl GraphStore,
10980) -> Result<(SubstrateGraphEdge, Vec<GraphPropertyFilter>)> {
10981 if let Some((edge, filter)) = store.sample_edge_with_property()? {
10982 return Ok((edge, vec![filter]));
10983 }
10984 let edge = store
10985 .sample_edge(None)?
10986 .context("backend-eval edge scan requires at least one edge")?;
10987 Ok((edge, Vec::new()))
10988}
10989
10990#[allow(clippy::too_many_arguments)]
10991fn graph_db_backend_eval_report_for_store<S: GraphStore>(
10992 backend: &str,
10993 adapter: &str,
10994 read_only: bool,
10995 root: &Path,
10996 path: &Path,
10997 scope: Option<&str>,
10998 targets: &[String],
10999 depth: usize,
11000 limit: usize,
11001 impact_limit: usize,
11002 store: &S,
11003 freshness: GraphDbFreshnessReport,
11004 refresh_operation: GraphDbBackendEvalOperation,
11005 refresh_signature: Option<GraphDbBackendEvalSignature>,
11006 sqlite_signatures: Option<&[GraphDbBackendEvalSignature]>,
11007 extra_warnings: Vec<String>,
11008 prepared: &ConflictMatrixPreparedInputs,
11009 projection_load: &str,
11010 lock_behavior: &str,
11011 install_portability: &str,
11012) -> (
11013 GraphDbBackendEvalBackendReport,
11014 Vec<GraphDbBackendEvalSignature>,
11015) {
11016 let mut operations = vec![refresh_operation];
11017 let mut signatures = refresh_signature.into_iter().collect::<Vec<_>>();
11018
11019 let (operation, signature) = graph_db_backend_eval_timed("status", || {
11020 let (nodes, edges) = store.graph_counts()?;
11021 Ok((
11022 Some(nodes + edges),
11023 serde_json::json!({
11024 "freshness": freshness.status,
11025 "nodes": nodes,
11026 "edges": edges,
11027 }),
11028 ))
11029 });
11030 operations.push(operation);
11031 signatures.extend(signature);
11032
11033 let (operation, signature) = graph_db_backend_eval_timed("edge_lookup", || {
11034 let edge = store
11035 .sample_edge(None)?
11036 .context("backend-eval edge lookup requires at least one edge")?;
11037 let edge_id = graph_db_edge_key(&edge);
11038 let found = store
11039 .edge(&edge_id)?
11040 .with_context(|| format!("backend-eval edge lookup missed {edge_id}"))?;
11041 Ok((
11042 Some(1),
11043 serde_json::json!({
11044 "edge_id": edge_id,
11045 "from_id": found.from_id,
11046 "to_id": found.to_id,
11047 "kind": found.kind,
11048 }),
11049 ))
11050 });
11051 operations.push(operation);
11052 signatures.extend(signature);
11053
11054 let (operation, signature) = graph_db_backend_eval_timed("edge_property_scan", || {
11055 let (edge, filters) = graph_db_backend_eval_edge_scan_probe(store)?;
11056 let page = store.paged_edges(
11057 Some(&edge.kind),
11058 GraphQueryOptions {
11059 limit: Some(limit.max(1)),
11060 property_filters: filters.clone(),
11061 ..GraphQueryOptions::default()
11062 },
11063 )?;
11064 Ok((
11065 Some(page.edges.len()),
11066 serde_json::json!({
11067 "kind": edge.kind,
11068 "filters": filters.iter().map(|filter| format!("{}={}", filter.key, filter.value)).collect::<Vec<_>>(),
11069 "edge_ids": page.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
11070 "truncated": page.page.truncated,
11071 }),
11072 ))
11073 });
11074 operations.push(operation);
11075 signatures.extend(signature);
11076
11077 let (operation, signature) = graph_db_backend_eval_timed("incident_edges", || {
11078 let edge = store
11079 .sample_edge(None)?
11080 .context("backend-eval incident edge scan requires at least one edge")?;
11081 let page = store.paged_incident_edges(
11082 &edge.from_id,
11083 Some(&edge.kind),
11084 GraphQueryOptions {
11085 limit: Some(limit.max(1)),
11086 ..GraphQueryOptions::default()
11087 },
11088 )?;
11089 Ok((
11090 Some(page.edges.len()),
11091 serde_json::json!({
11092 "node_id": edge.from_id,
11093 "kind": edge.kind,
11094 "edge_ids": page.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
11095 "truncated": page.page.truncated,
11096 }),
11097 ))
11098 });
11099 operations.push(operation);
11100 signatures.extend(signature);
11101
11102 let (operation, signature) = graph_db_backend_eval_neighborhood_operation(store, depth, limit);
11103 operations.push(operation);
11104 signatures.extend(signature);
11105
11106 let (operation, signature) =
11107 graph_db_backend_eval_related_operation(root, scope, store, depth, limit);
11108 operations.push(operation);
11109 signatures.extend(signature);
11110
11111 for configured_max_hops in std::iter::once(GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS)
11112 .chain(GRAPH_DB_BACKEND_EVAL_EXTENDED_PATH_HOPS)
11113 {
11114 let (operation, signature) =
11115 graph_db_backend_eval_path_operation(store, configured_max_hops);
11116 operations.push(operation);
11117 signatures.extend(signature);
11118 }
11119
11120 let (operation, signature) = graph_db_backend_eval_timed("evidence_target_resolution", || {
11121 let resolved = targets
11122 .iter()
11123 .map(|target| {
11124 let node = graph_db_resolve_evidence_target(store, target)?
11125 .with_context(|| format!("backend-eval target not found: {target}"))?;
11126 Ok((target.clone(), node))
11127 })
11128 .collect::<Result<Vec<_>>>()?;
11129 let signature = graph_db_backend_eval_target_resolution_signature(&resolved);
11130 Ok((Some(resolved.len()), signature))
11131 });
11132 operations.push(operation);
11133 signatures.extend(signature);
11134
11135 let mut evidence_for_report = None;
11136 let mut graph_snapshot_for_trace = None;
11137 let (operation, signature) = graph_db_backend_eval_timed("evidence", || {
11138 let resolved_targets =
11139 resolve_conflict_matrix_targets(store, targets, &prepared.context_pack)?;
11140 let evidence = collect_conflict_matrix_evidence_packets(
11141 root,
11142 scope,
11143 backend,
11144 &resolved_targets,
11145 depth,
11146 limit,
11147 store,
11148 freshness.clone(),
11149 )?;
11150 let report = &evidence
11151 .first()
11152 .context("backend-eval evidence requires at least one target")?
11153 .report;
11154 let rows = evidence
11155 .iter()
11156 .map(|entry| {
11157 entry.report.worker_context.len()
11158 + entry.report.source_handles.len()
11159 + entry.report.worker_results.len()
11160 + entry.report.semantic_related.len()
11161 })
11162 .sum();
11163 let signature = graph_db_backend_eval_evidence_signature(report);
11164 evidence_for_report = Some((resolved_targets, evidence));
11165 Ok((Some(rows), signature))
11166 });
11167 operations.push(operation);
11168 signatures.extend(signature);
11169
11170 let mut conflict_for_trace = None;
11171 let (operation, signature) = graph_db_backend_eval_timed("conflict_matrix", || {
11172 let graph_prepared = if let Some((targets, evidence)) = evidence_for_report.take() {
11173 let graph =
11174 conflict_matrix_target_scoped_graph_snapshot(store, &evidence, depth, limit)?;
11175 let shared_preparation =
11176 conflict_matrix_shared_preparation_summary(&graph, &evidence, "memory_reuse");
11177 ConflictMatrixGraphPreparedInputs {
11178 targets,
11179 graph,
11180 evidence,
11181 shared_preparation,
11182 }
11183 } else {
11184 prepare_conflict_matrix_graph_orchestration(
11185 root,
11186 scope,
11187 backend,
11188 targets,
11189 prepared,
11190 depth,
11191 limit,
11192 store,
11193 freshness.clone(),
11194 )?
11195 };
11196 let report = build_conflict_matrix_report_from_prepared_graph(
11197 root,
11198 path,
11199 scope,
11200 depth,
11201 limit,
11202 impact_limit,
11203 freshness.clone(),
11204 extra_warnings.clone(),
11205 prepared,
11206 &graph_prepared,
11207 )?;
11208 let signature = graph_db_backend_eval_conflict_signature(&report);
11209 let rows = report.candidates.len() + report.conflicts.len();
11210 conflict_for_trace = Some(report);
11211 graph_snapshot_for_trace = Some(graph_prepared.graph);
11212 Ok((Some(rows), signature))
11213 });
11214 operations.push(operation);
11215 signatures.extend(signature);
11216
11217 let (operation, signature) = graph_db_backend_eval_timed("dispatch_trace", || {
11218 let conflict = conflict_for_trace
11219 .take()
11220 .context("backend-eval dispatch-trace requires a completed conflict-matrix report")?;
11221 let graph = graph_snapshot_for_trace
11222 .take()
11223 .context("backend-eval dispatch-trace requires conflict-matrix graph preparation")?;
11224 let report = build_dispatch_trace_report_from_conflict_snapshot(
11225 root,
11226 scope,
11227 conflict,
11228 graph.nodes,
11229 graph.edges,
11230 depth,
11231 limit,
11232 Vec::new(),
11233 )?;
11234 Ok((
11235 Some(report.nodes.len() + report.edges.len()),
11236 graph_db_backend_eval_dispatch_signature(&report),
11237 ))
11238 });
11239 operations.push(operation);
11240 signatures.extend(signature);
11241
11242 let total_micros = operations
11243 .iter()
11244 .map(|operation| operation.duration_micros)
11245 .sum();
11246 let parity = graph_db_backend_eval_parity(sqlite_signatures, &signatures);
11247 (
11248 GraphDbBackendEvalBackendReport {
11249 backend: backend.to_string(),
11250 adapter: adapter.to_string(),
11251 read_only,
11252 projection_load: projection_load.to_string(),
11253 operations,
11254 total_micros,
11255 parity,
11256 lock_behavior: lock_behavior.to_string(),
11257 install_portability: install_portability.to_string(),
11258 },
11259 signatures,
11260 )
11261}
11262
11263pub(crate) fn graph_db_backend_eval_refresh_operation(
11264 duration_micros: u128,
11265 rows: usize,
11266 value: serde_json::Value,
11267) -> (GraphDbBackendEvalOperation, GraphDbBackendEvalSignature) {
11268 (
11269 GraphDbBackendEvalOperation {
11270 name: "refresh".to_string(),
11271 supported: true,
11272 status: "ok".to_string(),
11273 duration_micros,
11274 rows: Some(rows),
11275 error: None,
11276 },
11277 GraphDbBackendEvalSignature {
11278 operation: "refresh".to_string(),
11279 value,
11280 },
11281 )
11282}
11283
11284pub(crate) fn graph_db_backend_eval_synthetic_projection(
11285 nodes: usize,
11286 fanout: usize,
11287) -> GraphProjection {
11288 let nodes = nodes.max(12);
11289 let symbol_count = nodes.saturating_sub(9).max(1);
11290 let source = GraphProvenance::new("backend-eval", "synthetic");
11291 let mut projection_nodes = vec![
11292 SubstrateGraphNode::new(
11293 "projection:tsift-traversal:synthetic",
11294 GRAPH_PROJECTION_META_KIND,
11295 "synthetic projection",
11296 )
11297 .with_property("projection_version", GRAPH_PROJECTION_VERSION)
11298 .with_property(
11299 "content_hash",
11300 format!("synthetic-{nodes}-{fanout}-{symbol_count}"),
11301 )
11302 .with_provenance(source.clone()),
11303 SubstrateGraphNode::new("gses-synthetic", "session", "synthetic session")
11304 .with_property("ref_id", "synthetic-session"),
11305 SubstrateGraphNode::new("gbak-synthetic", "backlog", "#synthetic")
11306 .with_property("ref_id", "synthetic")
11307 .with_property("path", "tasks/software/synthetic.md")
11308 .with_property("line", "1")
11309 .with_property(
11310 "expand",
11311 "tsift source-read tasks/software/synthetic.md --start 1 --lines 40",
11312 ),
11313 SubstrateGraphNode::new("gjob-synthetic", "job_packet", "do #synthetic")
11314 .with_property("ref_id", "synthetic"),
11315 SubstrateGraphNode::new("gwctx-synthetic", "worker_context", "synthetic context")
11316 .with_property("target", "synthetic")
11317 .with_property("summary", "Synthetic worker owns synthetic.rs")
11318 .with_property(
11319 "expand",
11320 "tsift source-read synthetic.rs --start 1 --lines 80",
11321 ),
11322 SubstrateGraphNode::new("gsrc-synthetic", "source_handle", "synthetic.rs:1-80")
11323 .with_property("file", "synthetic.rs")
11324 .with_property("start", "1")
11325 .with_property("end", "80")
11326 .with_property(
11327 "expand",
11328 "tsift source-read synthetic.rs --start 1 --lines 80",
11329 ),
11330 SubstrateGraphNode::new("gfil-synthetic", "file", "synthetic.rs")
11331 .with_property("path", "synthetic.rs"),
11332 SubstrateGraphNode::new("gsem-synthetic", "semantic_concept", "backend evaluation")
11333 .with_property("handle", "gsem-synthetic")
11334 .with_property("label", "backend evaluation")
11335 .with_property("embedding_model", SEMANTIC_EMBEDDING_MODEL)
11336 .with_property(
11337 "embedding",
11338 semantic_embedding_property("backend evaluation"),
11339 ),
11340 SubstrateGraphNode::new("gwres-synthetic", "worker_result", "completed #synthetic")
11341 .with_property("ref_id", "synthetic")
11342 .with_property("status", "completed")
11343 .with_property("touched_files", "synthetic.rs")
11344 .with_property("expected_tests", "cargo test --test graph_db_conformance"),
11345 ];
11346 for idx in 0..symbol_count {
11347 projection_nodes.push(
11348 SubstrateGraphNode::new(
11349 format!("gsym-synthetic-{idx:04}"),
11350 "symbol",
11351 format!("synthetic_symbol_{idx:04}"),
11352 )
11353 .with_property("ref_id", format!("synthetic_symbol_{idx:04}"))
11354 .with_property("path", "synthetic.rs")
11355 .with_property("line", (idx + 1).to_string()),
11356 );
11357 }
11358
11359 let mut projection_edges = vec![
11360 SubstrateGraphEdge::new("gses-synthetic", "gbak-synthetic", "contains"),
11361 SubstrateGraphEdge::new("gses-synthetic", "gjob-synthetic", "queues"),
11362 SubstrateGraphEdge::new("gbak-synthetic", "gwctx-synthetic", "has_context"),
11363 SubstrateGraphEdge::new("gjob-synthetic", "gwctx-synthetic", "has_context"),
11364 SubstrateGraphEdge::new("gwctx-synthetic", "gsrc-synthetic", "uses_source"),
11365 SubstrateGraphEdge::new("gbak-synthetic", "gwres-synthetic", "has_worker_result"),
11366 SubstrateGraphEdge::new("gbak-synthetic", "gsem-synthetic", "mentions_concept"),
11367 SubstrateGraphEdge::new("gsrc-synthetic", "gfil-synthetic", "reads_file"),
11368 SubstrateGraphEdge::new("gfil-synthetic", "gsym-synthetic-0000", "defines"),
11369 ];
11370 for idx in 0..symbol_count {
11371 let from = format!("gsym-synthetic-{idx:04}");
11372 for offset in 1..=fanout.max(1).min(symbol_count) {
11373 let to_idx = (idx + offset) % symbol_count;
11374 if to_idx != idx {
11375 projection_edges.push(SubstrateGraphEdge::new(
11376 from.clone(),
11377 format!("gsym-synthetic-{to_idx:04}"),
11378 "calls",
11379 ));
11380 }
11381 }
11382 }
11383
11384 GraphProjection {
11385 nodes: projection_nodes,
11386 edges: projection_edges
11387 .into_iter()
11388 .map(|edge| {
11389 edge.with_property("dataset", "synthetic")
11390 .with_provenance(source.clone())
11391 })
11392 .collect(),
11393 }
11394}
11395
11396pub(crate) fn graph_db_backend_eval_promotion(
11397 datasets: &[GraphDbBackendEvalDataset],
11398 candidates: &[GraphDbExperimentalBackend],
11399) -> Vec<GraphDbBackendPromotionDecision> {
11400 let mut decisions = Vec::new();
11401 for candidate in candidates {
11402 let mut reasons = Vec::new();
11403 let mut faster_everywhere = true;
11404 let mut parity_everywhere = true;
11405 for dataset in datasets {
11406 let Some(sqlite_report) = dataset
11407 .backends
11408 .iter()
11409 .find(|backend| backend.backend == "sqlite")
11410 else {
11411 parity_everywhere = false;
11412 faster_everywhere = false;
11413 reasons.push(format!(
11414 "{} dataset is missing SQLite baseline",
11415 dataset.name
11416 ));
11417 continue;
11418 };
11419 let sqlite_total = sqlite_report.total_micros;
11420 let Some(candidate_report) = dataset
11421 .backends
11422 .iter()
11423 .find(|backend| backend.backend == candidate.name())
11424 else {
11425 parity_everywhere = false;
11426 reasons.push(format!("{} dataset did not run", dataset.name));
11427 continue;
11428 };
11429 if !candidate_report.parity.matches_sqlite {
11430 parity_everywhere = false;
11431 reasons.push(format!("{} parity differed from SQLite", dataset.name));
11432 }
11433 if candidate_report.total_micros >= sqlite_total {
11434 faster_everywhere = false;
11435 reasons.push(format!(
11436 "{} total {}us did not beat SQLite {}us",
11437 dataset.name, candidate_report.total_micros, sqlite_total
11438 ));
11439 }
11440 let sqlite_operations = sqlite_report
11441 .operations
11442 .iter()
11443 .map(|operation| (operation.name.as_str(), operation.duration_micros))
11444 .collect::<BTreeMap<_, _>>();
11445 for operation in &candidate_report.operations {
11446 if let Some(sqlite_duration) = sqlite_operations.get(operation.name.as_str())
11447 && operation.duration_micros >= *sqlite_duration
11448 {
11449 faster_everywhere = false;
11450 reasons.push(format!(
11451 "{} {} operation {}us did not beat SQLite {}us",
11452 dataset.name, operation.name, operation.duration_micros, sqlite_duration
11453 ));
11454 }
11455 }
11456 if candidate_report
11457 .operations
11458 .iter()
11459 .any(|operation| operation.status != "ok")
11460 {
11461 parity_everywhere = false;
11462 reasons.push(format!("{} has failed benchmark operations", dataset.name));
11463 }
11464 }
11465 let decision = if let Some(reason) = candidate.prototype_hold_reason() {
11466 reasons.push(reason.to_string());
11467 reasons.push(
11468 "current bounded prototype timings are benchmark evidence, not a backend switch approval"
11469 .to_string(),
11470 );
11471 "hold"
11472 } else if parity_everywhere && faster_everywhere {
11473 reasons.push(
11474 "prototype gate passed; production promotion still requires the real engine adapter to preserve SQLite's bundled install and multi-process lock behavior"
11475 .to_string(),
11476 );
11477 "eligible"
11478 } else {
11479 reasons.push(
11480 "production promotion requires SQLite parity plus lower total time for every measured operation on every dataset without worse lock behavior or install portability"
11481 .to_string(),
11482 );
11483 "hold"
11484 };
11485 decisions.push(GraphDbBackendPromotionDecision {
11486 backend: candidate.name().to_string(),
11487 decision: decision.to_string(),
11488 reasons: dedupe_preserve_order(reasons),
11489 gate: candidate.promotion_gate(),
11490 });
11491 }
11492 decisions
11493}
11494
11495pub(crate) fn graph_db_backend_eval_metrics(
11496 datasets: &[GraphDbBackendEvalDataset],
11497) -> BTreeMap<String, f64> {
11498 let mut metrics = BTreeMap::new();
11499 for dataset in datasets {
11500 let graph_rows = graph_db_backend_eval_graph_rows(dataset);
11501 metrics.insert(format!("{}.nodes", dataset.name), dataset.nodes as f64);
11502 metrics.insert(format!("{}.edges", dataset.name), dataset.edges as f64);
11503 metrics.insert(format!("{}.graph_rows", dataset.name), graph_rows as f64);
11504 for backend in &dataset.backends {
11505 let prefix = format!("{}.{}", dataset.name, backend.backend.replace('-', "_"));
11506 metrics.insert(
11507 format!("{prefix}.total_duration_micros"),
11508 backend.total_micros as f64,
11509 );
11510 append_graph_db_backend_eval_normalized_duration_metric(
11511 &mut metrics,
11512 &format!("{prefix}.total_duration_micros_per_1k_graph_rows"),
11513 backend.total_micros,
11514 graph_rows,
11515 );
11516 for operation in &backend.operations {
11517 metrics.insert(
11518 format!("{prefix}.{}.duration_micros", operation.name),
11519 operation.duration_micros as f64,
11520 );
11521 append_graph_db_backend_eval_normalized_duration_metric(
11522 &mut metrics,
11523 &format!(
11524 "{prefix}.{}.duration_micros_per_1k_graph_rows",
11525 operation.name
11526 ),
11527 operation.duration_micros,
11528 graph_rows,
11529 );
11530 if let Some(rows) = operation.rows {
11531 metrics.insert(format!("{prefix}.{}.rows", operation.name), rows as f64);
11532 }
11533 }
11534 }
11535 }
11536 metrics
11537}
11538
11539pub(crate) fn graph_db_backend_eval_graph_rows(dataset: &GraphDbBackendEvalDataset) -> usize {
11540 dataset.nodes + dataset.edges
11541}
11542
11543pub(crate) fn append_graph_db_backend_eval_normalized_duration_metric(
11544 metrics: &mut BTreeMap<String, f64>,
11545 key: &str,
11546 duration_micros: u128,
11547 graph_rows: usize,
11548) {
11549 if graph_rows == 0 {
11550 return;
11551 }
11552 metrics.insert(
11553 key.to_string(),
11554 duration_micros as f64 / graph_rows as f64 * GRAPH_DB_BACKEND_EVAL_NORMALIZATION_ROW_UNIT,
11555 );
11556}
11557
11558pub(crate) fn append_graph_db_backend_eval_phase_metrics(
11559 metrics: &mut BTreeMap<String, f64>,
11560 dataset: &str,
11561 graph_rows: usize,
11562 phases: &[GraphDbBackendEvalPhaseTiming],
11563) {
11564 for phase in phases {
11565 metrics.insert(
11566 format!("{dataset}.refresh_phase.{}.duration_micros", phase.name),
11567 phase.duration_micros as f64,
11568 );
11569 append_graph_db_backend_eval_normalized_duration_metric(
11570 metrics,
11571 &format!(
11572 "{dataset}.refresh_phase.{}.duration_micros_per_1k_graph_rows",
11573 phase.name
11574 ),
11575 phase.duration_micros,
11576 graph_rows,
11577 );
11578 }
11579}
11580
11581fn graph_db_backend_eval_base_command(
11582 root: &Path,
11583 scope: Option<&str>,
11584 full_projection: bool,
11585) -> String {
11586 let full_projection_arg = if full_projection {
11587 " --full-projection"
11588 } else {
11589 ""
11590 };
11591 format!(
11592 "tsift graph-db --path {}{} --json backend-eval{}",
11593 shell_quote(root.to_string_lossy().as_ref()),
11594 graph_db_scope_arg(scope),
11595 full_projection_arg
11596 )
11597}
11598
11599pub(crate) fn graph_db_backend_eval_metric_digest_command(
11600 root: &Path,
11601 scope: Option<&str>,
11602 full_projection: bool,
11603) -> String {
11604 format!(
11605 "{} | tsift metric-digest --baseline fixtures/graph-db-performance-history.json",
11606 graph_db_backend_eval_base_command(root, scope, full_projection)
11607 )
11608}
11609
11610fn graph_db_backend_eval_repeated_sample_command(
11611 root: &Path,
11612 scope: Option<&str>,
11613 full_projection: bool,
11614) -> String {
11615 format!(
11616 "for sample in 1 2 3; do {}; done | tsift metric-digest --baseline fixtures/graph-db-performance-history.json",
11617 graph_db_backend_eval_base_command(root, scope, full_projection)
11618 )
11619}
11620
11621fn graph_db_backend_eval_hop_cap_promotion_gate() -> GraphDbHopCapPromotionGate {
11622 let mut required_metrics = Vec::new();
11623 for workload in perf_gate::HOP_CAP_REQUIRED_WORKLOADS {
11624 required_metrics.push(format!("{workload}.sqlite.path_max_hops.duration_micros"));
11625 required_metrics.push(format!("{workload}.sqlite.path_max_hops.rows"));
11626 for hops in perf_gate::HOP_CAP_CANDIDATE_TIERS {
11627 required_metrics.push(format!(
11628 "{workload}.sqlite.path_max_hops_{hops}.duration_micros"
11629 ));
11630 required_metrics.push(format!("{workload}.sqlite.path_max_hops_{hops}.rows"));
11631 }
11632 }
11633 GraphDbHopCapPromotionGate {
11634 status: "hold_64_default_until_gate_passes".to_string(),
11635 current_default_hops: perf_gate::HOP_CAP_CURRENT_DEFAULT,
11636 candidate_hop_tiers: perf_gate::HOP_CAP_CANDIDATE_TIERS.to_vec(),
11637 required_backend: perf_gate::BASELINE_BACKEND.to_string(),
11638 required_workloads: perf_gate::HOP_CAP_REQUIRED_WORKLOADS
11639 .iter()
11640 .map(|workload| (*workload).to_string())
11641 .collect(),
11642 required_metrics,
11643 allowed_regression_percent: GRAPH_DB_BACKEND_EVAL_ALLOWED_REGRESSION_PERCENT,
11644 minimum_sample_runs: GRAPH_DB_BACKEND_EVAL_MIN_SAMPLE_RUNS,
11645 decision_rule:
11646 "keep 64 as the user-facing default until each candidate tier has repeated real, full_projection, and synthetic_deep_chain SQLite samples within the latency-regression budget and returning useful path rows; full_projection samples are binding only after a cold populate leg proves a cache-hit leg"
11647 .to_string(),
11648 }
11649}
11650
11651fn graph_db_backend_eval_backend_adapter_spike_gate() -> GraphDbBackendAdapterSpikeGate {
11652 let candidate_backends = [
11653 GraphDbExperimentalBackend::Falkordb,
11654 GraphDbExperimentalBackend::Kuzu,
11655 GraphDbExperimentalBackend::Surrealdb,
11656 ]
11657 .into_iter()
11658 .map(|backend| GraphDbBackendAdapterSpikeCandidate {
11659 backend: backend.name().to_string(),
11660 adapter_label: backend.adapter_label().to_string(),
11661 projection_load: backend.projection_load().to_string(),
11662 lock_behavior: backend.lock_behavior().to_string(),
11663 install_portability: backend.install_portability().to_string(),
11664 })
11665 .collect();
11666
11667 GraphDbBackendAdapterSpikeGate {
11668 status: "hold_real_optional_adapter_required".to_string(),
11669 candidate_backends,
11670 required_workloads: perf_gate::GATE_WORKLOAD_PREFIXES
11671 .iter()
11672 .map(|workload| (*workload).to_string())
11673 .collect(),
11674 required_checks: vec![
11675 "real_optional_adapter_behind_graphstore_without_default_build_dependency".to_string(),
11676 "projection_load_writes_provider_neutral_rows_without_sqlite_row_replay".to_string(),
11677 "freshness_and_full_parity_match_sqlite_on_every_graphstore_operation".to_string(),
11678 "lock_semantics_match_or_beat_sqlite_for_writer_and_read_only_workflows".to_string(),
11679 "install_portability_preserves_cargo_build_install_without_external_service_or_native_toolchain"
11680 .to_string(),
11681 "full_projection_cache_hit_sample_before_backend_or_hop_cap_changes".to_string(),
11682 "beats_sqlite_on_every_required_workload_and_metric_in_backend_eval".to_string(),
11683 ],
11684 decision_rule:
11685 "do not promote a read-only prototype; FalkorDB, Kuzu, or SurrealDB can only advance after a real optional adapter proves projection writes/load, lock semantics, install portability, full parity, and faster-than-SQLite results across every required workload"
11686 .to_string(),
11687 evidence_plan: "plans/gback-evidence.md".to_string(),
11688 }
11689}
11690
11691pub(crate) fn graph_db_backend_eval_performance_gate(
11692 root: &Path,
11693 scope: Option<&str>,
11694 full_projection: bool,
11695) -> GraphDbBackendEvalPerformanceGate {
11696 let mut required_metrics = vec![
11697 "real.sqlite.refresh.duration_micros".to_string(),
11698 "real.sqlite.refresh.duration_micros_per_1k_graph_rows".to_string(),
11699 "real.sqlite.edge_lookup.duration_micros_per_1k_graph_rows".to_string(),
11700 "real.sqlite.edge_property_scan.duration_micros_per_1k_graph_rows".to_string(),
11701 "real.sqlite.incident_edges.duration_micros_per_1k_graph_rows".to_string(),
11702 "real.sqlite.neighborhood.duration_micros_per_1k_graph_rows".to_string(),
11703 "real.sqlite.evidence_target_resolution.duration_micros_per_1k_graph_rows".to_string(),
11704 "real.sqlite.evidence.duration_micros_per_1k_graph_rows".to_string(),
11705 "real.sqlite.total_duration_micros_per_1k_graph_rows".to_string(),
11706 "real.refresh_phase.source_graph_build.duration_micros_per_1k_graph_rows".to_string(),
11707 "real.refresh_phase.sqlite_delta_write.duration_micros".to_string(),
11708 "real.refresh_phase.sqlite_property_row_staging.duration_micros".to_string(),
11709 "real.refresh_phase.sqlite_edge_property_row_staging.duration_micros".to_string(),
11710 "real.sqlite.conflict_matrix.duration_micros".to_string(),
11711 "real.sqlite.dispatch_trace.duration_micros".to_string(),
11712 "real.sqlite.path_max_hops.duration_micros".to_string(),
11713 "real.sqlite.path_max_hops_128.duration_micros".to_string(),
11714 "real.sqlite.path_max_hops_256.duration_micros".to_string(),
11715 "real.sqlite.path_max_hops_512.duration_micros".to_string(),
11716 "real.sqlite.path_max_hops_128.duration_micros_per_1k_graph_rows".to_string(),
11717 "real.sqlite.path_max_hops_256.duration_micros_per_1k_graph_rows".to_string(),
11718 "real.sqlite.path_max_hops_512.duration_micros_per_1k_graph_rows".to_string(),
11719 "synthetic_high_degree.sqlite.total_duration_micros".to_string(),
11720 "synthetic_high_degree.sqlite.total_duration_micros_per_1k_graph_rows".to_string(),
11721 "synthetic_high_degree.sqlite.neighborhood.duration_micros_per_1k_graph_rows".to_string(),
11722 "synthetic_high_degree.sqlite.edge_property_scan.duration_micros_per_1k_graph_rows"
11723 .to_string(),
11724 "synthetic_high_degree.sqlite.evidence_target_resolution.duration_micros_per_1k_graph_rows"
11725 .to_string(),
11726 "synthetic_deep_chain.sqlite.incident_edges.duration_micros_per_1k_graph_rows".to_string(),
11727 "synthetic_deep_chain.sqlite.neighborhood.duration_micros_per_1k_graph_rows".to_string(),
11728 "synthetic_deep_chain.sqlite.path_max_hops.duration_micros".to_string(),
11729 "synthetic_deep_chain.sqlite.path_max_hops_128.duration_micros".to_string(),
11730 "synthetic_deep_chain.sqlite.path_max_hops_256.duration_micros".to_string(),
11731 "synthetic_deep_chain.sqlite.path_max_hops_512.duration_micros".to_string(),
11732 "synthetic_deep_chain.sqlite.evidence_target_resolution.duration_micros_per_1k_graph_rows"
11733 .to_string(),
11734 "synthetic_deep_chain.sqlite.path_max_hops.duration_micros_per_1k_graph_rows".to_string(),
11735 "synthetic_deep_chain.sqlite.path_max_hops_128.duration_micros_per_1k_graph_rows"
11736 .to_string(),
11737 "synthetic_deep_chain.sqlite.path_max_hops_256.duration_micros_per_1k_graph_rows"
11738 .to_string(),
11739 "synthetic_deep_chain.sqlite.path_max_hops_512.duration_micros_per_1k_graph_rows"
11740 .to_string(),
11741 ];
11742 if full_projection {
11743 required_metrics.extend([
11744 "full_projection.cache.hit".to_string(),
11745 "full_projection.cache.disk_bytes".to_string(),
11746 "full_projection.cache.compression_ratio".to_string(),
11747 "full_projection.refresh_phase.cache_lookup.duration_micros".to_string(),
11748 "full_projection.sqlite.total_duration_micros_per_1k_graph_rows".to_string(),
11749 "full_projection.refresh_phase.source_graph_build.duration_micros_per_1k_graph_rows"
11750 .to_string(),
11751 "full_projection.refresh_phase.projection_rows.duration_micros_per_1k_graph_rows"
11752 .to_string(),
11753 "full_projection.sqlite.sqlite_delta_write.duration_micros".to_string(),
11754 "full_projection.sqlite.sqlite_node_staging.duration_micros".to_string(),
11755 "full_projection.sqlite.post_write_reads.duration_micros".to_string(),
11756 "full_projection.sqlite.neighborhood.duration_micros".to_string(),
11757 "full_projection.sqlite.evidence_target_resolution.duration_micros".to_string(),
11758 "full_projection.sqlite.evidence.duration_micros".to_string(),
11759 "full_projection.sqlite.path_max_hops.duration_micros".to_string(),
11760 "full_projection.sqlite.path_max_hops_128.duration_micros".to_string(),
11761 "full_projection.sqlite.path_max_hops_256.duration_micros".to_string(),
11762 "full_projection.sqlite.path_max_hops_512.duration_micros".to_string(),
11763 "full_projection.sqlite.conflict_matrix.duration_micros".to_string(),
11764 "full_projection.sqlite.dispatch_trace.duration_micros".to_string(),
11765 ]);
11766 }
11767 GraphDbBackendEvalPerformanceGate {
11768 baseline_fixture: "fixtures/graph-db-performance-history.json".to_string(),
11769 ci_profile: "synthetic_high_degree + synthetic_deep_chain metrics are CI-safe and bounded"
11770 .to_string(),
11771 opt_in_real_profile:
11772 "pass --full-projection to add the full-project dataset when checking for large projection regressions"
11773 .to_string(),
11774 full_projection_cache_hit_gate: if full_projection {
11775 "binding full_projection performance evidence requires a cold populate leg followed by cache-leg samples with full_projection.cache.hit=1; cache-miss samples are diagnostics, not backend or hop-cap promotion proof"
11776 .to_string()
11777 } else {
11778 "not evaluated until --full-projection is enabled".to_string()
11779 },
11780 allowed_regression_percent: GRAPH_DB_BACKEND_EVAL_ALLOWED_REGRESSION_PERCENT,
11781 minimum_sample_runs: GRAPH_DB_BACKEND_EVAL_MIN_SAMPLE_RUNS,
11782 normalized_metric_unit: "duration_micros_per_1k_graph_rows".to_string(),
11783 required_metrics,
11784 digest_command: graph_db_backend_eval_metric_digest_command(root, scope, full_projection),
11785 repeated_sample_command: graph_db_backend_eval_repeated_sample_command(
11786 root,
11787 scope,
11788 full_projection,
11789 ),
11790 hop_cap_promotion: graph_db_backend_eval_hop_cap_promotion_gate(),
11791 backend_adapter_spike: graph_db_backend_eval_backend_adapter_spike_gate(),
11792 }
11793}
11794
11795#[cfg(feature = "backend-surrealdb")]
11796fn graph_db_backend_eval_path_segment(value: &str) -> String {
11797 value
11798 .chars()
11799 .map(|ch| {
11800 if ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_' | '.') {
11801 ch
11802 } else {
11803 '_'
11804 }
11805 })
11806 .collect()
11807}
11808
11809#[cfg(feature = "backend-surrealdb")]
11810fn graph_db_backend_eval_surrealdb_store_path(
11811 root: &Path,
11812 scope: Option<&str>,
11813 dataset: &str,
11814) -> PathBuf {
11815 root.join(".tsift/backend-eval-cache/surrealdb")
11816 .join(graph_db_backend_eval_path_segment(scope.unwrap_or("root")))
11817 .join(graph_db_backend_eval_path_segment(dataset))
11818 .join("surrealkv")
11819}
11820
11821pub(crate) struct GraphDbBackendEvalOptions<'a> {
11822 path: &'a Path,
11823 scope: Option<&'a str>,
11824 candidates: &'a [String],
11825 targets: &'a [String],
11826 full_projection: bool,
11827}
11828
11829#[allow(clippy::too_many_arguments)]
11830pub(crate) fn graph_db_backend_eval_dataset(
11831 name: &str,
11832 root: &Path,
11833 path: &Path,
11834 scope: Option<&str>,
11835 targets: &[String],
11836 depth: usize,
11837 limit: usize,
11838 impact_limit: usize,
11839 candidates: &[GraphDbExperimentalBackend],
11840 sqlite_store: &SqliteGraphStore,
11841 sqlite_freshness: GraphDbFreshnessReport,
11842 sqlite_refresh: (GraphDbBackendEvalOperation, GraphDbBackendEvalSignature),
11843 sqlite_rows: ConvexProjectionRows,
11844 extra_warnings: Vec<String>,
11845 prepared: &ConflictMatrixPreparedInputs,
11846) -> Result<GraphDbBackendEvalDataset> {
11847 let (nodes, edges) = sqlite_store.graph_counts()?;
11848 let (sqlite_operation, sqlite_signature) = sqlite_refresh;
11849 let (sqlite_report, sqlite_signatures) = graph_db_backend_eval_report_for_store(
11850 "sqlite",
11851 "SQLite GraphStore correctness baseline",
11852 false,
11853 root,
11854 path,
11855 scope,
11856 targets,
11857 depth,
11858 limit,
11859 impact_limit,
11860 sqlite_store,
11861 sqlite_freshness,
11862 sqlite_operation,
11863 Some(sqlite_signature),
11864 None,
11865 extra_warnings.clone(),
11866 prepared,
11867 "SQLite refresh writes provider-neutral projection rows into graph.db transactionally",
11868 "SQLite WAL correctness store; refresh uses one transactional writer and read-only queries use snapshot recovery",
11869 "bundled rusqlite baseline; no external service or runtime required",
11870 );
11871
11872 let mut backends = vec![sqlite_report];
11873 for candidate in candidates {
11874 #[cfg(feature = "backend-surrealdb")]
11875 if *candidate == GraphDbExperimentalBackend::Surrealdb {
11876 let started = Instant::now();
11877 let store_path = graph_db_backend_eval_surrealdb_store_path(root, scope, name);
11878 let (store, warm_start) =
11879 SurrealdbGraphStore::open_or_refresh(&store_path, &sqlite_rows)?;
11880 let (candidate_nodes, candidate_edges) = store.graph_counts()?;
11881 let rows = candidate_nodes + candidate_edges;
11882 let mut refresh_meta = serde_json::json!({
11883 "nodes": candidate_nodes,
11884 "edges": candidate_edges,
11885 });
11886 if warm_start == tsift_surrealdb::WarmStartOutcome::CacheHit {
11887 refresh_meta["warm_start"] = serde_json::json!("cache_hit");
11888 }
11889 let refresh = graph_db_backend_eval_refresh_operation(
11890 started.elapsed().as_micros(),
11891 rows,
11892 refresh_meta,
11893 );
11894 let freshness = sqlite_graph_freshness(sqlite_store, scope.unwrap_or("root"))?;
11895 let (candidate_report, _signatures) = graph_db_backend_eval_report_for_store(
11896 candidate.name(),
11897 "SurrealDB SurrealKV optional adapter spike",
11898 false,
11899 root,
11900 path,
11901 scope,
11902 targets,
11903 depth,
11904 limit,
11905 impact_limit,
11906 &store,
11907 freshness,
11908 refresh.0,
11909 Some(refresh.1),
11910 Some(&sqlite_signatures),
11911 extra_warnings.clone(),
11912 prepared,
11913 "provider-neutral rows written into an embedded/file-backed SurrealDB SurrealKV store through the optional tsift-surrealdb adapter; warm-start reuses existing store when row hash matches",
11914 "embedded/file-backed writer through SurrealDB SurrealKV rewrites backend-eval rows before read-only measurements; promotion still requires multi-process/read-only contention samples",
11915 "feature-gated optional tsift-surrealdb crate; default cargo build/install does not pull SurrealDB into the dependency graph",
11916 );
11917 backends.push(candidate_report);
11918 continue;
11919 }
11920 let started = Instant::now();
11921 let store = ExperimentalReadOnlyGraphStore::from_rows(*candidate, &sqlite_rows)?;
11922 let (candidate_nodes, candidate_edges) = store.graph_counts()?;
11923 let rows = candidate_nodes + candidate_edges;
11924 let refresh = graph_db_backend_eval_refresh_operation(
11925 started.elapsed().as_micros(),
11926 rows,
11927 serde_json::json!({
11928 "nodes": candidate_nodes,
11929 "edges": candidate_edges,
11930 }),
11931 );
11932 let freshness = sqlite_graph_freshness(sqlite_store, scope.unwrap_or("root"))?;
11933 let (candidate_report, _signatures) = graph_db_backend_eval_report_for_store(
11934 candidate.name(),
11935 candidate.adapter_label(),
11936 true,
11937 root,
11938 path,
11939 scope,
11940 targets,
11941 depth,
11942 limit,
11943 impact_limit,
11944 &store,
11945 freshness,
11946 refresh.0,
11947 Some(refresh.1),
11948 Some(&sqlite_signatures),
11949 extra_warnings.clone(),
11950 prepared,
11951 candidate.projection_load(),
11952 candidate.lock_behavior(),
11953 candidate.install_portability(),
11954 );
11955 backends.push(candidate_report);
11956 }
11957
11958 Ok(GraphDbBackendEvalDataset {
11959 name: name.to_string(),
11960 target_count: targets.len(),
11961 nodes,
11962 edges,
11963 backends,
11964 })
11965}
11966
11967pub(crate) fn print_graph_db_backend_eval_human(report: &GraphDbBackendEvalReport) {
11968 println!(
11969 "graph-db backend-eval baseline:{} candidates:{}",
11970 report.baseline_backend,
11971 report.candidates.join(", ")
11972 );
11973 for phase in &report.phase_timings {
11974 println!(
11975 "phase:{} {}us {}",
11976 phase.name, phase.duration_micros, phase.detail
11977 );
11978 }
11979 for dataset in &report.datasets {
11980 println!(
11981 "dataset:{} targets:{} rows:{}",
11982 dataset.name,
11983 dataset.target_count,
11984 dataset.nodes + dataset.edges
11985 );
11986 for backend in &dataset.backends {
11987 println!(
11988 " backend:{} total:{}us parity:{}",
11989 backend.backend, backend.total_micros, backend.parity.matches_sqlite
11990 );
11991 println!(" projection-load: {}", backend.projection_load);
11992 println!(" lock-behavior: {}", backend.lock_behavior);
11993 println!(" install-portability: {}", backend.install_portability);
11994 for operation in &backend.operations {
11995 println!(
11996 " {} {} {}us",
11997 operation.name, operation.status, operation.duration_micros
11998 );
11999 }
12000 for diagnostic in &backend.parity.diagnostics {
12001 println!(" parity: {diagnostic}");
12002 }
12003 }
12004 }
12005 for decision in &report.promotion {
12006 println!("promotion {}: {}", decision.backend, decision.decision);
12007 println!(" gate: {}", decision.gate.status);
12008 for reason in &decision.reasons {
12009 println!(" reason: {reason}");
12010 }
12011 for check in &decision.gate.required_checks {
12012 println!(" check: {check}");
12013 }
12014 }
12015 println!("metric-digest: {}", report.metric_digest_command);
12016 println!(
12017 "repeat-samples: {}",
12018 report.performance_gate.repeated_sample_command
12019 );
12020}
12021
12022fn traversal_expand_command(root: &Path, handle: &str) -> String {
12023 format!(
12024 "tsift traverse {} --path {} --depth 1 --limit 50",
12025 shell_quote(handle),
12026 shell_quote(root.to_string_lossy().as_ref())
12027 )
12028}
12029
12030fn traversal_file_node(root: &Path, file: &str) -> TraversalNode {
12031 let display = relativize(file, root);
12032 let handle = stable_handle("gfil", &format!("file:{display}"));
12033 TraversalNode {
12034 handle: handle.clone(),
12035 kind: "file".to_string(),
12036 label: display.clone(),
12037 ref_id: Some(display.clone()),
12038 path: Some(display),
12039 line: None,
12040 detail: None,
12041 properties: BTreeMap::new(),
12042 expand: traversal_expand_command(root, &handle),
12043 }
12044}
12045
12046fn traversal_raw_source_file_node(root: &Path, file: &str) -> TraversalNode {
12047 let mut node = traversal_file_node(root, file);
12048 if let Some(path) = node.path.clone() {
12049 node.detail = Some("raw source fallback; graph evidence unavailable".to_string());
12050 node.expand = source_read_command(root, &path, 1, 80);
12051 }
12052 node
12053}
12054
12055fn traversal_symbol_node(root: &Path, symbol: &index::StoredSymbol) -> TraversalNode {
12056 let file = relativize(&symbol.file, root);
12057 let key = format!("symbol:{file}:{}:{}", symbol.line, symbol.name);
12058 let handle = stable_handle("gsym", &key);
12059 TraversalNode {
12060 handle: handle.clone(),
12061 kind: "symbol".to_string(),
12062 label: symbol.name.clone(),
12063 ref_id: Some(symbol.name.clone()),
12064 path: Some(file),
12065 line: Some(symbol.line),
12066 detail: Some(format!("{} {}", symbol.language, symbol.kind)),
12067 properties: BTreeMap::new(),
12068 expand: traversal_expand_command(root, &handle),
12069 }
12070}
12071
12072fn traversal_ast_span_expand_command(
12073 root: &Path,
12074 file: &str,
12075 symbol: &index::StoredSymbol,
12076 span: &AstSpanPreview,
12077) -> String {
12078 if symbol.language == "markdown" {
12079 markdown_ast_command(root, file, Some(&span.handle))
12080 } else {
12081 let line_count = span
12082 .end_line
12083 .saturating_sub(span.start_line)
12084 .saturating_add(1)
12085 .max(1);
12086 source_read_command(root, file, span.start_line, line_count)
12087 }
12088}
12089
12090fn traversal_ast_span_node(
12091 root: &Path,
12092 symbol: &index::StoredSymbol,
12093 source: &[u8],
12094 symbols: &[index::StoredSymbol],
12095) -> Option<(TraversalNode, TraversalAstSpanIndexEntry)> {
12096 let span = stored_symbol_ast_span(symbol, source, symbols, usize::MAX)?;
12097 let file = relativize(&symbol.file, root);
12098 let mut properties = BTreeMap::new();
12099 properties.insert("layer".to_string(), "ast_navigation".to_string());
12100 properties.insert("language".to_string(), symbol.language.clone());
12101 properties.insert("symbol_kind".to_string(), symbol.kind.clone());
12102 properties.insert("node_kind".to_string(), span.node_kind.clone());
12103 properties.insert("start_byte".to_string(), span.start_byte.to_string());
12104 properties.insert("end_byte".to_string(), span.end_byte.to_string());
12105 properties.insert("end_line".to_string(), span.end_line.to_string());
12106 if let Some(body_start_byte) = span.body_start_byte {
12107 properties.insert("body_start_byte".to_string(), body_start_byte.to_string());
12108 }
12109 if let Some(body_end_byte) = span.body_end_byte {
12110 properties.insert("body_end_byte".to_string(), body_end_byte.to_string());
12111 }
12112 if let Some(body_start_line) = span.body_start_line {
12113 properties.insert("body_start_line".to_string(), body_start_line.to_string());
12114 }
12115 if let Some(body_end_line) = span.body_end_line {
12116 properties.insert("body_end_line".to_string(), body_end_line.to_string());
12117 }
12118 if let Some(parent_handle) = &span.parent_handle {
12119 properties.insert("parent_handle".to_string(), parent_handle.clone());
12120 }
12121 if !span.child_handles.is_empty() {
12122 properties.insert("child_handles".to_string(), span.child_handles.join(","));
12123 }
12124 if let Some(parent_module) = &symbol.parent_module {
12125 properties.insert("parent_module".to_string(), parent_module.clone());
12126 }
12127 if let Some(markdown) = &span.markdown {
12128 properties.insert(
12129 "markdown_block_kind".to_string(),
12130 markdown_ast_block_kind(&symbol.kind),
12131 );
12132 if let Some(heading_level) = markdown.heading_level {
12133 properties.insert("heading_level".to_string(), heading_level.to_string());
12134 }
12135 if !markdown.section_path.is_empty() {
12136 properties.insert(
12137 "section_path".to_string(),
12138 markdown.section_path.join(" > "),
12139 );
12140 }
12141 if let Some(section_handle) = &markdown.section_handle {
12142 properties.insert("section_handle".to_string(), section_handle.clone());
12143 }
12144 if let Some(list_depth) = markdown.list_depth {
12145 properties.insert("list_depth".to_string(), list_depth.to_string());
12146 }
12147 if let Some(fence_language) = &markdown.fence_language {
12148 properties.insert("fence_language".to_string(), fence_language.clone());
12149 }
12150 }
12151
12152 let line = i64::try_from(span.start_line).unwrap_or(i64::MAX);
12153 let node = TraversalNode {
12154 handle: span.handle.clone(),
12155 kind: "ast_span".to_string(),
12156 label: symbol.name.clone(),
12157 ref_id: Some(symbol.name.clone()),
12158 path: Some(file.clone()),
12159 line: Some(line),
12160 detail: Some(format!("{} {} AST span", symbol.language, symbol.kind)),
12161 properties,
12162 expand: traversal_ast_span_expand_command(root, &file, symbol, &span),
12163 };
12164 let entry = TraversalAstSpanIndexEntry {
12165 handle: span.handle,
12166 symbol_handle: String::new(),
12167 file_handle: None,
12168 file,
12169 name: symbol.name.clone(),
12170 kind: symbol.kind.clone(),
12171 language: symbol.language.clone(),
12172 node_kind: span.node_kind,
12173 start_byte: span.start_byte,
12174 end_byte: span.end_byte,
12175 parent_module: symbol.parent_module.clone(),
12176 markdown: span.markdown,
12177 };
12178 Some((node, entry))
12179}
12180
12181fn traversal_unresolved_symbol_node(root: &Path, name: &str) -> TraversalNode {
12182 let handle = stable_handle("gsym", &format!("symbol:{name}"));
12183 TraversalNode {
12184 handle: handle.clone(),
12185 kind: "symbol".to_string(),
12186 label: name.to_string(),
12187 ref_id: Some(name.to_string()),
12188 path: None,
12189 line: None,
12190 detail: Some("unresolved call target".to_string()),
12191 properties: BTreeMap::new(),
12192 expand: traversal_expand_command(root, &handle),
12193 }
12194}
12195
12196fn traversal_route_node(root: &Path, route: &index::StoredRoute) -> TraversalNode {
12197 let file = relativize(&route.file, root);
12198 let method = route.method.as_deref().unwrap_or("any");
12199 let key = format!(
12200 "route:{file}:{}:{}:{}",
12201 route.line, method, route.route_path
12202 );
12203 let handle = stable_handle("grte", &key);
12204 TraversalNode {
12205 handle: handle.clone(),
12206 kind: "route".to_string(),
12207 label: format!("{} {}", method.to_uppercase(), route.route_path),
12208 ref_id: Some(route.route_path.clone()),
12209 path: Some(file),
12210 line: Some(route.line),
12211 detail: Some(format!(
12212 "{} route handled by {}",
12213 route.framework, route.handler_name
12214 )),
12215 properties: BTreeMap::new(),
12216 expand: traversal_expand_command(root, &handle),
12217 }
12218}
12219
12220fn traversal_cargo_workspace_node(
12221 root: &Path,
12222 workspace: &multiplicity::CargoWorkspaceInfo,
12223) -> TraversalNode {
12224 let manifest = relativize_pathbuf(&workspace.manifest_path, root)
12225 .to_string_lossy()
12226 .replace('\\', "/");
12227 let workspace_root = relativize_pathbuf(&workspace.workspace_root, root)
12228 .to_string_lossy()
12229 .replace('\\', "/");
12230 let handle = stable_handle("gcwk", &format!("cargo-workspace:{manifest}"));
12231 let mut properties = BTreeMap::new();
12232 properties.insert("layer".to_string(), "cargo_workspace".to_string());
12233 properties.insert("workspace_root".to_string(), workspace_root.clone());
12234 properties.insert("members".to_string(), workspace.members.join(","));
12235 properties.insert(
12236 "default_members".to_string(),
12237 workspace.default_members.join(","),
12238 );
12239 TraversalNode {
12240 handle: handle.clone(),
12241 kind: "cargo_workspace".to_string(),
12242 label: if workspace_root.is_empty() {
12243 "root cargo workspace".to_string()
12244 } else {
12245 workspace_root
12246 },
12247 ref_id: Some(workspace.id.clone()),
12248 path: Some(manifest),
12249 line: None,
12250 detail: Some("Cargo workspace manifest".to_string()),
12251 properties,
12252 expand: traversal_expand_command(root, &handle),
12253 }
12254}
12255
12256fn traversal_cargo_package_node(
12257 root: &Path,
12258 package: &multiplicity::CargoPackageInfo,
12259) -> TraversalNode {
12260 let manifest = relativize_pathbuf(&package.manifest_path, root)
12261 .to_string_lossy()
12262 .replace('\\', "/");
12263 let package_root = relativize_pathbuf(&package.package_root, root)
12264 .to_string_lossy()
12265 .replace('\\', "/");
12266 let workspace_root = relativize_pathbuf(&package.workspace_root, root)
12267 .to_string_lossy()
12268 .replace('\\', "/");
12269 let handle = stable_handle(
12270 "gcpk",
12271 &format!("cargo-package:{manifest}:{}", package.name),
12272 );
12273 let mut properties = BTreeMap::new();
12274 properties.insert("layer".to_string(), "cargo_package".to_string());
12275 properties.insert("package_name".to_string(), package.name.clone());
12276 properties.insert(
12277 "normalized_name".to_string(),
12278 package.normalized_name.clone(),
12279 );
12280 properties.insert("package_root".to_string(), package_root.clone());
12281 properties.insert("workspace_root".to_string(), workspace_root);
12282 properties.insert("features".to_string(), package.features.join(","));
12283 properties.insert("targets".to_string(), package.targets.join(","));
12284 properties.insert(
12285 "dependencies".to_string(),
12286 package
12287 .dependencies
12288 .iter()
12289 .map(|dependency| format!("{}:{}", dependency.kind, dependency.name))
12290 .collect::<Vec<_>>()
12291 .join(","),
12292 );
12293 TraversalNode {
12294 handle: handle.clone(),
12295 kind: "cargo_package".to_string(),
12296 label: package.name.clone(),
12297 ref_id: Some(package.scope_id.clone()),
12298 path: Some(manifest),
12299 line: None,
12300 detail: Some(format!(
12301 "Cargo package in {}",
12302 if package_root.is_empty() {
12303 "."
12304 } else {
12305 package_root.as_str()
12306 }
12307 )),
12308 properties,
12309 expand: traversal_expand_command(root, &handle),
12310 }
12311}
12312
12313fn traversal_session_node(
12314 root: &Path,
12315 markdown_path: &Path,
12316 session_id: Option<&str>,
12317) -> TraversalNode {
12318 let display = relativize_pathbuf(markdown_path, root)
12319 .to_string_lossy()
12320 .replace('\\', "/");
12321 let handle = stable_handle("gses", &format!("session:{display}"));
12322 TraversalNode {
12323 handle: handle.clone(),
12324 kind: "session".to_string(),
12325 label: session_id.unwrap_or(&display).to_string(),
12326 ref_id: session_id.map(str::to_string),
12327 path: Some(display),
12328 line: None,
12329 detail: Some("agent-doc session artifact".to_string()),
12330 properties: BTreeMap::new(),
12331 expand: traversal_expand_command(root, &handle),
12332 }
12333}
12334
12335fn traversal_backlog_node(
12336 root: &Path,
12337 markdown_path: &Path,
12338 id: &str,
12339 text: &str,
12340 line: i64,
12341) -> TraversalNode {
12342 let display = relativize_pathbuf(markdown_path, root)
12343 .to_string_lossy()
12344 .replace('\\', "/");
12345 let handle = stable_handle("gbak", &format!("backlog:{display}:#{id}"));
12346 TraversalNode {
12347 handle: handle.clone(),
12348 kind: "backlog".to_string(),
12349 label: format!("#{id}"),
12350 ref_id: Some(id.to_string()),
12351 path: Some(display),
12352 line: Some(line),
12353 detail: Some(text.to_string()),
12354 properties: BTreeMap::new(),
12355 expand: traversal_expand_command(root, &handle),
12356 }
12357}
12358
12359fn traversal_job_packet_node(
12360 root: &Path,
12361 markdown_path: &Path,
12362 label: &str,
12363 ref_id: Option<&str>,
12364 detail: &str,
12365 line: i64,
12366) -> TraversalNode {
12367 let display = relativize_pathbuf(markdown_path, root)
12368 .to_string_lossy()
12369 .replace('\\', "/");
12370 let handle = stable_handle("gjob", &format!("job:{display}:{line}:{label}"));
12371 TraversalNode {
12372 handle: handle.clone(),
12373 kind: "job_packet".to_string(),
12374 label: label.to_string(),
12375 ref_id: ref_id.map(str::to_string),
12376 path: Some(display),
12377 line: Some(line),
12378 detail: Some(detail.to_string()),
12379 properties: BTreeMap::new(),
12380 expand: traversal_expand_command(root, &handle),
12381 }
12382}
12383
12384#[derive(Clone, Debug)]
12385struct ParsedWorkerResult {
12386 id: String,
12387 status: String,
12388 touched_files: Vec<String>,
12389 tests: Vec<String>,
12390 follow_up_ids: Vec<String>,
12391}
12392
12393fn traversal_worker_result_node(
12394 root: &Path,
12395 markdown_path: &Path,
12396 parsed: &ParsedWorkerResult,
12397 line_text: &str,
12398 line: i64,
12399) -> TraversalNode {
12400 let display = relativize_pathbuf(markdown_path, root)
12401 .to_string_lossy()
12402 .replace('\\', "/");
12403 let handle = stable_handle(
12404 "wres",
12405 &format!(
12406 "worker-result:{display}:{}:{}:{}",
12407 parsed.id, parsed.status, line
12408 ),
12409 );
12410 let mut properties = BTreeMap::new();
12411 properties.insert("status".to_string(), parsed.status.clone());
12412 if !parsed.touched_files.is_empty() {
12413 properties.insert("touched_files".to_string(), parsed.touched_files.join(","));
12414 }
12415 if !parsed.tests.is_empty() {
12416 properties.insert("expected_tests".to_string(), parsed.tests.join(" && "));
12417 }
12418 if !parsed.follow_up_ids.is_empty() {
12419 properties.insert("follow_up_ids".to_string(), parsed.follow_up_ids.join(","));
12420 }
12421 TraversalNode {
12422 handle: handle.clone(),
12423 kind: "worker_result".to_string(),
12424 label: format!("{} #{}", parsed.status, parsed.id),
12425 ref_id: Some(parsed.id.clone()),
12426 path: Some(display),
12427 line: Some(line),
12428 detail: Some(line_text.trim().to_string()),
12429 properties,
12430 expand: traversal_expand_command(root, &handle),
12431 }
12432}
12433
12434fn traversal_tokens(input: &str) -> BTreeSet<String> {
12435 input
12436 .split(|ch: char| !(ch.is_ascii_alphanumeric() || ch == '_' || ch == '-'))
12437 .flat_map(|part| part.split(['_', '-']))
12438 .map(str::trim)
12439 .filter(|part| part.len() >= 3)
12440 .map(|part| part.to_ascii_lowercase())
12441 .collect()
12442}
12443
12444fn traversal_ast_span_contains(
12445 parent: &TraversalAstSpanIndexEntry,
12446 child: &TraversalAstSpanIndexEntry,
12447) -> bool {
12448 parent.handle != child.handle
12449 && parent.file == child.file
12450 && parent.start_byte <= child.start_byte
12451 && parent.end_byte >= child.end_byte
12452}
12453
12454fn traversal_ast_parent_handle<'a>(
12455 entry: &TraversalAstSpanIndexEntry,
12456 entries: &'a [TraversalAstSpanIndexEntry],
12457) -> Option<&'a str> {
12458 entries
12459 .iter()
12460 .filter(|candidate| traversal_ast_span_contains(candidate, entry))
12461 .min_by_key(|candidate| {
12462 (
12463 candidate.end_byte.saturating_sub(candidate.start_byte),
12464 candidate.start_byte,
12465 candidate.end_byte,
12466 candidate.kind.as_str(),
12467 candidate.name.as_str(),
12468 candidate.node_kind.as_str(),
12469 )
12470 })
12471 .map(|candidate| candidate.handle.as_str())
12472}
12473
12474fn traversal_ast_enclosing_module_handle<'a>(
12475 entry: &TraversalAstSpanIndexEntry,
12476 entries_by_handle: &'a BTreeMap<String, TraversalAstSpanIndexEntry>,
12477 parent_by_handle: &BTreeMap<String, String>,
12478) -> Option<&'a str> {
12479 let mut current = parent_by_handle.get(&entry.handle);
12480 while let Some(handle) = current {
12481 let Some(parent) = entries_by_handle.get(handle) else {
12482 break;
12483 };
12484 if matches!(parent.kind.as_str(), "module" | "mod")
12485 || entry
12486 .parent_module
12487 .as_deref()
12488 .is_some_and(|module| module == parent.name)
12489 {
12490 return Some(parent.handle.as_str());
12491 }
12492 current = parent_by_handle.get(&parent.handle);
12493 }
12494 None
12495}
12496
12497fn link_ast_navigation_edges(
12498 graph: &mut TraversalGraphBuild,
12499 entries: &[TraversalAstSpanIndexEntry],
12500) {
12501 let mut entries_by_file = BTreeMap::<String, Vec<TraversalAstSpanIndexEntry>>::new();
12502 let entries_by_handle = entries
12503 .iter()
12504 .map(|entry| (entry.handle.clone(), entry.clone()))
12505 .collect::<BTreeMap<_, _>>();
12506 let mut parent_by_handle = BTreeMap::<String, String>::new();
12507 let mut children_by_parent = BTreeMap::<Option<String>, Vec<TraversalAstSpanIndexEntry>>::new();
12508
12509 for entry in entries {
12510 entries_by_file
12511 .entry(entry.file.clone())
12512 .or_default()
12513 .push(entry.clone());
12514 }
12515
12516 for file_entries in entries_by_file.values() {
12517 for entry in file_entries {
12518 let parent = traversal_ast_parent_handle(entry, file_entries).map(str::to_string);
12519 if let Some(parent) = &parent {
12520 parent_by_handle.insert(entry.handle.clone(), parent.clone());
12521 }
12522 let sibling_key = parent.clone().or_else(|| entry.file_handle.clone());
12523 children_by_parent
12524 .entry(sibling_key)
12525 .or_default()
12526 .push(entry.clone());
12527 }
12528 }
12529
12530 for entry in entries {
12531 let parent = parent_by_handle.get(&entry.handle);
12532 if let Some(parent) = parent {
12533 graph.add_edge(
12534 parent,
12535 &entry.handle,
12536 "contains",
12537 Some("AST parent contains child span".to_string()),
12538 1,
12539 );
12540 graph.add_edge(
12541 parent,
12542 &entry.handle,
12543 "child",
12544 Some("AST child span".to_string()),
12545 1,
12546 );
12547 graph.add_edge(
12548 &entry.handle,
12549 parent,
12550 "parent",
12551 Some("AST parent span".to_string()),
12552 1,
12553 );
12554 } else if let Some(file_handle) = &entry.file_handle {
12555 graph.add_edge(
12556 file_handle,
12557 &entry.handle,
12558 "contains",
12559 Some("file contains top-level AST span".to_string()),
12560 1,
12561 );
12562 }
12563
12564 if let Some(module_handle) =
12565 traversal_ast_enclosing_module_handle(entry, &entries_by_handle, &parent_by_handle)
12566 {
12567 graph.add_edge(
12568 &entry.handle,
12569 module_handle,
12570 "enclosing_module",
12571 Some("nearest enclosing module AST span".to_string()),
12572 1,
12573 );
12574 }
12575
12576 if entry.language == "markdown"
12577 && let Some(markdown) = &entry.markdown
12578 && let Some(section_handle) = &markdown.section_handle
12579 && section_handle != &entry.handle
12580 {
12581 graph.add_edge(
12582 section_handle,
12583 &entry.handle,
12584 "contains_markdown_block",
12585 Some("Markdown section contains block".to_string()),
12586 1,
12587 );
12588 graph.add_edge(
12589 &entry.handle,
12590 section_handle,
12591 "enclosing_section",
12592 Some("Markdown enclosing section".to_string()),
12593 1,
12594 );
12595 }
12596 }
12597
12598 for siblings in children_by_parent.values_mut() {
12599 siblings.sort_by(|left, right| {
12600 left.start_byte
12601 .cmp(&right.start_byte)
12602 .then(left.end_byte.cmp(&right.end_byte))
12603 .then(left.kind.cmp(&right.kind))
12604 .then(left.name.cmp(&right.name))
12605 .then(left.node_kind.cmp(&right.node_kind))
12606 .then(left.handle.cmp(&right.handle))
12607 });
12608 for pair in siblings.windows(2) {
12609 let previous = &pair[0];
12610 let next = &pair[1];
12611 graph.add_edge(
12612 &previous.handle,
12613 &next.handle,
12614 "next_sibling",
12615 Some("next AST sibling span".to_string()),
12616 1,
12617 );
12618 graph.add_edge(
12619 &next.handle,
12620 &previous.handle,
12621 "previous_sibling",
12622 Some("previous AST sibling span".to_string()),
12623 1,
12624 );
12625 }
12626 }
12627}
12628
12629fn traversal_markdown_embedded_symbol_node(
12630 root: &Path,
12631 entry: &TraversalAstSpanIndexEntry,
12632 markdown: &MarkdownSpanMetadata,
12633 embedded: &MarkdownEmbeddedSymbol,
12634) -> TraversalNode {
12635 let mut properties = BTreeMap::new();
12636 properties.insert("layer".to_string(), "embedded_code".to_string());
12637 properties.insert("embedded".to_string(), "true".to_string());
12638 properties.insert("language".to_string(), embedded.language.clone());
12639 properties.insert("symbol_kind".to_string(), embedded.kind.clone());
12640 properties.insert("node_kind".to_string(), embedded.node_kind.clone());
12641 properties.insert("start_byte".to_string(), embedded.start_byte.to_string());
12642 properties.insert("end_byte".to_string(), embedded.end_byte.to_string());
12643 properties.insert("end_line".to_string(), embedded.end_line.to_string());
12644 properties.insert("markdown_block_handle".to_string(), entry.handle.clone());
12645 properties.insert(
12646 "markdown_block_kind".to_string(),
12647 markdown_ast_block_kind(&entry.kind),
12648 );
12649 if let Some(body_start_byte) = embedded.body_start_byte {
12650 properties.insert("body_start_byte".to_string(), body_start_byte.to_string());
12651 }
12652 if let Some(body_end_byte) = embedded.body_end_byte {
12653 properties.insert("body_end_byte".to_string(), body_end_byte.to_string());
12654 }
12655 if let Some(body_start_line) = embedded.body_start_line {
12656 properties.insert("body_start_line".to_string(), body_start_line.to_string());
12657 }
12658 if let Some(body_end_line) = embedded.body_end_line {
12659 properties.insert("body_end_line".to_string(), body_end_line.to_string());
12660 }
12661 if let Some(fence_language) = &markdown.fence_language {
12662 properties.insert("fence_language".to_string(), fence_language.clone());
12663 }
12664 if !markdown.section_path.is_empty() {
12665 properties.insert(
12666 "section_path".to_string(),
12667 markdown.section_path.join(" > "),
12668 );
12669 }
12670 if let Some(section_handle) = &markdown.section_handle {
12671 properties.insert("section_handle".to_string(), section_handle.clone());
12672 }
12673 let line_count = embedded
12674 .end_line
12675 .saturating_sub(embedded.start_line)
12676 .saturating_add(1)
12677 .max(1);
12678 TraversalNode {
12679 handle: embedded.handle.clone(),
12680 kind: "ast_span".to_string(),
12681 label: embedded.name.clone(),
12682 ref_id: Some(embedded.name.clone()),
12683 path: Some(entry.file.clone()),
12684 line: Some(i64::try_from(embedded.start_line).unwrap_or(i64::MAX)),
12685 detail: Some(format!(
12686 "{} {} embedded in Markdown fence",
12687 embedded.language, embedded.kind
12688 )),
12689 properties,
12690 expand: source_read_command(root, &entry.file, embedded.start_line, line_count),
12691 }
12692}
12693
12694fn link_markdown_embedded_code_edges(
12695 graph: &mut TraversalGraphBuild,
12696 root: &Path,
12697 entries: &[TraversalAstSpanIndexEntry],
12698) {
12699 for entry in entries {
12700 let Some(markdown) = &entry.markdown else {
12701 continue;
12702 };
12703 for embedded in &markdown.embedded_symbols {
12704 let node = traversal_markdown_embedded_symbol_node(root, entry, markdown, embedded);
12705 graph.add_node(node);
12706 graph.add_edge(
12707 &entry.handle,
12708 &embedded.handle,
12709 "contains",
12710 Some("Markdown fence contains embedded AST symbol".to_string()),
12711 1,
12712 );
12713 graph.add_edge(
12714 &entry.handle,
12715 &embedded.handle,
12716 "child",
12717 Some("embedded code symbol".to_string()),
12718 1,
12719 );
12720 graph.add_edge(
12721 &entry.handle,
12722 &embedded.handle,
12723 "contains_embedded_symbol",
12724 Some("Markdown fence contains embedded code symbol".to_string()),
12725 1,
12726 );
12727 graph.add_edge(
12728 &embedded.handle,
12729 &entry.handle,
12730 "parent",
12731 Some("Markdown fence parent span".to_string()),
12732 1,
12733 );
12734 graph.add_edge(
12735 &embedded.handle,
12736 &entry.handle,
12737 "embedded_in_fence",
12738 Some("embedded code symbol belongs to Markdown fence".to_string()),
12739 1,
12740 );
12741 if let Some(section_handle) = &markdown.section_handle
12742 && section_handle != &entry.handle
12743 {
12744 graph.add_edge(
12745 section_handle,
12746 &embedded.handle,
12747 "contains_embedded_code",
12748 Some("Markdown section contains embedded code symbol".to_string()),
12749 1,
12750 );
12751 graph.add_edge(
12752 &embedded.handle,
12753 section_handle,
12754 "enclosing_section",
12755 Some("Markdown enclosing section".to_string()),
12756 1,
12757 );
12758 }
12759 }
12760 }
12761}
12762
12763fn traversal_node_tokens(node: &TraversalNode) -> BTreeSet<String> {
12764 let mut tokens = traversal_tokens(&node.label);
12765 if let Some(ref_id) = &node.ref_id {
12766 tokens.extend(traversal_tokens(ref_id));
12767 }
12768 if let Some(path) = &node.path {
12769 tokens.extend(traversal_tokens(path));
12770 }
12771 if let Some(detail) = &node.detail {
12772 tokens.extend(traversal_tokens(detail));
12773 }
12774 tokens
12775}
12776
12777fn parse_agent_doc_session_id(content: &str) -> Option<String> {
12778 content.lines().find_map(|line| {
12779 let trimmed = line.trim();
12780 trimmed
12781 .strip_prefix("agent_doc_session:")
12782 .map(str::trim)
12783 .filter(|value| !value.is_empty())
12784 .map(str::to_string)
12785 })
12786}
12787
12788fn parse_backlog_line(line: &str) -> Option<(String, String)> {
12789 let trimmed = line.trim();
12790 if !trimmed.starts_with("- [") {
12791 return None;
12792 }
12793 let start = trimmed.find("[#")?;
12794 let after_start = start + 2;
12795 let rest = &trimmed[after_start..];
12796 let end = rest.find(']')?;
12797 let id = rest[..end].trim();
12798 if id.is_empty() {
12799 return None;
12800 }
12801 let text = rest[end + 1..].trim().to_string();
12802 Some((id.to_string(), text))
12803}
12804
12805fn parse_queue_dispatch_line(line: &str) -> Option<String> {
12806 let trimmed = line.trim();
12807 ["dispatch ", "preset "].iter().find_map(|prefix| {
12808 trimmed
12809 .strip_prefix(prefix)
12810 .map(str::trim)
12811 .filter(|value| !value.is_empty())
12812 .map(str::to_string)
12813 })
12814}
12815
12816fn parse_queue_do_line(line: &str) -> Option<String> {
12817 let trimmed = line.trim();
12818 let rest = trimmed.strip_prefix("- do [#")?;
12819 let end = rest.find(']')?;
12820 let id = rest[..end].trim();
12821 (!id.is_empty()).then(|| id.to_string())
12822}
12823
12824fn markdown_code_spans(input: &str) -> Vec<String> {
12825 input
12826 .split('`')
12827 .enumerate()
12828 .filter(|(idx, _)| idx % 2 == 1)
12829 .map(|(_, part)| part.trim().to_string())
12830 .filter(|part| !part.is_empty())
12831 .collect()
12832}
12833
12834fn push_traversal_token_index(
12835 index: &mut HashMap<String, Vec<usize>>,
12836 tokens: &BTreeSet<String>,
12837 entry_index: usize,
12838) {
12839 for token in tokens {
12840 index.entry(token.clone()).or_default().push(entry_index);
12841 }
12842}
12843
12844impl<'a> TraversalCodeLookup<'a> {
12845 fn new(
12846 symbols: &'a [TraversalSymbolIndexEntry],
12847 files: &'a [TraversalFileIndexEntry],
12848 routes: &'a [TraversalRouteIndexEntry],
12849 multiplicities: &'a [TraversalMultiplicityIndexEntry],
12850 ) -> Self {
12851 let mut symbol_index = HashMap::new();
12852 for (idx, entry) in symbols.iter().enumerate() {
12853 push_traversal_token_index(&mut symbol_index, &entry.tokens, idx);
12854 }
12855 let mut file_index = HashMap::new();
12856 let mut file_path_index = HashMap::new();
12857 for (idx, entry) in files.iter().enumerate() {
12858 push_traversal_token_index(&mut file_index, &entry.tokens, idx);
12859 if let Some(path) = entry.node.path.as_ref() {
12860 file_path_index.insert(path.clone(), path.clone());
12861 }
12862 }
12863 let mut route_index = HashMap::new();
12864 for (idx, entry) in routes.iter().enumerate() {
12865 push_traversal_token_index(&mut route_index, &entry.tokens, idx);
12866 }
12867 let mut multiplicity_index = HashMap::new();
12868 for (idx, entry) in multiplicities.iter().enumerate() {
12869 push_traversal_token_index(&mut multiplicity_index, &entry.tokens, idx);
12870 }
12871 Self {
12872 symbols,
12873 files,
12874 routes,
12875 multiplicities,
12876 symbol_index,
12877 file_index,
12878 route_index,
12879 multiplicity_index,
12880 file_path_index,
12881 }
12882 }
12883
12884 fn touched_files_for_line(&self, line: &str) -> Vec<String> {
12885 let mut touched_files = BTreeSet::new();
12886 for candidate in markdown_code_spans(line)
12887 .into_iter()
12888 .chain(line.split_whitespace().map(str::to_string))
12889 {
12890 for path in traversal_path_candidates(&candidate) {
12891 if let Some(file) = self.file_path_index.get(&path) {
12892 touched_files.insert(file.clone());
12893 }
12894 }
12895 }
12896 touched_files.into_iter().collect()
12897 }
12898}
12899
12900fn traversal_path_candidates(candidate: &str) -> Vec<String> {
12901 let trimmed = candidate.trim_matches(|ch: char| {
12902 matches!(
12903 ch,
12904 '`' | '"' | '\'' | ',' | ';' | '.' | '!' | '?' | '(' | ')' | '[' | ']' | '{' | '}'
12905 )
12906 });
12907 if trimmed.is_empty() {
12908 return Vec::new();
12909 }
12910 let mut candidates = vec![trimmed.to_string()];
12911 if let Some((path, line_suffix)) = trimmed.rsplit_once(':')
12912 && !path.is_empty()
12913 && line_suffix.chars().all(|ch| ch.is_ascii_digit())
12914 {
12915 candidates.push(path.to_string());
12916 }
12917 candidates
12918}
12919
12920fn parse_worker_result_line(
12921 line: &str,
12922 lookup: &TraversalCodeLookup<'_>,
12923) -> Vec<ParsedWorkerResult> {
12924 if line.trim_start().starts_with("- [") {
12925 return Vec::new();
12926 }
12927 let lower = line.to_ascii_lowercase();
12928 let status =
12929 if lower.contains("completed") || lower.contains("code-complete") || lower.contains("done")
12930 {
12931 "completed"
12932 } else if lower.contains("blocked") || lower.contains("externally blocked") {
12933 "blocked"
12934 } else {
12935 return Vec::new();
12936 };
12937 let result_prefix_end = ["follow-up", "follow up", "next:"]
12938 .iter()
12939 .filter_map(|marker| lower.find(marker))
12940 .min()
12941 .unwrap_or(line.len());
12942 let ids = extract_conflict_target_refs(&line[..result_prefix_end]);
12943 if ids.is_empty() {
12944 return Vec::new();
12945 }
12946 let result_ids = ids.iter().cloned().collect::<BTreeSet<_>>();
12947 let all_ids = extract_conflict_target_refs(line);
12948
12949 let touched_files = lookup.touched_files_for_line(line);
12950 let tests = markdown_code_spans(line)
12951 .into_iter()
12952 .filter(|span| span.to_ascii_lowercase().contains("test"))
12953 .collect::<Vec<_>>();
12954
12955 ids.iter()
12956 .map(|id| ParsedWorkerResult {
12957 id: id.clone(),
12958 status: status.to_string(),
12959 touched_files: touched_files.clone(),
12960 tests: tests.clone(),
12961 follow_up_ids: all_ids
12962 .iter()
12963 .filter(|other| *other != id && !result_ids.contains(*other))
12964 .cloned()
12965 .collect(),
12966 })
12967 .collect()
12968}
12969
12970fn hinted_markdown_file(root: &Path, path_hint: &Path) -> Option<PathBuf> {
12971 let hinted_path = if path_hint.is_absolute() {
12972 path_hint.to_path_buf()
12973 } else {
12974 root.join(path_hint)
12975 };
12976 if hinted_path.extension().and_then(|ext| ext.to_str()) == Some("md") && hinted_path.is_file() {
12977 return Some(hinted_path);
12978 }
12979 None
12980}
12981
12982fn traversal_markdown_content_looks_like_session(content: &str) -> bool {
12983 parse_agent_doc_session_id(content).is_some()
12984 || content.contains("<!-- agent:exchange")
12985 || content.contains("<!-- agent:backlog")
12986 || content.contains("## Backlog")
12987}
12988
12989fn traversal_path_is_session_markdown(root: &Path, source_root: &Path, path: &Path) -> bool {
12990 let candidate = if path.is_absolute() {
12991 path.to_path_buf()
12992 } else {
12993 source_root.join(path)
12994 };
12995 if !candidate.starts_with(source_root) && !candidate.starts_with(root) {
12996 return false;
12997 }
12998 if !matches!(
12999 candidate.extension().and_then(|ext| ext.to_str()),
13000 Some("md" | "mdx")
13001 ) {
13002 return false;
13003 }
13004 fs::read_to_string(&candidate)
13005 .map(|content| traversal_markdown_content_looks_like_session(&content))
13006 .unwrap_or(false)
13007}
13008
13009fn markdown_files_for_traversal(root: &Path, path_hint: &Path) -> Result<Vec<PathBuf>> {
13010 if let Some(hinted_path) = hinted_markdown_file(root, path_hint) {
13011 return Ok(vec![hinted_path]);
13012 }
13013 let mut files = Vec::new();
13014 let walker = ignore::WalkBuilder::new(root)
13015 .hidden(true)
13016 .git_ignore(true)
13017 .git_global(true)
13018 .git_exclude(true)
13019 .build();
13020 for result in walker {
13021 let entry =
13022 result.with_context(|| format!("walking markdown files under {}", root.display()))?;
13023 if !entry.file_type().is_some_and(|ft| ft.is_file()) {
13024 continue;
13025 }
13026 if traversal_path_is_generated_artifact(root, root, entry.path()) {
13027 continue;
13028 }
13029 if entry.path().extension().and_then(|ext| ext.to_str()) == Some("md") {
13030 files.push(entry.path().to_path_buf());
13031 }
13032 }
13033 files.sort();
13034 Ok(files)
13035}
13036
13037fn traversal_watermark_path(root: &Path, path: &Path) -> String {
13038 path.strip_prefix(root)
13039 .unwrap_or(path)
13040 .to_string_lossy()
13041 .replace('\\', "/")
13042}
13043
13044fn push_traversal_metadata_watermark_part(
13045 root: &Path,
13046 path: &Path,
13047 label: &str,
13048 parts: &mut Vec<String>,
13049) {
13050 let display = traversal_watermark_path(root, path);
13051 match fs::metadata(path) {
13052 Ok(metadata) => {
13053 let (secs, nanos) = metadata
13054 .modified()
13055 .ok()
13056 .and_then(|modified| modified.duration_since(UNIX_EPOCH).ok())
13057 .map(|duration| (duration.as_secs(), duration.subsec_nanos()))
13058 .unwrap_or((0, 0));
13059 parts.push(format!(
13060 "{label}:{display}:len={}:mtime={secs}.{nanos}",
13061 metadata.len()
13062 ));
13063 }
13064 Err(_) => parts.push(format!("{label}:{display}:missing")),
13065 }
13066}
13067
13068#[derive(Serialize)]
13069struct TraversalSummaryWatermarkRow<'a> {
13070 symbol_name: &'a str,
13071 file_path: &'a str,
13072 entities: &'a Option<Vec<summarize::Entity>>,
13073 relationships: &'a Option<Vec<summarize::Relationship>>,
13074 concept_labels: &'a Option<Vec<String>>,
13075}
13076
13077fn push_traversal_summaries_watermark_part(root: &Path, parts: &mut Vec<String>) -> Result<()> {
13078 let summaries_db = root.join(".tsift/summaries.db");
13079 if !summaries_db.exists() {
13080 parts.push("summaries_db:absent".to_string());
13081 return Ok(());
13082 }
13083
13084 match summarize::SummaryDb::open_read_only_resilient(&summaries_db)
13085 .and_then(|summary_db| summary_db.all())
13086 {
13087 Ok(summaries) => {
13088 let rows = summaries
13089 .iter()
13090 .map(|summary| TraversalSummaryWatermarkRow {
13091 symbol_name: &summary.symbol_name,
13092 file_path: &summary.file_path,
13093 entities: &summary.entities,
13094 relationships: &summary.relationships,
13095 concept_labels: &summary.concept_labels,
13096 })
13097 .collect::<Vec<_>>();
13098 parts.push(format!(
13099 "summaries_db:rows={}:semantic_hash={}",
13100 rows.len(),
13101 content_hash(&rows)?
13102 ));
13103 }
13104 Err(_) => {
13105 push_traversal_metadata_watermark_part(
13106 root,
13107 &summaries_db,
13108 "summaries_db_unreadable",
13109 parts,
13110 );
13111 }
13112 }
13113 Ok(())
13114}
13115
13116#[cfg(test)]
13117fn traversal_relative_path_is_generated_artifact(relative: &str) -> bool {
13118 resolution::relative_path_is_generated_artifact(relative)
13119}
13120
13121fn traversal_path_is_generated_artifact(root: &Path, source_root: &Path, path: &Path) -> bool {
13122 resolution::path_is_generated_artifact(root, source_root, path)
13123}
13124
13125fn traversal_index_snapshot_part_is_generated(root: &Path, source_root: &Path, part: &str) -> bool {
13126 resolution::index_snapshot_part_is_generated(root, source_root, part)
13127}
13128
13129pub(crate) fn traversal_source_watermark(
13130 root: &Path,
13131 path_hint: &Path,
13132 scope: Option<&str>,
13133 session_only: bool,
13134) -> Result<Option<String>> {
13135 let mut parts = vec![
13136 format!("projection_version:{GRAPH_PROJECTION_VERSION}"),
13137 format!("scope:{}", scope.unwrap_or("root")),
13138 format!("path_hint:{}", traversal_watermark_path(root, path_hint)),
13139 format!("session_only:{session_only}"),
13140 ];
13141
13142 if !session_only || hinted_markdown_file(root, path_hint).is_none() {
13143 let targets = match resolve_search_index_targets(root, path_hint, scope, false) {
13144 Ok(targets) => targets,
13145 Err(_) => return Ok(None),
13146 };
13147 let Some(target) = targets.into_iter().next() else {
13148 return Ok(None);
13149 };
13150 let db = match index::IndexDb::open_read_only_resilient(&target.db_path) {
13151 Ok(db) => db,
13152 Err(_) => return Ok(None),
13153 };
13154 parts.push(format!("index_label:{}", target.label));
13155 parts.push(format!(
13156 "index_scope:{}",
13157 target.scope_name.as_deref().unwrap_or("root")
13158 ));
13159 parts.push(format!(
13160 "index_source_root:{}",
13161 traversal_watermark_path(root, &target.source_root)
13162 ));
13163 let mut snapshot_rows = 0usize;
13164 for part in db.source_snapshot_parts()? {
13165 if traversal_index_snapshot_part_is_generated(root, &target.source_root, &part) {
13166 continue;
13167 }
13168 snapshot_rows += 1;
13169 parts.push(format!("index_snapshot:{part}"));
13170 }
13171 parts.push(format!("index_snapshot_rows:{snapshot_rows}"));
13172 }
13173
13174 let markdown_files = markdown_files_for_traversal(root, path_hint)?;
13175 parts.push(format!("markdown_count:{}", markdown_files.len()));
13176 for markdown_path in markdown_files {
13177 push_traversal_metadata_watermark_part(root, &markdown_path, "markdown", &mut parts);
13178 }
13179
13180 push_traversal_summaries_watermark_part(root, &mut parts)?;
13181
13182 Ok(Some(content_hash(&parts)?))
13183}
13184
13185fn ranked_symbol_matches<'a>(
13186 query_tokens: &BTreeSet<String>,
13187 entries: &'a [TraversalSymbolIndexEntry],
13188 index: &HashMap<String, Vec<usize>>,
13189) -> Vec<(usize, &'a TraversalSymbolIndexEntry)> {
13190 let mut scores = BTreeMap::<usize, usize>::new();
13191 for token in query_tokens {
13192 if let Some(indices) = index.get(token) {
13193 for idx in indices {
13194 *scores.entry(*idx).or_default() += 1;
13195 }
13196 }
13197 }
13198 let mut matches = scores
13199 .into_iter()
13200 .map(|(idx, score)| (score, &entries[idx]))
13201 .collect::<Vec<_>>();
13202 matches.sort_by(|(left_score, left), (right_score, right)| {
13203 right_score
13204 .cmp(left_score)
13205 .then_with(|| left.node.label.cmp(&right.node.label))
13206 .then_with(|| left.handle.cmp(&right.handle))
13207 });
13208 matches
13209}
13210
13211fn ranked_file_matches<'a>(
13212 query_tokens: &BTreeSet<String>,
13213 entries: &'a [TraversalFileIndexEntry],
13214 index: &HashMap<String, Vec<usize>>,
13215) -> Vec<(usize, &'a TraversalFileIndexEntry)> {
13216 let mut scores = BTreeMap::<usize, usize>::new();
13217 for token in query_tokens {
13218 if let Some(indices) = index.get(token) {
13219 for idx in indices {
13220 *scores.entry(*idx).or_default() += 1;
13221 }
13222 }
13223 }
13224 let mut matches = scores
13225 .into_iter()
13226 .map(|(idx, score)| (score, &entries[idx]))
13227 .collect::<Vec<_>>();
13228 matches.sort_by(|(left_score, left), (right_score, right)| {
13229 right_score
13230 .cmp(left_score)
13231 .then_with(|| left.node.label.cmp(&right.node.label))
13232 .then_with(|| left.handle.cmp(&right.handle))
13233 });
13234 matches
13235}
13236
13237fn ranked_route_matches<'a>(
13238 query_tokens: &BTreeSet<String>,
13239 entries: &'a [TraversalRouteIndexEntry],
13240 index: &HashMap<String, Vec<usize>>,
13241) -> Vec<(usize, &'a TraversalRouteIndexEntry)> {
13242 let mut scores = BTreeMap::<usize, usize>::new();
13243 for token in query_tokens {
13244 if let Some(indices) = index.get(token) {
13245 for idx in indices {
13246 *scores.entry(*idx).or_default() += 1;
13247 }
13248 }
13249 }
13250 let mut matches = scores
13251 .into_iter()
13252 .map(|(idx, score)| (score, &entries[idx]))
13253 .collect::<Vec<_>>();
13254 matches.sort_by(|(left_score, left), (right_score, right)| {
13255 right_score
13256 .cmp(left_score)
13257 .then_with(|| left.node.label.cmp(&right.node.label))
13258 .then_with(|| left.handle.cmp(&right.handle))
13259 });
13260 matches
13261}
13262
13263fn ranked_multiplicity_matches<'a>(
13264 query_tokens: &BTreeSet<String>,
13265 entries: &'a [TraversalMultiplicityIndexEntry],
13266 index: &HashMap<String, Vec<usize>>,
13267) -> Vec<(usize, &'a TraversalMultiplicityIndexEntry)> {
13268 let mut scores = BTreeMap::<usize, usize>::new();
13269 for token in query_tokens {
13270 if let Some(indices) = index.get(token) {
13271 for idx in indices {
13272 *scores.entry(*idx).or_default() += 1;
13273 }
13274 }
13275 }
13276 let mut matches = scores
13277 .into_iter()
13278 .map(|(idx, score)| (score, &entries[idx]))
13279 .collect::<Vec<_>>();
13280 matches.sort_by(|(left_score, left), (right_score, right)| {
13281 right_score
13282 .cmp(left_score)
13283 .then_with(|| left.node.kind.cmp(&right.node.kind))
13284 .then_with(|| left.node.label.cmp(&right.node.label))
13285 .then_with(|| left.handle.cmp(&right.handle))
13286 });
13287 matches
13288}
13289
13290fn link_backlog_to_code_nodes(
13291 graph: &mut TraversalGraphBuild,
13292 backlog: &TraversalNode,
13293 text: &str,
13294 lookup: &TraversalCodeLookup<'_>,
13295 limit: usize,
13296) {
13297 let mut query_tokens = traversal_tokens(text);
13298 if let Some(ref_id) = &backlog.ref_id {
13299 query_tokens.extend(traversal_tokens(ref_id));
13300 }
13301 if query_tokens.is_empty() {
13302 return;
13303 }
13304
13305 for (score, entry) in ranked_symbol_matches(&query_tokens, lookup.symbols, &lookup.symbol_index)
13306 .into_iter()
13307 .take(limit)
13308 {
13309 graph.add_edge(
13310 &backlog.handle,
13311 &entry.handle,
13312 "mentions",
13313 Some("backlog text matches symbol tokens".to_string()),
13314 score,
13315 );
13316 }
13317
13318 for (score, entry) in ranked_file_matches(&query_tokens, lookup.files, &lookup.file_index)
13319 .into_iter()
13320 .take(limit.min(5))
13321 {
13322 graph.add_edge(
13323 &backlog.handle,
13324 &entry.handle,
13325 "mentions",
13326 Some("backlog text matches file tokens".to_string()),
13327 score,
13328 );
13329 }
13330
13331 for (score, entry) in ranked_route_matches(&query_tokens, lookup.routes, &lookup.route_index)
13332 .into_iter()
13333 .take(limit.min(5))
13334 {
13335 graph.add_edge(
13336 &backlog.handle,
13337 &entry.handle,
13338 "mentions",
13339 Some("backlog text matches route tokens".to_string()),
13340 score,
13341 );
13342 }
13343
13344 for (score, entry) in ranked_multiplicity_matches(
13345 &query_tokens,
13346 lookup.multiplicities,
13347 &lookup.multiplicity_index,
13348 )
13349 .into_iter()
13350 .take(limit.min(5))
13351 {
13352 graph.add_edge(
13353 &backlog.handle,
13354 &entry.handle,
13355 "mentions",
13356 Some("backlog text matches multiplicity tokens".to_string()),
13357 score,
13358 );
13359 }
13360}
13361
13362fn load_agent_doc_traversal_nodes(
13363 root: &Path,
13364 path_hint: &Path,
13365 graph: &mut TraversalGraphBuild,
13366 lookup: &TraversalCodeLookup<'_>,
13367) -> Result<()> {
13368 for markdown_path in markdown_files_for_traversal(root, path_hint)? {
13369 let content = match fs::read_to_string(&markdown_path) {
13370 Ok(content) => content,
13371 Err(err) => {
13372 graph.warnings.push(format!(
13373 "session artifact unavailable: {}: {err}",
13374 markdown_path.display()
13375 ));
13376 continue;
13377 }
13378 };
13379 if !traversal_markdown_content_looks_like_session(&content) {
13380 continue;
13381 }
13382
13383 let session_id = parse_agent_doc_session_id(&content);
13384 let session = traversal_session_node(root, &markdown_path, session_id.as_deref());
13385 graph.add_node(session.clone());
13386 let lines = content.lines().collect::<Vec<_>>();
13387 let mut backlog_by_id = BTreeMap::<String, TraversalNode>::new();
13388 for (idx, line) in lines.iter().enumerate() {
13389 let Some((id, text)) = parse_backlog_line(line) else {
13390 continue;
13391 };
13392 let backlog = traversal_backlog_node(root, &markdown_path, &id, &text, idx as i64 + 1);
13393 graph.add_node(backlog.clone());
13394 backlog_by_id.insert(id.clone(), backlog.clone());
13395 graph.add_edge(
13396 &session.handle,
13397 &backlog.handle,
13398 "contains",
13399 Some("session backlog item".to_string()),
13400 1,
13401 );
13402 link_backlog_to_code_nodes(graph, &backlog, &text, lookup, 8);
13403 }
13404
13405 let mut in_queue = false;
13406 let mut job_by_id = BTreeMap::<String, TraversalNode>::new();
13407 for (idx, line) in lines.iter().enumerate() {
13408 let trimmed = line.trim();
13409 if trimmed.starts_with("<!-- agent:queue") {
13410 in_queue = true;
13411 continue;
13412 }
13413 if trimmed.starts_with("<!-- /agent:queue") {
13414 in_queue = false;
13415 continue;
13416 }
13417 if !in_queue {
13418 continue;
13419 }
13420 if let Some(dispatch) = parse_queue_dispatch_line(line) {
13421 let dispatch_ref = dispatch.strip_prefix('#').unwrap_or(dispatch.as_str());
13422 let node = traversal_job_packet_node(
13423 root,
13424 &markdown_path,
13425 &format!("dispatch {dispatch}"),
13426 Some(dispatch_ref),
13427 "agent-doc dispatch preset",
13428 idx as i64 + 1,
13429 );
13430 graph.add_node(node.clone());
13431 graph.add_edge(
13432 &session.handle,
13433 &node.handle,
13434 "contains",
13435 Some("session queued dispatch".to_string()),
13436 1,
13437 );
13438 continue;
13439 }
13440 if let Some(id) = parse_queue_do_line(line) {
13441 let detail = backlog_by_id
13442 .get(&id)
13443 .and_then(|node| node.detail.clone())
13444 .unwrap_or_else(|| "queued backlog item".to_string());
13445 let node = traversal_job_packet_node(
13446 root,
13447 &markdown_path,
13448 &format!("do #{id}"),
13449 Some(&id),
13450 &detail,
13451 idx as i64 + 1,
13452 );
13453 graph.add_node(node.clone());
13454 graph.add_edge(
13455 &session.handle,
13456 &node.handle,
13457 "contains",
13458 Some("session queued job packet".to_string()),
13459 1,
13460 );
13461 if let Some(backlog) = backlog_by_id.get(&id) {
13462 graph.add_edge(
13463 &node.handle,
13464 &backlog.handle,
13465 "targets",
13466 Some("queued backlog item".to_string()),
13467 1,
13468 );
13469 }
13470 job_by_id.insert(id, node);
13471 }
13472 }
13473
13474 let mut seen_results = BTreeSet::<(String, String, i64)>::new();
13475 for (idx, line) in lines.iter().enumerate() {
13476 for parsed in parse_worker_result_line(line, lookup) {
13477 let line_no = idx as i64 + 1;
13478 if !seen_results.insert((parsed.id.clone(), parsed.status.clone(), line_no)) {
13479 continue;
13480 }
13481 let result =
13482 traversal_worker_result_node(root, &markdown_path, &parsed, line, line_no);
13483 graph.add_node(result.clone());
13484 graph.add_edge(
13485 &session.handle,
13486 &result.handle,
13487 "contains",
13488 Some("session worker result".to_string()),
13489 1,
13490 );
13491 if let Some(backlog) = backlog_by_id.get(&parsed.id) {
13492 graph.add_edge(
13493 &backlog.handle,
13494 &result.handle,
13495 "has_result",
13496 Some(format!("worker result {}", parsed.status)),
13497 1,
13498 );
13499 }
13500 if let Some(job) = job_by_id.get(&parsed.id) {
13501 graph.add_edge(
13502 &job.handle,
13503 &result.handle,
13504 "has_result",
13505 Some(format!("queued worker result {}", parsed.status)),
13506 1,
13507 );
13508 }
13509 let mut result_text = line.to_string();
13510 if !parsed.touched_files.is_empty() {
13511 result_text.push(' ');
13512 result_text.push_str(&parsed.touched_files.join(" "));
13513 }
13514 link_backlog_to_code_nodes(graph, &result, &result_text, lookup, 8);
13515 }
13516 }
13517 }
13518 Ok(())
13519}
13520
13521#[derive(Debug, Clone)]
13522struct AgentDocIndexGate {
13523 db_path: Option<PathBuf>,
13524 source_root: PathBuf,
13525 diagnostics: Vec<String>,
13526}
13527
13528#[derive(Clone, Hash, PartialEq, Eq)]
13529struct AgentDocIndexGateCacheKey {
13530 root: PathBuf,
13531 path_hint: PathBuf,
13532 scope: Option<String>,
13533 packet_label: String,
13534}
13535
13536fn agent_doc_index_gate_cache() -> &'static std::sync::Mutex<
13537 std::collections::HashMap<AgentDocIndexGateCacheKey, AgentDocIndexGate>,
13538> {
13539 static CACHE: std::sync::OnceLock<
13540 std::sync::Mutex<std::collections::HashMap<AgentDocIndexGateCacheKey, AgentDocIndexGate>>,
13541 > = std::sync::OnceLock::new();
13542 CACHE.get_or_init(|| std::sync::Mutex::new(std::collections::HashMap::new()))
13543}
13544
13545fn prepare_agent_doc_index_gate_cached(
13546 root: &Path,
13547 path_hint: &Path,
13548 scope: Option<&str>,
13549 packet_label: &str,
13550) -> (AgentDocIndexGate, String) {
13551 let key = AgentDocIndexGateCacheKey {
13552 root: root.to_path_buf(),
13553 path_hint: path_hint.to_path_buf(),
13554 scope: scope.map(str::to_string),
13555 packet_label: packet_label.to_string(),
13556 };
13557 if let Ok(cache) = agent_doc_index_gate_cache().lock()
13558 && let Some(cached) = cache.get(&key)
13559 {
13560 return (
13561 cached.clone(),
13562 "reused from in-process index gate cache by root/path_hint/scope key".to_string(),
13563 );
13564 }
13565 let gate = prepare_agent_doc_index_gate(root, path_hint, scope, packet_label);
13566 if let Ok(mut cache) = agent_doc_index_gate_cache().lock() {
13567 cache.insert(key, gate.clone());
13568 }
13569 (
13570 gate,
13571 "fresh inspection/refresh — cache miss on this preparation key".to_string(),
13572 )
13573}
13574
13575fn index_reason_for_state(state: SearchIndexState) -> Option<RebuildSearchReason> {
13576 match state {
13577 SearchIndexState::Fresh => None,
13578 SearchIndexState::Missing => Some(RebuildSearchReason::Missing),
13579 SearchIndexState::Stale { stale_files } => Some(RebuildSearchReason::Stale { stale_files }),
13580 }
13581}
13582
13583fn index_reason_detail(target: &SearchIndexTarget, reason: RebuildSearchReason) -> String {
13584 rebuild_search_target_detail(&RebuildSearchTarget {
13585 label: target.label.clone(),
13586 reason,
13587 reindex_cmd: target.reindex_cmd.clone(),
13588 })
13589}
13590
13591fn index_refresh_diagnostic(
13592 target: &SearchIndexTarget,
13593 reason: RebuildSearchReason,
13594 summary: &index::IndexSummary,
13595 packet_label: &str,
13596) -> String {
13597 let changed = summary.new + summary.modified + summary.deleted;
13598 format!(
13599 "index refreshed: {}; updated {} changed file{} before {}",
13600 index_reason_detail(target, reason),
13601 changed,
13602 if changed == 1 { "" } else { "s" },
13603 packet_label
13604 )
13605}
13606
13607fn index_refresh_fallback_diagnostic(
13608 target: &SearchIndexTarget,
13609 reason: RebuildSearchReason,
13610 err: &anyhow::Error,
13611 packet_label: &str,
13612) -> String {
13613 format!(
13614 "{}; could not refresh before {}: {err:#}; falling back to raw source file nodes",
13615 index_reason_detail(target, reason),
13616 packet_label
13617 )
13618}
13619
13620fn graph_fallback_source_root(root: &Path, path_hint: &Path, scope: Option<&str>) -> PathBuf {
13621 if let Some(scope_name) = scope
13622 && let Ok(Some(scope)) = config::Config::find_submodule(root, scope_name)
13623 {
13624 return scope.source_root;
13625 }
13626 if let Some(scope_name) = scope
13627 && let Ok(Some(package)) = multiplicity::find_cargo_package(root, scope_name)
13628 {
13629 return package.package_root;
13630 }
13631 if let Ok(Some(scope)) = config::Config::infer_submodule_from_path(root, path_hint) {
13632 return scope.source_root;
13633 }
13634 if let Ok(Some(package)) = multiplicity::infer_cargo_package_from_path(root, path_hint) {
13635 return package.package_root;
13636 }
13637 if let Ok(Some(scope)) = infer_agent_doc_task_submodule(root, path_hint) {
13638 return scope.source_root;
13639 }
13640 root.to_path_buf()
13641}
13642
13643fn prepare_agent_doc_index_gate(
13644 root: &Path,
13645 path_hint: &Path,
13646 scope: Option<&str>,
13647 packet_label: &str,
13648) -> AgentDocIndexGate {
13649 let fallback_source_root = graph_fallback_source_root(root, path_hint, scope);
13650 let targets = match resolve_search_index_targets(root, path_hint, scope, false) {
13651 Ok(targets) => targets,
13652 Err(err) => {
13653 return AgentDocIndexGate {
13654 db_path: None,
13655 source_root: fallback_source_root,
13656 diagnostics: vec![format!(
13657 "code index unavailable before {packet_label}: {err:#}; falling back to raw source file nodes"
13658 )],
13659 };
13660 }
13661 };
13662 let Some(target) = targets.into_iter().next() else {
13663 return AgentDocIndexGate {
13664 db_path: None,
13665 source_root: fallback_source_root,
13666 diagnostics: vec![format!(
13667 "code index unavailable before {packet_label}: no index target resolved; falling back to raw source file nodes"
13668 )],
13669 };
13670 };
13671
13672 let state = match inspect_search_index(&target) {
13673 Ok(state) => state,
13674 Err(err) => {
13675 return AgentDocIndexGate {
13676 db_path: None,
13677 source_root: target.source_root,
13678 diagnostics: vec![format!(
13679 "code index freshness unavailable before {packet_label}: {err:#}; falling back to raw source file nodes"
13680 )],
13681 };
13682 }
13683 };
13684
13685 let Some(reason) = index_reason_for_state(state) else {
13686 return AgentDocIndexGate {
13687 db_path: Some(target.db_path),
13688 source_root: target.source_root,
13689 diagnostics: Vec::new(),
13690 };
13691 };
13692
13693 match apply_search_index_update(root, &target) {
13694 Ok(summary) => {
13695 index::inspect_scope_invalidate_all();
13701 let diagnostics = vec![index_refresh_diagnostic(
13702 &target,
13703 reason,
13704 &summary,
13705 packet_label,
13706 )];
13707 AgentDocIndexGate {
13708 db_path: Some(target.db_path),
13709 source_root: target.source_root,
13710 diagnostics,
13711 }
13712 }
13713 Err(err) => {
13714 let diagnostics = vec![index_refresh_fallback_diagnostic(
13715 &target,
13716 reason,
13717 &err,
13718 packet_label,
13719 )];
13720 AgentDocIndexGate {
13721 db_path: None,
13722 source_root: target.source_root,
13723 diagnostics,
13724 }
13725 }
13726 }
13727}
13728
13729fn add_raw_source_file_nodes(
13730 root: &Path,
13731 source_root: &Path,
13732 graph: &mut TraversalGraphBuild,
13733 file_entries: &mut Vec<TraversalFileIndexEntry>,
13734) -> Result<()> {
13735 let mut entries = walk::walk_files(source_root)?;
13736 entries.sort_by(|left, right| left.path.cmp(&right.path));
13737 for entry in entries {
13738 let file = entry.path.to_string_lossy();
13739 let node = traversal_raw_source_file_node(root, file.as_ref());
13740 let entry = TraversalFileIndexEntry {
13741 handle: node.handle.clone(),
13742 tokens: traversal_node_tokens(&node),
13743 node: node.clone(),
13744 };
13745 graph.add_node(node);
13746 file_entries.push(entry);
13747 }
13748 Ok(())
13749}
13750
13751fn relative_path_inside_scope(path: &str, scope_root: &str) -> bool {
13752 if scope_root.is_empty() {
13753 return true;
13754 }
13755 path == scope_root || path.starts_with(&format!("{scope_root}/"))
13756}
13757
13758fn traversal_symbol_source_path(root: &Path, source_root: &Path, file: &str) -> PathBuf {
13759 let path = Path::new(file);
13760 if path.is_absolute() {
13761 return path.to_path_buf();
13762 }
13763 let source_candidate = source_root.join(path);
13764 if source_candidate.exists() {
13765 source_candidate
13766 } else {
13767 root.join(path)
13768 }
13769}
13770
13771fn cargo_import_alias_from_line(line: &str) -> Option<String> {
13772 let trimmed = line.trim();
13773 let rest = trimmed
13774 .strip_prefix("pub use ")
13775 .or_else(|| trimmed.strip_prefix("use "))
13776 .or_else(|| trimmed.strip_prefix("extern crate "))?;
13777 let alias = rest
13778 .split([':', ';', ' ', '\t'])
13779 .next()
13780 .unwrap_or_default()
13781 .trim();
13782 (!alias.is_empty()).then(|| alias.to_string())
13783}
13784
13785fn cargo_import_aliases(package: &multiplicity::CargoPackageInfo) -> Result<BTreeSet<String>> {
13786 let mut aliases = BTreeSet::new();
13787 for entry in walk::walk_files(&package.package_root)? {
13788 if entry.path.extension().and_then(|ext| ext.to_str()) != Some("rs") {
13789 continue;
13790 }
13791 let content = fs::read_to_string(&entry.path)
13792 .with_context(|| format!("reading Rust source {}", entry.path.display()))?;
13793 aliases.extend(content.lines().filter_map(cargo_import_alias_from_line));
13794 }
13795 Ok(aliases)
13796}
13797
13798fn load_multiplicity_traversal_nodes(
13799 root: &Path,
13800 source_root: &Path,
13801 graph: &mut TraversalGraphBuild,
13802 file_handle_by_path: &HashMap<String, String>,
13803 multiplicity_entries: &mut Vec<TraversalMultiplicityIndexEntry>,
13804) -> Result<()> {
13805 let inventory = multiplicity::discover_cargo_inventory(source_root)?;
13806 let mut workspace_handle_by_root = BTreeMap::<String, String>::new();
13807 for workspace in &inventory.workspaces {
13808 let node = traversal_cargo_workspace_node(root, workspace);
13809 workspace_handle_by_root.insert(workspace.relative_root.clone(), node.handle.clone());
13810 multiplicity_entries.push(TraversalMultiplicityIndexEntry {
13811 handle: node.handle.clone(),
13812 tokens: traversal_node_tokens(&node),
13813 node: node.clone(),
13814 });
13815 graph.add_node(node);
13816 }
13817
13818 let mut package_handle_by_name = BTreeMap::<String, Vec<String>>::new();
13819 let mut package_nodes = Vec::new();
13820 for package in &inventory.packages {
13821 let node = traversal_cargo_package_node(root, package);
13822 package_handle_by_name
13823 .entry(package.name.clone())
13824 .or_default()
13825 .push(node.handle.clone());
13826 package_handle_by_name
13827 .entry(package.normalized_name.clone())
13828 .or_default()
13829 .push(node.handle.clone());
13830 multiplicity_entries.push(TraversalMultiplicityIndexEntry {
13831 handle: node.handle.clone(),
13832 tokens: traversal_node_tokens(&node),
13833 node: node.clone(),
13834 });
13835 graph.add_node(node.clone());
13836 package_nodes.push((package, node));
13837 }
13838
13839 for (package, node) in &package_nodes {
13840 if let Some(workspace_handle) =
13841 workspace_handle_by_root.get(&package.relative_workspace_root)
13842 {
13843 graph.add_edge(
13844 workspace_handle,
13845 &node.handle,
13846 "contains_package",
13847 Some("Cargo workspace member package".to_string()),
13848 1,
13849 );
13850 }
13851 let package_root = relativize_pathbuf(&package.package_root, root)
13852 .to_string_lossy()
13853 .replace('\\', "/");
13854 for (file, handle) in file_handle_by_path {
13855 if relative_path_inside_scope(file, &package_root) {
13856 graph.add_edge(
13857 &node.handle,
13858 handle,
13859 "owns_file",
13860 Some("Cargo package owns source file".to_string()),
13861 1,
13862 );
13863 }
13864 }
13865 for dependency in &package.dependencies {
13866 if let Some(handles) = package_handle_by_name.get(&dependency.name)
13867 && handles.len() == 1
13868 {
13869 graph.add_edge(
13870 &node.handle,
13871 &handles[0],
13872 "declares_dependency",
13873 Some(format!("{} Cargo dependency", dependency.kind)),
13874 1,
13875 );
13876 }
13877 }
13878 for alias in cargo_import_aliases(package)? {
13879 if let Some(handles) = package_handle_by_name.get(&alias)
13880 && handles.len() == 1
13881 && handles[0] != node.handle
13882 {
13883 graph.add_edge(
13884 &node.handle,
13885 &handles[0],
13886 "uses_crate",
13887 Some("Rust use/extern crate reference".to_string()),
13888 1,
13889 );
13890 graph.add_edge(
13891 &node.handle,
13892 &handles[0],
13893 "imports",
13894 Some("Rust use/extern crate import".to_string()),
13895 1,
13896 );
13897 }
13898 }
13899 }
13900
13901 Ok(())
13902}
13903
13904fn build_traversal_graph_source_with_options(
13905 root: &Path,
13906 path_hint: &Path,
13907 scope: Option<&str>,
13908 session_only: bool,
13909) -> Result<TraversalGraphBuild> {
13910 let mut graph = TraversalGraphBuild::default();
13911 let mut symbol_entries = Vec::new();
13912 let mut file_entries = Vec::new();
13913 let mut route_entries = Vec::new();
13914 let mut multiplicity_entries = Vec::new();
13915 let mut file_handle_by_path = HashMap::<String, String>::new();
13916 let bounded_session_projection = hinted_markdown_file(root, path_hint).is_some();
13917 if !session_only || hinted_markdown_file(root, path_hint).is_none() {
13918 let (gate, _cache_detail) =
13919 prepare_agent_doc_index_gate_cached(root, path_hint, scope, "graph traversal packet");
13920 graph.warnings.extend(gate.diagnostics);
13921 let gate_source_root = gate.source_root.clone();
13922
13923 match gate.db_path {
13924 Some(db_path) if db_path.exists() => {
13925 let db = index::IndexDb::open_read_only_resilient(&db_path)?;
13926 let file_paths = db.file_paths()?;
13927 for file in file_paths {
13928 if traversal_path_is_generated_artifact(
13929 root,
13930 &gate_source_root,
13931 Path::new(&file),
13932 ) {
13933 continue;
13934 }
13935 let node = traversal_file_node(root, &file);
13936 let entry = TraversalFileIndexEntry {
13937 handle: node.handle.clone(),
13938 tokens: traversal_node_tokens(&node),
13939 node: node.clone(),
13940 };
13941 if let Some(path) = entry.node.path.as_ref() {
13942 file_handle_by_path.insert(path.clone(), entry.handle.clone());
13943 }
13944 graph.add_node(node);
13945 file_entries.push(entry);
13946 }
13947
13948 let symbols = db.all_symbols()?;
13949 let mut symbol_by_file_name_line = HashMap::new();
13950 let mut span_by_file_name_line = HashMap::new();
13951 let mut first_symbol_by_name = BTreeMap::<String, String>::new();
13952 let mut first_span_by_name = BTreeMap::<String, String>::new();
13953 let mut ast_entries = Vec::<TraversalAstSpanIndexEntry>::new();
13954 let mut source_by_file = HashMap::<String, Option<Vec<u8>>>::new();
13955 for symbol in symbols.iter().filter(|symbol| {
13956 !traversal_path_is_generated_artifact(
13957 root,
13958 &gate_source_root,
13959 Path::new(&symbol.file),
13960 )
13961 }) {
13962 let node = traversal_symbol_node(root, symbol);
13963 let file = relativize(&symbol.file, root);
13964 symbol_by_file_name_line.insert(
13965 format!("{file}:{}:{}", symbol.line, symbol.name),
13966 node.handle.clone(),
13967 );
13968 first_symbol_by_name
13969 .entry(symbol.name.clone())
13970 .or_insert_with(|| node.handle.clone());
13971 let entry = TraversalSymbolIndexEntry {
13972 handle: node.handle.clone(),
13973 tokens: traversal_node_tokens(&node),
13974 node: node.clone(),
13975 };
13976 graph.add_node(node.clone());
13977 if let Some(file_handle) = file_handle_by_path.get(&file) {
13978 graph.add_edge(
13979 file_handle,
13980 &node.handle,
13981 "defines",
13982 Some("file defines symbol".to_string()),
13983 1,
13984 );
13985 }
13986 if !source_by_file.contains_key(&symbol.file) {
13987 let source_path =
13988 traversal_symbol_source_path(root, &gate_source_root, &symbol.file);
13989 source_by_file.insert(symbol.file.clone(), fs::read(source_path).ok());
13990 }
13991 if let Some(Some(source)) = source_by_file.get(&symbol.file)
13992 && let Some((ast_node, mut ast_entry)) =
13993 traversal_ast_span_node(root, symbol, source, &symbols)
13994 {
13995 ast_entry.symbol_handle = node.handle.clone();
13996 ast_entry.file_handle = file_handle_by_path.get(&file).cloned();
13997 span_by_file_name_line.insert(
13998 format!("{file}:{}:{}", symbol.line, symbol.name),
13999 ast_node.handle.clone(),
14000 );
14001 first_span_by_name
14002 .entry(symbol.name.clone())
14003 .or_insert_with(|| ast_node.handle.clone());
14004 graph.add_node(ast_node.clone());
14005 graph.add_edge(
14006 &node.handle,
14007 &ast_node.handle,
14008 "has_ast_span",
14009 Some("symbol projects to indexed AST span".to_string()),
14010 1,
14011 );
14012 graph.add_edge(
14013 &ast_node.handle,
14014 &node.handle,
14015 "represents_symbol",
14016 Some("AST span represents indexed symbol".to_string()),
14017 1,
14018 );
14019 ast_entries.push(ast_entry);
14020 }
14021 symbol_entries.push(entry);
14022 }
14023 link_ast_navigation_edges(&mut graph, &ast_entries);
14024 link_markdown_embedded_code_edges(&mut graph, root, &ast_entries);
14025
14026 if !bounded_session_projection {
14027 for edge in db.all_stored_edges()? {
14028 if traversal_path_is_generated_artifact(
14029 root,
14030 &gate_source_root,
14031 Path::new(&edge.caller_file),
14032 ) {
14033 continue;
14034 }
14035 let caller_file = relativize(&edge.caller_file, root);
14036 let caller_key =
14037 format!("{caller_file}:{}:{}", edge.caller_line, edge.caller_name);
14038 let Some(caller_handle) =
14039 symbol_by_file_name_line.get(&caller_key).cloned()
14040 else {
14041 continue;
14042 };
14043 let callee_handle = if let Some(handle) =
14044 first_symbol_by_name.get(&edge.callee_name)
14045 {
14046 handle.clone()
14047 } else {
14048 let node = traversal_unresolved_symbol_node(root, &edge.callee_name);
14049 let handle = node.handle.clone();
14050 graph.add_node(node);
14051 handle
14052 };
14053 graph.add_edge(
14054 &caller_handle,
14055 &callee_handle,
14056 "calls",
14057 Some(format!("call site {}:{}", caller_file, edge.call_site_line)),
14058 1,
14059 );
14060 if let Some(caller_span) = span_by_file_name_line.get(&caller_key)
14061 && let Some(callee_span) = first_span_by_name.get(&edge.callee_name)
14062 {
14063 graph.add_edge(
14064 caller_span,
14065 callee_span,
14066 "calls",
14067 Some(format!(
14068 "AST call site {}:{}",
14069 caller_file, edge.call_site_line
14070 )),
14071 1,
14072 );
14073 }
14074 }
14075 }
14076
14077 for route in db.all_routes()? {
14078 if traversal_path_is_generated_artifact(
14079 root,
14080 &gate_source_root,
14081 Path::new(&route.file),
14082 ) {
14083 continue;
14084 }
14085 let node = traversal_route_node(root, &route);
14086 let entry = TraversalRouteIndexEntry {
14087 handle: node.handle.clone(),
14088 tokens: traversal_node_tokens(&node),
14089 node: node.clone(),
14090 };
14091 graph.add_node(node.clone());
14092 if let Some(path) = node.path.as_ref()
14093 && let Some(file_handle) = file_handle_by_path.get(path)
14094 {
14095 graph.add_edge(
14096 file_handle,
14097 &node.handle,
14098 "defines_route",
14099 Some("file declares route".to_string()),
14100 1,
14101 );
14102 }
14103 let handler_handle =
14104 if let Some(handle) = first_symbol_by_name.get(&route.handler_name) {
14105 handle.clone()
14106 } else {
14107 let node = traversal_unresolved_symbol_node(root, &route.handler_name);
14108 let handle = node.handle.clone();
14109 graph.add_node(node);
14110 handle
14111 };
14112 graph.add_edge(
14113 &entry.handle,
14114 &handler_handle,
14115 "handled_by",
14116 Some("route handler reference".to_string()),
14117 1,
14118 );
14119 if let Some(handler_span) = first_span_by_name.get(&route.handler_name) {
14120 graph.add_edge(
14121 &entry.handle,
14122 handler_span,
14123 "handled_by",
14124 Some("route handler AST span".to_string()),
14125 1,
14126 );
14127 graph.add_edge(
14128 handler_span,
14129 &entry.handle,
14130 "handles_route",
14131 Some("AST span handles route".to_string()),
14132 1,
14133 );
14134 }
14135 route_entries.push(entry);
14136 }
14137 }
14138 _ => {
14139 add_raw_source_file_nodes(root, &gate_source_root, &mut graph, &mut file_entries)
14140 .with_context(|| {
14141 format!(
14142 "loading raw source fallback nodes from {}",
14143 gate_source_root.display()
14144 )
14145 })?;
14146 for entry in &file_entries {
14147 if let Some(path) = entry.node.path.as_ref() {
14148 file_handle_by_path.insert(path.clone(), entry.handle.clone());
14149 }
14150 }
14151 }
14152 }
14153 load_multiplicity_traversal_nodes(
14154 root,
14155 &gate_source_root,
14156 &mut graph,
14157 &file_handle_by_path,
14158 &mut multiplicity_entries,
14159 )?;
14160 }
14161
14162 let code_lookup = TraversalCodeLookup::new(
14163 &symbol_entries,
14164 &file_entries,
14165 &route_entries,
14166 &multiplicity_entries,
14167 );
14168 load_agent_doc_traversal_nodes(root, path_hint, &mut graph, &code_lookup)?;
14169 Ok(graph)
14170}
14171
14172#[cfg(test)]
14173fn build_traversal_graph_source(
14174 root: &Path,
14175 path_hint: &Path,
14176 scope: Option<&str>,
14177) -> Result<TraversalGraphBuild> {
14178 build_traversal_graph_source_with_options(root, path_hint, scope, false)
14179}
14180
14181pub(crate) fn write_traversal_graph_store_with_options(
14182 root: &Path,
14183 path_hint: &Path,
14184 scope: Option<&str>,
14185 session_only: bool,
14186) -> Result<(TraversalGraphBuild, SqliteProjectionRefresh)> {
14187 let source_graph =
14188 build_traversal_graph_source_with_options(root, path_hint, scope, session_only)?;
14189 let projection = traversal_projection_from_graph(root, scope, &source_graph)?;
14190 let graph_db = graph_substrate_db_path(root, scope);
14191 let mut store = SqliteGraphStore::open(&graph_db)?;
14192 let source_watermark = traversal_source_watermark(root, path_hint, scope, session_only)
14193 .ok()
14194 .flatten()
14195 .or_else(|| graph_projection_content_hash(&projection));
14196 let refresh = store.replace_projection_with_version(
14197 scope.unwrap_or("root"),
14198 &projection,
14199 Some(GRAPH_PROJECTION_VERSION),
14200 source_watermark,
14201 )?;
14202 Ok((source_graph, refresh))
14203}
14204
14205pub(crate) fn write_traversal_graph_store(
14206 root: &Path,
14207 path_hint: &Path,
14208 scope: Option<&str>,
14209) -> Result<(TraversalGraphBuild, SqliteProjectionRefresh)> {
14210 write_traversal_graph_store_with_options(root, path_hint, scope, false)
14211}
14212
14213fn refresh_traversal_graph_store_with_options(
14214 root: &Path,
14215 path_hint: &Path,
14216 scope: Option<&str>,
14217 session_only: bool,
14218) -> Result<(TraversalGraphBuild, SqliteProjectionRefresh)> {
14219 let (source_graph, refresh) =
14220 write_traversal_graph_store_with_options(root, path_hint, scope, session_only)?;
14221 let graph_db = graph_substrate_db_path(root, scope);
14222 let store = SqliteGraphStore::open_read_only_resilient(&graph_db)?;
14223 let mut graph = traversal_graph_from_store(root, &store)?;
14224 graph.warnings = source_graph.warnings;
14225 Ok((graph, refresh))
14226}
14227
14228fn refresh_traversal_graph_store(
14229 root: &Path,
14230 path_hint: &Path,
14231 scope: Option<&str>,
14232) -> Result<(TraversalGraphBuild, SqliteProjectionRefresh)> {
14233 refresh_traversal_graph_store_with_options(root, path_hint, scope, false)
14234}
14235
14236pub(crate) fn build_traversal_graph(
14237 root: &Path,
14238 path_hint: &Path,
14239 scope: Option<&str>,
14240) -> Result<TraversalGraphBuild> {
14241 let (graph, _refresh) = refresh_traversal_graph_store(root, path_hint, scope)?;
14242 Ok(graph)
14243}
14244
14245fn traversal_query_kind_priority(kind: &str) -> usize {
14246 match kind {
14247 "backlog" => 0,
14248 "job_packet" => 1,
14249 "worker_result" => 2,
14250 "symbol" => 3,
14251 "ast_span" => 4,
14252 "file" => 5,
14253 "route" => 6,
14254 "cargo_package" => 7,
14255 "cargo_workspace" => 8,
14256 "session" => 9,
14257 "semantic_concept" => 10,
14258 "semantic_entity" => 11,
14259 _ => 12,
14260 }
14261}
14262
14263fn traversal_node_match_rank(node: &TraversalNode, query: &str) -> Option<(usize, usize, String)> {
14264 let trimmed = query.trim();
14265 if trimmed.is_empty() {
14266 return None;
14267 }
14268 let kind_priority = traversal_query_kind_priority(&node.kind);
14269 if node.handle == trimmed {
14270 return Some((0, kind_priority, node.handle.clone()));
14271 }
14272 if node.path.as_deref() == Some(trimmed) {
14273 let path_priority = if node.kind == "file" {
14274 0
14275 } else {
14276 kind_priority.saturating_add(1)
14277 };
14278 return Some((1, path_priority, node.handle.clone()));
14279 }
14280 let normalized_backlog = trimmed.trim_start_matches('#');
14281 if node.ref_id.as_deref() == Some(trimmed) || node.ref_id.as_deref() == Some(normalized_backlog)
14282 {
14283 return Some((2, kind_priority, node.handle.clone()));
14284 }
14285 if node.label == trimmed || (node.kind == "symbol" && node.label == normalized_backlog) {
14286 return Some((3, kind_priority, node.handle.clone()));
14287 }
14288 None
14289}
14290
14291fn resolve_traversal_node<'a>(
14292 graph: &'a TraversalGraphBuild,
14293 query: &str,
14294) -> Option<&'a TraversalNode> {
14295 graph
14296 .nodes
14297 .values()
14298 .filter_map(|node| traversal_node_match_rank(node, query).map(|rank| (rank, node)))
14299 .min_by(|(left_rank, _), (right_rank, _)| left_rank.cmp(right_rank))
14300 .map(|(_, node)| node)
14301}
14302
14303fn traversal_adjacency(edges: &[TraversalEdge]) -> BTreeMap<String, Vec<String>> {
14304 let mut adj = BTreeMap::<String, BTreeSet<String>>::new();
14305 for edge in edges {
14306 adj.entry(edge.from.clone())
14307 .or_default()
14308 .insert(edge.to.clone());
14309 adj.entry(edge.to.clone())
14310 .or_default()
14311 .insert(edge.from.clone());
14312 }
14313 adj.into_iter()
14314 .map(|(node, neighbors)| (node, neighbors.into_iter().collect()))
14315 .collect()
14316}
14317
14318fn traversal_shortest_handles(
14319 edges: &[TraversalEdge],
14320 from: &str,
14321 to: &str,
14322) -> Option<Vec<String>> {
14323 if from == to {
14324 return Some(vec![from.to_string()]);
14325 }
14326 let adj = traversal_adjacency(edges);
14327 if !adj.contains_key(from) || !adj.contains_key(to) {
14328 return None;
14329 }
14330 let mut visited = BTreeSet::new();
14331 let mut queue = VecDeque::new();
14332 let mut parent = BTreeMap::<String, String>::new();
14333 visited.insert(from.to_string());
14334 queue.push_back(from.to_string());
14335 while let Some(current) = queue.pop_front() {
14336 if let Some(neighbors) = adj.get(¤t) {
14337 for neighbor in neighbors {
14338 if visited.insert(neighbor.clone()) {
14339 parent.insert(neighbor.clone(), current.clone());
14340 if neighbor == to {
14341 let mut path = vec![to.to_string()];
14342 let mut cursor = to.to_string();
14343 while let Some(prev) = parent.get(&cursor) {
14344 path.push(prev.clone());
14345 cursor = prev.clone();
14346 }
14347 path.reverse();
14348 return Some(path);
14349 }
14350 queue.push_back(neighbor.clone());
14351 }
14352 }
14353 }
14354 }
14355 None
14356}
14357
14358fn traversal_scored_neighbors(edges: &[TraversalEdge], current: &str) -> Vec<String> {
14359 let mut best_score_by_neighbor = BTreeMap::<String, usize>::new();
14360 for edge in edges {
14361 let neighbor = if edge.from == current {
14362 edge.to.as_str()
14363 } else if edge.to == current {
14364 edge.from.as_str()
14365 } else {
14366 continue;
14367 };
14368 let score = traversal_relation_score(edge, current);
14369 best_score_by_neighbor
14370 .entry(neighbor.to_string())
14371 .and_modify(|best| *best = (*best).max(score))
14372 .or_insert(score);
14373 }
14374 let mut ranked = best_score_by_neighbor.into_iter().collect::<Vec<_>>();
14375 ranked.sort_by(|(left_handle, left_score), (right_handle, right_score)| {
14376 right_score
14377 .cmp(left_score)
14378 .then_with(|| left_handle.cmp(right_handle))
14379 });
14380 ranked.into_iter().map(|(handle, _)| handle).collect()
14381}
14382
14383fn traversal_neighborhood_handles(
14384 edges: &[TraversalEdge],
14385 origin: &str,
14386 depth: usize,
14387 limit: usize,
14388) -> BTreeSet<String> {
14389 let mut seen = BTreeSet::new();
14390 let mut queue = VecDeque::new();
14391 seen.insert(origin.to_string());
14392 queue.push_back((origin.to_string(), 0usize));
14393 while let Some((current, current_depth)) = queue.pop_front() {
14394 if current_depth >= depth {
14395 continue;
14396 }
14397 for neighbor in traversal_scored_neighbors(edges, ¤t) {
14398 if limit > 0 && seen.len() >= limit {
14399 return seen;
14400 }
14401 if seen.insert(neighbor.clone()) {
14402 queue.push_back((neighbor, current_depth + 1));
14403 }
14404 }
14405 }
14406 seen
14407}
14408
14409fn traversal_edges_between(
14410 handles: &BTreeSet<String>,
14411 edges: &[TraversalEdge],
14412) -> Vec<TraversalEdge> {
14413 edges
14414 .iter()
14415 .filter(|edge| handles.contains(&edge.from) && handles.contains(&edge.to))
14416 .cloned()
14417 .collect()
14418}
14419
14420fn traversal_path_edges(path: &[String], edges: &[TraversalEdge]) -> Vec<TraversalEdge> {
14421 let mut result = Vec::new();
14422 for pair in path.windows(2) {
14423 if let Some(edge) = edges.iter().find(|edge| {
14424 (edge.from == pair[0] && edge.to == pair[1])
14425 || (edge.from == pair[1] && edge.to == pair[0])
14426 }) {
14427 result.push(edge.clone());
14428 }
14429 }
14430 result
14431}
14432
14433fn sorted_traversal_nodes<'a>(
14434 nodes: impl IntoIterator<Item = &'a TraversalNode>,
14435) -> Vec<TraversalNode> {
14436 let mut nodes = nodes.into_iter().cloned().collect::<Vec<_>>();
14437 nodes.sort_by(|left, right| {
14438 left.kind
14439 .cmp(&right.kind)
14440 .then_with(|| left.label.cmp(&right.label))
14441 .then_with(|| left.path.cmp(&right.path))
14442 .then_with(|| left.handle.cmp(&right.handle))
14443 });
14444 nodes
14445}
14446
14447fn traversal_relation_score(edge: &TraversalEdge, origin: &str) -> usize {
14448 let base = match edge.relation.as_str() {
14449 "mentions" => 100,
14450 "contains" => 80,
14451 "parent" | "child" | "has_ast_span" | "represents_symbol" => 78,
14452 "contains_embedded_symbol" | "embedded_in_fence" => 77,
14453 "contains_markdown_block"
14454 | "contains_embedded_code"
14455 | "enclosing_module"
14456 | "enclosing_section" => 76,
14457 "calls" => {
14458 if edge.from == origin {
14459 70
14460 } else {
14461 65
14462 }
14463 }
14464 "handled_by" | "handles_route" => 68,
14465 "defines_route" => 62,
14466 "imports" => 62,
14467 "previous_sibling" | "next_sibling" => 54,
14468 "mentions_concept" | "mentions_entity" => 66,
14469 "semantic_relation" => 64,
14470 "tagged_concept" | "related_concept" => 58,
14471 "defines" => {
14472 if edge.from == origin {
14473 60
14474 } else {
14475 55
14476 }
14477 }
14478 _ => 10,
14479 };
14480 base + edge.weight
14481}
14482
14483fn traversal_recommendation_reason(edge: &TraversalEdge, origin: &str) -> String {
14484 match edge.relation.as_str() {
14485 "mentions" => "matched from backlog/session text".to_string(),
14486 "contains" => "contained in the selected session artifact".to_string(),
14487 "has_ast_span" => "indexed AST span for the selected symbol".to_string(),
14488 "represents_symbol" => "indexed symbol represented by the selected AST span".to_string(),
14489 "parent" => "parent AST span".to_string(),
14490 "child" => "child AST span".to_string(),
14491 "previous_sibling" => "previous AST sibling".to_string(),
14492 "next_sibling" => "next AST sibling".to_string(),
14493 "contains_markdown_block" => "Markdown section block".to_string(),
14494 "contains_embedded_symbol" => "embedded code symbol in Markdown fence".to_string(),
14495 "embedded_in_fence" => "Markdown fence containing the embedded symbol".to_string(),
14496 "contains_embedded_code" => "embedded code symbol in Markdown section".to_string(),
14497 "enclosing_module" => "nearest enclosing module".to_string(),
14498 "enclosing_section" => "nearest enclosing Markdown section".to_string(),
14499 "defines" if edge.from == origin => "symbol defined in selected file".to_string(),
14500 "defines" => "file that defines the selected symbol".to_string(),
14501 "defines_route" if edge.from == origin => "route declared in selected file".to_string(),
14502 "defines_route" => "file that declares the selected route".to_string(),
14503 "handled_by" if edge.from == origin => "handler for the selected route".to_string(),
14504 "handled_by" => "route handled by the selected symbol".to_string(),
14505 "handles_route" => "route handled by the selected AST span".to_string(),
14506 "imports" => "import dependency from the selected package".to_string(),
14507 "mentions_concept" => "cached summary concept for the selected source".to_string(),
14508 "mentions_entity" => "cached summary entity for the selected source".to_string(),
14509 "semantic_relation" => "LLM-extracted semantic relationship".to_string(),
14510 "tagged_concept" => "concept label attached to the selected entity".to_string(),
14511 "related_concept" => "co-occurring cached summary concept".to_string(),
14512 "calls" if edge.from == origin => "callee from the selected symbol".to_string(),
14513 "calls" => "caller of the selected symbol".to_string(),
14514 other => format!("connected by {other}"),
14515 }
14516}
14517
14518fn traversal_recommendations(
14519 graph: &TraversalGraphBuild,
14520 origin: Option<&str>,
14521 shortest_path: Option<&[String]>,
14522 limit: usize,
14523) -> Vec<TraversalRecommendation> {
14524 let Some(origin) = origin else {
14525 return Vec::new();
14526 };
14527 let mut recommendations = Vec::new();
14528 let mut seen = BTreeSet::new();
14529
14530 if let Some(path) = shortest_path
14531 && path.len() > 1
14532 && path.first().is_some_and(|handle| handle == origin)
14533 && let Some(next) = graph.nodes.get(&path[1])
14534 {
14535 seen.insert(next.handle.clone());
14536 recommendations.push(TraversalRecommendation {
14537 handle: next.handle.clone(),
14538 kind: next.kind.clone(),
14539 label: next.label.clone(),
14540 reason: "next hop on shortest path".to_string(),
14541 score: 1_000,
14542 expand: next.expand.clone(),
14543 });
14544 }
14545
14546 let mut candidates = graph
14547 .edges
14548 .iter()
14549 .filter_map(|edge| {
14550 let neighbor = if edge.from == origin {
14551 edge.to.as_str()
14552 } else if edge.to == origin {
14553 edge.from.as_str()
14554 } else {
14555 return None;
14556 };
14557 let node = graph.nodes.get(neighbor)?;
14558 Some((traversal_relation_score(edge, origin), edge, node))
14559 })
14560 .collect::<Vec<_>>();
14561 candidates.sort_by(|(left_score, _, left), (right_score, _, right)| {
14562 right_score
14563 .cmp(left_score)
14564 .then_with(|| left.kind.cmp(&right.kind))
14565 .then_with(|| left.label.cmp(&right.label))
14566 .then_with(|| left.handle.cmp(&right.handle))
14567 });
14568
14569 let max = if limit == 0 { usize::MAX } else { limit };
14570 for (score, edge, node) in candidates {
14571 if recommendations.len() >= max {
14572 break;
14573 }
14574 if seen.insert(node.handle.clone()) {
14575 recommendations.push(TraversalRecommendation {
14576 handle: node.handle.clone(),
14577 kind: node.kind.clone(),
14578 label: node.label.clone(),
14579 reason: traversal_recommendation_reason(edge, origin),
14580 score,
14581 expand: node.expand.clone(),
14582 });
14583 }
14584 }
14585
14586 recommendations
14587}
14588
14589fn exploration_budget_for_counts(nodes: usize, edges: usize) -> ExplorationBudget {
14590 let scale = nodes.saturating_add(edges);
14591 if scale <= 80 {
14592 ExplorationBudget {
14593 project_size: "small".to_string(),
14594 max_source_windows: 8,
14595 lines_per_window: 96,
14596 relationship_limit: 40,
14597 }
14598 } else if scale <= 800 {
14599 ExplorationBudget {
14600 project_size: "medium".to_string(),
14601 max_source_windows: 6,
14602 lines_per_window: 80,
14603 relationship_limit: 32,
14604 }
14605 } else {
14606 ExplorationBudget {
14607 project_size: "large".to_string(),
14608 max_source_windows: 4,
14609 lines_per_window: 64,
14610 relationship_limit: 24,
14611 }
14612 }
14613}
14614
14615fn exploration_node_label(node: &TraversalNode) -> String {
14616 format!("{}:{}", node.kind, node.label)
14617}
14618
14619fn exploration_source_window_for_node(
14620 root: &Path,
14621 node: &TraversalNode,
14622 budget: &ExplorationBudget,
14623) -> Option<ExplorationSourceWindow> {
14624 let file = node.path.as_ref()?;
14625 let anchor = node
14626 .line
14627 .and_then(|line| usize::try_from(line).ok())
14628 .and_then(|line| line.checked_add(1))
14629 .unwrap_or(1);
14630 let context_before = budget.lines_per_window / 3;
14631 let start = anchor.saturating_sub(context_before).max(1);
14632 let end = start
14633 .saturating_add(budget.lines_per_window)
14634 .saturating_sub(1);
14635 let handle = stable_handle("xwin", &format!("{file}:{start}:{end}:{}", node.handle));
14636 Some(ExplorationSourceWindow {
14637 handle,
14638 file: file.clone(),
14639 start,
14640 end,
14641 reason: format!("cluster around {}", exploration_node_label(node)),
14642 expand: source_read_command(root, file, start, budget.lines_per_window),
14643 })
14644}
14645
14646fn build_exploration_packet(
14647 root: &Path,
14648 totals: &TraversalTotals,
14649 selected_nodes: &[TraversalNode],
14650 selected_edges: &[TraversalEdge],
14651) -> ExplorationPacket {
14652 let budget = exploration_budget_for_counts(totals.nodes, totals.edges);
14653 let node_by_handle = selected_nodes
14654 .iter()
14655 .map(|node| (node.handle.as_str(), node))
14656 .collect::<BTreeMap<_, _>>();
14657 let relationship_map = selected_edges
14658 .iter()
14659 .take(budget.relationship_limit)
14660 .filter_map(|edge| {
14661 let from = node_by_handle.get(edge.from.as_str())?;
14662 let to = node_by_handle.get(edge.to.as_str())?;
14663 Some(ExplorationRelation {
14664 from: exploration_node_label(from),
14665 relation: edge.relation.clone(),
14666 to: exploration_node_label(to),
14667 label: edge.label.clone(),
14668 })
14669 })
14670 .collect::<Vec<_>>();
14671
14672 let mut seen_windows = BTreeSet::new();
14673 let mut source_windows = Vec::new();
14674 for node in selected_nodes {
14675 if source_windows.len() >= budget.max_source_windows {
14676 break;
14677 }
14678 let Some(window) = exploration_source_window_for_node(root, node, &budget) else {
14679 continue;
14680 };
14681 let key = (window.file.clone(), window.start, window.end);
14682 if seen_windows.insert(key) {
14683 source_windows.push(window);
14684 }
14685 }
14686
14687 ExplorationPacket {
14688 budget,
14689 relationship_map,
14690 source_windows,
14691 worker_context: Vec::new(),
14692 no_reread_guidance:
14693 "Use the source_windows expand commands for line-numbered context; avoid whole-file reads unless the needed line is outside every listed window."
14694 .to_string(),
14695 }
14696}
14697
14698pub(crate) fn traversal_report(
14699 root: &Path,
14700 scope: Option<&str>,
14701 graph: TraversalGraphBuild,
14702 query: Option<&str>,
14703 target: Option<&str>,
14704 depth: usize,
14705 limit: usize,
14706) -> Result<TraversalReport> {
14707 let totals = TraversalTotals {
14708 nodes: graph.nodes.len(),
14709 edges: graph.edges.len(),
14710 };
14711 let origin_node = query.and_then(|value| resolve_traversal_node(&graph, value));
14712 let target_node = target.and_then(|value| resolve_traversal_node(&graph, value));
14713 if let Some(query) = query
14714 && origin_node.is_none()
14715 {
14716 bail!("traversal node not found: {}", query);
14717 }
14718 if let Some(target) = target
14719 && target_node.is_none()
14720 {
14721 bail!("traversal target not found: {}", target);
14722 }
14723
14724 let (mode, selected_nodes, selected_edges, shortest_path) =
14725 if let (Some(origin), Some(target)) = (origin_node, target_node) {
14726 if let Some(handles) =
14727 traversal_shortest_handles(&graph.edges, &origin.handle, &target.handle)
14728 {
14729 let handle_set = handles.iter().cloned().collect::<BTreeSet<_>>();
14730 let nodes = handles
14731 .iter()
14732 .filter_map(|handle| graph.nodes.get(handle).cloned())
14733 .collect::<Vec<_>>();
14734 let edges = traversal_path_edges(&handles, &graph.edges);
14735 let path = TraversalPathReport {
14736 from: origin.clone(),
14737 to: target.clone(),
14738 hops: handles.len().saturating_sub(1),
14739 nodes: nodes.clone(),
14740 edges: edges.clone(),
14741 };
14742 (
14743 "path".to_string(),
14744 nodes,
14745 traversal_edges_between(&handle_set, &graph.edges),
14746 Some(path),
14747 )
14748 } else {
14749 (
14750 "path".to_string(),
14751 vec![origin.clone(), target.clone()],
14752 Vec::new(),
14753 None,
14754 )
14755 }
14756 } else if let Some(origin) = origin_node {
14757 let handles =
14758 traversal_neighborhood_handles(&graph.edges, &origin.handle, depth, limit);
14759 let nodes =
14760 sorted_traversal_nodes(handles.iter().filter_map(|handle| graph.nodes.get(handle)));
14761 let edges = traversal_edges_between(&handles, &graph.edges);
14762 ("neighborhood".to_string(), nodes, edges, None)
14763 } else {
14764 let mut nodes = sorted_traversal_nodes(graph.nodes.values());
14765 let truncated_nodes = limit > 0 && nodes.len() > limit;
14766 if truncated_nodes {
14767 nodes.truncate(limit);
14768 }
14769 let handles = nodes
14770 .iter()
14771 .map(|node| node.handle.clone())
14772 .collect::<BTreeSet<_>>();
14773 let mut edges = traversal_edges_between(&handles, &graph.edges);
14774 let truncated_edges = limit > 0 && edges.len() > limit;
14775 if truncated_edges {
14776 edges.truncate(limit);
14777 }
14778 ("export".to_string(), nodes, edges, None)
14779 };
14780
14781 let shortest_handles = shortest_path.as_ref().map(|path| {
14782 path.nodes
14783 .iter()
14784 .map(|node| node.handle.clone())
14785 .collect::<Vec<_>>()
14786 });
14787 let recommendations = traversal_recommendations(
14788 &graph,
14789 origin_node.map(|node| node.handle.as_str()),
14790 shortest_handles.as_deref(),
14791 if limit == 0 { 10 } else { limit.min(10) },
14792 );
14793 let exploration = build_exploration_packet(root, &totals, &selected_nodes, &selected_edges);
14794 let truncated = selected_nodes.len() < totals.nodes || selected_edges.len() < totals.edges;
14795
14796 Ok(TraversalReport {
14797 root: root.to_string_lossy().to_string(),
14798 scope: scope.map(str::to_string),
14799 mode,
14800 totals,
14801 query: query.map(str::to_string),
14802 target: target.map(str::to_string),
14803 nodes: selected_nodes,
14804 edges: selected_edges,
14805 shortest_path,
14806 recommendations,
14807 exploration,
14808 truncated,
14809 warnings: graph.warnings,
14810 })
14811}
14812
14813fn html_escape(input: &str) -> String {
14814 input
14815 .replace('&', "&")
14816 .replace('<', "<")
14817 .replace('>', ">")
14818 .replace('"', """)
14819 .replace('\'', "'")
14820}
14821
14822pub(crate) fn traversal_report_html(report: &TraversalReport) -> Result<String> {
14823 let json = serde_json::to_string(report)?.replace("</", "<\\/");
14824 let mut html = String::new();
14825 html.push_str(
14826 "<!doctype html><html><head><meta charset=\"utf-8\"><title>tsift traversal graph</title>",
14827 );
14828 html.push_str(
14829 r#"<style>
14830:root{color-scheme:light dark;--bg:#f7f8fb;--panel:#ffffff;--text:#17202a;--muted:#5c6674;--line:#d7dce3;--edge:#8b98a8;--accent:#0f766e;--semantic:#9a3412}
14831@media (prefers-color-scheme:dark){:root{--bg:#111318;--panel:#1b2028;--text:#ecf1f7;--muted:#a8b3c1;--line:#323946;--edge:#667386;--accent:#2dd4bf;--semantic:#fb923c}}
14832*{box-sizing:border-box}body{margin:0;background:var(--bg);color:var(--text);font-family:Inter,ui-sans-serif,system-ui,sans-serif;line-height:1.4}.page{max-width:1280px;margin:0 auto;padding:20px}.top{display:flex;align-items:flex-end;justify-content:space-between;gap:16px;margin-bottom:14px}.top h1{font-size:22px;margin:0}.meta{color:var(--muted);font-size:13px}.toolbar{display:flex;gap:8px;align-items:center}.toolbar input{min-width:220px;border:1px solid var(--line);border-radius:6px;background:var(--panel);color:var(--text);padding:8px 10px}.layout{display:grid;grid-template-columns:minmax(0,1fr) 320px;gap:14px;min-height:650px}.graph-panel,.side{background:var(--panel);border:1px solid var(--line);border-radius:8px;overflow:hidden}.graph-panel{position:relative}.legend{position:absolute;left:12px;top:12px;display:flex;flex-wrap:wrap;gap:6px;max-width:calc(100% - 24px)}.legend span{font-size:12px;background:color-mix(in srgb,var(--panel) 86%,transparent);border:1px solid var(--line);border-radius:999px;padding:4px 8px}.side{padding:14px;overflow:auto}.side h2{font-size:15px;margin:0 0 8px}.selected{border-top:1px solid var(--line);margin-top:12px;padding-top:12px}.list{display:grid;gap:8px}.row{border:1px solid var(--line);border-radius:6px;padding:8px;cursor:pointer}.row:hover{border-color:var(--accent)}.kind{font-size:11px;text-transform:uppercase;color:var(--muted);letter-spacing:.04em}.label{font-weight:650;overflow-wrap:anywhere}.handle,code{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;font-size:12px;color:var(--muted)}svg{width:100%;height:650px;display:block}.edge{stroke:var(--edge);stroke-width:1.4;opacity:.72}.edge.semantic{stroke:var(--semantic);stroke-width:1.8}.node{stroke:var(--panel);stroke-width:2;cursor:pointer}.node.semantic{stroke:var(--semantic);stroke-width:2.5}.node-label{font-size:12px;paint-order:stroke;stroke:var(--panel);stroke-width:4px;stroke-linejoin:round;fill:var(--text);pointer-events:none}.hidden{display:none}@media(max-width:900px){.top{display:block}.toolbar{margin-top:12px}.layout{grid-template-columns:1fr}.side{max-height:360px}svg{height:560px}}
14833</style>"#,
14834 );
14835 html.push_str("</head><body>");
14836 html.push_str("<div class=\"page\">");
14837 html.push_str(&format!(
14838 "<header class=\"top\"><div><h1>tsift traversal graph</h1><div class=\"meta\">mode <code>{}</code> | nodes <code>{}</code>/<code>{}</code> | edges <code>{}</code>/<code>{}</code></div></div><div class=\"toolbar\"><input id=\"filter\" type=\"search\" placeholder=\"Filter nodes\"></div></header>",
14839 html_escape(&report.mode),
14840 report.nodes.len(),
14841 report.totals.nodes,
14842 report.edges.len(),
14843 report.totals.edges
14844 ));
14845 html.push_str(
14846 r#"<main class="layout"><section class="graph-panel"><div id="legend" class="legend"></div><svg id="graph-canvas" role="img" aria-label="Traversal graph"></svg></section><aside class="side"><h2>Nodes</h2><div id="node-list" class="list"></div><div id="selected" class="selected"></div></aside></main>"#,
14847 );
14848 html.push_str("<script id=\"graph-data\" type=\"application/json\">");
14849 html.push_str(&json);
14850 html.push_str(
14851 r##"</script><script>
14852const report = JSON.parse(document.getElementById("graph-data").textContent);
14853const svg = document.getElementById("graph-canvas");
14854const list = document.getElementById("node-list");
14855const selected = document.getElementById("selected");
14856const filter = document.getElementById("filter");
14857const legend = document.getElementById("legend");
14858const nodes = report.nodes.map((node, index) => ({...node, index}));
14859const nodeByHandle = new Map(nodes.map(node => [node.handle, node]));
14860const edges = report.edges.filter(edge => nodeByHandle.has(edge.from) && nodeByHandle.has(edge.to));
14861const colorByKind = new Map([
14862 ["file", "#2563eb"], ["symbol", "#16a34a"], ["route", "#7c3aed"],
14863 ["session", "#0891b2"], ["backlog", "#dc2626"], ["job_packet", "#ea580c"],
14864 ["semantic_concept", "#9a3412"], ["semantic_entity", "#b45309"],
14865 ["source_handle", "#64748b"], ["worker_context", "#475569"], ["worker_result", "#15803d"]
14866]);
14867function color(kind){ return colorByKind.get(kind) || "#6b7280"; }
14868function isSemantic(edge){ return edge.relation.includes("concept") || edge.relation.includes("entity") || edge.relation.includes("semantic"); }
14869function text(value){ return value == null ? "" : String(value); }
14870function matches(node, query){
14871 if (!query) return true;
14872 const haystack = [node.kind,node.label,node.handle,node.ref_id,node.path,node.detail].map(text).join(" ").toLowerCase();
14873 return haystack.includes(query);
14874}
14875function layout(){
14876 const rect = svg.getBoundingClientRect();
14877 const width = rect.width || 900;
14878 const height = rect.height || 650;
14879 const cx = width / 2;
14880 const cy = height / 2;
14881 const kinds = [...new Set(nodes.map(node => node.kind))].sort();
14882 const counts = new Map();
14883 for (const node of nodes) counts.set(node.kind, (counts.get(node.kind) || 0) + 1);
14884 const offsets = new Map();
14885 for (const node of nodes) {
14886 const group = kinds.indexOf(node.kind);
14887 const index = offsets.get(node.kind) || 0;
14888 offsets.set(node.kind, index + 1);
14889 const groupCount = counts.get(node.kind) || 1;
14890 const ring = Math.min(width, height) * (0.18 + ((group % 4) * 0.09));
14891 const angle = (Math.PI * 2 * index / Math.max(groupCount, 1)) + (group * 0.47);
14892 node.x = cx + Math.cos(angle) * ring;
14893 node.y = cy + Math.sin(angle) * ring;
14894 }
14895}
14896function draw(){
14897 const query = filter.value.trim().toLowerCase();
14898 const visible = new Set(nodes.filter(node => matches(node, query)).map(node => node.handle));
14899 svg.innerHTML = "";
14900 for (const edge of edges) {
14901 if (!visible.has(edge.from) || !visible.has(edge.to)) continue;
14902 const from = nodeByHandle.get(edge.from);
14903 const to = nodeByHandle.get(edge.to);
14904 const line = document.createElementNS("http://www.w3.org/2000/svg", "line");
14905 line.setAttribute("x1", from.x); line.setAttribute("y1", from.y);
14906 line.setAttribute("x2", to.x); line.setAttribute("y2", to.y);
14907 line.setAttribute("class", "edge" + (isSemantic(edge) ? " semantic" : ""));
14908 line.appendChild(document.createElementNS("http://www.w3.org/2000/svg", "title")).textContent = edge.relation + (edge.label ? ": " + edge.label : "");
14909 svg.appendChild(line);
14910 }
14911 for (const node of nodes) {
14912 if (!visible.has(node.handle)) continue;
14913 const circle = document.createElementNS("http://www.w3.org/2000/svg", "circle");
14914 circle.setAttribute("cx", node.x); circle.setAttribute("cy", node.y);
14915 circle.setAttribute("r", node.kind.startsWith("semantic_") ? 8 : 6);
14916 circle.setAttribute("fill", color(node.kind));
14917 circle.setAttribute("class", "node" + (node.kind.startsWith("semantic_") ? " semantic" : ""));
14918 circle.addEventListener("click", () => selectNode(node));
14919 circle.appendChild(document.createElementNS("http://www.w3.org/2000/svg", "title")).textContent = node.kind + ": " + node.label;
14920 svg.appendChild(circle);
14921 const label = document.createElementNS("http://www.w3.org/2000/svg", "text");
14922 label.setAttribute("x", node.x + 9); label.setAttribute("y", node.y + 4);
14923 label.setAttribute("class", "node-label");
14924 label.textContent = node.label.length > 34 ? node.label.slice(0, 31) + "..." : node.label;
14925 svg.appendChild(label);
14926 }
14927 renderList(query);
14928}
14929function renderLegend(){
14930 const kinds = [...new Set(nodes.map(node => node.kind))].sort();
14931 legend.innerHTML = kinds.map(kind => `<span><b style="color:${color(kind)}">●</b> ${kind}</span>`).join("");
14932}
14933function renderList(query){
14934 const rows = nodes.filter(node => matches(node, query)).slice(0, 120);
14935 list.innerHTML = rows.map(node => `<div class="row" data-handle="${node.handle}"><div class="kind">${node.kind}</div><div class="label">${escapeHtml(node.label)}</div><div class="handle">${node.handle}</div></div>`).join("");
14936 for (const row of list.querySelectorAll(".row")) {
14937 row.addEventListener("click", () => selectNode(nodeByHandle.get(row.dataset.handle)));
14938 }
14939}
14940function selectNode(node){
14941 const adjacent = edges.filter(edge => edge.from === node.handle || edge.to === node.handle).slice(0, 20);
14942 selected.innerHTML = `<h2>${escapeHtml(node.label)}</h2><div class="kind">${node.kind}</div><p class="handle">${node.handle}</p>${node.path ? `<p>${escapeHtml(node.path)}${node.line != null ? ":" + node.line : ""}</p>` : ""}${node.detail ? `<p>${escapeHtml(node.detail)}</p>` : ""}<p><code>${escapeHtml(node.expand)}</code></p><h2>Edges</h2><div class="list">${adjacent.map(edge => `<div class="row"><div class="kind">${edge.relation}</div><div>${escapeHtml(edge.from)} -> ${escapeHtml(edge.to)}</div>${edge.label ? `<div>${escapeHtml(edge.label)}</div>` : ""}</div>`).join("") || "<div class=\"meta\">No visible edges.</div>"}</div>`;
14943}
14944function escapeHtml(value){
14945 return text(value).replace(/[&<>"']/g, ch => ({"&":"&","<":"<",">":">","\"":""","'":"'"}[ch]));
14946}
14947filter.addEventListener("input", draw);
14948window.addEventListener("resize", () => { layout(); draw(); });
14949renderLegend();
14950layout();
14951draw();
14952if (nodes.length) selectNode(nodes[0]);
14953</script></div></body></html>"##,
14954 );
14955 Ok(html)
14956}
14957
14958fn semantic_related_report_from_store(
14959 root: &Path,
14960 scope: Option<&str>,
14961 query: &str,
14962 limit: usize,
14963 kind: SemanticRelatedKind,
14964 store: &impl GraphStore,
14965) -> Result<SemanticRelatedReport> {
14966 if query.trim().is_empty() {
14967 bail!("semantic query cannot be empty");
14968 }
14969
14970 let query_embedding = semantic_embedding(query);
14971 let node_kinds: &[&str] = match kind {
14972 SemanticRelatedKind::Concept => &["semantic_concept"],
14973 SemanticRelatedKind::Entity => &["semantic_entity"],
14974 SemanticRelatedKind::All => &["semantic_concept", "semantic_entity"],
14975 };
14976
14977 let mut items = Vec::new();
14978 for node_kind in node_kinds {
14979 for node in store.nodes_by_kind(node_kind)? {
14980 let Some(embedding) = node
14981 .properties
14982 .get("embedding")
14983 .and_then(|value| parse_semantic_embedding_property(value))
14984 else {
14985 continue;
14986 };
14987 let score = semantic_cosine(&query_embedding, &embedding);
14988 items.push(SemanticRelatedItem {
14989 handle: node
14990 .properties
14991 .get("handle")
14992 .cloned()
14993 .unwrap_or_else(|| node.id.clone()),
14994 kind: node.kind,
14995 label: node.label,
14996 score,
14997 file_path: node
14998 .properties
14999 .get("source_file")
15000 .or_else(|| node.properties.get("path"))
15001 .cloned(),
15002 source_symbol: node.properties.get("source_symbol").cloned(),
15003 detail: node
15004 .properties
15005 .get("description")
15006 .or_else(|| node.properties.get("detail"))
15007 .cloned(),
15008 expand: node
15009 .properties
15010 .get("expand")
15011 .cloned()
15012 .unwrap_or_else(|| traversal_expand_command(root, &node.id)),
15013 });
15014 }
15015 }
15016
15017 items.sort_by(|left, right| {
15018 right
15019 .score
15020 .partial_cmp(&left.score)
15021 .unwrap_or(Ordering::Equal)
15022 .then_with(|| left.kind.cmp(&right.kind))
15023 .then_with(|| left.label.cmp(&right.label))
15024 .then_with(|| left.handle.cmp(&right.handle))
15025 });
15026 if limit > 0 && items.len() > limit {
15027 items.truncate(limit);
15028 }
15029
15030 let mut warnings = Vec::new();
15031 if items.is_empty() {
15032 warnings.push(
15033 "no semantic graph rows found; run `tsift summarize --extract <path>` first"
15034 .to_string(),
15035 );
15036 }
15037
15038 Ok(SemanticRelatedReport {
15039 root: root.to_string_lossy().to_string(),
15040 scope: scope.map(str::to_string),
15041 query: query.to_string(),
15042 embedding_model: SEMANTIC_EMBEDDING_MODEL.to_string(),
15043 count: items.len(),
15044 items,
15045 warnings,
15046 })
15047}
15048
15049fn graph_store_semantic_node_count(store: &impl GraphStore) -> Result<usize> {
15050 Ok(store.nodes_by_kind("semantic_concept")?.len()
15051 + store.nodes_by_kind("semantic_entity")?.len())
15052}
15053
15054fn graph_db_semantic_edge_scan_cap(limit: usize) -> usize {
15055 if limit == 0 {
15056 return 0;
15057 }
15058 limit.saturating_mul(4).clamp(
15059 GRAPH_DB_SEMANTIC_MIN_EDGE_SCAN_CAP,
15060 GRAPH_DB_SEMANTIC_MAX_EDGE_SCAN_CAP,
15061 )
15062}
15063
15064fn graph_db_semantic_node_discovery_cap(seed_count: usize, limit: usize) -> usize {
15065 if limit == 0 {
15066 return usize::MAX;
15067 }
15068 limit.saturating_mul(3).max(limit).max(seed_count)
15069}
15070
15071fn graph_db_semantic_edge_other_id<'a>(
15072 edge: &'a SubstrateGraphEdge,
15073 current_id: &str,
15074) -> Option<&'a str> {
15075 if edge.from_id == current_id {
15076 Some(edge.to_id.as_str())
15077 } else if edge.to_id == current_id {
15078 Some(edge.from_id.as_str())
15079 } else {
15080 None
15081 }
15082}
15083
15084fn graph_db_semantic_edge_score(edge: &SubstrateGraphEdge, current_id: &str) -> i64 {
15085 let mut score = resolution::edge_kind_rank_score(&edge.kind).saturating_mul(10);
15086 score += if edge.from_id == current_id { 8 } else { 4 };
15087 score += match edge.kind.as_str() {
15088 "mentions_concept" | "mentions_entity" | "tagged_concept" | "tagged_entity"
15089 | "related_concept" => 30,
15090 "semantic_relation" => 28,
15091 "calls" => 24,
15092 "mentions" => 22,
15093 "requests_context" | "scopes_context" | "scopes_source" | "explains_result" => 18,
15094 "defines" | "contains" | "belongs_to" => 12,
15095 _ => 0,
15096 };
15097 score
15098}
15099
15100fn graph_db_semantic_seeded_neighborhood(
15101 store: &impl GraphStore,
15102 seed_ids: &[String],
15103 depth: usize,
15104 limit: usize,
15105) -> Result<GraphDbSemanticSeededSubgraph> {
15106 let seed_rank = seed_ids
15107 .iter()
15108 .enumerate()
15109 .map(|(idx, seed)| (seed.clone(), idx))
15110 .collect::<BTreeMap<_, _>>();
15111 let mut nodes = BTreeMap::<String, SubstrateGraphNode>::new();
15112 let mut edges = BTreeMap::<String, SubstrateGraphEdge>::new();
15113 let mut node_score_by_id = BTreeMap::<String, i64>::new();
15114 let mut queue = VecDeque::<(String, usize)>::new();
15115 let mut seen_at_depth = BTreeMap::<String, usize>::new();
15116 let edge_scan_cap = graph_db_semantic_edge_scan_cap(limit);
15117 let node_discovery_cap = graph_db_semantic_node_discovery_cap(seed_ids.len(), limit);
15118 let mut skipped_by_edge_cap = 0usize;
15119 let mut skipped_by_node_cap = 0usize;
15120 let mut diagnostics = vec![
15121 "semantic-seeded retrieval uses phrase similarity to pick graph seeds".to_string(),
15122 "seed expansion traverses both outgoing and incident edges so code, markdown, conversation, and memory adapters can link into semantic rows without reversing their edge direction".to_string(),
15123 format!(
15124 "seed expansion ranks incident/outgoing edges before caps; per-node edge scan cap={} node discovery cap={}",
15125 if edge_scan_cap == 0 {
15126 "unbounded".to_string()
15127 } else {
15128 edge_scan_cap.to_string()
15129 },
15130 if node_discovery_cap == usize::MAX {
15131 "unbounded".to_string()
15132 } else {
15133 node_discovery_cap.to_string()
15134 }
15135 ),
15136 ];
15137
15138 for (idx, seed_id) in seed_ids.iter().enumerate() {
15139 if let Some(node) = store.node(seed_id)? {
15140 nodes.entry(seed_id.clone()).or_insert(node);
15141 node_score_by_id
15142 .entry(seed_id.clone())
15143 .or_insert(1_000_000i64.saturating_sub(idx as i64));
15144 queue.push_back((seed_id.clone(), 0));
15145 seen_at_depth.entry(seed_id.clone()).or_insert(0);
15146 } else {
15147 diagnostics.push(format!(
15148 "semantic seed {seed_id} was not present in the graph store"
15149 ));
15150 }
15151 }
15152
15153 while let Some((current_id, current_depth)) = queue.pop_front() {
15154 if current_depth >= depth {
15155 continue;
15156 }
15157
15158 let mut expansion_edges_by_key = BTreeMap::<String, SubstrateGraphEdge>::new();
15159 for edge in store.outgoing_edges(¤t_id, None)? {
15160 expansion_edges_by_key
15161 .entry(graph_db_edge_key(&edge))
15162 .or_insert(edge);
15163 }
15164 for edge in store.incident_edges(¤t_id, None)? {
15165 expansion_edges_by_key
15166 .entry(graph_db_edge_key(&edge))
15167 .or_insert(edge);
15168 }
15169 let mut expansion_edges = expansion_edges_by_key.into_values().collect::<Vec<_>>();
15170 expansion_edges.sort_by(|left, right| {
15171 graph_db_semantic_edge_score(right, ¤t_id)
15172 .cmp(&graph_db_semantic_edge_score(left, ¤t_id))
15173 .then_with(|| graph_db_edge_key(left).cmp(&graph_db_edge_key(right)))
15174 });
15175 if edge_scan_cap > 0 && expansion_edges.len() > edge_scan_cap {
15176 skipped_by_edge_cap += expansion_edges.len() - edge_scan_cap;
15177 expansion_edges.truncate(edge_scan_cap);
15178 }
15179
15180 for edge in expansion_edges {
15181 let Some(other_id) = graph_db_semantic_edge_other_id(&edge, ¤t_id) else {
15182 continue;
15183 };
15184 let other_known = nodes.contains_key(other_id);
15185 if !other_known && nodes.len() >= node_discovery_cap {
15186 skipped_by_node_cap += 1;
15187 continue;
15188 }
15189 let other_id = other_id.to_string();
15190 let edge_score = graph_db_semantic_edge_score(&edge, ¤t_id)
15191 .saturating_add((depth.saturating_sub(current_depth) as i64).saturating_mul(5));
15192 node_score_by_id
15193 .entry(other_id.clone())
15194 .and_modify(|score| *score = (*score).max(edge_score))
15195 .or_insert(edge_score);
15196 let edge_key = graph_db_edge_key(&edge);
15197 edges.entry(edge_key).or_insert_with(|| edge.clone());
15198 if let std::collections::btree_map::Entry::Vacant(entry) = nodes.entry(other_id.clone())
15199 && let Some(node) = store.node(&other_id)?
15200 {
15201 entry.insert(node);
15202 }
15203 if !nodes.contains_key(&other_id) {
15204 continue;
15205 }
15206 let next_depth = current_depth + 1;
15207 let should_queue = seen_at_depth
15208 .get(&other_id)
15209 .is_none_or(|seen_depth| next_depth < *seen_depth);
15210 if should_queue {
15211 seen_at_depth.insert(other_id.clone(), next_depth);
15212 queue.push_back((other_id, next_depth));
15213 }
15214 }
15215 }
15216
15217 if skipped_by_edge_cap > 0 {
15218 diagnostics.push(format!(
15219 "semantic-seeded expansion skipped {skipped_by_edge_cap} lower-scoring incident/outgoing edge(s) after per-node caps"
15220 ));
15221 }
15222 if skipped_by_node_cap > 0 {
15223 diagnostics.push(format!(
15224 "semantic-seeded expansion skipped {skipped_by_node_cap} lower-scoring node discovery edge(s) after the discovery cap"
15225 ));
15226 }
15227
15228 let mut nodes = nodes.into_values().collect::<Vec<_>>();
15229 nodes.sort_by(|left, right| {
15230 seed_rank
15231 .get(&left.id)
15232 .copied()
15233 .unwrap_or(usize::MAX)
15234 .cmp(&seed_rank.get(&right.id).copied().unwrap_or(usize::MAX))
15235 .then_with(|| {
15236 node_score_by_id
15237 .get(&right.id)
15238 .copied()
15239 .unwrap_or_default()
15240 .cmp(&node_score_by_id.get(&left.id).copied().unwrap_or_default())
15241 })
15242 .then(left.id.cmp(&right.id))
15243 });
15244
15245 let before_limit = nodes.len();
15246 let truncated = limit > 0 && nodes.len() > limit;
15247 if truncated {
15248 nodes.truncate(limit);
15249 diagnostics.push(format!(
15250 "semantic-seeded neighborhood truncated from {before_limit} to {limit} node(s)"
15251 ));
15252 }
15253
15254 let node_ids = nodes
15255 .iter()
15256 .map(|node| node.id.as_str())
15257 .collect::<BTreeSet<_>>();
15258 let mut edges = edges
15259 .into_values()
15260 .filter(|edge| {
15261 node_ids.contains(edge.from_id.as_str()) && node_ids.contains(edge.to_id.as_str())
15262 })
15263 .collect::<Vec<_>>();
15264 edges.sort_by_key(graph_db_edge_key);
15265
15266 Ok(GraphDbSemanticSeededSubgraph {
15267 nodes,
15268 edges,
15269 truncated,
15270 diagnostics,
15271 })
15272}
15273
15274#[allow(clippy::too_many_arguments)]
15275fn cmd_semantic_related(
15276 query: &str,
15277 path: &Path,
15278 scope: Option<&str>,
15279 limit: usize,
15280 kind: SemanticRelatedKind,
15281 json_output: bool,
15282 compact: bool,
15283 pretty: bool,
15284 terse: bool,
15285 schema: bool,
15286) -> Result<()> {
15287 let root = lint::resolve_project_root_or_canonical_path(path)?;
15288 write_traversal_graph_store(&root, path, scope)?;
15289 let graph_db = graph_substrate_db_path(&root, scope);
15290 let store = SqliteGraphStore::open_read_only_resilient(&graph_db)?;
15291 let mut report = semantic_related_report_from_store(&root, scope, query, limit, kind, &store)?;
15292 if let Some(recovery) = store.read_only_recovery() {
15293 report
15294 .warnings
15295 .push(graph_db_read_recovery_diagnostic(recovery));
15296 }
15297
15298 if json_output {
15299 println!("{}", to_json_schema(&report, pretty, terse, false, schema)?);
15300 } else if compact {
15301 for item in &report.items {
15302 println!(
15303 "{:.3}\t{}\t{}\t{}",
15304 item.score, item.kind, item.label, item.handle
15305 );
15306 }
15307 for warning in &report.warnings {
15308 eprintln!("warning: {warning}");
15309 }
15310 } else {
15311 println!(
15312 "Related semantic graph rows for {:?} ({})",
15313 report.query, report.embedding_model
15314 );
15315 for item in &report.items {
15316 println!(
15317 " {:.3} [{}] {} ({})",
15318 item.score, item.kind, item.label, item.handle
15319 );
15320 if let Some(detail) = &item.detail {
15321 println!(" {}", detail);
15322 }
15323 if let Some(file_path) = &item.file_path {
15324 println!(" file: {}", file_path);
15325 }
15326 println!(" expand: {}", item.expand);
15327 }
15328 for warning in &report.warnings {
15329 eprintln!("warning: {warning}");
15330 }
15331 }
15332
15333 Ok(())
15334}
15335
15336#[derive(Serialize)]
15337struct SourceLinePreview {
15338 line: usize,
15339 text: String,
15340}
15341
15342#[derive(Serialize)]
15343pub(crate) struct SourceRangePreview {
15344 start: usize,
15345 end: usize,
15346 total_lines: usize,
15347 truncated_before: bool,
15348 truncated_after: bool,
15349}
15350
15351#[derive(Serialize)]
15352struct SourceExpandCommands {
15353 #[serde(skip_serializing_if = "Option::is_none")]
15354 before: Option<String>,
15355 #[serde(skip_serializing_if = "Option::is_none")]
15356 after: Option<String>,
15357 #[serde(skip_serializing_if = "Option::is_none")]
15358 body: Option<String>,
15359 file: String,
15360 #[serde(skip_serializing_if = "Option::is_none")]
15361 markdown_ast: Option<String>,
15362}
15363
15364#[derive(Serialize)]
15365struct SourceSymbolRef {
15366 handle: String,
15367 name: String,
15368 kind: String,
15369 language: String,
15370 file: String,
15371 line: usize,
15372 #[serde(skip_serializing_if = "Option::is_none")]
15373 end_line: Option<usize>,
15374 #[serde(skip_serializing_if = "Option::is_none")]
15375 signature: Option<String>,
15376 #[serde(skip_serializing_if = "Option::is_none")]
15377 span: Option<AstSpanPreview>,
15378 expand: String,
15379}
15380
15381#[derive(Serialize)]
15382struct SourceSummaryRef {
15383 handle: String,
15384 symbol_name: String,
15385 file_path: String,
15386 summary: String,
15387 expand: String,
15388}
15389
15390#[derive(Serialize)]
15391struct SourceReadReport {
15392 handle: String,
15393 root: String,
15394 file: String,
15395 range: SourceRangePreview,
15396 preview: Vec<SourceLinePreview>,
15397 symbols: Vec<SourceSymbolRef>,
15398 summaries: Vec<SourceSummaryRef>,
15399 #[serde(skip_serializing_if = "Option::is_none")]
15400 markdown: Option<SourceReadMarkdownProjection>,
15401 expand: SourceExpandCommands,
15402 #[serde(skip_serializing_if = "Vec::is_empty", default)]
15403 warnings: Vec<String>,
15404}
15405
15406#[derive(Serialize)]
15407struct SymbolReadTarget {
15408 handle: String,
15409 name: String,
15410 kind: String,
15411 language: String,
15412 file: String,
15413 line: usize,
15414 #[serde(skip_serializing_if = "Option::is_none")]
15415 end_line: Option<usize>,
15416 #[serde(skip_serializing_if = "Option::is_none")]
15417 signature: Option<String>,
15418 #[serde(skip_serializing_if = "Option::is_none")]
15419 parent_module: Option<String>,
15420 #[serde(skip_serializing_if = "Option::is_none")]
15421 visibility: Option<String>,
15422 #[serde(skip_serializing_if = "Option::is_none")]
15423 span: Option<AstSpanPreview>,
15424}
15425
15426#[derive(Serialize)]
15427struct SymbolReadExpandCommands {
15428 source_window: String,
15429 #[serde(skip_serializing_if = "Option::is_none")]
15430 body: Option<String>,
15431 file: String,
15432 explain: String,
15433 callers: String,
15434 callees: String,
15435 #[serde(skip_serializing_if = "Option::is_none")]
15436 markdown_ast: Option<String>,
15437}
15438
15439#[derive(Serialize)]
15440struct SymbolReadReport {
15441 handle: String,
15442 root: String,
15443 query: String,
15444 symbol: SymbolReadTarget,
15445 range: SourceRangePreview,
15446 body: Vec<SourceLinePreview>,
15447 child_symbols: Vec<SourceSymbolRef>,
15448 summaries: Vec<SourceSummaryRef>,
15449 expand: SymbolReadExpandCommands,
15450 #[serde(skip_serializing_if = "Vec::is_empty", default)]
15451 warnings: Vec<String>,
15452}
15453
15454#[derive(Clone)]
15455pub(crate) struct MarkdownAstRawNode {
15456 handle: String,
15457 span_handle: String,
15458 name: String,
15459 kind: String,
15460 block_kind: String,
15461 node_kind: String,
15462 start_byte: usize,
15463 end_byte: usize,
15464 body_start_byte: Option<usize>,
15465 body_end_byte: Option<usize>,
15466}
15467
15468#[derive(Clone)]
15469pub(crate) struct MarkdownAstProjection {
15470 source_hash: String,
15471 nodes: Vec<MarkdownAstRawNode>,
15472 parse_duration_micros: u128,
15473 cache_hit: bool,
15474}
15475
15476#[derive(Clone)]
15477struct MarkdownAstCacheEntry {
15478 source_hash: String,
15479 nodes: Vec<MarkdownAstRawNode>,
15480 parse_duration_micros: u128,
15481}
15482
15483static MARKDOWN_AST_CACHE: OnceLock<Mutex<HashMap<String, MarkdownAstCacheEntry>>> =
15484 OnceLock::new();
15485
15486#[derive(Serialize, Clone)]
15487struct MarkdownAstNodeMetadata {
15488 #[serde(skip_serializing_if = "Option::is_none")]
15489 heading_level: Option<usize>,
15490 #[serde(skip_serializing_if = "Vec::is_empty", default)]
15491 section_path: Vec<String>,
15492 #[serde(skip_serializing_if = "Option::is_none")]
15493 section_handle: Option<String>,
15494 #[serde(skip_serializing_if = "Option::is_none")]
15495 list_depth: Option<usize>,
15496 #[serde(skip_serializing_if = "Option::is_none")]
15497 list_marker: Option<String>,
15498 #[serde(skip_serializing_if = "Option::is_none")]
15499 list_order: Option<usize>,
15500 #[serde(skip_serializing_if = "Option::is_none")]
15501 fence_language: Option<String>,
15502 #[serde(skip_serializing_if = "Option::is_none")]
15503 fence_marker: Option<String>,
15504 #[serde(skip_serializing_if = "Vec::is_empty", default)]
15505 embedded_symbols: Vec<MarkdownEmbeddedSymbol>,
15506}
15507
15508#[derive(Serialize, Clone)]
15509struct MarkdownAstNodeExpand {
15510 source_window: String,
15511 source_body: String,
15512 symbol_read: String,
15513 edit_intents: String,
15514}
15515
15516#[derive(Serialize, Clone)]
15517struct MarkdownAstCacheReport {
15518 source_hash: String,
15519 cache_hit: bool,
15520 parse_duration_micros: u128,
15521 node_count: usize,
15522 section_count: usize,
15523 list_item_count: usize,
15524 code_block_count: usize,
15525}
15526
15527#[derive(Serialize, Clone)]
15528struct MarkdownAstPhaseTiming {
15529 name: String,
15530 duration_micros: u128,
15531 detail: String,
15532}
15533
15534#[derive(Serialize, Clone)]
15535struct MarkdownAstOutlineEntry {
15536 handle: String,
15537 span_handle: String,
15538 name: String,
15539 kind: String,
15540 block_kind: String,
15541 line: usize,
15542 end_line: usize,
15543 #[serde(skip_serializing_if = "Vec::is_empty", default)]
15544 section_path: Vec<String>,
15545 child_count: usize,
15546 expand: String,
15547}
15548
15549#[derive(Serialize, Clone)]
15550struct MarkdownAstProjectionPreview {
15551 mode: String,
15552 total_nodes: usize,
15553 returned_nodes: usize,
15554 omitted_nodes: usize,
15555 selected_node: Option<String>,
15556 cache: MarkdownAstCacheReport,
15557 outline: Vec<MarkdownAstOutlineEntry>,
15558 phase_timings: Vec<MarkdownAstPhaseTiming>,
15559}
15560
15561#[derive(Serialize)]
15562struct SourceReadMarkdownProjection {
15563 handle: String,
15564 mode: String,
15565 total_nodes: usize,
15566 visible_nodes: usize,
15567 outline: Vec<MarkdownAstOutlineEntry>,
15568 expand: String,
15569}
15570
15571#[derive(Serialize, Clone)]
15572struct SourceByteRangePreview {
15573 start: usize,
15574 end: usize,
15575}
15576
15577#[derive(Serialize, Clone)]
15578struct MarkdownAstNode {
15579 handle: String,
15580 span_handle: String,
15581 name: String,
15582 kind: String,
15583 block_kind: String,
15584 node_kind: String,
15585 line: usize,
15586 end_line: usize,
15587 byte_span: SourceByteRangePreview,
15588 #[serde(skip_serializing_if = "Option::is_none")]
15589 body_byte_span: Option<SourceByteRangePreview>,
15590 parent_handle: Option<String>,
15591 #[serde(skip_serializing_if = "Vec::is_empty", default)]
15592 child_handles: Vec<String>,
15593 metadata: MarkdownAstNodeMetadata,
15594 expand: MarkdownAstNodeExpand,
15595}
15596
15597#[derive(Serialize)]
15598struct MarkdownAstExpandCommands {
15599 file: String,
15600 source_read: String,
15601 edit_intents: String,
15602}
15603
15604#[derive(Serialize)]
15605struct MarkdownAstReport {
15606 handle: String,
15607 root: String,
15608 file: String,
15609 range: SourceRangePreview,
15610 projection: MarkdownAstProjectionPreview,
15611 nodes: Vec<MarkdownAstNode>,
15612 expand: MarkdownAstExpandCommands,
15613 #[serde(skip_serializing_if = "Vec::is_empty", default)]
15614 warnings: Vec<String>,
15615}
15616
15617pub(crate) fn resolve_source_file(root: &Path, file: &Path) -> Result<PathBuf> {
15618 let candidate = if file.is_absolute() {
15619 file.to_path_buf()
15620 } else {
15621 root.join(file)
15622 };
15623 let canonical = candidate
15624 .canonicalize()
15625 .with_context(|| format!("canonicalizing source file {}", candidate.display()))?;
15626 if !canonical.is_file() {
15627 bail!("source file is not a regular file: {}", canonical.display());
15628 }
15629 let canonical_root = root
15630 .canonicalize()
15631 .with_context(|| format!("canonicalizing project root {}", root.display()))?;
15632 if !canonical.starts_with(&canonical_root) {
15633 bail!(
15634 "source file {} is outside project root {}",
15635 canonical.display(),
15636 canonical_root.display()
15637 );
15638 }
15639 Ok(canonical)
15640}
15641
15642pub(crate) fn source_read_command(root: &Path, file: &str, start: usize, lines: usize) -> String {
15643 format!(
15644 "tsift source-read {} --path {} --start {} --lines {} --budget normal",
15645 shell_quote(file),
15646 shell_quote(&root.to_string_lossy()),
15647 start,
15648 lines
15649 )
15650}
15651
15652pub(crate) fn source_symbol_read_command(root: &Path, symbol: &str, file: &str) -> String {
15653 format!(
15654 "tsift --envelope symbol-read {} --path {} --file {} --budget normal",
15655 shell_quote(symbol),
15656 shell_quote(&root.to_string_lossy()),
15657 shell_quote(file)
15658 )
15659}
15660
15661fn source_symbol_expand_command(root: &Path, symbol: &str) -> String {
15662 format!(
15663 "tsift --envelope explain {} --path {} --budget normal",
15664 shell_quote(symbol),
15665 shell_quote(&root.to_string_lossy())
15666 )
15667}
15668
15669fn source_symbol_graph_command(root: &Path, symbol: &str, relation: &str) -> String {
15670 format!(
15671 "tsift graph {} --path {} --{} --json",
15672 shell_quote(symbol),
15673 shell_quote(&root.to_string_lossy()),
15674 relation
15675 )
15676}
15677
15678fn source_summary_expand_command(root: &Path, symbol: &str) -> String {
15679 format!(
15680 "tsift summarize {} --path {} --json",
15681 shell_quote(symbol),
15682 shell_quote(&root.to_string_lossy())
15683 )
15684}
15685
15686pub(crate) fn markdown_ast_command(root: &Path, file: &str, node: Option<&str>) -> String {
15687 let mut command = format!(
15688 "tsift --envelope markdown-ast {} --path {} --budget normal",
15689 shell_quote(file),
15690 shell_quote(&root.to_string_lossy())
15691 );
15692 if let Some(node) = node {
15693 command.push_str(" --node ");
15694 command.push_str(&shell_quote(node));
15695 }
15696 command
15697}
15698
15699fn markdown_edit_intents_command(root: &Path) -> String {
15700 format!(
15701 "tsift --envelope edit-intents --path {} --budget normal",
15702 shell_quote(&root.to_string_lossy())
15703 )
15704}
15705
15706pub(crate) fn source_symbol_line(symbol: &index::StoredSymbol) -> usize {
15707 usize::try_from(symbol.line)
15708 .ok()
15709 .and_then(|line| line.checked_add(1))
15710 .unwrap_or(1)
15711}
15712
15713fn source_symbol_end_line(symbol: &index::StoredSymbol) -> Option<usize> {
15714 symbol
15715 .end_line
15716 .and_then(|line| usize::try_from(line).ok())
15717 .and_then(|line| line.checked_add(1))
15718}
15719
15720fn symbol_span_byte(value: Option<i64>) -> Option<usize> {
15721 value.and_then(|byte| usize::try_from(byte).ok())
15722}
15723
15724fn source_line_for_byte(source: &[u8], byte: usize) -> usize {
15725 let byte = byte.min(source.len());
15726 source[..byte]
15727 .iter()
15728 .filter(|value| **value == b'\n')
15729 .count()
15730 .saturating_add(1)
15731}
15732
15733fn source_line_for_end_byte(source: &[u8], end_byte: usize) -> usize {
15734 source_line_for_byte(source, end_byte.saturating_sub(1))
15735}
15736
15737fn ast_span_handle(
15738 file: &str,
15739 name: &str,
15740 kind: &str,
15741 start_byte: usize,
15742 end_byte: usize,
15743) -> String {
15744 stable_handle(
15745 "span",
15746 &format!("{file}:{kind}:{name}:{start_byte}:{end_byte}"),
15747 )
15748}
15749
15750pub(crate) fn stored_symbol_span_bounds(symbol: &index::StoredSymbol) -> Option<(usize, usize)> {
15751 Some((
15752 symbol_span_byte(symbol.start_byte)?,
15753 symbol_span_byte(symbol.end_byte)?,
15754 ))
15755}
15756
15757pub(crate) fn symbol_hit_span_bounds(symbol: &index::SymbolHit) -> Option<(usize, usize)> {
15758 Some((
15759 symbol_span_byte(symbol.start_byte)?,
15760 symbol_span_byte(symbol.end_byte)?,
15761 ))
15762}
15763
15764pub(crate) fn stored_symbol_span_handle(symbol: &index::StoredSymbol) -> Option<String> {
15765 let (start_byte, end_byte) = stored_symbol_span_bounds(symbol)?;
15766 Some(ast_span_handle(
15767 &symbol.file,
15768 &symbol.name,
15769 &symbol.kind,
15770 start_byte,
15771 end_byte,
15772 ))
15773}
15774
15775fn same_stored_symbol_span(left: &index::StoredSymbol, right: &index::StoredSymbol) -> bool {
15776 left.file == right.file
15777 && left.name == right.name
15778 && left.kind == right.kind
15779 && stored_symbol_span_bounds(left) == stored_symbol_span_bounds(right)
15780}
15781
15782fn stored_symbol_parent_span_handle(
15783 symbol: &index::StoredSymbol,
15784 symbols: &[index::StoredSymbol],
15785) -> Option<String> {
15786 let (start_byte, end_byte) = stored_symbol_span_bounds(symbol)?;
15787 symbols
15788 .iter()
15789 .filter(|candidate| {
15790 if candidate.file != symbol.file || same_stored_symbol_span(candidate, symbol) {
15791 return false;
15792 }
15793 let Some((candidate_start, candidate_end)) = stored_symbol_span_bounds(candidate)
15794 else {
15795 return false;
15796 };
15797 candidate_start <= start_byte && candidate_end >= end_byte
15798 })
15799 .min_by_key(|candidate| {
15800 stored_symbol_span_bounds(candidate)
15801 .map(|(start, end)| end.saturating_sub(start))
15802 .unwrap_or(usize::MAX)
15803 })
15804 .and_then(stored_symbol_span_handle)
15805}
15806
15807fn stored_symbol_child_span_handles(
15808 symbol: &index::StoredSymbol,
15809 symbols: &[index::StoredSymbol],
15810 limit: usize,
15811) -> Vec<String> {
15812 let Some((start_byte, end_byte)) = stored_symbol_span_bounds(symbol) else {
15813 return Vec::new();
15814 };
15815 symbols
15816 .iter()
15817 .filter(|candidate| {
15818 if candidate.file != symbol.file || same_stored_symbol_span(candidate, symbol) {
15819 return false;
15820 }
15821 let Some((candidate_start, candidate_end)) = stored_symbol_span_bounds(candidate)
15822 else {
15823 return false;
15824 };
15825 candidate_start >= start_byte && candidate_end <= end_byte
15826 })
15827 .take(limit)
15828 .filter_map(stored_symbol_span_handle)
15829 .collect()
15830}
15831
15832fn markdown_heading_level(source: &[u8], start_byte: usize) -> Option<usize> {
15833 let start = start_byte.min(source.len());
15834 let line_end = source[start..]
15835 .iter()
15836 .position(|value| *value == b'\n')
15837 .map(|pos| start + pos)
15838 .unwrap_or(source.len());
15839 let line = std::str::from_utf8(&source[start..line_end]).unwrap_or("");
15840 let marker = line.trim_start();
15841 let level = marker.chars().take_while(|ch| *ch == '#').count();
15842 (1..=6).contains(&level).then_some(level)
15843}
15844
15845fn markdown_list_depth(source: &[u8], start_byte: usize) -> usize {
15846 let start = start_byte.min(source.len());
15847 let line_start = source[..start]
15848 .iter()
15849 .rposition(|value| *value == b'\n')
15850 .map(|pos| pos + 1)
15851 .unwrap_or(0);
15852 source[line_start..start]
15853 .iter()
15854 .map(|byte| match byte {
15855 b'\t' => 4,
15856 b' ' => 1,
15857 _ => 0,
15858 })
15859 .sum::<usize>()
15860 / 2
15861}
15862
15863fn markdown_enclosing_heading_symbols<'a>(
15864 file: &str,
15865 start_byte: usize,
15866 end_byte: usize,
15867 symbols: &'a [index::StoredSymbol],
15868) -> Vec<&'a index::StoredSymbol> {
15869 let mut headings = symbols
15870 .iter()
15871 .filter(|candidate| candidate.file == file && candidate.kind == "heading")
15872 .filter(|candidate| {
15873 let Some((candidate_start, candidate_end)) = stored_symbol_span_bounds(candidate)
15874 else {
15875 return false;
15876 };
15877 candidate_start <= start_byte && candidate_end >= end_byte
15878 })
15879 .collect::<Vec<_>>();
15880 headings.sort_by(|left, right| {
15881 stored_symbol_span_bounds(left)
15882 .map(|(start, _)| start)
15883 .unwrap_or(usize::MAX)
15884 .cmp(
15885 &stored_symbol_span_bounds(right)
15886 .map(|(start, _)| start)
15887 .unwrap_or(usize::MAX),
15888 )
15889 .then(left.name.cmp(&right.name))
15890 });
15891 headings
15892}
15893
15894fn markdown_stored_symbol_metadata(
15895 symbol: &index::StoredSymbol,
15896 source: &[u8],
15897 symbols: &[index::StoredSymbol],
15898) -> Option<MarkdownSpanMetadata> {
15899 if symbol.language != "markdown" {
15900 return None;
15901 }
15902 let (start_byte, end_byte) = stored_symbol_span_bounds(symbol)?;
15903 let section_symbols =
15904 markdown_enclosing_heading_symbols(&symbol.file, start_byte, end_byte, symbols);
15905 let section_path = section_symbols
15906 .iter()
15907 .map(|heading| heading.name.clone())
15908 .collect::<Vec<_>>();
15909 let section_handle = section_symbols
15910 .last()
15911 .and_then(|heading| stored_symbol_span_handle(heading));
15912 let heading_level = (symbol.kind == "heading")
15913 .then(|| markdown_heading_level(source, start_byte))
15914 .flatten();
15915 let list_depth = (symbol.kind == "list_item").then(|| markdown_list_depth(source, start_byte));
15916 let fence_language = (symbol.kind == "code_block").then(|| symbol.name.clone());
15917 let embedded_symbols = if symbol.kind == "code_block" {
15918 markdown_embedded_symbols(
15919 &symbol.file,
15920 source,
15921 symbol_span_byte(symbol.body_start_byte),
15922 symbol_span_byte(symbol.body_end_byte),
15923 fence_language.as_deref(),
15924 )
15925 } else {
15926 Vec::new()
15927 };
15928
15929 (heading_level.is_some()
15930 || !section_path.is_empty()
15931 || section_handle.is_some()
15932 || list_depth.is_some()
15933 || fence_language.is_some()
15934 || !embedded_symbols.is_empty())
15935 .then_some(MarkdownSpanMetadata {
15936 heading_level,
15937 section_path,
15938 section_handle,
15939 list_depth,
15940 fence_language,
15941 embedded_symbols,
15942 })
15943}
15944
15945fn markdown_symbol_hit_metadata(
15946 symbol: &index::SymbolHit,
15947 source: &[u8],
15948 start_byte: usize,
15949) -> Option<MarkdownSpanMetadata> {
15950 if symbol.language != "markdown" {
15951 return None;
15952 }
15953 let heading_level = (symbol.kind == "heading")
15954 .then(|| markdown_heading_level(source, start_byte))
15955 .flatten();
15956 let list_depth = (symbol.kind == "list_item").then(|| markdown_list_depth(source, start_byte));
15957 let fence_language = (symbol.kind == "code_block").then(|| symbol.name.clone());
15958 let embedded_symbols = if symbol.kind == "code_block" {
15959 markdown_embedded_symbols(
15960 &symbol.file,
15961 source,
15962 symbol_span_byte(symbol.body_start_byte),
15963 symbol_span_byte(symbol.body_end_byte),
15964 fence_language.as_deref(),
15965 )
15966 } else {
15967 Vec::new()
15968 };
15969 (heading_level.is_some()
15970 || list_depth.is_some()
15971 || fence_language.is_some()
15972 || !embedded_symbols.is_empty())
15973 .then_some(MarkdownSpanMetadata {
15974 heading_level,
15975 section_path: Vec::new(),
15976 section_handle: None,
15977 list_depth,
15978 fence_language,
15979 embedded_symbols,
15980 })
15981}
15982
15983fn is_markdown_path(path: &Path) -> bool {
15984 path.extension()
15985 .and_then(|ext| ext.to_str())
15986 .map(|ext| matches!(ext.to_ascii_lowercase().as_str(), "md" | "mdx"))
15987 .unwrap_or(false)
15988}
15989
15990fn markdown_ast_block_kind(kind: &str) -> String {
15991 match kind {
15992 "heading" => "section",
15993 "code_block" => "fenced_code_block",
15994 "list_item" => "list_item",
15995 other => other,
15996 }
15997 .to_string()
15998}
15999
16000fn markdown_embedded_language_key(language: &str) -> Option<String> {
16001 let key = language
16002 .split_whitespace()
16003 .next()
16004 .unwrap_or("")
16005 .trim()
16006 .trim_start_matches("language-")
16007 .trim_start_matches("lang-")
16008 .trim_matches(|ch| matches!(ch, '`' | '"' | '\''))
16009 .to_ascii_lowercase();
16010 (!key.is_empty()).then_some(key)
16011}
16012
16013fn markdown_embedded_lang(language: &str) -> Option<graph::Lang> {
16014 let key = markdown_embedded_language_key(language)?;
16015 let extension = match key.as_str() {
16016 "rust" => "rs",
16017 "python" => "py",
16018 "typescript" => "ts",
16019 "javascript" => "js",
16020 "kotlin" => "kt",
16021 "shell" | "sh" | "zsh" => "bash",
16022 other => other,
16023 };
16024 let lang = graph::Lang::from_extension(extension)?;
16025 (lang.name() != "markdown").then_some(lang)
16026}
16027
16028fn markdown_embedded_ast_span_handle(
16029 file: &str,
16030 language: &str,
16031 name: &str,
16032 kind: &str,
16033 start_byte: usize,
16034 end_byte: usize,
16035) -> String {
16036 stable_handle(
16037 "span",
16038 &format!("{file}:embedded:{language}:{kind}:{name}:{start_byte}:{end_byte}"),
16039 )
16040}
16041
16042fn markdown_embedded_symbols(
16043 file: &str,
16044 source: &[u8],
16045 body_start_byte: Option<usize>,
16046 body_end_byte: Option<usize>,
16047 fence_language: Option<&str>,
16048) -> Vec<MarkdownEmbeddedSymbol> {
16049 let Some(fence_language) = fence_language else {
16050 return Vec::new();
16051 };
16052 let Some(lang) = markdown_embedded_lang(fence_language) else {
16053 return Vec::new();
16054 };
16055 let Some((body_start_byte, body_end_byte)) = body_start_byte.zip(body_end_byte) else {
16056 return Vec::new();
16057 };
16058 let Some(body) = source.get(body_start_byte.min(source.len())..body_end_byte.min(source.len()))
16059 else {
16060 return Vec::new();
16061 };
16062 if body.is_empty() {
16063 return Vec::new();
16064 }
16065
16066 let Ok(symbols) = lang.extract_symbols(body) else {
16067 return Vec::new();
16068 };
16069 let language = lang.name().to_string();
16070 symbols
16071 .into_iter()
16072 .map(|symbol| {
16073 let start_byte = body_start_byte.saturating_add(symbol.start_byte);
16074 let end_byte = body_start_byte.saturating_add(symbol.end_byte);
16075 let body_start = symbol
16076 .body_start_byte
16077 .map(|byte| body_start_byte.saturating_add(byte));
16078 let body_end = symbol
16079 .body_end_byte
16080 .map(|byte| body_start_byte.saturating_add(byte));
16081 let start_line = source_line_for_byte(source, start_byte);
16082 let end_line = source_line_for_end_byte(source, end_byte).max(start_line);
16083 MarkdownEmbeddedSymbol {
16084 handle: markdown_embedded_ast_span_handle(
16085 file,
16086 &language,
16087 &symbol.name,
16088 &symbol.kind,
16089 start_byte,
16090 end_byte,
16091 ),
16092 name: symbol.name,
16093 kind: symbol.kind,
16094 language: language.clone(),
16095 node_kind: symbol.node_kind,
16096 start_byte,
16097 end_byte,
16098 start_line,
16099 end_line,
16100 body_start_byte: body_start,
16101 body_end_byte: body_end,
16102 body_start_line: body_start.map(|byte| source_line_for_byte(source, byte)),
16103 body_end_line: body_end.map(|byte| source_line_for_end_byte(source, byte)),
16104 }
16105 })
16106 .collect()
16107}
16108
16109fn markdown_source_line(source: &[u8], start_byte: usize) -> &str {
16110 let start = start_byte.min(source.len());
16111 let line_start = source[..start]
16112 .iter()
16113 .rposition(|value| *value == b'\n')
16114 .map(|pos| pos + 1)
16115 .unwrap_or(0);
16116 let line_end = source[start..]
16117 .iter()
16118 .position(|value| *value == b'\n')
16119 .map(|pos| start + pos)
16120 .unwrap_or(source.len());
16121 std::str::from_utf8(&source[line_start..line_end]).unwrap_or("")
16122}
16123
16124fn markdown_list_attributes(source: &[u8], start_byte: usize) -> (Option<String>, Option<usize>) {
16125 let line = markdown_source_line(source, start_byte);
16126 let trimmed = line.trim_start();
16127 for marker in ["-", "*", "+"] {
16128 if trimmed
16129 .strip_prefix(marker)
16130 .and_then(|rest| rest.strip_prefix(' '))
16131 .is_some()
16132 {
16133 return (Some(marker.to_string()), None);
16134 }
16135 }
16136
16137 let digit_end = trimmed
16138 .find(|ch: char| !ch.is_ascii_digit())
16139 .unwrap_or(trimmed.len());
16140 let (digits, rest) = trimmed.split_at(digit_end);
16141 if !digits.is_empty() {
16142 for marker in [".", ")"] {
16143 if rest
16144 .strip_prefix(marker)
16145 .and_then(|value| value.strip_prefix(' '))
16146 .is_some()
16147 {
16148 return (
16149 Some(format!("{digits}{marker}")),
16150 digits.parse::<usize>().ok(),
16151 );
16152 }
16153 }
16154 }
16155 (None, None)
16156}
16157
16158fn markdown_fence_marker(source: &[u8], start_byte: usize) -> Option<String> {
16159 let line = markdown_source_line(source, start_byte);
16160 let trimmed = line.trim_start();
16161 ["```", "~~~"]
16162 .into_iter()
16163 .find(|marker| trimmed.starts_with(marker))
16164 .map(str::to_string)
16165}
16166
16167fn markdown_ast_extract_raw_nodes(file: &str, source: &[u8]) -> Result<Vec<MarkdownAstRawNode>> {
16168 let mut nodes = graph::Lang::Markdown
16169 .extract_symbols(source)
16170 .context("extracting Markdown AST nodes")?
16171 .into_iter()
16172 .map(|symbol| {
16173 let body_start_byte = symbol.body_start_byte;
16174 let body_end_byte = symbol.body_end_byte;
16175 let span_handle = ast_span_handle(
16176 file,
16177 &symbol.name,
16178 &symbol.kind,
16179 symbol.start_byte,
16180 symbol.end_byte,
16181 );
16182 MarkdownAstRawNode {
16183 handle: stable_handle(
16184 "mdast",
16185 &format!(
16186 "{}:{}:{}:{}:{}",
16187 file, symbol.kind, symbol.name, symbol.start_byte, symbol.end_byte
16188 ),
16189 ),
16190 span_handle,
16191 name: symbol.name,
16192 kind: symbol.kind.clone(),
16193 block_kind: markdown_ast_block_kind(&symbol.kind),
16194 node_kind: symbol.node_kind,
16195 start_byte: symbol.start_byte,
16196 end_byte: symbol.end_byte,
16197 body_start_byte,
16198 body_end_byte,
16199 }
16200 })
16201 .collect::<Vec<_>>();
16202 nodes.sort_by(|left, right| {
16203 left.start_byte
16204 .cmp(&right.start_byte)
16205 .then(left.end_byte.cmp(&right.end_byte))
16206 .then(left.kind.cmp(&right.kind))
16207 .then(left.name.cmp(&right.name))
16208 });
16209 Ok(nodes)
16210}
16211
16212pub(crate) fn markdown_ast_projection(file: &str, source: &[u8]) -> Result<MarkdownAstProjection> {
16213 let source_hash = blake3::hash(source).to_hex().to_string();
16214 let cache_key = format!("{file}:{source_hash}");
16215 let cache = MARKDOWN_AST_CACHE.get_or_init(|| Mutex::new(HashMap::new()));
16216 if let Some(entry) = cache
16217 .lock()
16218 .expect("markdown ast cache poisoned")
16219 .get(&cache_key)
16220 {
16221 return Ok(MarkdownAstProjection {
16222 source_hash: entry.source_hash.clone(),
16223 nodes: entry.nodes.clone(),
16224 parse_duration_micros: entry.parse_duration_micros,
16225 cache_hit: true,
16226 });
16227 }
16228
16229 let started = Instant::now();
16230 let nodes = markdown_ast_extract_raw_nodes(file, source)?;
16231 let parse_duration_micros = started.elapsed().as_micros();
16232 cache.lock().expect("markdown ast cache poisoned").insert(
16233 cache_key,
16234 MarkdownAstCacheEntry {
16235 source_hash: source_hash.clone(),
16236 nodes: nodes.clone(),
16237 parse_duration_micros,
16238 },
16239 );
16240 Ok(MarkdownAstProjection {
16241 source_hash,
16242 nodes,
16243 parse_duration_micros,
16244 cache_hit: false,
16245 })
16246}
16247
16248fn markdown_ast_cache_report(projection: &MarkdownAstProjection) -> MarkdownAstCacheReport {
16249 MarkdownAstCacheReport {
16250 source_hash: projection.source_hash.clone(),
16251 cache_hit: projection.cache_hit,
16252 parse_duration_micros: projection.parse_duration_micros,
16253 node_count: projection.nodes.len(),
16254 section_count: projection
16255 .nodes
16256 .iter()
16257 .filter(|node| node.kind == "heading")
16258 .count(),
16259 list_item_count: projection
16260 .nodes
16261 .iter()
16262 .filter(|node| node.kind == "list_item")
16263 .count(),
16264 code_block_count: projection
16265 .nodes
16266 .iter()
16267 .filter(|node| node.kind == "code_block")
16268 .count(),
16269 }
16270}
16271
16272fn markdown_ast_node_direct_child_count(
16273 node: &MarkdownAstRawNode,
16274 nodes: &[MarkdownAstRawNode],
16275) -> usize {
16276 nodes
16277 .iter()
16278 .filter(|candidate| {
16279 markdown_ast_parent_handle(candidate, nodes).as_deref() == Some(&node.handle)
16280 })
16281 .count()
16282}
16283
16284fn markdown_ast_outline_entry(
16285 root: &Path,
16286 file: &str,
16287 source: &[u8],
16288 nodes: &[MarkdownAstRawNode],
16289 node: &MarkdownAstRawNode,
16290 max_bytes: usize,
16291) -> MarkdownAstOutlineEntry {
16292 let line = source_line_for_byte(source, node.start_byte);
16293 let end_line = source_line_for_end_byte(source, node.end_byte).max(line);
16294 MarkdownAstOutlineEntry {
16295 handle: node.handle.clone(),
16296 span_handle: node.span_handle.clone(),
16297 name: truncate_for_budget(&node.name, max_bytes),
16298 kind: node.kind.clone(),
16299 block_kind: node.block_kind.clone(),
16300 line,
16301 end_line,
16302 section_path: markdown_ast_node_metadata(file, node, source, nodes).section_path,
16303 child_count: markdown_ast_node_direct_child_count(node, nodes),
16304 expand: markdown_ast_command(root, file, Some(&node.handle)),
16305 }
16306}
16307
16308fn markdown_ast_outline_entries(
16309 root: &Path,
16310 file: &str,
16311 source: &[u8],
16312 nodes: &[MarkdownAstRawNode],
16313 limit: usize,
16314 max_bytes: usize,
16315) -> Vec<MarkdownAstOutlineEntry> {
16316 let mut headings = nodes
16317 .iter()
16318 .filter(|node| node.kind == "heading")
16319 .collect::<Vec<_>>();
16320 let mut blocks = nodes
16321 .iter()
16322 .filter(|node| node.kind != "heading")
16323 .collect::<Vec<_>>();
16324 headings.sort_by_key(|node| (node.start_byte, node.end_byte));
16325 blocks.sort_by_key(|node| (node.start_byte, node.end_byte));
16326 headings
16327 .into_iter()
16328 .chain(blocks)
16329 .take(limit)
16330 .map(|node| markdown_ast_outline_entry(root, file, source, nodes, node, max_bytes))
16331 .collect()
16332}
16333
16334fn markdown_ast_node_intersects_lines(
16335 source: &[u8],
16336 node: &MarkdownAstRawNode,
16337 start: usize,
16338 end: usize,
16339) -> bool {
16340 let line = source_line_for_byte(source, node.start_byte);
16341 let end_line = source_line_for_end_byte(source, node.end_byte).max(line);
16342 line <= end && end_line >= start
16343}
16344
16345fn source_read_markdown_projection(
16346 root: &Path,
16347 file: &str,
16348 source: &[u8],
16349 start: usize,
16350 end: usize,
16351 budget: ResponseBudget,
16352) -> Result<SourceReadMarkdownProjection> {
16353 let projection = markdown_ast_projection(file, source)?;
16354 let visible_nodes = projection
16355 .nodes
16356 .iter()
16357 .filter(|node| markdown_ast_node_intersects_lines(source, node, start, end))
16358 .collect::<Vec<_>>();
16359 let mut outline_nodes = visible_nodes.clone();
16360 outline_nodes.sort_by_key(|node| {
16361 (
16362 node.kind != "heading",
16363 node.start_byte,
16364 node.end_byte,
16365 node.name.as_str(),
16366 )
16367 });
16368 let outline = outline_nodes
16369 .into_iter()
16370 .take(budget.preview_items())
16371 .map(|node| {
16372 markdown_ast_outline_entry(
16373 root,
16374 file,
16375 source,
16376 &projection.nodes,
16377 node,
16378 budget.preview_bytes(),
16379 )
16380 })
16381 .collect::<Vec<_>>();
16382 Ok(SourceReadMarkdownProjection {
16383 handle: stable_handle(
16384 "mdproj",
16385 &format!("{file}:{start}:{end}:{}", projection.source_hash),
16386 ),
16387 mode: "window_outline".to_string(),
16388 total_nodes: projection.nodes.len(),
16389 visible_nodes: visible_nodes.len(),
16390 outline,
16391 expand: markdown_ast_command(root, file, None),
16392 })
16393}
16394
16395fn markdown_ast_contains(parent: &MarkdownAstRawNode, child: &MarkdownAstRawNode) -> bool {
16396 if parent.handle == child.handle {
16397 return false;
16398 }
16399 parent.start_byte <= child.start_byte && parent.end_byte >= child.end_byte
16400}
16401
16402fn markdown_ast_parent_handle(
16403 node: &MarkdownAstRawNode,
16404 nodes: &[MarkdownAstRawNode],
16405) -> Option<String> {
16406 nodes
16407 .iter()
16408 .filter(|candidate| markdown_ast_contains(candidate, node))
16409 .min_by_key(|candidate| {
16410 (
16411 candidate.end_byte.saturating_sub(candidate.start_byte),
16412 candidate.start_byte,
16413 )
16414 })
16415 .map(|candidate| candidate.handle.clone())
16416}
16417
16418fn markdown_ast_child_handles(
16419 node: &MarkdownAstRawNode,
16420 nodes: &[MarkdownAstRawNode],
16421 limit: usize,
16422) -> Vec<String> {
16423 nodes
16424 .iter()
16425 .filter(|candidate| {
16426 markdown_ast_parent_handle(candidate, nodes).as_deref() == Some(&node.handle)
16427 })
16428 .take(limit)
16429 .map(|candidate| candidate.handle.clone())
16430 .collect()
16431}
16432
16433fn markdown_ast_section_nodes<'a>(
16434 node: &MarkdownAstRawNode,
16435 nodes: &'a [MarkdownAstRawNode],
16436) -> Vec<&'a MarkdownAstRawNode> {
16437 let mut headings = nodes
16438 .iter()
16439 .filter(|candidate| candidate.kind == "heading")
16440 .filter(|candidate| {
16441 candidate.start_byte <= node.start_byte && candidate.end_byte >= node.end_byte
16442 })
16443 .collect::<Vec<_>>();
16444 headings.sort_by(|left, right| {
16445 left.start_byte
16446 .cmp(&right.start_byte)
16447 .then(left.end_byte.cmp(&right.end_byte))
16448 .then(left.name.cmp(&right.name))
16449 });
16450 headings
16451}
16452
16453fn markdown_ast_node_metadata(
16454 file: &str,
16455 node: &MarkdownAstRawNode,
16456 source: &[u8],
16457 nodes: &[MarkdownAstRawNode],
16458) -> MarkdownAstNodeMetadata {
16459 let section_nodes = markdown_ast_section_nodes(node, nodes);
16460 let section_path = section_nodes
16461 .iter()
16462 .map(|heading| heading.name.clone())
16463 .collect::<Vec<_>>();
16464 let section_handle = section_nodes.last().map(|heading| heading.handle.clone());
16465 let heading_level = (node.kind == "heading")
16466 .then(|| markdown_heading_level(source, node.start_byte))
16467 .flatten();
16468 let (list_marker, list_order) = if node.kind == "list_item" {
16469 markdown_list_attributes(source, node.start_byte)
16470 } else {
16471 (None, None)
16472 };
16473 let fence_language = (node.kind == "code_block").then(|| node.name.clone());
16474 let embedded_symbols = if node.kind == "code_block" {
16475 markdown_embedded_symbols(
16476 file,
16477 source,
16478 node.body_start_byte,
16479 node.body_end_byte,
16480 fence_language.as_deref(),
16481 )
16482 } else {
16483 Vec::new()
16484 };
16485 MarkdownAstNodeMetadata {
16486 heading_level,
16487 section_path,
16488 section_handle,
16489 list_depth: (node.kind == "list_item")
16490 .then(|| markdown_list_depth(source, node.start_byte)),
16491 list_marker,
16492 list_order,
16493 fence_language,
16494 fence_marker: (node.kind == "code_block")
16495 .then(|| markdown_fence_marker(source, node.start_byte))
16496 .flatten(),
16497 embedded_symbols,
16498 }
16499}
16500
16501fn markdown_ast_node_expand(
16502 root: &Path,
16503 file: &str,
16504 node: &MarkdownAstRawNode,
16505 source: &[u8],
16506) -> MarkdownAstNodeExpand {
16507 let start_line = source_line_for_byte(source, node.start_byte);
16508 let end_line = source_line_for_end_byte(source, node.end_byte).max(start_line);
16509 let line_count = end_line.saturating_sub(start_line).saturating_add(1).max(1);
16510 let body_start_line = node
16511 .body_start_byte
16512 .map(|byte| source_line_for_byte(source, byte))
16513 .unwrap_or(start_line);
16514 let body_end_line = node
16515 .body_end_byte
16516 .map(|byte| source_line_for_end_byte(source, byte))
16517 .unwrap_or(end_line)
16518 .max(body_start_line);
16519 let body_line_count = body_end_line
16520 .saturating_sub(body_start_line)
16521 .saturating_add(1)
16522 .max(1);
16523 MarkdownAstNodeExpand {
16524 source_window: source_read_command(root, file, start_line, line_count),
16525 source_body: source_read_command(root, file, body_start_line, body_line_count),
16526 symbol_read: source_symbol_read_command(root, &node.name, file),
16527 edit_intents: markdown_edit_intents_command(root),
16528 }
16529}
16530
16531fn markdown_ast_node(
16532 root: &Path,
16533 file: &str,
16534 node: &MarkdownAstRawNode,
16535 source: &[u8],
16536 nodes: &[MarkdownAstRawNode],
16537 child_limit: usize,
16538) -> MarkdownAstNode {
16539 let line = source_line_for_byte(source, node.start_byte);
16540 let end_line = source_line_for_end_byte(source, node.end_byte).max(line);
16541 let body_byte_span = node
16542 .body_start_byte
16543 .zip(node.body_end_byte)
16544 .map(|(start, end)| SourceByteRangePreview { start, end });
16545 MarkdownAstNode {
16546 handle: node.handle.clone(),
16547 span_handle: node.span_handle.clone(),
16548 name: node.name.clone(),
16549 kind: node.kind.clone(),
16550 block_kind: node.block_kind.clone(),
16551 node_kind: node.node_kind.clone(),
16552 line,
16553 end_line,
16554 byte_span: SourceByteRangePreview {
16555 start: node.start_byte,
16556 end: node.end_byte,
16557 },
16558 body_byte_span,
16559 parent_handle: markdown_ast_parent_handle(node, nodes),
16560 child_handles: markdown_ast_child_handles(node, nodes, child_limit),
16561 metadata: markdown_ast_node_metadata(file, node, source, nodes),
16562 expand: markdown_ast_node_expand(root, file, node, source),
16563 }
16564}
16565
16566pub(crate) fn stored_symbol_ast_span(
16567 symbol: &index::StoredSymbol,
16568 source: &[u8],
16569 symbols: &[index::StoredSymbol],
16570 child_limit: usize,
16571) -> Option<AstSpanPreview> {
16572 let (start_byte, end_byte) = stored_symbol_span_bounds(symbol)?;
16573 let node_kind = symbol.node_kind.clone()?;
16574 let body_start_byte = symbol_span_byte(symbol.body_start_byte);
16575 let body_end_byte = symbol_span_byte(symbol.body_end_byte);
16576 Some(AstSpanPreview {
16577 handle: ast_span_handle(
16578 &symbol.file,
16579 &symbol.name,
16580 &symbol.kind,
16581 start_byte,
16582 end_byte,
16583 ),
16584 node_kind,
16585 start_byte,
16586 end_byte,
16587 start_line: source_line_for_byte(source, start_byte),
16588 end_line: source_line_for_end_byte(source, end_byte),
16589 body_start_byte,
16590 body_end_byte,
16591 body_start_line: body_start_byte.map(|byte| source_line_for_byte(source, byte)),
16592 body_end_line: body_end_byte.map(|byte| source_line_for_end_byte(source, byte)),
16593 parent_handle: stored_symbol_parent_span_handle(symbol, symbols),
16594 child_handles: stored_symbol_child_span_handles(symbol, symbols, child_limit),
16595 markdown: markdown_stored_symbol_metadata(symbol, source, symbols),
16596 })
16597}
16598
16599pub(crate) fn symbol_hit_ast_span(symbol: &index::SymbolHit, source: &[u8]) -> Option<AstSpanPreview> {
16600 let (start_byte, end_byte) = symbol_hit_span_bounds(symbol)?;
16601 let node_kind = symbol.node_kind.clone()?;
16602 let body_start_byte = symbol_span_byte(symbol.body_start_byte);
16603 let body_end_byte = symbol_span_byte(symbol.body_end_byte);
16604 Some(AstSpanPreview {
16605 handle: ast_span_handle(
16606 &symbol.file,
16607 &symbol.name,
16608 &symbol.kind,
16609 start_byte,
16610 end_byte,
16611 ),
16612 node_kind,
16613 start_byte,
16614 end_byte,
16615 start_line: source_line_for_byte(source, start_byte),
16616 end_line: source_line_for_end_byte(source, end_byte),
16617 body_start_byte,
16618 body_end_byte,
16619 body_start_line: body_start_byte.map(|byte| source_line_for_byte(source, byte)),
16620 body_end_line: body_end_byte.map(|byte| source_line_for_end_byte(source, byte)),
16621 parent_handle: None,
16622 child_handles: Vec::new(),
16623 markdown: markdown_symbol_hit_metadata(symbol, source, start_byte),
16624 })
16625}
16626
16627pub(crate) fn symbol_hit_line(symbol: &index::SymbolHit) -> usize {
16628 usize::try_from(symbol.line)
16629 .ok()
16630 .and_then(|line| line.checked_add(1))
16631 .unwrap_or(1)
16632}
16633
16634pub(crate) fn symbol_hit_end_line(symbol: &index::SymbolHit) -> Option<usize> {
16635 symbol
16636 .end_line
16637 .and_then(|line| usize::try_from(line).ok())
16638 .and_then(|line| line.checked_add(1))
16639}
16640
16641fn source_symbol_intersects(symbol: &index::StoredSymbol, start: usize, end: usize) -> bool {
16642 if end == 0 {
16643 return false;
16644 }
16645 let symbol_start = source_symbol_line(symbol);
16646 let symbol_end = source_symbol_end_line(symbol).unwrap_or(symbol_start);
16647 symbol_start <= end && symbol_end >= start
16648}
16649
16650#[allow(clippy::too_many_arguments)]
16651fn load_source_symbols(
16652 root: &Path,
16653 file_abs: &Path,
16654 file_display: &str,
16655 source: &[u8],
16656 scope: Option<&str>,
16657 start: usize,
16658 end: usize,
16659 limit: usize,
16660 max_bytes: usize,
16661 warnings: &mut Vec<String>,
16662) -> Vec<SourceSymbolRef> {
16663 let db_path = match resolve_query_db_path(root, file_abs, scope) {
16664 Ok(path) => path,
16665 Err(err) => {
16666 warnings.push(format!("index refs unavailable: {err:#}"));
16667 return Vec::new();
16668 }
16669 };
16670 if !db_path.exists() {
16671 warnings.push(format!(
16672 "index refs unavailable: no index found at {}",
16673 db_path.display()
16674 ));
16675 return Vec::new();
16676 }
16677
16678 let db = match index::IndexDb::open_read_only_resilient(&db_path) {
16679 Ok(db) => db,
16680 Err(err) => {
16681 warnings.push(format!("index refs unavailable: {err:#}"));
16682 return Vec::new();
16683 }
16684 };
16685
16686 let file_key = file_abs.to_string_lossy().to_string();
16687 let symbols = match db.symbols_for_file(&file_key) {
16688 Ok(symbols) => symbols,
16689 Err(err) => {
16690 warnings.push(format!("symbol refs unavailable: {err:#}"));
16691 return Vec::new();
16692 }
16693 };
16694
16695 symbols
16696 .iter()
16697 .filter(|symbol| source_symbol_intersects(symbol, start, end))
16698 .take(limit)
16699 .map(|symbol| {
16700 let line = source_symbol_line(symbol);
16701 let end_line = source_symbol_end_line(symbol);
16702 let handle = stable_handle(
16703 "ssym",
16704 &format!("{}:{}:{}", file_display, symbol.name, line),
16705 );
16706 SourceSymbolRef {
16707 handle,
16708 name: truncate_for_budget(&symbol.name, max_bytes),
16709 kind: symbol.kind.clone(),
16710 language: symbol.language.clone(),
16711 file: file_display.to_string(),
16712 line,
16713 end_line,
16714 signature: symbol
16715 .signature
16716 .clone()
16717 .map(|signature| truncate_for_budget(&signature, max_bytes)),
16718 span: stored_symbol_ast_span(symbol, source, &symbols, limit),
16719 expand: source_symbol_read_command(root, &symbol.name, file_display),
16720 }
16721 })
16722 .collect()
16723}
16724
16725fn load_source_summaries(
16726 root: &Path,
16727 file_display: &str,
16728 limit: usize,
16729 max_bytes: usize,
16730 warnings: &mut Vec<String>,
16731) -> Vec<SourceSummaryRef> {
16732 let db_path = root.join(".tsift/summaries.db");
16733 if !db_path.exists() {
16734 return Vec::new();
16735 }
16736 let db = match summarize::SummaryDb::open_read_only_resilient(&db_path) {
16737 Ok(db) => db,
16738 Err(err) => {
16739 warnings.push(format!("summary refs unavailable: {err:#}"));
16740 return Vec::new();
16741 }
16742 };
16743 let summaries = match db.get_by_file(file_display) {
16744 Ok(summaries) => summaries,
16745 Err(err) => {
16746 warnings.push(format!("summary refs unavailable: {err:#}"));
16747 return Vec::new();
16748 }
16749 };
16750
16751 summaries
16752 .into_iter()
16753 .take(limit)
16754 .map(|summary| SourceSummaryRef {
16755 handle: stable_handle(
16756 "sum",
16757 &format!(
16758 "{}:{}:{}",
16759 summary.file_path, summary.symbol_name, summary.id
16760 ),
16761 ),
16762 symbol_name: truncate_for_budget(&summary.symbol_name, max_bytes),
16763 file_path: summary.file_path,
16764 summary: truncate_for_budget(&summary.summary, max_bytes),
16765 expand: source_summary_expand_command(root, &summary.symbol_name),
16766 })
16767 .collect()
16768}
16769
16770fn cmd_markdown_ast(
16771 file: &Path,
16772 path: &Path,
16773 node: Option<&str>,
16774 format: OutputFormat,
16775 absolute: bool,
16776 budget: ResponseBudget,
16777) -> Result<()> {
16778 let root = lint::resolve_project_root_or_canonical_path(path)?;
16779 let file_abs = resolve_source_file(&root, file)?;
16780 if !is_markdown_path(&file_abs) {
16781 bail!(
16782 "markdown-ast only supports Markdown files (.md/.mdx): {}",
16783 file_abs.display()
16784 );
16785 }
16786 let file_display = if absolute {
16787 file_abs.to_string_lossy().to_string()
16788 } else {
16789 relativize_pathbuf(&file_abs, &root)
16790 .to_string_lossy()
16791 .to_string()
16792 };
16793 let source = fs::read(&file_abs).with_context(|| format!("reading {}", file_abs.display()))?;
16794 let text = String::from_utf8_lossy(&source);
16795 let total_lines = text.lines().count();
16796 let projection = markdown_ast_projection(&file_display, &source)?;
16797 let raw_nodes = &projection.nodes;
16798 let max_items = budget.preview_items();
16799 let max_bytes = budget.preview_bytes();
16800
16801 let selected_nodes = if let Some(handle) = node {
16802 let matches = raw_nodes
16803 .iter()
16804 .filter(|candidate| candidate.handle == handle || candidate.span_handle == handle)
16805 .collect::<Vec<_>>();
16806 if matches.is_empty() {
16807 bail!("Markdown AST node handle {handle:?} was not found in {file_display}");
16808 }
16809 matches
16810 } else {
16811 raw_nodes.iter().take(max_items).collect::<Vec<_>>()
16812 };
16813 let nodes = selected_nodes
16814 .into_iter()
16815 .map(|raw| {
16816 let mut node =
16817 markdown_ast_node(&root, &file_display, raw, &source, raw_nodes, max_items);
16818 node.name = truncate_for_budget(&node.name, max_bytes);
16819 node
16820 })
16821 .collect::<Vec<_>>();
16822 let outline_started = Instant::now();
16823 let outline = markdown_ast_outline_entries(
16824 &root,
16825 &file_display,
16826 &source,
16827 raw_nodes,
16828 max_items,
16829 max_bytes,
16830 );
16831 let outline_duration_micros = outline_started.elapsed().as_micros();
16832 let projection_preview = MarkdownAstProjectionPreview {
16833 mode: if node.is_some() {
16834 "selected_node".to_string()
16835 } else {
16836 "outline_first".to_string()
16837 },
16838 total_nodes: raw_nodes.len(),
16839 returned_nodes: nodes.len(),
16840 omitted_nodes: raw_nodes.len().saturating_sub(nodes.len()),
16841 selected_node: node.map(str::to_string),
16842 cache: markdown_ast_cache_report(&projection),
16843 outline,
16844 phase_timings: vec![
16845 MarkdownAstPhaseTiming {
16846 name: "parse_extract".to_string(),
16847 duration_micros: projection.parse_duration_micros,
16848 detail: if projection.cache_hit {
16849 "reused cached tree-sitter Markdown symbol extraction".to_string()
16850 } else {
16851 "tree-sitter Markdown symbol extraction".to_string()
16852 },
16853 },
16854 MarkdownAstPhaseTiming {
16855 name: "outline_projection".to_string(),
16856 duration_micros: outline_duration_micros,
16857 detail: "outline-first section/block preview construction".to_string(),
16858 },
16859 ],
16860 };
16861 let report = MarkdownAstReport {
16862 handle: stable_handle("mdastrep", &file_display),
16863 root: root.to_string_lossy().to_string(),
16864 file: file_display.clone(),
16865 range: SourceRangePreview {
16866 start: 1,
16867 end: total_lines,
16868 total_lines,
16869 truncated_before: false,
16870 truncated_after: false,
16871 },
16872 projection: projection_preview,
16873 nodes,
16874 expand: MarkdownAstExpandCommands {
16875 file: markdown_ast_command(&root, &file_display, None),
16876 source_read: source_read_command(&root, &file_display, 1, total_lines.max(1)),
16877 edit_intents: markdown_edit_intents_command(&root),
16878 },
16879 warnings: Vec::new(),
16880 };
16881
16882 if format.json_output {
16883 let truncated = node.is_none() && raw_nodes.len() > report.nodes.len();
16884 let mut follow_up = vec![
16885 report.expand.file.clone(),
16886 report.expand.source_read.clone(),
16887 report.expand.edit_intents.clone(),
16888 ];
16889 follow_up.extend(
16890 report
16891 .nodes
16892 .iter()
16893 .map(|node| node.expand.source_window.clone()),
16894 );
16895 print_json_or_envelope(
16896 &report,
16897 &format,
16898 "markdown-ast",
16899 "ast",
16900 ToolEnvelopeSummary {
16901 text: format!("markdown ast {} nodes:{}", report.file, report.nodes.len()),
16902 metrics: vec![
16903 envelope_metric("nodes", report.nodes.len()),
16904 envelope_metric("total_nodes", report.projection.total_nodes),
16905 envelope_metric(
16906 "parse_duration_micros",
16907 report.projection.cache.parse_duration_micros,
16908 ),
16909 envelope_metric("total_lines", report.range.total_lines),
16910 ],
16911 },
16912 truncated,
16913 follow_up,
16914 )?;
16915 } else if format.compact {
16916 println!(
16917 "markdown-ast {} nodes:{} handle:{}",
16918 report.file,
16919 report.nodes.len(),
16920 report.handle
16921 );
16922 for node in &report.nodes {
16923 println!(
16924 " {} {} {}:{}-{}",
16925 node.handle, node.kind, node.name, node.line, node.end_line
16926 );
16927 }
16928 if node.is_none() && raw_nodes.len() > report.nodes.len() {
16929 println!("expand: {}", report.expand.file);
16930 }
16931 } else {
16932 println!(
16933 "Markdown AST `{}` nodes {} of {} ({})",
16934 report.file,
16935 report.nodes.len(),
16936 raw_nodes.len(),
16937 report.handle
16938 );
16939 for node in &report.nodes {
16940 println!(
16941 " {} `{}` {}:{}-{} — {}",
16942 node.handle,
16943 node.name,
16944 node.kind,
16945 node.line,
16946 node.end_line,
16947 node.expand.source_window
16948 );
16949 }
16950 if node.is_none() && raw_nodes.len() > report.nodes.len() {
16951 println!();
16952 println!("Expand:");
16953 println!(" file: {}", report.expand.file);
16954 }
16955 }
16956
16957 Ok(())
16958}
16959
16960#[allow(clippy::too_many_arguments)]
16961fn cmd_source_read(
16962 file: &Path,
16963 path: &Path,
16964 start: usize,
16965 lines: usize,
16966 end: Option<usize>,
16967 scope: Option<&str>,
16968 format: OutputFormat,
16969 absolute: bool,
16970 budget: ResponseBudget,
16971) -> Result<()> {
16972 if start == 0 {
16973 bail!("--start is 1-based and must be greater than zero");
16974 }
16975 if lines == 0 {
16976 bail!("--lines must be greater than zero");
16977 }
16978 if let Some(end) = end
16979 && end < start
16980 {
16981 bail!("--end must be greater than or equal to --start");
16982 }
16983
16984 let root = lint::resolve_project_root_or_canonical_path(path)?;
16985 let file_abs = resolve_source_file(&root, file)?;
16986 let file_display = if absolute {
16987 file_abs.to_string_lossy().to_string()
16988 } else {
16989 relativize_pathbuf(&file_abs, &root)
16990 .to_string_lossy()
16991 .to_string()
16992 };
16993
16994 let source = fs::read(&file_abs).with_context(|| format!("reading {}", file_abs.display()))?;
16995 let text = String::from_utf8_lossy(&source);
16996 let all_lines: Vec<&str> = text.lines().collect();
16997 let total_lines = all_lines.len();
16998 if total_lines > 0 && start > total_lines {
16999 bail!(
17000 "--start {} is beyond end of {} ({} lines)",
17001 start,
17002 file_display,
17003 total_lines
17004 );
17005 }
17006 let requested_end = end.unwrap_or_else(|| start.saturating_add(lines).saturating_sub(1));
17007 let end_line = requested_end.min(total_lines);
17008 let max_bytes = budget.preview_bytes();
17009 let token_cap = budget.body_token_cap();
17010 let (preview, preview_end, body_truncated) = if total_lines == 0 {
17011 (Vec::new(), end_line, false)
17012 } else {
17013 let capped = build_token_capped_preview(&all_lines, start, end_line, max_bytes, token_cap);
17014 (capped.preview, capped.capped_end, capped.was_capped)
17015 };
17016 let effective_end = if body_truncated { preview_end } else { end_line };
17017
17018 let mut warnings = Vec::new();
17019 if body_truncated {
17020 warnings.push(format!(
17021 "body preview capped at ~{token_cap} tokens at line {preview_end} of {end_line}"
17022 ));
17023 }
17024 let max_items = budget.preview_items();
17025 let symbols = load_source_symbols(
17026 &root,
17027 &file_abs,
17028 &file_display,
17029 &source,
17030 scope,
17031 start,
17032 effective_end,
17033 max_items,
17034 max_bytes,
17035 &mut warnings,
17036 );
17037 let summaries =
17038 load_source_summaries(&root, &file_display, max_items, max_bytes, &mut warnings);
17039 let markdown = if is_markdown_path(&file_abs) {
17040 match source_read_markdown_projection(
17041 &root,
17042 &file_display,
17043 &source,
17044 start,
17045 effective_end,
17046 budget,
17047 ) {
17048 Ok(markdown) => Some(markdown),
17049 Err(err) => {
17050 warnings.push(format!("markdown projection unavailable: {err:#}"));
17051 None
17052 }
17053 }
17054 } else {
17055 None
17056 };
17057
17058 let effective_lines = effective_end.saturating_sub(start).saturating_add(1).max(1);
17059 let expand = SourceExpandCommands {
17060 before: (start > 1).then(|| {
17061 let before_start = start.saturating_sub(lines).max(1);
17062 source_read_command(&root, &file_display, before_start, start - before_start)
17063 }),
17064 after: (effective_end < total_lines)
17065 .then(|| source_read_command(&root, &file_display, effective_end + 1, lines)),
17066 body: body_truncated.then(|| {
17067 let remaining = end_line.saturating_sub(effective_end);
17068 source_read_command(&root, &file_display, effective_end + 1, remaining)
17069 }),
17070 file: source_read_command(&root, &file_display, 1, total_lines.max(effective_lines)),
17071 markdown_ast: is_markdown_path(&file_abs)
17072 .then(|| markdown_ast_command(&root, &file_display, None)),
17073 };
17074
17075 let report = SourceReadReport {
17076 handle: stable_handle("swin", &format!("{file_display}:{start}:{effective_end}")),
17077 root: root.to_string_lossy().to_string(),
17078 file: file_display,
17079 range: SourceRangePreview {
17080 start,
17081 end: effective_end,
17082 total_lines,
17083 truncated_before: start > 1,
17084 truncated_after: effective_end < total_lines,
17085 },
17086 preview,
17087 symbols,
17088 summaries,
17089 markdown,
17090 expand,
17091 warnings,
17092 };
17093
17094 if format.json_output {
17095 let truncated = report.range.truncated_before || report.range.truncated_after;
17096 let follow_up = [
17097 report.expand.before.clone(),
17098 report.expand.after.clone(),
17099 report.expand.body.clone(),
17100 Some(report.expand.file.clone()),
17101 report.expand.markdown_ast.clone(),
17102 ]
17103 .into_iter()
17104 .flatten()
17105 .collect::<Vec<_>>();
17106 print_json_or_envelope(
17107 &report,
17108 &format,
17109 "source-read",
17110 "window",
17111 ToolEnvelopeSummary {
17112 text: format!(
17113 "source window {}:{}-{}",
17114 report.file, report.range.start, report.range.end
17115 ),
17116 metrics: vec![
17117 envelope_metric("lines", report.preview.len()),
17118 envelope_metric("symbols", report.symbols.len()),
17119 envelope_metric("summaries", report.summaries.len()),
17120 envelope_metric(
17121 "markdown_nodes",
17122 report
17123 .markdown
17124 .as_ref()
17125 .map_or(0, |markdown| markdown.visible_nodes),
17126 ),
17127 ],
17128 },
17129 truncated,
17130 follow_up,
17131 )?;
17132 } else if format.compact {
17133 println!(
17134 "source {}:{}-{} / {} handle:{}",
17135 report.file,
17136 report.range.start,
17137 report.range.end,
17138 report.range.total_lines,
17139 report.handle
17140 );
17141 for line in &report.preview {
17142 println!("{:>5} {}", line.line, line.text);
17143 }
17144 if !report.symbols.is_empty() {
17145 println!("syms[{}]:", report.symbols.len());
17146 for symbol in &report.symbols {
17147 println!(" {} {}:{}", symbol.name, symbol.file, symbol.line);
17148 }
17149 }
17150 if report.range.truncated_before || report.range.truncated_after {
17151 println!("expand: {}", report.expand.file);
17152 }
17153 } else {
17154 println!(
17155 "Source window `{}` lines {}-{} of {} ({})",
17156 report.file,
17157 report.range.start,
17158 report.range.end,
17159 report.range.total_lines,
17160 report.handle
17161 );
17162 for line in &report.preview {
17163 println!("{:>5} | {}", line.line, line.text);
17164 }
17165 if !report.symbols.is_empty() {
17166 println!();
17167 println!("Symbol refs:");
17168 for symbol in &report.symbols {
17169 println!(
17170 " {} `{}` {}:{} — {}",
17171 symbol.handle, symbol.name, symbol.file, symbol.line, symbol.expand
17172 );
17173 }
17174 }
17175 if !report.summaries.is_empty() {
17176 println!();
17177 println!("Summary refs:");
17178 for summary in &report.summaries {
17179 println!(
17180 " {} `{}` — {}",
17181 summary.handle, summary.symbol_name, summary.expand
17182 );
17183 }
17184 }
17185 if report.range.truncated_before || report.range.truncated_after {
17186 println!();
17187 println!("Expand:");
17188 if let Some(before) = &report.expand.before {
17189 println!(" before: {}", before);
17190 }
17191 if let Some(after) = &report.expand.after {
17192 println!(" after: {}", after);
17193 }
17194 println!(" file: {}", report.expand.file);
17195 }
17196 for warning in &report.warnings {
17197 eprintln!("warning: {warning}");
17198 }
17199 }
17200
17201 Ok(())
17202}
17203
17204#[allow(clippy::too_many_arguments)]
17205fn cmd_symbol_read(
17206 symbol: &str,
17207 file_hint: Option<&Path>,
17208 path: &Path,
17209 scope: Option<&str>,
17210 format: OutputFormat,
17211 absolute: bool,
17212 budget: ResponseBudget,
17213) -> Result<()> {
17214 let root = lint::resolve_project_root_or_canonical_path(path)?;
17215 let hinted_file_abs = file_hint
17216 .map(|file| resolve_source_file(&root, file))
17217 .transpose()?;
17218 let path_hint = hinted_file_abs.as_deref().unwrap_or(root.as_path());
17219 let db_path = resolve_query_db_path(&root, path_hint, scope)?;
17220 if !db_path.exists() {
17221 bail!(
17222 "index refs unavailable: no index found at {}",
17223 db_path.display()
17224 );
17225 }
17226 let db = index::IndexDb::open_read_only_resilient(&db_path)
17227 .with_context(|| format!("opening symbol index {}", db_path.display()))?;
17228 let search_limit = budget.follow_up_items().max(10);
17229 let hits = db
17230 .symbol_search(symbol, search_limit)
17231 .with_context(|| format!("searching symbols for {symbol:?}"))?;
17232 let selected = hits
17233 .into_iter()
17234 .find(|hit| {
17235 let Some(hinted_file_abs) = &hinted_file_abs else {
17236 return true;
17237 };
17238 resolve_source_file(&root, Path::new(&hit.file))
17239 .map(|hit_file| hit_file == *hinted_file_abs)
17240 .unwrap_or(false)
17241 })
17242 .with_context(|| {
17243 let hint = file_hint
17244 .map(|file| format!(" in {}", file.display()))
17245 .unwrap_or_default();
17246 format!("no indexed symbol matched {symbol:?}{hint}")
17247 })?;
17248
17249 let file_abs = resolve_source_file(&root, Path::new(&selected.file))?;
17250 let file_display = if absolute {
17251 file_abs.to_string_lossy().to_string()
17252 } else {
17253 relativize_pathbuf(&file_abs, &root)
17254 .to_string_lossy()
17255 .to_string()
17256 };
17257 let source = fs::read(&file_abs).with_context(|| format!("reading {}", file_abs.display()))?;
17258 let content_hash = blake3::hash(&source).to_hex().to_string();
17259 let text = String::from_utf8_lossy(&source);
17260 let all_lines: Vec<&str> = text.lines().collect();
17261 let total_lines = all_lines.len();
17262 let file_symbols = db
17263 .symbols_for_file(&file_abs.to_string_lossy())
17264 .with_context(|| format!("loading symbols for {}", file_abs.display()))?;
17265 let max_items = budget.preview_items();
17266 let max_bytes = budget.preview_bytes();
17267 let selected_start = symbol_hit_line(&selected);
17268 let selected_end = symbol_hit_end_line(&selected)
17269 .unwrap_or(selected_start)
17270 .max(selected_start);
17271 let stored_target = file_symbols.iter().find(|candidate| {
17272 candidate.name == selected.name
17273 && candidate.kind == selected.kind
17274 && source_symbol_line(candidate) == selected_start
17275 });
17276 let target_span = stored_target
17277 .and_then(|stored| stored_symbol_ast_span(stored, &source, &file_symbols, max_items))
17278 .or_else(|| symbol_hit_ast_span(&selected, &source));
17279 let target_start = target_span
17280 .as_ref()
17281 .map(|span| span.start_line)
17282 .unwrap_or(selected_start);
17283 let target_end = target_span
17284 .as_ref()
17285 .map(|span| span.end_line)
17286 .or_else(|| stored_target.and_then(source_symbol_end_line))
17287 .unwrap_or(selected_end)
17288 .max(target_start);
17289 let target_bounds = stored_target
17290 .and_then(stored_symbol_span_bounds)
17291 .or_else(|| symbol_hit_span_bounds(&selected));
17292 let target_end = stored_target
17293 .and_then(source_symbol_end_line)
17294 .unwrap_or(target_end)
17295 .max(target_start);
17296 let body_line_budget = budget.preview_items().max(1).saturating_mul(16);
17297 let line_capped_end = target_start
17298 .saturating_add(body_line_budget)
17299 .saturating_sub(1)
17300 .min(target_end)
17301 .min(total_lines.max(target_start));
17302 let token_cap = budget.body_token_cap();
17303 let (body, effective_preview_end, body_truncated) = if total_lines == 0 || target_start > total_lines {
17304 (Vec::new(), line_capped_end, false)
17305 } else {
17306 let capped = build_token_capped_preview(&all_lines, target_start, line_capped_end, max_bytes, token_cap);
17307 (capped.preview, capped.capped_end, capped.was_capped)
17308 };
17309 let preview_end = if body_truncated { effective_preview_end } else { line_capped_end };
17310 let child_symbols = file_symbols
17311 .iter()
17312 .filter(|candidate| {
17313 if let Some((target_start_byte, target_end_byte)) = target_bounds {
17314 let Some((candidate_start, candidate_end)) = stored_symbol_span_bounds(candidate)
17315 else {
17316 return false;
17317 };
17318 return candidate_start >= target_start_byte
17319 && candidate_end <= target_end_byte
17320 && (candidate_start, candidate_end) != (target_start_byte, target_end_byte);
17321 }
17322 let line = source_symbol_line(candidate);
17323 line > target_start && line <= target_end
17324 })
17325 .take(max_items)
17326 .map(|symbol| {
17327 let line = source_symbol_line(symbol);
17328 let end_line = source_symbol_end_line(symbol);
17329 SourceSymbolRef {
17330 handle: stable_handle(
17331 "ssym",
17332 &format!("{}:{}:{}", file_display, symbol.name, line),
17333 ),
17334 name: truncate_for_budget(&symbol.name, max_bytes),
17335 kind: symbol.kind.clone(),
17336 language: symbol.language.clone(),
17337 file: file_display.clone(),
17338 line,
17339 end_line,
17340 signature: symbol
17341 .signature
17342 .clone()
17343 .map(|signature| truncate_for_budget(&signature, max_bytes)),
17344 span: stored_symbol_ast_span(symbol, &source, &file_symbols, max_items),
17345 expand: source_symbol_read_command(&root, &symbol.name, &file_display),
17346 }
17347 })
17348 .collect::<Vec<_>>();
17349 let mut warnings = Vec::new();
17350 if body_truncated {
17351 warnings.push(format!(
17352 "body preview capped at ~{token_cap} tokens at line {preview_end} of {target_end}"
17353 ));
17354 }
17355 let summaries =
17356 load_source_summaries(&root, &file_display, max_items, max_bytes, &mut warnings);
17357 let symbol_handle = stable_handle(
17358 "sread",
17359 &format!("{}:{}:{}", file_display, selected.name, target_start),
17360 );
17361 let source_lines = preview_end
17362 .saturating_sub(target_start)
17363 .saturating_add(1)
17364 .max(1);
17365 let expand = SymbolReadExpandCommands {
17366 source_window: source_read_command(&root, &file_display, target_start, source_lines),
17367 body: body_truncated.then(|| {
17368 let remaining = target_end.saturating_sub(preview_end);
17369 source_read_command(&root, &file_display, preview_end + 1, remaining)
17370 }),
17371 file: source_read_command(&root, &file_display, 1, total_lines.max(source_lines)),
17372 explain: source_symbol_expand_command(&root, &selected.name),
17373 callers: source_symbol_graph_command(&root, &selected.name, "callers"),
17374 callees: source_symbol_graph_command(&root, &selected.name, "callees"),
17375 markdown_ast: (selected.language == "markdown").then(|| {
17376 markdown_ast_command(
17377 &root,
17378 &file_display,
17379 target_span.as_ref().map(|span| span.handle.as_str()),
17380 )
17381 }),
17382 };
17383 let report = SymbolReadReport {
17384 handle: symbol_handle.clone(),
17385 root: root.to_string_lossy().to_string(),
17386 query: symbol.to_string(),
17387 symbol: SymbolReadTarget {
17388 handle: symbol_handle,
17389 name: selected.name.clone(),
17390 kind: selected.kind.clone(),
17391 language: selected.language.clone(),
17392 file: file_display.clone(),
17393 line: target_start,
17394 end_line: Some(target_end),
17395 signature: stored_target
17396 .and_then(|stored| stored.signature.clone())
17397 .map(|signature| truncate_for_budget(&signature, max_bytes)),
17398 parent_module: stored_target.and_then(|stored| stored.parent_module.clone()),
17399 visibility: stored_target.and_then(|stored| stored.visibility.clone()),
17400 span: target_span,
17401 },
17402 range: SourceRangePreview {
17403 start: target_start,
17404 end: preview_end,
17405 total_lines,
17406 truncated_before: false,
17407 truncated_after: preview_end < target_end,
17408 },
17409 body,
17410 child_symbols,
17411 summaries,
17412 expand,
17413 warnings,
17414 };
17415
17416 if format.json_output {
17417 let truncated = report.range.truncated_after
17418 || report.body.iter().any(|line| line.text.len() >= max_bytes)
17419 || report.child_symbols.len() >= max_items;
17420 let follow_up = [
17421 Some(report.expand.source_window.clone()),
17422 report.expand.body.clone(),
17423 Some(report.expand.file.clone()),
17424 Some(report.expand.explain.clone()),
17425 Some(report.expand.callers.clone()),
17426 Some(report.expand.callees.clone()),
17427 ]
17428 .into_iter()
17429 .flatten()
17430 .chain(report.expand.markdown_ast.clone())
17431 .collect::<Vec<_>>();
17432 print_json_or_envelope(
17433 &report,
17434 &format,
17435 "symbol-read",
17436 "symbol",
17437 ToolEnvelopeSummary {
17438 text: format!(
17439 "symbol {} {}:{}-{}",
17440 report.symbol.name, report.symbol.file, report.range.start, report.range.end
17441 ),
17442 metrics: vec![
17443 envelope_metric("body_lines", report.body.len()),
17444 envelope_metric("child_symbols", report.child_symbols.len()),
17445 envelope_metric("summaries", report.summaries.len()),
17446 ],
17447 },
17448 truncated,
17449 follow_up,
17450 )?;
17451 } else if format.compact {
17452 println!(
17453 "symbol {} {}:{}-{} handle:{} hash:{}",
17454 report.symbol.name,
17455 report.symbol.file,
17456 report.range.start,
17457 report.range.end,
17458 report.handle,
17459 content_hash
17460 );
17461 for line in &report.body {
17462 println!("{:>5} {}", line.line, line.text);
17463 }
17464 if !report.child_symbols.is_empty() {
17465 println!("children[{}]:", report.child_symbols.len());
17466 for child in &report.child_symbols {
17467 println!(" {} {}:{}", child.name, child.file, child.line);
17468 }
17469 }
17470 } else {
17471 println!(
17472 "Symbol `{}` in `{}` lines {}-{} ({})",
17473 report.symbol.name,
17474 report.symbol.file,
17475 report.range.start,
17476 report.range.end,
17477 report.handle
17478 );
17479 for line in &report.body {
17480 println!("{:>5} | {}", line.line, line.text);
17481 }
17482 if !report.child_symbols.is_empty() {
17483 println!();
17484 println!("Child symbols:");
17485 for child in &report.child_symbols {
17486 println!(
17487 " {} `{}` {}:{} — {}",
17488 child.handle, child.name, child.file, child.line, child.expand
17489 );
17490 }
17491 }
17492 println!();
17493 println!("Expand:");
17494 println!(" source: {}", report.expand.source_window);
17495 println!(" file: {}", report.expand.file);
17496 println!(" explain: {}", report.expand.explain);
17497 println!(" callers: {}", report.expand.callers);
17498 println!(" callees: {}", report.expand.callees);
17499 for warning in &report.warnings {
17500 eprintln!("warning: {warning}");
17501 }
17502 }
17503
17504 Ok(())
17505}
17506
17507#[allow(clippy::too_many_arguments)]
17508#[derive(Serialize)]
17509struct ExplainBudgetDefinitionPreview {
17510 handle: String,
17511 #[serde(skip_serializing_if = "Option::is_none")]
17512 tag_alias: Option<String>,
17513 kind: String,
17514 name: String,
17515 file: String,
17516 line: i64,
17517 expand: String,
17518}
17519
17520#[derive(Serialize)]
17521struct ExplainBudgetEdgePreview {
17522 handle: String,
17523 #[serde(skip_serializing_if = "Option::is_none")]
17524 tag_alias: Option<String>,
17525 name: String,
17526 file: String,
17527 line: i64,
17528 expand: String,
17529}
17530
17531#[derive(Serialize)]
17532struct ExplainBudgetCommunityPreview {
17533 size: usize,
17534 members: Vec<String>,
17535}
17536
17537#[derive(Serialize)]
17538struct ExplainBudgetReport {
17539 symbol: String,
17540 max_items: usize,
17541 max_bytes: usize,
17542 definition_total: usize,
17543 callers_total: usize,
17544 callers_truncated_by_limit: bool,
17545 callees_total: usize,
17546 callees_truncated_by_limit: bool,
17547 truncated: bool,
17548 definitions: Vec<ExplainBudgetDefinitionPreview>,
17549 callers: Vec<ExplainBudgetEdgePreview>,
17550 callees: Vec<ExplainBudgetEdgePreview>,
17551 #[serde(skip_serializing_if = "Option::is_none")]
17552 community: Option<ExplainBudgetCommunityPreview>,
17553}
17554
17555#[allow(clippy::too_many_arguments)]
17556pub(crate) fn build_explain_budget_report(
17557 symbol: &str,
17558 _root: &Path,
17559 symbols: &[index::StoredSymbol],
17560 callers: &[index::StoredEdge],
17561 callers_total: usize,
17562 callers_truncated_by_limit: bool,
17563 callees: &[index::StoredEdge],
17564 callees_total: usize,
17565 callees_truncated_by_limit: bool,
17566 community: Option<&graph::Community>,
17567 budget: ResponseBudget,
17568) -> ExplainBudgetReport {
17569 let max_items = budget.preview_items();
17570 let max_bytes = budget.preview_bytes();
17571 let definitions = symbols
17572 .iter()
17573 .take(max_items)
17574 .map(|entry| {
17575 let symbol_ref = build_compact_symbol_ref(
17576 "edef",
17577 &format!(
17578 "{}:{}:{}:{}",
17579 entry.kind, entry.name, entry.file, entry.line
17580 ),
17581 &entry.name,
17582 entry.tags.as_deref(),
17583 max_bytes,
17584 );
17585 ExplainBudgetDefinitionPreview {
17586 handle: symbol_ref.handle,
17587 tag_alias: symbol_ref.tag_alias,
17588 kind: entry.kind.clone(),
17589 name: symbol_ref.name,
17590 file: truncate_for_budget(&entry.file, max_bytes),
17591 line: entry.line,
17592 expand: format!(
17593 "tsift search {} --exact --path {} --limit 20",
17594 shell_quote(&entry.name),
17595 shell_quote(&entry.file)
17596 ),
17597 }
17598 })
17599 .collect();
17600 let callers_preview: Vec<ExplainBudgetEdgePreview> = callers
17601 .iter()
17602 .take(max_items)
17603 .map(|entry| {
17604 let symbol_ref = build_compact_symbol_ref(
17605 "ecall",
17606 &format!(
17607 "{}:{}:{}:{}",
17608 entry.caller_name, entry.caller_file, entry.call_site_line, symbol
17609 ),
17610 &entry.caller_name,
17611 None,
17612 max_bytes,
17613 );
17614 ExplainBudgetEdgePreview {
17615 handle: symbol_ref.handle,
17616 tag_alias: symbol_ref.tag_alias,
17617 name: symbol_ref.name,
17618 file: truncate_for_budget(&entry.caller_file, max_bytes),
17619 line: entry.call_site_line,
17620 expand: format!(
17621 "tsift explain {} --path {} --limit 0",
17622 shell_quote(&entry.caller_name),
17623 shell_quote(&entry.caller_file)
17624 ),
17625 }
17626 })
17627 .collect();
17628 let callees_preview: Vec<ExplainBudgetEdgePreview> = callees
17629 .iter()
17630 .take(max_items)
17631 .map(|entry| {
17632 let symbol_ref = build_compact_symbol_ref(
17633 "eces",
17634 &format!(
17635 "{}:{}:{}:{}",
17636 entry.callee_name, entry.caller_file, entry.call_site_line, symbol
17637 ),
17638 &entry.callee_name,
17639 None,
17640 max_bytes,
17641 );
17642 ExplainBudgetEdgePreview {
17643 handle: symbol_ref.handle,
17644 tag_alias: symbol_ref.tag_alias,
17645 name: symbol_ref.name,
17646 file: truncate_for_budget(&entry.caller_file, max_bytes),
17647 line: entry.call_site_line,
17648 expand: format!(
17649 "tsift explain {} --path {} --limit 0",
17650 shell_quote(&entry.callee_name),
17651 shell_quote(&entry.caller_file)
17652 ),
17653 }
17654 })
17655 .collect();
17656 let community_preview = community.map(|entry| ExplainBudgetCommunityPreview {
17657 size: entry.members.len(),
17658 members: entry
17659 .members
17660 .iter()
17661 .take(max_items)
17662 .map(|member| truncate_for_budget(&member.name, max_bytes))
17663 .collect(),
17664 });
17665
17666 ExplainBudgetReport {
17667 symbol: symbol.to_string(),
17668 max_items,
17669 max_bytes,
17670 definition_total: symbols.len(),
17671 callers_total,
17672 callers_truncated_by_limit,
17673 callees_total,
17674 callees_truncated_by_limit,
17675 truncated: symbols.len() > max_items
17676 || callers_total > callers_preview.len()
17677 || callees_total > callees_preview.len()
17678 || community
17679 .map(|entry| entry.members.len() > max_items)
17680 .unwrap_or(false),
17681 definitions,
17682 callers: callers_preview,
17683 callees: callees_preview,
17684 community: community_preview,
17685 }
17686}
17687
17688pub(crate) fn print_explain_budget_human(report: &ExplainBudgetReport) {
17689 println!(
17690 "explain-budget sym:{} defs:{}/{} crs:{}/{} ces:{}/{}",
17691 shell_quote(&report.symbol),
17692 report.definitions.len(),
17693 report.definition_total,
17694 report.callers.len(),
17695 report.callers_total,
17696 report.callees.len(),
17697 report.callees_total
17698 );
17699 for entry in &report.definitions {
17700 println!(
17701 "def {} {} {}:{} expand:{}",
17702 format_symbol_preview_line(&entry.handle, &entry.name, entry.tag_alias.as_deref()),
17703 entry.kind,
17704 entry.file,
17705 entry.line,
17706 entry.expand
17707 );
17708 }
17709 for entry in &report.callers {
17710 println!(
17711 "caller {} {}:{} expand:{}",
17712 format_symbol_preview_line(&entry.handle, &entry.name, entry.tag_alias.as_deref()),
17713 entry.file,
17714 entry.line,
17715 entry.expand
17716 );
17717 }
17718 for entry in &report.callees {
17719 println!(
17720 "callee {} {}:{} expand:{}",
17721 format_symbol_preview_line(&entry.handle, &entry.name, entry.tag_alias.as_deref()),
17722 entry.file,
17723 entry.line,
17724 entry.expand
17725 );
17726 }
17727 if let Some(community) = &report.community {
17728 println!(
17729 "community size:{} members:{}",
17730 community.size,
17731 community.members.join(", ")
17732 );
17733 }
17734 if report.truncated {
17735 println!(
17736 "budget truncated items:{} bytes:{}",
17737 report.max_items, report.max_bytes
17738 );
17739 }
17740}
17741
17742const TAGPATH_AUDIT_SKIP_DIRS: &[&str] = &[
17752 ".git",
17753 "node_modules",
17754 "target",
17755 "__pycache__",
17756 ".venv",
17757 "vendor",
17758];
17759
17760const TAGPATH_AUDIT_SOURCE_EXTENSIONS: &[&str] = &[
17761 "rs", "py", "ts", "js", "go", "java", "rb", "c", "cpp", "h", "hpp", "cs", "swift", "kt",
17762 "scala", "zig", "nim", "ex", "exs", "erl", "hs", "ml", "clj", "r", "lua", "php", "pl", "d",
17763 "cr", "dart", "jl", "v", "odin", "gleam", "rkt", "scm", "lisp", "lsp", "f", "fs", "fsi", "fsx",
17764 "sh", "bash", "zsh", "sql", "css", "tsx",
17765];
17766
17767pub(crate) fn tagpath_audit_supported_extensions(root: &Path) -> BTreeSet<String> {
17768 let mut extensions = TAGPATH_AUDIT_SOURCE_EXTENSIONS
17769 .iter()
17770 .map(|ext| (*ext).to_string())
17771 .collect::<BTreeSet<_>>();
17772
17773 let config_path = root.join(".naming.toml");
17774 if !config_path.exists() {
17775 return extensions;
17776 }
17777
17778 match tagpath::config::resolve(&config_path) {
17779 Ok(config) => {
17780 if let Some(grammars) = config.grammars {
17781 for grammar in grammars.languages.values() {
17782 for ext in &grammar.extensions {
17783 if let Some(normalized) = normalize_extension(ext) {
17784 extensions.insert(normalized);
17785 }
17786 }
17787 }
17788 }
17789 }
17790 Err(err) => {
17791 eprintln!("tagpath_policy_hint_config_unreadable: {err}");
17792 }
17793 }
17794 extensions
17795}
17796
17797pub(crate) fn tagpath_audit_policy_hints(
17798 rel_path: &str,
17799 supported_extensions: &BTreeSet<String>,
17800) -> Vec<String> {
17801 let path = Path::new(rel_path);
17802 let mut hints = BTreeSet::new();
17803 if let Some(parent) = path.parent() {
17804 for component in parent.components() {
17805 if let std::path::Component::Normal(name) = component {
17806 let name = name.to_string_lossy();
17807 if TAGPATH_AUDIT_SKIP_DIRS.contains(&name.as_ref()) {
17808 hints.insert(format!("skip_dir:{name}"));
17809 }
17810 }
17811 }
17812 }
17813 if path
17814 .extension()
17815 .and_then(|ext| ext.to_str())
17816 .and_then(normalize_extension)
17817 .is_some_and(|ext| !supported_extensions.contains(&ext))
17818 {
17819 hints.insert("extension_unsupported".to_string());
17820 }
17821 hints.into_iter().collect()
17822}
17823
17824fn normalize_extension(ext: &str) -> Option<String> {
17825 let normalized = ext.trim().trim_start_matches('.').to_ascii_lowercase();
17826 if normalized.is_empty() {
17827 None
17828 } else {
17829 Some(normalized)
17830 }
17831}
17832
17833pub(crate) fn diff_digest_status_label(status: diff_digest::DiffDigestFileStatus) -> &'static str {
17834 match status {
17835 diff_digest::DiffDigestFileStatus::Added => "added",
17836 diff_digest::DiffDigestFileStatus::Modified => "modified",
17837 diff_digest::DiffDigestFileStatus::Deleted => "deleted",
17838 }
17839}
17840
17841pub(crate) fn diff_digest_summary_label(
17842 state: diff_digest::DiffDigestSummaryState,
17843) -> &'static str {
17844 match state {
17845 diff_digest::DiffDigestSummaryState::Current => "current",
17846 diff_digest::DiffDigestSummaryState::Stale => "stale",
17847 diff_digest::DiffDigestSummaryState::Missing => "missing",
17848 diff_digest::DiffDigestSummaryState::Unavailable => "unavailable",
17849 }
17850}
17851
17852fn test_digest_summary_label(state: test_digest::TestDigestSummaryState) -> &'static str {
17853 match state {
17854 test_digest::TestDigestSummaryState::Current => "current",
17855 test_digest::TestDigestSummaryState::Stale => "stale",
17856 test_digest::TestDigestSummaryState::Missing => "missing",
17857 test_digest::TestDigestSummaryState::Unavailable => "unavailable",
17858 }
17859}
17860
17861fn log_digest_summary_label(state: log_digest::LogDigestSummaryState) -> &'static str {
17862 match state {
17863 log_digest::LogDigestSummaryState::Current => "current",
17864 log_digest::LogDigestSummaryState::Stale => "stale",
17865 log_digest::LogDigestSummaryState::Missing => "missing",
17866 log_digest::LogDigestSummaryState::Unavailable => "unavailable",
17867 }
17868}
17869
17870pub(crate) fn diff_digest_mode_label(mode: diff_digest::DiffDigestMode) -> &'static str {
17871 match mode {
17872 diff_digest::DiffDigestMode::WorkingTree => "worktree",
17873 diff_digest::DiffDigestMode::Cached => "cached",
17874 diff_digest::DiffDigestMode::Revision => "revision",
17875 }
17876}
17877
17878pub(crate) fn diff_digest_mode_display(report: &diff_digest::DiffDigestReport) -> String {
17879 match (&report.mode, &report.revision) {
17880 (diff_digest::DiffDigestMode::WorkingTree, _) => "working tree".to_string(),
17881 (diff_digest::DiffDigestMode::Cached, _) => "staged index".to_string(),
17882 (diff_digest::DiffDigestMode::Revision, Some(revision)) => {
17883 format!("revision {revision}")
17884 }
17885 (diff_digest::DiffDigestMode::Revision, None) => "revision".to_string(),
17886 }
17887}
17888
17889pub(crate) fn diff_digest_empty_message(report: &diff_digest::DiffDigestReport) -> String {
17890 match (&report.mode, &report.revision) {
17891 (diff_digest::DiffDigestMode::WorkingTree, _) => "No git changes found.".to_string(),
17892 (diff_digest::DiffDigestMode::Cached, _) => "No staged git changes found.".to_string(),
17893 (diff_digest::DiffDigestMode::Revision, Some(revision)) => {
17894 format!("No diff found for revision {revision}.")
17895 }
17896 (diff_digest::DiffDigestMode::Revision, None) => "No revision diff found.".to_string(),
17897 }
17898}
17899
17900fn cmd_impact(
17901 path: &Path,
17902 cached: bool,
17903 revision: Option<&str>,
17904 scope: Option<&str>,
17905 limit: usize,
17906 format: OutputFormat,
17907) -> Result<()> {
17908 let report = impact::compute(
17909 path,
17910 impact::ImpactOptions {
17911 cached,
17912 revision,
17913 scope,
17914 limit,
17915 },
17916 )?;
17917 if format.json_output {
17918 println!(
17919 "{}",
17920 to_json_schema(
17921 &report,
17922 format.pretty,
17923 format.terse,
17924 format.ultra_terse,
17925 format.schema
17926 )?
17927 );
17928 return Ok(());
17929 }
17930
17931 if format.compact {
17932 println!(
17933 "impact mode:{} changed:{} symbols:{} tests:{}/{}",
17934 diff_digest_mode_label(report.mode),
17935 report.changed_files.len(),
17936 report.changed_symbols.len(),
17937 report.affected_tests.len(),
17938 report.affected_tests_total
17939 );
17940 for target in &report.affected_tests {
17941 println!(
17942 "{} reasons:{} command:{}",
17943 target.path,
17944 target.reasons.len(),
17945 target.commands.join(" && ")
17946 );
17947 }
17948 for warning in &report.warnings {
17949 println!("warning {warning}");
17950 }
17951 return Ok(());
17952 }
17953
17954 println!("Impact ({})", diff_digest_mode_label(report.mode));
17955 println!(" changed files: {}", report.changed_files.len());
17956 println!(" changed symbols: {}", report.changed_symbols.len());
17957 println!(
17958 " affected tests: {}/{}",
17959 report.affected_tests.len(),
17960 report.affected_tests_total
17961 );
17962 for target in &report.affected_tests {
17963 println!();
17964 println!("{}", target.path);
17965 for reason in &target.reasons {
17966 println!(" - {reason}");
17967 }
17968 if !target.symbols.is_empty() {
17969 println!(" symbols: {}", target.symbols.join(", "));
17970 }
17971 for command in &target.commands {
17972 println!(" run: {}", command);
17973 }
17974 }
17975 for warning in &report.warnings {
17976 println!("warning: {warning}");
17977 }
17978 Ok(())
17979}
17980
17981pub(crate) fn render_test_digest_from_input(
17982 path: &Path,
17983 input: &str,
17984 runner: Option<&str>,
17985 format: OutputFormat,
17986) -> Result<()> {
17987 let report = test_digest::compute(path, input, runner)?;
17988 if format.json_output {
17989 println!(
17990 "{}",
17991 to_json_schema(
17992 &report,
17993 format.pretty,
17994 format.terse,
17995 format.ultra_terse,
17996 format.schema
17997 )?
17998 );
17999 return Ok(());
18000 }
18001
18002 if report.failure_groups.is_empty() {
18003 println!("No failures detected (runner: {}).", report.runner);
18004 for warning in &report.warnings {
18005 println!("warning: {warning}");
18006 }
18007 return Ok(());
18008 }
18009
18010 if format.compact {
18011 println!(
18012 "test runner:{} failures:{} groups:{} passed:{} failed:{} skipped:{}",
18013 report.runner,
18014 report.failures,
18015 report.grouped_failures,
18016 report.counts.passed.unwrap_or(0),
18017 report.counts.failed.unwrap_or(report.grouped_failures),
18018 report.counts.skipped.unwrap_or(0),
18019 );
18020 for failure in &report.failure_groups {
18021 let tests = truncate_for_compact(&failure.tests.join(","), 60);
18022 let location = match (&failure.path, failure.line) {
18023 (Some(path), Some(line)) => format!("{path}:{line}"),
18024 (Some(path), None) => path.clone(),
18025 _ => "-".to_string(),
18026 };
18027 println!(
18028 "{} tests:{} count:{} summaries:{} msg:{}",
18029 location,
18030 tests,
18031 failure.occurrences,
18032 test_digest_summary_label(failure.summary_state),
18033 truncate_for_compact(&failure.message, 80)
18034 );
18035 }
18036 for warning in &report.warnings {
18037 println!("warning: {warning}");
18038 }
18039 return Ok(());
18040 }
18041
18042 println!("Test digest ({})", report.runner);
18043 println!(" failures: {}", report.failures);
18044 println!(" failure groups: {}", report.grouped_failures);
18045 if let Some(passed) = report.counts.passed {
18046 println!(" passed: {}", passed);
18047 }
18048 if let Some(failed) = report.counts.failed {
18049 println!(" failed: {}", failed);
18050 }
18051 if let Some(skipped) = report.counts.skipped {
18052 println!(" skipped: {}", skipped);
18053 }
18054
18055 for failure in &report.failure_groups {
18056 println!();
18057 match (&failure.path, failure.line, failure.column) {
18058 (Some(path), Some(line), Some(column)) => println!("{path}:{line}:{column}"),
18059 (Some(path), Some(line), None) => println!("{path}:{line}"),
18060 (Some(path), None, _) => println!("{path}"),
18061 (None, _, _) => println!("(no file anchor)"),
18062 }
18063 println!(" tests: {}", failure.tests.join(", "));
18064 println!(" occurrences: {}", failure.occurrences);
18065 println!(" message: {}", failure.message);
18066 println!(
18067 " cached summaries: {}",
18068 test_digest_summary_label(failure.summary_state)
18069 );
18070 for summary in &failure.current_summaries {
18071 println!(
18072 " - {}: {}",
18073 summary.symbol,
18074 truncate_for_compact(&summary.summary, 160)
18075 );
18076 }
18077 }
18078 for warning in &report.warnings {
18079 println!("warning: {warning}");
18080 }
18081 Ok(())
18082}
18083
18084#[derive(Clone, Serialize, Deserialize)]
18085struct DispatchTraceSummary {
18086 backlog: usize,
18087 job_packet: usize,
18088 worker_result: usize,
18089 worker_context: usize,
18090 source_handle: usize,
18091 semantic_rows: usize,
18092}
18093
18094#[derive(Clone, Serialize, Deserialize)]
18095struct DispatchTraceReport {
18096 contract_version: String,
18097 root: String,
18098 #[serde(skip_serializing_if = "Option::is_none")]
18099 scope: Option<String>,
18100 targets: Vec<String>,
18101 projection_freshness: GraphDbFreshnessReport,
18102 projection_hashes: Vec<String>,
18103 evidence_packet_ids: Vec<String>,
18104 shared_preparation: ConflictMatrixSharedPreparationSummary,
18105 worker_prompt_packets: Vec<ConflictMatrixWorkerPromptPacket>,
18106 worker_feedback: Vec<ConflictMatrixWorkerFeedback>,
18107 summary: DispatchTraceSummary,
18108 nodes: Vec<SubstrateTerseGraphNode>,
18109 edges: Vec<SubstrateTerseGraphEdge>,
18110 conflict_matrix_decisions: Vec<String>,
18111 replay_commands: Vec<String>,
18112 repair_commands: Vec<String>,
18113 truncated: bool,
18114 #[serde(skip_serializing_if = "Vec::is_empty", default)]
18115 warnings: Vec<String>,
18116}
18117
18118fn dispatch_trace_allowed_node_kind(kind: &str) -> bool {
18119 matches!(
18120 kind,
18121 "session"
18122 | "backlog"
18123 | "job_packet"
18124 | "worker_result"
18125 | "worker_context"
18126 | "source_handle"
18127 | "semantic_concept"
18128 | "semantic_entity"
18129 | "file"
18130 | "symbol"
18131 | "route"
18132 )
18133}
18134
18135fn dispatch_trace_kind_rank(kind: &str) -> usize {
18136 match kind {
18137 "backlog" => 0,
18138 "job_packet" => 1,
18139 "worker_result" => 2,
18140 "worker_context" => 3,
18141 "source_handle" => 4,
18142 "file" => 5,
18143 "symbol" => 6,
18144 "route" => 7,
18145 "semantic_concept" => 8,
18146 "semantic_entity" => 9,
18147 "session" => 10,
18148 _ => 99,
18149 }
18150}
18151
18152fn dispatch_trace_summary(nodes: &[SubstrateGraphNode]) -> DispatchTraceSummary {
18153 DispatchTraceSummary {
18154 backlog: nodes.iter().filter(|node| node.kind == "backlog").count(),
18155 job_packet: nodes
18156 .iter()
18157 .filter(|node| node.kind == "job_packet")
18158 .count(),
18159 worker_result: nodes
18160 .iter()
18161 .filter(|node| node.kind == "worker_result")
18162 .count(),
18163 worker_context: nodes
18164 .iter()
18165 .filter(|node| node.kind == "worker_context")
18166 .count(),
18167 source_handle: nodes
18168 .iter()
18169 .filter(|node| node.kind == "source_handle")
18170 .count(),
18171 semantic_rows: nodes
18172 .iter()
18173 .filter(|node| matches!(node.kind.as_str(), "semantic_concept" | "semantic_entity"))
18174 .count(),
18175 }
18176}
18177
18178fn dispatch_trace_shared_preparation_summary(
18179 graph_nodes: &[SubstrateGraphNode],
18180 graph_edges: &[SubstrateGraphEdge],
18181 conflict: &ConflictMatrixReport,
18182) -> ConflictMatrixSharedPreparationSummary {
18183 ConflictMatrixSharedPreparationSummary {
18184 evidence_cache_status: conflict
18185 .inputs
18186 .shared_preparation
18187 .evidence_cache_status
18188 .clone(),
18189 graph_nodes: graph_nodes.len(),
18190 graph_edges: graph_edges.len(),
18191 evidence_packets: conflict.orchestration.evidence_packet_ids.len(),
18192 source_handles: conflict
18193 .candidates
18194 .iter()
18195 .map(|candidate| candidate.source_handles.len())
18196 .sum(),
18197 worker_context: conflict
18198 .candidates
18199 .iter()
18200 .map(|candidate| candidate.worker_context_handles.len())
18201 .sum(),
18202 worker_results: conflict
18203 .candidates
18204 .iter()
18205 .map(|candidate| candidate.worker_feedback.total)
18206 .sum(),
18207 semantic_rows: conflict
18208 .candidates
18209 .iter()
18210 .map(|candidate| candidate.semantic_related.len())
18211 .sum(),
18212 dispatch_trace_snapshot_nodes: graph_nodes.len(),
18213 dispatch_trace_snapshot_edges: graph_edges.len(),
18214 }
18215}
18216
18217fn dispatch_trace_collect_ids(
18218 targets: &[String],
18219 candidates: &[ConflictMatrixCandidate],
18220 graph_nodes: &[SubstrateGraphNode],
18221 graph_edges: &[SubstrateGraphEdge],
18222 depth: usize,
18223 limit: usize,
18224) -> (BTreeSet<String>, bool) {
18225 let target_refs = targets
18226 .iter()
18227 .map(|target| target.trim_start_matches('#').to_string())
18228 .collect::<BTreeSet<_>>();
18229 let mut ids = BTreeSet::new();
18230 for candidate in candidates {
18231 ids.insert(candidate.target_node_id.clone());
18232 for source in &candidate.source_handles {
18233 ids.insert(source.handle.clone());
18234 }
18235 for handle in &candidate.worker_context_handles {
18236 ids.insert(handle.clone());
18237 }
18238 for semantic in &candidate.semantic_related {
18239 ids.insert(semantic.handle.clone());
18240 }
18241 }
18242 for node in graph_nodes {
18243 if !dispatch_trace_allowed_node_kind(&node.kind) {
18244 continue;
18245 }
18246 if node
18247 .properties
18248 .get("ref_id")
18249 .is_some_and(|ref_id| target_refs.contains(ref_id))
18250 {
18251 ids.insert(node.id.clone());
18252 }
18253 }
18254
18255 let node_by_id = graph_nodes
18256 .iter()
18257 .map(|node| (node.id.as_str(), node))
18258 .collect::<BTreeMap<_, _>>();
18259 let max_nodes = if limit == 0 {
18260 usize::MAX
18261 } else {
18262 limit
18263 .saturating_mul(targets.len().max(1))
18264 .saturating_mul(12)
18265 .max(64)
18266 };
18267 let mut truncated = false;
18268 for _ in 0..depth.max(1) {
18269 let before = ids.len();
18270 let current_ids = ids.clone();
18271 for edge in graph_edges {
18272 if ids.len() >= max_nodes {
18273 truncated = true;
18274 break;
18275 }
18276 let touches = current_ids.contains(&edge.from_id) || current_ids.contains(&edge.to_id);
18277 if !touches {
18278 continue;
18279 }
18280 for endpoint in [&edge.from_id, &edge.to_id] {
18281 let Some(node) = node_by_id.get(endpoint.as_str()) else {
18282 continue;
18283 };
18284 if dispatch_trace_allowed_node_kind(&node.kind) {
18285 ids.insert(endpoint.clone());
18286 }
18287 }
18288 }
18289 if ids.len() == before || truncated {
18290 break;
18291 }
18292 }
18293 (ids, truncated)
18294}
18295
18296#[allow(clippy::too_many_arguments)]
18297fn build_dispatch_trace_report_from_conflict_snapshot(
18298 root: &Path,
18299 scope: Option<&str>,
18300 conflict: ConflictMatrixReport,
18301 graph_nodes: Vec<SubstrateGraphNode>,
18302 graph_edges: Vec<SubstrateGraphEdge>,
18303 depth: usize,
18304 limit: usize,
18305 extra_warnings: Vec<String>,
18306) -> Result<DispatchTraceReport> {
18307 let shared_preparation =
18308 dispatch_trace_shared_preparation_summary(&graph_nodes, &graph_edges, &conflict);
18309 let (ids, truncated) = dispatch_trace_collect_ids(
18310 &conflict.targets,
18311 &conflict.candidates,
18312 &graph_nodes,
18313 &graph_edges,
18314 depth,
18315 limit,
18316 );
18317 let mut nodes = graph_nodes
18318 .into_iter()
18319 .filter(|node| ids.contains(&node.id))
18320 .collect::<Vec<_>>();
18321 nodes.sort_by(|left, right| {
18322 dispatch_trace_kind_rank(&left.kind)
18323 .cmp(&dispatch_trace_kind_rank(&right.kind))
18324 .then(left.id.cmp(&right.id))
18325 });
18326 let node_ids = nodes
18327 .iter()
18328 .map(|node| node.id.as_str())
18329 .collect::<BTreeSet<_>>();
18330 let mut edges = graph_edges
18331 .into_iter()
18332 .filter(|edge| {
18333 node_ids.contains(edge.from_id.as_str()) && node_ids.contains(edge.to_id.as_str())
18334 })
18335 .collect::<Vec<_>>();
18336 edges.sort_by(|left, right| {
18337 left.from_id
18338 .cmp(&right.from_id)
18339 .then(left.kind.cmp(&right.kind))
18340 .then(left.to_id.cmp(&right.to_id))
18341 });
18342 let mut warnings = conflict.warnings;
18343 warnings.extend(extra_warnings);
18344
18345 Ok(DispatchTraceReport {
18346 contract_version: DISPATCH_TRACE_CONTRACT_VERSION.to_string(),
18347 root: conflict.root,
18348 scope: conflict.scope,
18349 targets: conflict.targets,
18350 projection_freshness: conflict.orchestration.projection_freshness,
18351 projection_hashes: conflict.orchestration.projection_hashes,
18352 evidence_packet_ids: conflict.orchestration.evidence_packet_ids,
18353 shared_preparation,
18354 worker_prompt_packets: conflict.worker_prompt_packets,
18355 worker_feedback: conflict
18356 .candidates
18357 .iter()
18358 .map(|candidate| candidate.worker_feedback.clone())
18359 .collect(),
18360 summary: dispatch_trace_summary(&nodes),
18361 nodes: nodes.into_iter().map(Into::into).collect(),
18362 edges: edges.into_iter().map(Into::into).collect(),
18363 conflict_matrix_decisions: conflict.orchestration.conflict_matrix_decisions,
18364 replay_commands: conflict.next_commands,
18365 repair_commands: graph_db_repair_commands(root, scope),
18366 truncated,
18367 warnings,
18368 })
18369}
18370
18371fn build_dispatch_trace_report(
18372 path: &Path,
18373 scope: Option<&str>,
18374 raw_targets: &[String],
18375 depth: usize,
18376 limit: usize,
18377 impact_limit: usize,
18378) -> Result<DispatchTraceReport> {
18379 let root = lint::resolve_project_root_or_canonical_path(path)?;
18380 let source_watermark = traversal_source_watermark(&root, path, scope, false)?;
18381 if graph_db_backend_eval_cached_refresh(&root, scope, source_watermark.as_deref())?.is_none() {
18382 write_traversal_graph_store(&root, path, scope)
18383 .with_context(|| format!("refreshing graph-db projection for {}", root.display()))?;
18384 }
18385 let graph_db = graph_substrate_db_path(&root, scope);
18386 let store = SqliteGraphStore::open_read_only_resilient(&graph_db)
18387 .with_context(|| format!("opening graph-db projection: {}", graph_db.display()))?;
18388 let freshness = sqlite_graph_freshness(&store, scope.unwrap_or("root"))?;
18389 let extra_warnings = store
18390 .read_only_recovery()
18391 .map(graph_db_read_recovery_diagnostic)
18392 .into_iter()
18393 .collect::<Vec<_>>();
18394 let prepared = prepare_conflict_matrix_inputs(&root, path, scope, impact_limit)?;
18395 let graph_prepared = prepare_conflict_matrix_graph_orchestration(
18396 &root,
18397 scope,
18398 "sqlite",
18399 raw_targets,
18400 &prepared,
18401 depth,
18402 limit,
18403 &store,
18404 freshness.clone(),
18405 )?;
18406 let dt_cache_key = cycle_packet_cache::cycle_packet_watermark_key(
18407 &prepared.preparation_cache.source_watermark,
18408 &prepared.preparation_cache.document_watermark,
18409 &prepared.preparation_cache.staged_diff_watermark,
18410 &[
18411 &format!("targets:{}", raw_targets.join(",")),
18412 &format!("depth:{depth}"),
18413 &format!("limit:{limit}"),
18414 ],
18415 );
18416 if let Some(cached_report) = cycle_packet_cache::cycle_packet_read_cache::<DispatchTraceReport>(
18417 &root,
18418 cycle_packet_cache::CyclePacketKind::ConflictMatrix,
18419 &dt_cache_key,
18420 ) {
18421 return Ok(cached_report);
18422 }
18423 let conflict = build_conflict_matrix_report_from_prepared_graph(
18424 &root,
18425 path,
18426 scope,
18427 depth,
18428 limit,
18429 impact_limit,
18430 freshness,
18431 extra_warnings.clone(),
18432 &prepared,
18433 &graph_prepared,
18434 )?;
18435 let report = build_dispatch_trace_report_from_conflict_snapshot(
18436 &root,
18437 scope,
18438 conflict,
18439 graph_prepared.graph.nodes,
18440 graph_prepared.graph.edges,
18441 depth,
18442 limit,
18443 extra_warnings,
18444 )?;
18445 cycle_packet_cache::cycle_packet_write_cache(
18446 &root,
18447 cycle_packet_cache::CyclePacketKind::ConflictMatrix,
18448 &dt_cache_key,
18449 &report,
18450 );
18451 Ok(report)
18452}
18453
18454fn dispatch_trace_html(report: &DispatchTraceReport) -> Result<String> {
18455 let json = serde_json::to_string(report)?.replace("</", "<\\/");
18456 let mut html = String::new();
18457 html.push_str(
18458 "<!doctype html><html><head><meta charset=\"utf-8\"><title>tsift dispatch trace</title>",
18459 );
18460 html.push_str(
18461 r#"<style>
18462:root{color-scheme:light dark;--bg:#f7f8fb;--panel:#fff;--text:#17202a;--muted:#5c6674;--line:#d7dce3;--edge:#8b98a8;--accent:#0f766e}
18463@media (prefers-color-scheme:dark){:root{--bg:#111318;--panel:#1b2028;--text:#ecf1f7;--muted:#a8b3c1;--line:#323946;--edge:#667386;--accent:#2dd4bf}}
18464*{box-sizing:border-box}body{margin:0;background:var(--bg);color:var(--text);font-family:Inter,ui-sans-serif,system-ui,sans-serif;line-height:1.4}.page{max-width:1280px;margin:0 auto;padding:20px}.top{display:flex;align-items:flex-end;justify-content:space-between;gap:16px;margin-bottom:14px}.top h1{font-size:22px;margin:0}.meta{color:var(--muted);font-size:13px}.layout{display:grid;grid-template-columns:minmax(0,1fr) 360px;gap:14px}.panel,.side{background:var(--panel);border:1px solid var(--line);border-radius:8px;overflow:hidden}.side{padding:14px;overflow:auto;max-height:720px}.side h2{font-size:15px;margin:12px 0 8px}.side h2:first-child{margin-top:0}.list{display:grid;gap:8px}.row{border:1px solid var(--line);border-radius:6px;padding:8px}.kind{font-size:11px;text-transform:uppercase;color:var(--muted);letter-spacing:.04em}.label{font-weight:650;overflow-wrap:anywhere}.handle,code{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;font-size:12px;color:var(--muted);overflow-wrap:anywhere}svg{width:100%;height:680px;display:block}.edge{stroke:var(--edge);stroke-width:1.4;opacity:.72}.node{stroke:var(--panel);stroke-width:2}.node-label{font-size:12px;paint-order:stroke;stroke:var(--panel);stroke-width:4px;stroke-linejoin:round;fill:var(--text)}@media(max-width:900px){.top{display:block}.layout{grid-template-columns:1fr}.side{max-height:none}svg{height:560px}}
18465</style>"#,
18466 );
18467 html.push_str("</head><body><div class=\"page\">");
18468 html.push_str(&format!(
18469 "<header class=\"top\"><div><h1>tsift dispatch trace</h1><div class=\"meta\">targets <code>{}</code> | evidence <code>{}</code> | nodes <code>{}</code> | worker_prompt_packets <code>{}</code></div></div><div class=\"meta\"><code>{}</code></div></header>",
18470 html_escape(&report.targets.join(", ")),
18471 report.evidence_packet_ids.len(),
18472 report.nodes.len(),
18473 report.worker_prompt_packets.len(),
18474 html_escape(&report.contract_version)
18475 ));
18476 html.push_str(
18477 r#"<main class="layout"><section class="panel"><svg id="graph-canvas" role="img" aria-label="Dispatch trace graph"></svg></section><aside class="side"><h2>Worker Prompt Packets</h2><div id="packets" class="list"></div><h2>Worker Feedback</h2><div id="feedback" class="list"></div><h2>Nodes</h2><div id="nodes" class="list"></div></aside></main>"#,
18478 );
18479 html.push_str("<script id=\"trace-data\" type=\"application/json\">");
18480 html.push_str(&json);
18481 html.push_str(
18482 r##"</script><script>
18483const report = JSON.parse(document.getElementById("trace-data").textContent);
18484const svg = document.getElementById("graph-canvas");
18485const nodeList = document.getElementById("nodes");
18486const packets = document.getElementById("packets");
18487const feedback = document.getElementById("feedback");
18488const nodes = report.nodes.map((node, index) => ({...node, index}));
18489const nodeById = new Map(nodes.map(node => [node.id, node]));
18490const edges = report.edges.filter(edge => nodeById.has(edge.from_id) && nodeById.has(edge.to_id));
18491const colorByKind = new Map([["backlog","#dc2626"],["job_packet","#ea580c"],["worker_result","#15803d"],["worker_context","#475569"],["source_handle","#64748b"],["semantic_concept","#9a3412"],["semantic_entity","#b45309"],["file","#2563eb"],["symbol","#16a34a"],["route","#7c3aed"],["session","#0891b2"]]);
18492function color(kind){return colorByKind.get(kind)||"#6b7280";}
18493function text(value){return value == null ? "" : String(value);}
18494function escapeHtml(value){return text(value).replace(/[&<>"']/g, ch => ({"&":"&","<":"<",">":">","\"":""","'":"'"}[ch]));}
18495function layout(){
18496 const rect = svg.getBoundingClientRect();
18497 const width = rect.width || 900, height = rect.height || 680, cx = width / 2, cy = height / 2;
18498 const kinds = [...new Set(nodes.map(node => node.kind))].sort();
18499 const counts = new Map();
18500 for (const node of nodes) counts.set(node.kind, (counts.get(node.kind)||0)+1);
18501 const offsets = new Map();
18502 for (const node of nodes) {
18503 const group = kinds.indexOf(node.kind);
18504 const index = offsets.get(node.kind) || 0;
18505 offsets.set(node.kind, index + 1);
18506 const total = counts.get(node.kind) || 1;
18507 const ring = Math.min(width, height) * (0.18 + ((group % 4) * 0.09));
18508 const angle = Math.PI * 2 * index / Math.max(total, 1) + group * 0.53;
18509 node.x = cx + Math.cos(angle) * ring;
18510 node.y = cy + Math.sin(angle) * ring;
18511 }
18512}
18513function draw(){
18514 svg.innerHTML = "";
18515 for (const edge of edges) {
18516 const from = nodeById.get(edge.from_id), to = nodeById.get(edge.to_id);
18517 const line = document.createElementNS("http://www.w3.org/2000/svg", "line");
18518 line.setAttribute("x1", from.x); line.setAttribute("y1", from.y);
18519 line.setAttribute("x2", to.x); line.setAttribute("y2", to.y);
18520 line.setAttribute("class", "edge");
18521 line.appendChild(document.createElementNS("http://www.w3.org/2000/svg", "title")).textContent = edge.kind;
18522 svg.appendChild(line);
18523 }
18524 for (const node of nodes) {
18525 const circle = document.createElementNS("http://www.w3.org/2000/svg", "circle");
18526 circle.setAttribute("cx", node.x); circle.setAttribute("cy", node.y);
18527 circle.setAttribute("r", node.kind.startsWith("semantic_") ? 8 : 6);
18528 circle.setAttribute("fill", color(node.kind));
18529 circle.setAttribute("class", "node");
18530 circle.appendChild(document.createElementNS("http://www.w3.org/2000/svg", "title")).textContent = node.kind + ": " + node.label;
18531 svg.appendChild(circle);
18532 const label = document.createElementNS("http://www.w3.org/2000/svg", "text");
18533 label.setAttribute("x", node.x + 9); label.setAttribute("y", node.y + 4);
18534 label.setAttribute("class", "node-label");
18535 label.textContent = node.label.length > 34 ? node.label.slice(0,31) + "..." : node.label;
18536 svg.appendChild(label);
18537 }
18538}
18539packets.innerHTML = report.worker_prompt_packets.map(packet => `<div class="row"><div class="kind">${escapeHtml(packet.contract_version)} - ${escapeHtml(packet.risk)} - parallel_safe ${packet.parallel_safe ? "true" : "false"} - closure ${packet.worker_feedback ? packet.worker_feedback.closure_rank_score : 0}</div><div class="label">${escapeHtml(packet.title)}</div><div class="handle">${escapeHtml(packet.packet_id)}</div><div class="handle">blocks ${escapeHtml((packet.blocks||[]).join(", ") || "none")} | blocked_by ${escapeHtml((packet.blocked_by||[]).join(", ") || "none")}</div></div>`).join("") || "<div class=\"meta\">No packets.</div>";
18540feedback.innerHTML = report.worker_feedback.map(item => `<div class="row"><div class="kind">completed ${item.completed} - blocked ${item.blocked} - closure ${item.closure_rank_score}</div><div>files ${escapeHtml((item.touched_files||[]).join(", ") || "none")}</div><div>tests ${escapeHtml((item.expected_tests||[]).join(" && ") || "none")}</div>${item.repeated_blockage ? "<div class=\"label\">Repeated blockage</div>" : ""}${(item.stale_expected_tests||[]).length ? `<div class="label">Stale tests: ${escapeHtml(item.stale_expected_tests.join(", "))}</div>` : ""}${(item.follow_up_debt||[]).length ? `<div class="label">Follow-up debt: ${escapeHtml(item.follow_up_debt.join(", "))}</div>` : ""}</div>`).join("") || "<div class=\"meta\">No worker results.</div>";
18541nodeList.innerHTML = nodes.map(node => `<div class="row"><div class="kind">${escapeHtml(node.kind)}</div><div class="label">${escapeHtml(node.label)}</div><div class="handle">${escapeHtml(node.id)}</div></div>`).join("");
18542window.addEventListener("resize", () => { layout(); draw(); });
18543layout(); draw();
18544</script></div></body></html>"##,
18545 );
18546 Ok(html)
18547}
18548
18549struct DispatchTraceOptions<'a> {
18550 path: &'a Path,
18551 scope: Option<&'a str>,
18552 raw_targets: &'a [String],
18553 depth: usize,
18554 limit: usize,
18555 impact_limit: usize,
18556 trace_format: DispatchTraceFormat,
18557}
18558
18559fn cmd_dispatch_trace(
18560 options: DispatchTraceOptions<'_>,
18561 output_format: OutputFormat,
18562) -> Result<()> {
18563 let report = build_dispatch_trace_report(
18564 options.path,
18565 options.scope,
18566 options.raw_targets,
18567 options.depth,
18568 options.limit,
18569 options.impact_limit,
18570 )?;
18571 match options.trace_format {
18572 DispatchTraceFormat::Json => {
18573 if output_format.envelope {
18574 print_json_or_envelope(
18575 &report,
18576 &output_format,
18577 "dispatch-trace",
18578 "operator-review",
18579 ToolEnvelopeSummary {
18580 text: format!(
18581 "Dispatch trace for {} target(s): {} graph node(s), {} worker prompt packet(s)",
18582 report.targets.len(),
18583 report.nodes.len(),
18584 report.worker_prompt_packets.len()
18585 ),
18586 metrics: vec![
18587 envelope_metric("targets", report.targets.len()),
18588 envelope_metric("nodes", report.nodes.len()),
18589 envelope_metric("edges", report.edges.len()),
18590 envelope_metric(
18591 "worker_prompt_packets",
18592 report.worker_prompt_packets.len(),
18593 ),
18594 ],
18595 },
18596 report.truncated,
18597 report.replay_commands.clone(),
18598 )
18599 } else {
18600 println!(
18601 "{}",
18602 to_json_schema(
18603 &report,
18604 output_format.pretty,
18605 output_format.terse,
18606 output_format.ultra_terse,
18607 output_format.schema
18608 )?
18609 );
18610 Ok(())
18611 }
18612 }
18613 DispatchTraceFormat::Html => {
18614 println!("{}", dispatch_trace_html(&report)?);
18615 Ok(())
18616 }
18617 }
18618}
18619
18620#[derive(Clone, Debug)]
18621struct DependencyDagProfile {
18622 id: String,
18623 graph_node_id: String,
18624 label: String,
18625 path: Option<String>,
18626 line: Option<i64>,
18627 detail: Option<String>,
18628 source_files: BTreeSet<String>,
18629 source_symbols: BTreeSet<String>,
18630 config_files: BTreeSet<String>,
18631 expected_tests: BTreeSet<String>,
18632 semantic_refs: BTreeMap<String, ConflictMatrixSemanticRef>,
18633 worker_feedback: ConflictMatrixWorkerFeedback,
18634}
18635
18636#[derive(Clone, Debug, Serialize)]
18637struct DependencyDagNode {
18638 id: String,
18639 graph_node_id: String,
18640 label: String,
18641 #[serde(skip_serializing_if = "Option::is_none")]
18642 path: Option<String>,
18643 #[serde(skip_serializing_if = "Option::is_none")]
18644 line: Option<i64>,
18645 #[serde(skip_serializing_if = "Option::is_none")]
18646 detail: Option<String>,
18647 source_files: Vec<String>,
18648 source_symbols: Vec<String>,
18649 config_files: Vec<String>,
18650 expected_tests: Vec<String>,
18651 semantic_refs: Vec<ConflictMatrixSemanticRef>,
18652 worker_feedback: ConflictMatrixWorkerFeedback,
18653}
18654
18655#[derive(Clone, Debug, Serialize)]
18656struct DependencyDagEdge {
18657 from: String,
18658 to: String,
18659 kind: String,
18660 weight: usize,
18661 reasons: Vec<String>,
18662 #[serde(skip_serializing_if = "Vec::is_empty", default)]
18663 shared_files: Vec<String>,
18664 #[serde(skip_serializing_if = "Vec::is_empty", default)]
18665 shared_symbols: Vec<String>,
18666 #[serde(skip_serializing_if = "Vec::is_empty", default)]
18667 shared_tests: Vec<String>,
18668 #[serde(skip_serializing_if = "Vec::is_empty", default)]
18669 shared_config_files: Vec<String>,
18670 #[serde(skip_serializing_if = "Vec::is_empty", default)]
18671 shared_semantic_refs: Vec<String>,
18672}
18673
18674#[derive(Clone, Debug, Serialize)]
18675struct DependencyDagTopoBatch {
18676 batch: usize,
18677 targets: Vec<String>,
18678}
18679
18680#[derive(Clone, Debug, Serialize)]
18681struct DependencyDagCycleDiagnostics {
18682 has_cycles: bool,
18683 blocked_nodes: Vec<String>,
18684 cycle_edges: Vec<DependencyDagEdge>,
18685}
18686
18687#[derive(Serialize)]
18688struct DependencyDagSummary {
18689 nodes: usize,
18690 edges: usize,
18691 topo_batches: usize,
18692 has_cycles: bool,
18693}
18694
18695#[derive(Serialize)]
18696struct DependencyDagReport {
18697 contract_version: &'static str,
18698 root: String,
18699 #[serde(skip_serializing_if = "Option::is_none")]
18700 scope: Option<String>,
18701 path: String,
18702 targets: Vec<String>,
18703 projection_freshness: GraphDbFreshnessReport,
18704 projection_hashes: Vec<String>,
18705 nodes: Vec<DependencyDagNode>,
18706 edges: Vec<DependencyDagEdge>,
18707 topo_batches: Vec<DependencyDagTopoBatch>,
18708 cycle_diagnostics: DependencyDagCycleDiagnostics,
18709 summary: DependencyDagSummary,
18710 replay_commands: Vec<String>,
18711 repair_commands: Vec<String>,
18712 #[serde(skip_serializing_if = "Vec::is_empty", default)]
18713 warnings: Vec<String>,
18714}
18715
18716fn dependency_dag_backlog_node_for_target(
18717 store: &impl GraphStore,
18718 target: &str,
18719) -> Result<SubstrateGraphNode> {
18720 let resolved = graph_db_resolve_evidence_target(store, target)?
18721 .with_context(|| format!("dependency-dag target not found: {target}"))?;
18722 if resolved.kind == "backlog" {
18723 return Ok(resolved);
18724 }
18725 let Some(ref_id) = resolved.properties.get("ref_id").cloned() else {
18726 bail!(
18727 "dependency-dag target {} resolved to {} without a backlog ref_id",
18728 target,
18729 resolved.kind
18730 );
18731 };
18732 store
18733 .nodes_by_kind("backlog")?
18734 .into_iter()
18735 .filter(|node| node.properties.get("ref_id") == Some(&ref_id))
18736 .min_by(|left, right| {
18737 left.properties
18738 .get("line")
18739 .and_then(|value| value.parse::<i64>().ok())
18740 .cmp(
18741 &right
18742 .properties
18743 .get("line")
18744 .and_then(|value| value.parse::<i64>().ok()),
18745 )
18746 .then(left.id.cmp(&right.id))
18747 })
18748 .with_context(|| format!("dependency-dag backlog node not found for #{ref_id}"))
18749}
18750
18751fn dependency_dag_resolve_backlog_nodes(
18752 root: &Path,
18753 path: &Path,
18754 store: &impl GraphStore,
18755 raw_targets: &[String],
18756) -> Result<Vec<SubstrateGraphNode>> {
18757 let mut nodes = Vec::new();
18758 let mut seen = BTreeSet::new();
18759 if raw_targets.is_empty() {
18760 let hinted_path = if path.is_absolute() {
18761 path.to_path_buf()
18762 } else {
18763 root.join(path)
18764 };
18765 let hinted_markdown = hinted_path
18766 .extension()
18767 .and_then(|ext| ext.to_str())
18768 .is_some_and(|ext| ext.eq_ignore_ascii_case("md"));
18769 let hinted_rel = hinted_markdown.then(|| {
18770 relativize_pathbuf(&hinted_path, root)
18771 .to_string_lossy()
18772 .replace('\\', "/")
18773 });
18774 for node in store.nodes_by_kind("backlog")? {
18775 if let Some(expected_path) = &hinted_rel
18776 && node.properties.get("path") != Some(expected_path)
18777 {
18778 continue;
18779 }
18780 if seen.insert(node.id.clone()) {
18781 nodes.push(node);
18782 }
18783 }
18784 if nodes.is_empty() && hinted_rel.is_some() {
18785 for node in store.nodes_by_kind("backlog")? {
18786 if seen.insert(node.id.clone()) {
18787 nodes.push(node);
18788 }
18789 }
18790 }
18791 } else {
18792 for target in raw_targets {
18793 let normalized = normalize_conflict_target(target).unwrap_or_else(|| target.clone());
18794 let node = dependency_dag_backlog_node_for_target(store, &normalized)?;
18795 if seen.insert(node.id.clone()) {
18796 nodes.push(node);
18797 }
18798 }
18799 }
18800 if nodes.is_empty() {
18801 bail!("dependency-dag needs at least one resolvable backlog id");
18802 }
18803 nodes.sort_by(|left, right| {
18804 left.properties
18805 .get("line")
18806 .and_then(|value| value.parse::<i64>().ok())
18807 .cmp(
18808 &right
18809 .properties
18810 .get("line")
18811 .and_then(|value| value.parse::<i64>().ok()),
18812 )
18813 .then(left.id.cmp(&right.id))
18814 });
18815 Ok(nodes)
18816}
18817
18818fn dependency_dag_node_id(node: &SubstrateGraphNode) -> String {
18819 node.properties
18820 .get("ref_id")
18821 .cloned()
18822 .unwrap_or_else(|| node.label.trim_start_matches('#').to_string())
18823}
18824
18825fn dependency_dag_node_profile(
18826 root: &Path,
18827 store: &impl GraphStore,
18828 node: &SubstrateGraphNode,
18829 graph_nodes_by_id: &BTreeMap<String, SubstrateGraphNode>,
18830 graph_edges: &[SubstrateGraphEdge],
18831 depth: usize,
18832 limit: usize,
18833) -> Result<DependencyDagProfile> {
18834 let id = dependency_dag_node_id(node);
18835 let mut source_files = BTreeSet::new();
18836 let mut source_symbols = BTreeSet::new();
18837 for edge in graph_edges
18838 .iter()
18839 .filter(|edge| edge.from_id == node.id && edge.kind == "mentions")
18840 {
18841 let Some(target) = graph_nodes_by_id.get(&edge.to_id) else {
18842 continue;
18843 };
18844 match target.kind.as_str() {
18845 "file" | "route" => {
18846 if let Some(path) = target.properties.get("path") {
18847 source_files.insert(path.clone());
18848 }
18849 }
18850 "symbol" => {
18851 source_symbols.insert(target.label.clone());
18852 if let Some(path) = target.properties.get("path") {
18853 source_files.insert(path.clone());
18854 }
18855 }
18856 _ => {}
18857 }
18858 }
18859
18860 let max_rows = if limit == 0 { usize::MAX } else { limit };
18861 for (source, _) in
18862 graph_db_reachable_nodes_by_kind(store, &node.id, "source_handle", depth, max_rows)?
18863 {
18864 let terse: SubstrateTerseGraphNode = (&source).into();
18865 if let Some(handle) = conflict_matrix_source_handle(&terse) {
18866 source_files.insert(handle.file);
18867 }
18868 }
18869
18870 let worker_results = graph_nodes_by_id
18871 .values()
18872 .filter(|candidate| {
18873 candidate.kind == "worker_result"
18874 && candidate.properties.get("ref_id").map(String::as_str) == Some(id.as_str())
18875 })
18876 .map(SubstrateTerseGraphNode::from)
18877 .collect::<Vec<_>>();
18878 let worker_feedback = conflict_matrix_worker_feedback(&worker_results);
18879 let expected_tests = worker_feedback.expected_tests.iter().cloned().collect();
18880 let config_files = source_files
18881 .iter()
18882 .filter(|file| is_planner_config_path(file))
18883 .cloned()
18884 .collect();
18885
18886 let mut semantic_refs = BTreeMap::new();
18887 for kind in ["semantic_concept", "semantic_entity"] {
18888 for (semantic, _) in
18889 graph_db_reachable_nodes_by_kind(store, &node.id, kind, depth, max_rows)?
18890 {
18891 let terse: SubstrateTerseGraphNode = (&semantic).into();
18892 let item = conflict_matrix_semantic_ref(root, &terse);
18893 semantic_refs
18894 .entry(format!("{}:{}", item.kind, item.label))
18895 .or_insert(item);
18896 }
18897 }
18898
18899 Ok(DependencyDagProfile {
18900 id,
18901 graph_node_id: node.id.clone(),
18902 label: node.label.clone(),
18903 path: node.properties.get("path").cloned(),
18904 line: node
18905 .properties
18906 .get("line")
18907 .and_then(|value| value.parse::<i64>().ok()),
18908 detail: node.properties.get("detail").cloned(),
18909 source_files,
18910 source_symbols,
18911 config_files,
18912 expected_tests,
18913 semantic_refs,
18914 worker_feedback,
18915 })
18916}
18917
18918fn dependency_dag_marker_refs(text: &str, markers: &[&str]) -> Vec<String> {
18919 let lower = text.to_ascii_lowercase();
18920 let mut refs = Vec::new();
18921 for marker in markers {
18922 let mut offset = 0usize;
18923 while let Some(pos) = lower[offset..].find(marker) {
18924 let start = offset + pos + marker.len();
18925 let segment = text[start..]
18926 .split(['\n', '.'])
18927 .next()
18928 .unwrap_or(&text[start..]);
18929 refs.extend(extract_conflict_target_refs(segment));
18930 offset = start;
18931 }
18932 }
18933 dedupe_preserve_order(refs)
18934}
18935
18936fn dependency_dag_push_edge(
18937 edges: &mut Vec<DependencyDagEdge>,
18938 seen: &mut BTreeSet<(String, String, String)>,
18939 edge: DependencyDagEdge,
18940) {
18941 if edge.from == edge.to {
18942 return;
18943 }
18944 if seen.insert((edge.from.clone(), edge.to.clone(), edge.kind.clone())) {
18945 edges.push(edge);
18946 }
18947}
18948
18949fn dependency_dag_explicit_edges(
18950 profiles: &[DependencyDagProfile],
18951 target_ids: &BTreeSet<String>,
18952 edges: &mut Vec<DependencyDagEdge>,
18953 seen: &mut BTreeSet<(String, String, String)>,
18954) {
18955 for profile in profiles {
18956 let detail = profile.detail.as_deref().unwrap_or_default();
18957 for dep in dependency_dag_marker_refs(
18958 detail,
18959 &[
18960 "depends on",
18961 "depends-on",
18962 "deps:",
18963 "after",
18964 "blocked by",
18965 "requires",
18966 ],
18967 ) {
18968 if target_ids.contains(&dep) {
18969 dependency_dag_push_edge(
18970 edges,
18971 seen,
18972 DependencyDagEdge {
18973 from: dep.clone(),
18974 to: profile.id.clone(),
18975 kind: "explicit_depends_on".to_string(),
18976 weight: 1000,
18977 reasons: vec![format!("{} declares dependency on #{dep}", profile.id)],
18978 shared_files: Vec::new(),
18979 shared_symbols: Vec::new(),
18980 shared_tests: Vec::new(),
18981 shared_config_files: Vec::new(),
18982 shared_semantic_refs: Vec::new(),
18983 },
18984 );
18985 }
18986 }
18987 for downstream in dependency_dag_marker_refs(detail, &["before", "unblocks"]) {
18988 if target_ids.contains(&downstream) {
18989 dependency_dag_push_edge(
18990 edges,
18991 seen,
18992 DependencyDagEdge {
18993 from: profile.id.clone(),
18994 to: downstream.clone(),
18995 kind: "explicit_before".to_string(),
18996 weight: 900,
18997 reasons: vec![format!(
18998 "{} declares it should run before #{downstream}",
18999 profile.id
19000 )],
19001 shared_files: Vec::new(),
19002 shared_symbols: Vec::new(),
19003 shared_tests: Vec::new(),
19004 shared_config_files: Vec::new(),
19005 shared_semantic_refs: Vec::new(),
19006 },
19007 );
19008 }
19009 }
19010 }
19011}
19012
19013fn dependency_dag_worker_follow_up_edges(
19014 profiles: &[DependencyDagProfile],
19015 target_ids: &BTreeSet<String>,
19016 edges: &mut Vec<DependencyDagEdge>,
19017 seen: &mut BTreeSet<(String, String, String)>,
19018) {
19019 for profile in profiles {
19020 for follow_up in &profile.worker_feedback.follow_up_ids {
19021 if target_ids.contains(follow_up) {
19022 dependency_dag_push_edge(
19023 edges,
19024 seen,
19025 DependencyDagEdge {
19026 from: profile.id.clone(),
19027 to: follow_up.clone(),
19028 kind: "worker_result_follow_up".to_string(),
19029 weight: 700,
19030 reasons: vec![format!(
19031 "worker_result for #{} references follow-up #{}",
19032 profile.id, follow_up
19033 )],
19034 shared_files: Vec::new(),
19035 shared_symbols: Vec::new(),
19036 shared_tests: Vec::new(),
19037 shared_config_files: Vec::new(),
19038 shared_semantic_refs: Vec::new(),
19039 },
19040 );
19041 }
19042 }
19043 }
19044}
19045
19046fn dependency_dag_overlap_edges(
19047 profiles: &[DependencyDagProfile],
19048 edges: &mut Vec<DependencyDagEdge>,
19049 seen: &mut BTreeSet<(String, String, String)>,
19050) {
19051 for left_idx in 0..profiles.len() {
19052 for right_idx in (left_idx + 1)..profiles.len() {
19053 let left = &profiles[left_idx];
19054 let right = &profiles[right_idx];
19055 let shared_files = sorted_intersection(&left.source_files, &right.source_files);
19056 let shared_symbols = sorted_intersection(&left.source_symbols, &right.source_symbols);
19057 let shared_tests = sorted_intersection(&left.expected_tests, &right.expected_tests);
19058 let shared_config_files = sorted_intersection(&left.config_files, &right.config_files);
19059 let left_semantic = left.semantic_refs.keys().cloned().collect::<BTreeSet<_>>();
19060 let right_semantic = right.semantic_refs.keys().cloned().collect::<BTreeSet<_>>();
19061 let shared_semantic_refs = sorted_intersection(&left_semantic, &right_semantic);
19062 if shared_files.is_empty()
19063 && shared_symbols.is_empty()
19064 && shared_tests.is_empty()
19065 && shared_config_files.is_empty()
19066 && shared_semantic_refs.is_empty()
19067 {
19068 continue;
19069 }
19070 let kind = if shared_files.is_empty()
19071 && shared_symbols.is_empty()
19072 && shared_tests.is_empty()
19073 && shared_config_files.is_empty()
19074 {
19075 "semantic_relation"
19076 } else {
19077 "shared_resource"
19078 };
19079 let mut reasons = Vec::new();
19080 if !shared_files.is_empty() {
19081 reasons.push(format!("shared files: {}", shared_files.join(", ")));
19082 }
19083 if !shared_symbols.is_empty() {
19084 reasons.push(format!("shared symbols: {}", shared_symbols.join(", ")));
19085 }
19086 if !shared_tests.is_empty() {
19087 reasons.push(format!("shared tests: {}", shared_tests.join(" && ")));
19088 }
19089 if !shared_config_files.is_empty() {
19090 reasons.push(format!(
19091 "shared config files: {}",
19092 shared_config_files.join(", ")
19093 ));
19094 }
19095 if !shared_semantic_refs.is_empty() {
19096 reasons.push(format!(
19097 "shared semantic refs: {}",
19098 shared_semantic_refs.join(", ")
19099 ));
19100 }
19101 let weight = shared_files.len() * 100
19102 + shared_config_files.len() * 100
19103 + shared_symbols.len() * 40
19104 + shared_tests.len() * 10
19105 + shared_semantic_refs.len() * 5;
19106 dependency_dag_push_edge(
19107 edges,
19108 seen,
19109 DependencyDagEdge {
19110 from: left.id.clone(),
19111 to: right.id.clone(),
19112 kind: kind.to_string(),
19113 weight,
19114 reasons,
19115 shared_files,
19116 shared_symbols,
19117 shared_tests,
19118 shared_config_files,
19119 shared_semantic_refs,
19120 },
19121 );
19122 }
19123 }
19124}
19125
19126fn dependency_dag_topo_batches(
19127 targets: &[String],
19128 edges: &[DependencyDagEdge],
19129) -> (Vec<DependencyDagTopoBatch>, DependencyDagCycleDiagnostics) {
19130 let target_set = targets.iter().cloned().collect::<BTreeSet<_>>();
19131 let order = targets
19132 .iter()
19133 .enumerate()
19134 .map(|(idx, id)| (id.clone(), idx))
19135 .collect::<BTreeMap<_, _>>();
19136 let mut indegree = targets
19137 .iter()
19138 .map(|id| (id.clone(), 0usize))
19139 .collect::<BTreeMap<_, _>>();
19140 let mut outgoing = BTreeMap::<String, Vec<String>>::new();
19141 let mut seen_pairs = BTreeSet::<(String, String)>::new();
19142 for edge in edges {
19143 if !target_set.contains(&edge.from) || !target_set.contains(&edge.to) {
19144 continue;
19145 }
19146 if !seen_pairs.insert((edge.from.clone(), edge.to.clone())) {
19147 continue;
19148 }
19149 *indegree.entry(edge.to.clone()).or_default() += 1;
19150 outgoing
19151 .entry(edge.from.clone())
19152 .or_default()
19153 .push(edge.to.clone());
19154 }
19155 for values in outgoing.values_mut() {
19156 values.sort_by_key(|id| order.get(id).copied().unwrap_or(usize::MAX));
19157 values.dedup();
19158 }
19159
19160 let mut processed = BTreeSet::new();
19161 let mut batches = Vec::new();
19162 loop {
19163 let mut ready = targets
19164 .iter()
19165 .filter(|id| !processed.contains(*id))
19166 .filter(|id| indegree.get(*id).copied().unwrap_or(0) == 0)
19167 .cloned()
19168 .collect::<Vec<_>>();
19169 ready.sort_by_key(|id| order.get(id).copied().unwrap_or(usize::MAX));
19170 if ready.is_empty() {
19171 break;
19172 }
19173 for id in &ready {
19174 processed.insert(id.clone());
19175 for next in outgoing.get(id).into_iter().flatten() {
19176 if let Some(value) = indegree.get_mut(next) {
19177 *value = value.saturating_sub(1);
19178 }
19179 }
19180 }
19181 batches.push(DependencyDagTopoBatch {
19182 batch: batches.len() + 1,
19183 targets: ready,
19184 });
19185 }
19186
19187 let blocked_nodes = targets
19188 .iter()
19189 .filter(|id| !processed.contains(*id))
19190 .cloned()
19191 .collect::<Vec<_>>();
19192 let blocked_set = blocked_nodes.iter().cloned().collect::<BTreeSet<_>>();
19193 let cycle_edges = edges
19194 .iter()
19195 .filter(|edge| blocked_set.contains(&edge.from) && blocked_set.contains(&edge.to))
19196 .cloned()
19197 .collect::<Vec<_>>();
19198 (
19199 batches,
19200 DependencyDagCycleDiagnostics {
19201 has_cycles: !blocked_nodes.is_empty(),
19202 blocked_nodes,
19203 cycle_edges,
19204 },
19205 )
19206}
19207
19208fn dependency_dag_replay_commands(
19209 path: &Path,
19210 scope: Option<&str>,
19211 targets: &[String],
19212 depth: usize,
19213 limit: usize,
19214) -> Vec<String> {
19215 let target_args = targets
19216 .iter()
19217 .map(|target| shell_quote(target))
19218 .collect::<Vec<_>>()
19219 .join(" ");
19220 let mut command = format!(
19221 "tsift dependency-dag --path {}{} --depth {} --limit {} --json",
19222 shell_quote(path.to_string_lossy().as_ref()),
19223 scope
19224 .map(|scope| format!(" --scope {}", shell_quote(scope)))
19225 .unwrap_or_default(),
19226 depth,
19227 limit
19228 );
19229 if !target_args.is_empty() {
19230 command.push(' ');
19231 command.push_str(&target_args);
19232 }
19233 vec![command]
19234}
19235
19236fn build_dependency_dag_report(
19237 path: &Path,
19238 scope: Option<&str>,
19239 raw_targets: &[String],
19240 depth: usize,
19241 limit: usize,
19242) -> Result<DependencyDagReport> {
19243 let root = lint::resolve_project_root_or_canonical_path(path)?;
19244 write_traversal_graph_store(&root, path, scope)
19245 .with_context(|| format!("refreshing graph-db projection for {}", root.display()))?;
19246 let graph_db = graph_substrate_db_path(&root, scope);
19247 let store = SqliteGraphStore::open_read_only_resilient(&graph_db)
19248 .with_context(|| format!("opening graph-db projection: {}", graph_db.display()))?;
19249 let mut warnings = Vec::new();
19250 if let Some(recovery) = store.read_only_recovery() {
19251 warnings.push(graph_db_read_recovery_diagnostic(recovery));
19252 }
19253 let freshness = sqlite_graph_freshness(&store, scope.unwrap_or("root"))?;
19254 if freshness.fail_closed {
19255 bail!(
19256 "dependency-dag graph projection failed closed: {}; repair: {}",
19257 freshness.diagnostics.join("; "),
19258 graph_db_repair_commands(&root, scope).join("; ")
19259 );
19260 }
19261
19262 let target_nodes = dependency_dag_resolve_backlog_nodes(&root, path, &store, raw_targets)?;
19263 let graph_nodes = store.all_nodes()?;
19264 let graph_edges = store.all_edges()?;
19265 let graph_nodes_by_id = graph_nodes
19266 .into_iter()
19267 .map(|node| (node.id.clone(), node))
19268 .collect::<BTreeMap<_, _>>();
19269 let profiles = target_nodes
19270 .iter()
19271 .map(|node| {
19272 dependency_dag_node_profile(
19273 &root,
19274 &store,
19275 node,
19276 &graph_nodes_by_id,
19277 &graph_edges,
19278 depth,
19279 limit,
19280 )
19281 })
19282 .collect::<Result<Vec<_>>>()?;
19283 let targets = profiles
19284 .iter()
19285 .map(|profile| profile.id.clone())
19286 .collect::<Vec<_>>();
19287 let target_ids = targets.iter().cloned().collect::<BTreeSet<_>>();
19288
19289 let mut edges = Vec::new();
19290 let mut seen_edges = BTreeSet::new();
19291 dependency_dag_explicit_edges(&profiles, &target_ids, &mut edges, &mut seen_edges);
19292 dependency_dag_worker_follow_up_edges(&profiles, &target_ids, &mut edges, &mut seen_edges);
19293 dependency_dag_overlap_edges(&profiles, &mut edges, &mut seen_edges);
19294 edges.sort_by(|left, right| {
19295 left.from
19296 .cmp(&right.from)
19297 .then(left.to.cmp(&right.to))
19298 .then(left.kind.cmp(&right.kind))
19299 });
19300 let (topo_batches, cycle_diagnostics) = dependency_dag_topo_batches(&targets, &edges);
19301
19302 let nodes = profiles
19303 .into_iter()
19304 .map(|profile| DependencyDagNode {
19305 id: profile.id,
19306 graph_node_id: profile.graph_node_id,
19307 label: profile.label,
19308 path: profile.path,
19309 line: profile.line,
19310 detail: profile.detail,
19311 source_files: sorted_set(&profile.source_files),
19312 source_symbols: sorted_set(&profile.source_symbols),
19313 config_files: sorted_set(&profile.config_files),
19314 expected_tests: sorted_set(&profile.expected_tests),
19315 semantic_refs: profile.semantic_refs.into_values().collect(),
19316 worker_feedback: profile.worker_feedback,
19317 })
19318 .collect::<Vec<_>>();
19319 let projection_hashes = freshness
19320 .content_hash
19321 .clone()
19322 .into_iter()
19323 .collect::<Vec<_>>();
19324 let replay_commands = dependency_dag_replay_commands(path, scope, &targets, depth, limit);
19325 let repair_commands = graph_db_repair_commands(&root, scope);
19326 let summary = DependencyDagSummary {
19327 nodes: nodes.len(),
19328 edges: edges.len(),
19329 topo_batches: topo_batches.len(),
19330 has_cycles: cycle_diagnostics.has_cycles,
19331 };
19332
19333 Ok(DependencyDagReport {
19334 contract_version: DEPENDENCY_DAG_CONTRACT_VERSION,
19335 root: root.to_string_lossy().to_string(),
19336 scope: scope.map(str::to_string),
19337 path: path.to_string_lossy().to_string(),
19338 targets,
19339 projection_freshness: freshness,
19340 projection_hashes,
19341 nodes,
19342 edges,
19343 topo_batches,
19344 cycle_diagnostics,
19345 summary,
19346 replay_commands,
19347 repair_commands,
19348 warnings,
19349 })
19350}
19351
19352fn print_dependency_dag_human(report: &DependencyDagReport, compact: bool) {
19353 if compact {
19354 println!(
19355 "dependency-dag targets:{} edges:{} batches:{} cycles:{}",
19356 report.targets.len(),
19357 report.edges.len(),
19358 report.topo_batches.len(),
19359 report.cycle_diagnostics.has_cycles
19360 );
19361 } else {
19362 println!("Dependency DAG");
19363 println!(" targets: {}", report.targets.join(", "));
19364 println!(" edges: {}", report.edges.len());
19365 println!(" cycles: {}", report.cycle_diagnostics.has_cycles);
19366 }
19367 for batch in &report.topo_batches {
19368 println!("batch #{}: {}", batch.batch, batch.targets.join(", "));
19369 }
19370 for edge in &report.edges {
19371 println!(
19372 "edge {} -> {} kind:{} weight:{}",
19373 edge.from, edge.to, edge.kind, edge.weight
19374 );
19375 for reason in &edge.reasons {
19376 println!(" reason: {reason}");
19377 }
19378 }
19379 if report.cycle_diagnostics.has_cycles {
19380 println!(
19381 "cycle blocked nodes: {}",
19382 report.cycle_diagnostics.blocked_nodes.join(", ")
19383 );
19384 }
19385 for command in &report.replay_commands {
19386 println!("replay: {command}");
19387 }
19388 for command in &report.repair_commands {
19389 println!("repair: {command}");
19390 }
19391 for warning in &report.warnings {
19392 println!("warning: {warning}");
19393 }
19394}
19395
19396fn cmd_dependency_dag(
19397 path: &Path,
19398 scope: Option<&str>,
19399 raw_targets: &[String],
19400 depth: usize,
19401 limit: usize,
19402 format: OutputFormat,
19403) -> Result<()> {
19404 let report = build_dependency_dag_report(path, scope, raw_targets, depth, limit)?;
19405 if format.json_output {
19406 print_json_or_envelope(
19407 &report,
19408 &format,
19409 "dependency-dag",
19410 "topological-planning",
19411 ToolEnvelopeSummary {
19412 text: format!(
19413 "Dependency DAG for {} target(s): edges={} batches={} cycles={}",
19414 report.targets.len(),
19415 report.edges.len(),
19416 report.topo_batches.len(),
19417 report.cycle_diagnostics.has_cycles
19418 ),
19419 metrics: vec![
19420 envelope_metric("targets", report.targets.len()),
19421 envelope_metric("edges", report.edges.len()),
19422 envelope_metric("topo_batches", report.topo_batches.len()),
19423 envelope_metric("has_cycles", report.cycle_diagnostics.has_cycles),
19424 ],
19425 },
19426 report.cycle_diagnostics.has_cycles,
19427 report.replay_commands.clone(),
19428 )
19429 } else {
19430 print_dependency_dag_human(&report, format.compact);
19431 Ok(())
19432 }
19433}
19434
19435pub(crate) fn render_log_digest_from_input(
19436 path: &Path,
19437 input: &str,
19438 format: OutputFormat,
19439) -> Result<()> {
19440 let report = log_digest::compute(path, input)?;
19441 if format.json_output {
19442 println!(
19443 "{}",
19444 to_json_schema(
19445 &report,
19446 format.pretty,
19447 format.terse,
19448 format.ultra_terse,
19449 format.schema
19450 )?
19451 );
19452 return Ok(());
19453 }
19454
19455 if format.compact {
19456 println!(
19457 "log lines:{} signals:{} repeats:{} files:{} syms:{} stacks:{}",
19458 report.non_empty_lines,
19459 report.signal_groups,
19460 report.repeated_line_groups,
19461 report.file_ref_groups,
19462 report.symbol_ref_groups,
19463 report.stack_groups
19464 );
19465 for signal in &report.signals {
19466 let location = match (&signal.path, signal.line) {
19467 (Some(path), Some(line)) => format!("{path}:{line}"),
19468 (Some(path), None) => path.clone(),
19469 _ => "-".to_string(),
19470 };
19471 println!(
19472 "{} sev:{} count:{} sums:{} msg:{}",
19473 location,
19474 signal.severity,
19475 signal.occurrences,
19476 log_digest_summary_label(signal.summary_state),
19477 truncate_for_compact(&signal.message, 80)
19478 );
19479 }
19480 for repeated in &report.repeated_lines {
19481 println!(
19482 "repeat count:{} line:{}",
19483 repeated.occurrences,
19484 truncate_for_compact(&repeated.line, 80)
19485 );
19486 }
19487 for symbol in &report.symbol_refs {
19488 println!(
19489 "sym:{} count:{} sums:{}",
19490 symbol.symbol,
19491 symbol.occurrences,
19492 log_digest_summary_label(symbol.summary_state)
19493 );
19494 }
19495 for warning in &report.warnings {
19496 println!("warning: {warning}");
19497 }
19498 return Ok(());
19499 }
19500
19501 println!("Log digest");
19502 println!(" lines: {}", report.total_lines);
19503 println!(" non-empty lines: {}", report.non_empty_lines);
19504 println!(" signal groups: {}", report.signal_groups);
19505 println!(
19506 " repeated lines: {}",
19507 report.repeated_line_groups
19508 );
19509 println!(
19510 " repeated line instances: {}",
19511 report.repeated_line_occurrences
19512 );
19513 println!(" file refs: {}", report.file_ref_groups);
19514 println!(" symbol refs: {}", report.symbol_ref_groups);
19515 println!(" stack groups: {}", report.stack_groups);
19516
19517 if !report.signals.is_empty() {
19518 println!();
19519 println!("Signals:");
19520 for signal in &report.signals {
19521 match (&signal.path, signal.line, signal.column) {
19522 (Some(path), Some(line), Some(column)) => println!("{path}:{line}:{column}"),
19523 (Some(path), Some(line), None) => println!("{path}:{line}"),
19524 (Some(path), None, _) => println!("{path}"),
19525 (None, _, _) => println!("(no file anchor)"),
19526 }
19527 println!(" severity: {}", signal.severity);
19528 println!(" occurrences: {}", signal.occurrences);
19529 println!(" message: {}", signal.message);
19530 println!(
19531 " cached summaries: {}",
19532 log_digest_summary_label(signal.summary_state)
19533 );
19534 for summary in &signal.current_summaries {
19535 println!(
19536 " - {}: {}",
19537 summary.symbol,
19538 truncate_for_compact(&summary.summary, 160)
19539 );
19540 }
19541 }
19542 }
19543
19544 if !report.repeated_lines.is_empty() {
19545 println!();
19546 println!("Repeated lines:");
19547 for repeated in &report.repeated_lines {
19548 println!(
19549 " {}x {}",
19550 repeated.occurrences,
19551 truncate_for_compact(&repeated.line, 180)
19552 );
19553 }
19554 }
19555
19556 if !report.file_refs.is_empty() {
19557 println!();
19558 println!("Anchored files:");
19559 for file_ref in &report.file_refs {
19560 match (file_ref.line, file_ref.column) {
19561 (Some(line), Some(column)) => println!("{}:{}:{}", file_ref.path, line, column),
19562 (Some(line), None) => println!("{}:{}", file_ref.path, line),
19563 (None, _) => println!("{}", file_ref.path),
19564 }
19565 println!(" occurrences: {}", file_ref.occurrences);
19566 println!(
19567 " cached summaries: {}",
19568 log_digest_summary_label(file_ref.summary_state)
19569 );
19570 for summary in &file_ref.current_summaries {
19571 println!(
19572 " - {}: {}",
19573 summary.symbol,
19574 truncate_for_compact(&summary.summary, 160)
19575 );
19576 }
19577 }
19578 }
19579
19580 if !report.symbol_refs.is_empty() {
19581 println!();
19582 println!("Symbol candidates:");
19583 for symbol in &report.symbol_refs {
19584 println!("{}", symbol.symbol);
19585 println!(" occurrences: {}", symbol.occurrences);
19586 println!(
19587 " cached summaries: {}",
19588 log_digest_summary_label(symbol.summary_state)
19589 );
19590 for summary in &symbol.current_summaries {
19591 println!(
19592 " - {}: {}",
19593 summary.symbol,
19594 truncate_for_compact(&summary.summary, 160)
19595 );
19596 }
19597 }
19598 }
19599
19600 if !report.stack_traces.is_empty() {
19601 println!();
19602 println!("Stack groups:");
19603 for stack in &report.stack_traces {
19604 println!(" occurrences: {}", stack.occurrences);
19605 for frame in &stack.frames {
19606 println!(" - {}", frame);
19607 }
19608 }
19609 }
19610
19611 for warning in &report.warnings {
19612 println!("warning: {warning}");
19613 }
19614 Ok(())
19615}
19616
19617pub(crate) fn metric_digest_trend_label(trend: metric_digest::MetricDigestTrend) -> &'static str {
19618 match trend {
19619 metric_digest::MetricDigestTrend::Improved => "improved",
19620 metric_digest::MetricDigestTrend::Regressed => "regressed",
19621 metric_digest::MetricDigestTrend::Flat => "flat",
19622 metric_digest::MetricDigestTrend::Unknown => "changed",
19623 }
19624}
19625
19626pub(crate) fn metric_digest_gate_label(
19627 decision: metric_digest::CommunitySearchGateDecision,
19628) -> &'static str {
19629 match decision {
19630 metric_digest::CommunitySearchGateDecision::Pass => "pass",
19631 metric_digest::CommunitySearchGateDecision::Block => "block",
19632 }
19633}
19634
19635fn cmd_dci_benchmark(fixture_path: &Path, format: OutputFormat) -> Result<()> {
19636 let input = fs::read_to_string(fixture_path)
19637 .with_context(|| format!("reading dci-benchmark fixture: {}", fixture_path.display()))?;
19638 let report = dci_benchmark::compute(&input)?;
19639
19640 if format.json_output {
19641 println!(
19642 "{}",
19643 to_json_schema(
19644 &report,
19645 format.pretty,
19646 format.terse,
19647 format.ultra_terse,
19648 format.schema
19649 )?
19650 );
19651 return Ok(());
19652 }
19653
19654 if format.compact {
19655 println!(
19656 "dci tasks:{} strategies:{} warnings:{}",
19657 report.tasks_loaded,
19658 report.strategies_compared,
19659 report.warnings.len()
19660 );
19661 for summary in &report.strategy_summaries {
19662 println!(
19663 "{} rank:{} loc:{}/{} rate:{} useful_hits:{} zero_output:{} calls:{} latency_ms:{} tokens:{} output_tokens:{}",
19664 summary.strategy,
19665 summary.rank,
19666 summary.localized,
19667 summary.task_runs,
19668 dci_benchmark::format_number(summary.localization_rate * 100.0),
19669 dci_benchmark::format_number(summary.avg_useful_hits),
19670 dci_benchmark::format_number(summary.zero_output_rate * 100.0),
19671 dci_benchmark::format_number(summary.avg_tool_calls),
19672 dci_benchmark::format_number(summary.avg_latency_ms),
19673 dci_benchmark::format_number(summary.avg_estimated_tokens),
19674 dci_benchmark::format_number(summary.avg_output_tokens)
19675 );
19676 }
19677 if let Some(gate) = &report.memory_retrieval_gate {
19678 println!(
19679 "memory_retrieval_gate decision:{} baseline:{} min_avg_useful_hits:{} max_zero_output_failures:{} diagnostics:{}",
19680 gate.decision,
19681 gate.baseline_strategy,
19682 dci_benchmark::format_number(gate.min_avg_useful_hits),
19683 gate.max_zero_output_failures,
19684 gate.diagnostics.len()
19685 );
19686 }
19687 for warning in &report.warnings {
19688 println!("warning: {warning}");
19689 }
19690 return Ok(());
19691 }
19692
19693 println!("DCI benchmark");
19694 if let Some(description) = &report.description {
19695 println!(" description: {}", description);
19696 }
19697 println!(" tasks loaded: {}", report.tasks_loaded);
19698 println!(" strategies compared: {}", report.strategies_compared);
19699
19700 println!();
19701 println!("Strategy summary:");
19702 for summary in &report.strategy_summaries {
19703 println!(
19704 " #{} {}: localization {}/{} ({:.1}%), avg useful hits {}, zero output {:.1}%, avg calls {}, avg latency {}ms, avg tokens {}, avg output tokens {}",
19705 summary.rank,
19706 summary.strategy,
19707 summary.localized,
19708 summary.task_runs,
19709 summary.localization_rate * 100.0,
19710 dci_benchmark::format_number(summary.avg_useful_hits),
19711 summary.zero_output_rate * 100.0,
19712 dci_benchmark::format_number(summary.avg_tool_calls),
19713 dci_benchmark::format_number(summary.avg_latency_ms),
19714 dci_benchmark::format_number(summary.avg_estimated_tokens),
19715 dci_benchmark::format_number(summary.avg_output_tokens)
19716 );
19717 }
19718
19719 if let Some(gate) = &report.memory_retrieval_gate {
19720 println!();
19721 println!("Memory retrieval gate:");
19722 println!(" decision: {}", gate.decision);
19723 println!(
19724 " baseline: {}, min avg useful hits {}, max zero-output failures {}",
19725 gate.baseline_strategy,
19726 dci_benchmark::format_number(gate.min_avg_useful_hits),
19727 gate.max_zero_output_failures
19728 );
19729 for row in &gate.rows {
19730 println!(
19731 " {}: status {}, avg useful hits {}, zero-output failures {}",
19732 row.strategy,
19733 row.status,
19734 dci_benchmark::format_number(row.avg_useful_hits),
19735 row.zero_output_failures
19736 );
19737 }
19738 for diagnostic in &gate.diagnostics {
19739 println!(" diagnostic: {diagnostic}");
19740 }
19741 }
19742
19743 println!();
19744 println!("Task winners:");
19745 for row in &report.task_rows {
19746 let label = row
19747 .label
19748 .as_ref()
19749 .map(|value| format!(" ({value})"))
19750 .unwrap_or_default();
19751 println!(" {}{}", row.task_id, label);
19752 println!(" localized: {}", row.best_localization.join(", "));
19753 println!(" most useful hits: {}", row.most_useful_hits.join(", "));
19754 println!(
19755 " lowest calls: {}, lowest latency: {}, lowest tokens: {}, lowest output tokens: {}",
19756 row.lowest_tool_calls.as_deref().unwrap_or("-"),
19757 row.lowest_latency.as_deref().unwrap_or("-"),
19758 row.lowest_token_budget.as_deref().unwrap_or("-"),
19759 row.lowest_output_tokens.as_deref().unwrap_or("-")
19760 );
19761 if !row.zero_output_failures.is_empty() {
19762 println!(" zero output: {}", row.zero_output_failures.join(", "));
19763 }
19764 }
19765
19766 for warning in &report.warnings {
19767 println!("warning: {warning}");
19768 }
19769 Ok(())
19770}
19771
19772pub(crate) fn format_compact_count(value: u64) -> String {
19773 if value >= 1_000_000 {
19774 format!("{:.1}M", value as f64 / 1_000_000.0)
19775 } else if value >= 1_000 {
19776 format!("{:.1}K", value as f64 / 1_000.0)
19777 } else {
19778 value.to_string()
19779 }
19780}
19781
19782fn cmd_digest_runner(
19783 kind: &str,
19784 path: &Path,
19785 runner: Option<&str>,
19786 shell_command: &str,
19787 format: OutputFormat,
19788) -> Result<()> {
19789 let digest_kind = DigestRunnerKind::parse(kind)?;
19790 let root = transcript_artifact_root(path)?;
19791 let execution = run_digest_runner_command(shell_command)?;
19792 let output = &execution.output;
19793 let captured = String::from_utf8_lossy(&output.stdout).into_owned();
19794 let exit_code = output.status.code().unwrap_or(-1);
19795 if format.json_output && format.envelope {
19796 let artifact_key = format!(
19797 "{}:{}:{}:{}",
19798 digest_kind.as_str(),
19799 shell_command,
19800 execution.executed_command,
19801 captured
19802 );
19803 let artifact = if captured.trim().is_empty() {
19804 None
19805 } else {
19806 let (suffix, expand) = match digest_kind {
19807 DigestRunnerKind::Test => (
19808 "test.log",
19809 format!(
19810 "tsift test-digest --path {} --input {}{} --json",
19811 shell_quote(root.to_string_lossy().as_ref()),
19812 shell_quote(
19813 root.join(".tsift/artifacts")
19814 .join(format!("{}.test.log", stable_handle("tart", &artifact_key)))
19815 .to_string_lossy()
19816 .as_ref()
19817 ),
19818 runner
19819 .map(|value| format!(" --runner {}", shell_quote(value)))
19820 .unwrap_or_default()
19821 ),
19822 ),
19823 DigestRunnerKind::Log => (
19824 "log",
19825 format!(
19826 "tsift log-digest --path {} --input {} --json",
19827 shell_quote(root.to_string_lossy().as_ref()),
19828 shell_quote(
19829 root.join(".tsift/artifacts")
19830 .join(format!("{}.log", stable_handle("tart", &artifact_key)))
19831 .to_string_lossy()
19832 .as_ref()
19833 )
19834 ),
19835 ),
19836 };
19837 Some(persist_transcript_artifact(
19838 &root,
19839 "tart",
19840 suffix,
19841 &artifact_key,
19842 &captured,
19843 expand,
19844 )?)
19845 };
19846 let filter_report = execution.filter.as_ref().map(DigestRunnerFilter::to_json);
19847
19848 match digest_kind {
19849 DigestRunnerKind::Test => {
19850 let digest_report = test_digest::compute(path, &captured, runner)?;
19851 let report = serde_json::json!({
19852 "kind": digest_kind.as_str(),
19853 "command": shell_command,
19854 "executed_command": execution.executed_command,
19855 "exit_code": exit_code,
19856 "success": output.status.success(),
19857 "filter": filter_report,
19858 "artifact": artifact,
19859 "digest": digest_report,
19860 });
19861 let mut follow_up = artifact
19862 .as_ref()
19863 .map(|entry| vec![entry.expand.clone()])
19864 .unwrap_or_default();
19865 follow_up.push(format!(
19866 "tsift rewrite --run {}",
19867 shell_quote(shell_command)
19868 ));
19869 let summary_text = if output.status.success() && digest_report.failures == 0 {
19870 format!("test run passed for {}", runner.unwrap_or("auto"))
19871 } else {
19872 format!("test run captured {} failure(s)", digest_report.failures)
19873 };
19874 print_json_or_envelope(
19875 &report,
19876 &format,
19877 "digest-runner",
19878 "test-run",
19879 ToolEnvelopeSummary {
19880 text: summary_text,
19881 metrics: vec![
19882 envelope_metric("runner", &digest_report.runner),
19883 envelope_metric("exit_code", exit_code),
19884 envelope_metric("filter", execution.filter_label()),
19885 envelope_metric("failures", digest_report.failures),
19886 envelope_metric("groups", digest_report.grouped_failures),
19887 envelope_metric(
19888 "artifact",
19889 artifact
19890 .as_ref()
19891 .map(|entry| entry.handle.as_str())
19892 .unwrap_or("-"),
19893 ),
19894 ],
19895 },
19896 false,
19897 follow_up,
19898 )?;
19899 }
19900 DigestRunnerKind::Log => {
19901 let digest_report = log_digest::compute(path, &captured)?;
19902 let report = serde_json::json!({
19903 "kind": digest_kind.as_str(),
19904 "command": shell_command,
19905 "executed_command": execution.executed_command,
19906 "exit_code": exit_code,
19907 "success": output.status.success(),
19908 "filter": filter_report,
19909 "artifact": artifact,
19910 "digest": digest_report,
19911 });
19912 let mut follow_up = artifact
19913 .as_ref()
19914 .map(|entry| vec![entry.expand.clone()])
19915 .unwrap_or_default();
19916 follow_up.push(format!(
19917 "tsift rewrite --run {}",
19918 shell_quote(shell_command)
19919 ));
19920 let summary_text = if output.status.success() && digest_report.signal_groups == 0 {
19921 "command finished without log signals".to_string()
19922 } else {
19923 format!(
19924 "command emitted {} log signal group(s)",
19925 digest_report.signal_groups
19926 )
19927 };
19928 print_json_or_envelope(
19929 &report,
19930 &format,
19931 "digest-runner",
19932 "command-run",
19933 ToolEnvelopeSummary {
19934 text: summary_text,
19935 metrics: vec![
19936 envelope_metric("exit_code", exit_code),
19937 envelope_metric("filter", execution.filter_label()),
19938 envelope_metric("signals", digest_report.signal_groups),
19939 envelope_metric("file_refs", digest_report.file_ref_groups),
19940 envelope_metric(
19941 "artifact",
19942 artifact
19943 .as_ref()
19944 .map(|entry| entry.handle.as_str())
19945 .unwrap_or("-"),
19946 ),
19947 ],
19948 },
19949 false,
19950 follow_up,
19951 )?;
19952 }
19953 }
19954
19955 if output.status.success() {
19956 return Ok(());
19957 }
19958 if let Some(code) = output.status.code() {
19959 std::process::exit(code);
19960 }
19961 bail!("digest-wrapped command terminated by signal: {shell_command}");
19962 }
19963
19964 if captured.trim().is_empty() {
19965 let label = match digest_kind {
19966 DigestRunnerKind::Test => "test",
19967 DigestRunnerKind::Log => "log",
19968 };
19969 println!("No {label} output captured.");
19970 } else {
19971 match digest_kind {
19972 DigestRunnerKind::Test => {
19973 render_test_digest_from_input(path, &captured, runner, format)?
19974 }
19975 DigestRunnerKind::Log => render_log_digest_from_input(path, &captured, format)?,
19976 }
19977 }
19978
19979 if output.status.success() {
19980 return Ok(());
19981 }
19982 if let Some(code) = output.status.code() {
19983 std::process::exit(code);
19984 }
19985 bail!("digest-wrapped command terminated by signal: {shell_command}");
19986}
19987
19988struct DigestRunnerExecution {
19989 output: std::process::Output,
19990 executed_command: String,
19991 filter: Option<DigestRunnerFilter>,
19992}
19993
19994impl DigestRunnerExecution {
19995 fn filter_label(&self) -> &'static str {
19996 self.filter
19997 .as_ref()
19998 .map(|filter| filter.tool)
19999 .unwrap_or("none")
20000 }
20001}
20002
20003struct DigestRunnerFilter {
20004 tool: &'static str,
20005 command: String,
20006}
20007
20008impl DigestRunnerFilter {
20009 fn to_json(&self) -> serde_json::Value {
20010 serde_json::json!({
20011 "tool": self.tool,
20012 "command": self.command,
20013 })
20014 }
20015}
20016
20017fn run_digest_runner_command(shell_command: &str) -> Result<DigestRunnerExecution> {
20018 let filter = rtk_rewrite_for_digest_runner(shell_command);
20019 let executed_command = filter
20020 .as_ref()
20021 .map(|filter| filter.command.as_str())
20022 .unwrap_or(shell_command);
20023 let output = Command::new("sh")
20024 .arg("-lc")
20025 .arg(format!("({executed_command}) 2>&1"))
20026 .stdout(Stdio::piped())
20027 .output()
20028 .with_context(|| format!("running digest-wrapped command: {executed_command}"))?;
20029
20030 Ok(DigestRunnerExecution {
20031 output,
20032 executed_command: executed_command.to_string(),
20033 filter,
20034 })
20035}
20036
20037fn rtk_rewrite_for_digest_runner(shell_command: &str) -> Option<DigestRunnerFilter> {
20038 if shell_command.trim_start().starts_with("rtk ") || find_command_on_path("rtk").is_none() {
20039 return None;
20040 }
20041 let output = Command::new("rtk")
20042 .arg("rewrite")
20043 .arg(shell_command)
20044 .output()
20045 .ok()?;
20046 if !output.status.success() {
20047 return None;
20048 }
20049 let rewritten = String::from_utf8_lossy(&output.stdout).trim().to_string();
20050 if rewritten.is_empty() || rewritten == shell_command {
20051 return None;
20052 }
20053 Some(DigestRunnerFilter {
20054 tool: "rtk",
20055 command: rewritten,
20056 })
20057}
20058
20059fn find_command_on_path(command: &str) -> Option<PathBuf> {
20060 let path_var = std::env::var_os("PATH")?;
20061 std::env::split_paths(&path_var)
20062 .map(|dir| dir.join(command))
20063 .find(|candidate| candidate.is_file())
20064}
20065
20066pub(crate) fn open_existing_summary_db_read_only(db_path: &Path) -> Result<summarize::SummaryDb> {
20067 if !db_path.exists() {
20068 bail!("no summaries.db found — run `tsift summarize --extract <path>` first");
20069 }
20070 summarize::SummaryDb::open_read_only_resilient(db_path)
20071}
20072
20073fn status_index_needs_fix(report: &status::StatusReport) -> bool {
20074 !matches!(report.index, status::IndexStatus::Fresh { .. })
20075}
20076
20077fn status_instructions_need_fix(report: &status::StatusReport) -> bool {
20078 !matches!(report.instructions, init::InstructionStatus::Current { .. })
20079}
20080
20081pub(crate) fn apply_status_fixes(root: &Path, report: &status::StatusReport) -> Result<()> {
20082 if status_instructions_need_fix(report) {
20083 eprintln!("status fix: refreshing tsift instructions");
20084 init::init(root, false, false)?;
20085 }
20086
20087 let eviction = cycle_packet_cache::cycle_packet_cache_evict(
20088 root,
20089 cycle_packet_cache::CYCLE_PACKET_CACHE_DEFAULT_TTL_SECS,
20090 cycle_packet_cache::CYCLE_PACKET_CACHE_DEFAULT_MAX_BYTES,
20091 );
20092 if eviction.evicted_entries > 0 {
20093 eprintln!(
20094 "status fix: evicted {} cycle packet cache entry/entries ({} bytes, {} remaining)",
20095 eviction.evicted_entries,
20096 eviction.evicted_bytes,
20097 eviction.remaining_entries
20098 );
20099 }
20100
20101 if !status_index_needs_fix(report) {
20102 return Ok(());
20103 }
20104
20105 let scopes = config::Config::submodule_dirs(root)?;
20106 if scopes.is_empty() {
20107 eprintln!("status fix: refreshing index");
20108 run_index_update(
20109 &root.join(".tsift/index.db"),
20110 root,
20111 "status --fix refreshing index".to_string(),
20112 root,
20113 None,
20114 false,
20115 false,
20116 )?;
20117 return Ok(());
20118 }
20119
20120 let cfg = config::Config::load(root)?;
20121 for scope in scopes {
20122 if !scope.source_root.exists() {
20123 eprintln!(
20124 "status fix: skipping missing submodule `{}` ({})",
20125 scope.id,
20126 scope.source_root.display()
20127 );
20128 continue;
20129 }
20130 eprintln!("status fix: refreshing submodule `{}` index", scope.id);
20131 run_index_update(
20132 &cfg.db_path_for(root, &scope.id),
20133 &scope.source_root,
20134 format!("status --fix refreshing submodule `{}` index", scope.id),
20135 root,
20136 Some(scope.id.as_str()),
20137 false,
20138 false,
20139 )?;
20140 }
20141
20142 Ok(())
20143}
20144
20145pub(crate) fn status_missing_workspace_scopes(report: &status::StatusReport) -> bool {
20146 match &report.index {
20147 status::IndexStatus::Fresh { missing_scopes, .. }
20148 | status::IndexStatus::Stale { missing_scopes, .. }
20149 | status::IndexStatus::Missing { missing_scopes } => !missing_scopes.is_empty(),
20150 }
20151}
20152
20153pub(crate) fn autoindex_missing_workspace_scopes(
20154 root: &Path,
20155 report: &status::StatusReport,
20156) -> Result<()> {
20157 let missing_scopes = match &report.index {
20158 status::IndexStatus::Fresh { missing_scopes, .. }
20159 | status::IndexStatus::Stale { missing_scopes, .. }
20160 | status::IndexStatus::Missing { missing_scopes } => missing_scopes,
20161 };
20162 if missing_scopes.is_empty() {
20163 return Ok(());
20164 }
20165
20166 let missing_scope_ids = missing_scopes
20167 .iter()
20168 .map(|scope| scope.scope.as_str())
20169 .collect::<std::collections::HashSet<_>>();
20170 let cfg = config::Config::load(root)?;
20171 for scope in config::Config::submodule_dirs(root)? {
20172 if !missing_scope_ids.contains(scope.id.as_str()) || !scope.source_root.exists() {
20173 continue;
20174 }
20175 let db_path = cfg.db_path_for(root, &scope.id);
20176 run_index_update(
20177 &db_path,
20178 &scope.source_root,
20179 format!(
20180 "autoindexing missing submodule `{}` during status",
20181 scope.id
20182 ),
20183 root,
20184 Some(scope.id.as_str()),
20185 false,
20186 false,
20187 )?;
20188 }
20189 Ok(())
20190}
20191
20192pub(crate) fn emit_summary_stats_warnings(stats: &summarize::SummaryStats, root: &Path) {
20193 for warning in &stats.warnings {
20194 let rel_path = relativize_pathbuf(&warning.path, root);
20195 eprintln!(
20196 "warning: summarize stats {}: {}",
20197 rel_path.display(),
20198 warning.message
20199 );
20200 }
20201}
20202
20203fn contextualize_error(err: anyhow::Error, context: String) -> anyhow::Error {
20204 Result::<(), anyhow::Error>::Err(err)
20205 .context(context)
20206 .unwrap_err()
20207}
20208
20209fn should_attach_lock_diagnostics(err: &anyhow::Error) -> bool {
20210 let message = err.to_string();
20211 message.contains("another tsift index writer is already active")
20212 || substrate::error_mentions_locked_db(err)
20213}
20214
20215fn add_write_lock_context(
20216 err: anyhow::Error,
20217 action: String,
20218 root: &std::path::Path,
20219 scope: Option<&str>,
20220) -> anyhow::Error {
20221 if !should_attach_lock_diagnostics(&err) {
20222 return contextualize_error(err, action);
20223 }
20224
20225 let Ok(report) = status::check_locks(root, None, scope) else {
20226 return contextualize_error(err, action);
20227 };
20228
20229 contextualize_error(
20230 err,
20231 format!(
20232 "{}\n\nlock diagnostics:\n{}",
20233 action,
20234 status::format_locks_human(&report, false).trim_end()
20235 ),
20236 )
20237}
20238
20239pub(crate) fn run_index_update(
20240 db_path: &std::path::Path,
20241 source_root: &std::path::Path,
20242 action: String,
20243 root: &std::path::Path,
20244 scope: Option<&str>,
20245 rebuild: bool,
20246 prune: bool,
20247) -> Result<index::IndexSummary> {
20248 let result = (|| {
20249 let db = index::IndexDb::open(db_path)?;
20250 if rebuild {
20251 db.rebuild(source_root)
20252 } else if prune {
20253 db.apply_changes_pruned(source_root)
20254 } else {
20255 db.apply_changes(source_root)
20256 }
20257 })();
20258
20259 let summary = result.map_err(|err| add_write_lock_context(err, action, root, scope))?;
20260 emit_index_warnings(&summary, source_root, scope);
20261 Ok(summary)
20262}
20263
20264pub(crate) fn relativize_index_summary(summary: &mut index::IndexSummary, root: &Path) {
20265 for change in &mut summary.changes {
20266 change.path = relativize_pathbuf(&change.path, root);
20267 }
20268 for warning in &mut summary.warnings {
20269 warning.path = relativize_pathbuf(&warning.path, root);
20270 }
20271}
20272
20273fn emit_index_warnings(summary: &index::IndexSummary, root: &Path, scope: Option<&str>) {
20274 for warning in &summary.warnings {
20275 let rel_path = relativize_pathbuf(&warning.path, root);
20276 let stage = match warning.stage {
20277 index::IndexWarningStage::ReadSource => "read failed",
20278 index::IndexWarningStage::ExtractSymbols => "symbol extraction failed",
20279 index::IndexWarningStage::ExtractCallSites => "call extraction failed",
20280 index::IndexWarningStage::ExtractRoutes => "route extraction failed",
20281 };
20282 let scope_prefix = scope.map(|name| format!("[{}] ", name)).unwrap_or_default();
20283 let lang_suffix = warning
20284 .language
20285 .as_deref()
20286 .map(|lang| format!(" [{}]", lang))
20287 .unwrap_or_default();
20288 eprintln!(
20289 "warning: {}{}{}: {}: {}",
20290 scope_prefix,
20291 rel_path.display(),
20292 lang_suffix,
20293 stage,
20294 warning.message
20295 );
20296 }
20297}
20298
20299pub(crate) fn load_summarize_config(root: &std::path::Path) -> summarize::SummarizeConfig {
20300 let config_path = root.join(".tsift/config.toml");
20301 if !config_path.exists() {
20302 return summarize::SummarizeConfig::default();
20303 }
20304 #[derive(serde::Deserialize, Default)]
20305 struct RawConfig {
20306 #[serde(default)]
20307 summarize: Option<RawSummarize>,
20308 }
20309 #[derive(serde::Deserialize)]
20310 struct RawSummarize {
20311 model: Option<String>,
20312 max_file_tokens: Option<usize>,
20313 api_key_env: Option<String>,
20314 }
20315 let content = std::fs::read_to_string(&config_path).unwrap_or_default();
20316 let raw: RawConfig = toml::from_str(&content).unwrap_or_default();
20317 let defaults = summarize::SummarizeConfig::default();
20318 match raw.summarize {
20319 Some(s) => summarize::SummarizeConfig {
20320 model: s.model.unwrap_or(defaults.model),
20321 max_file_tokens: s.max_file_tokens.unwrap_or(defaults.max_file_tokens),
20322 api_key_env: s.api_key_env.unwrap_or(defaults.api_key_env),
20323 },
20324 None => defaults,
20325 }
20326}
20327
20328#[derive(Debug, Clone, PartialEq, Eq)]
20329struct ExtractSymbolContext {
20330 db_path: PathBuf,
20331 source_root: PathBuf,
20332}
20333
20334pub(crate) fn find_symbols_db_for_file(
20335 root: &Path,
20336 file_path: &Path,
20337) -> Result<Option<ExtractSymbolContext>> {
20338 let cfg = config::Config::load(root)?;
20339 let mut submodules = config::Config::submodule_dirs(root)?;
20340 submodules.sort_by(|left, right| {
20341 right
20342 .source_root
20343 .components()
20344 .count()
20345 .cmp(&left.source_root.components().count())
20346 });
20347
20348 for scope in submodules {
20349 if !file_path.starts_with(&scope.source_root) {
20350 continue;
20351 }
20352 let db_path = cfg.db_path_for(root, &scope.id);
20353 if db_path.exists() {
20354 return Ok(Some(ExtractSymbolContext {
20355 db_path,
20356 source_root: scope.source_root,
20357 }));
20358 }
20359 }
20360
20361 let single = root.join(".tsift/index.db");
20362 if single.exists() && file_path.starts_with(root) {
20363 return Ok(Some(ExtractSymbolContext {
20364 db_path: single,
20365 source_root: root.to_path_buf(),
20366 }));
20367 }
20368
20369 Ok(None)
20370}
20371
20372pub(crate) fn resolve_extract_base(path: &Path) -> Result<PathBuf> {
20373 let canonical = path
20374 .canonicalize()
20375 .with_context(|| format!("canonicalizing {}", path.display()))?;
20376
20377 Ok(if canonical.is_dir() {
20378 canonical
20379 } else {
20380 canonical
20381 .parent()
20382 .map(Path::to_path_buf)
20383 .unwrap_or(canonical)
20384 })
20385}
20386
20387fn normalize_extract_scope_path(path: &Path) -> Result<PathBuf> {
20388 if path.exists() {
20389 return path
20390 .canonicalize()
20391 .with_context(|| format!("canonicalizing extract scope {}", path.display()));
20392 }
20393
20394 Ok(summarize::normalize_lexical_path(path))
20395}
20396
20397pub(crate) fn resolve_extract_scope(root: &Path, extract_path: &Path) -> Result<PathBuf> {
20398 let scope = if extract_path.is_absolute() {
20399 extract_path.to_path_buf()
20400 } else {
20401 root.join(extract_path)
20402 };
20403 normalize_extract_scope_path(&scope)
20404}
20405
20406pub(crate) fn summarize_diff_matches_scope(changed_path: &Path, extract_scope: &Path) -> bool {
20407 normalize_extract_scope_path(changed_path)
20408 .unwrap_or_else(|_| summarize::normalize_lexical_path(changed_path))
20409 .starts_with(extract_scope)
20410}
20411
20412pub(crate) fn summarize_relative_file_path(root: &Path, file_path: &Path) -> String {
20413 summarize::normalize_summary_file_key(file_path.strip_prefix(root).unwrap_or(file_path))
20414}
20415
20416pub(crate) fn summarize_full_extract_deleted_summary_paths(
20417 summary_db: &summarize::SummaryDb,
20418 root: &Path,
20419 extract_scope: &Path,
20420 files_to_extract: &[PathBuf],
20421) -> Result<BTreeSet<String>> {
20422 let live_paths = files_to_extract
20423 .iter()
20424 .map(|file_path| summarize_relative_file_path(root, file_path))
20425 .collect::<BTreeSet<_>>();
20426 let mut deleted = BTreeSet::new();
20427
20428 for cached_path in summary_db.cached_file_paths()? {
20429 if !summarize_diff_matches_scope(&root.join(&cached_path), extract_scope) {
20430 continue;
20431 }
20432 if !live_paths.contains(&cached_path) {
20433 deleted.insert(cached_path);
20434 }
20435 }
20436
20437 Ok(deleted)
20438}
20439
20440#[derive(Debug, Clone)]
20441struct SearchIndexTarget {
20442 label: String,
20443 db_path: PathBuf,
20444 source_root: PathBuf,
20445 scope_name: Option<String>,
20446 reindex_cmd: String,
20447}
20448
20449fn cargo_package_index_target(
20450 root: &Path,
20451 package: multiplicity::CargoPackageInfo,
20452) -> SearchIndexTarget {
20453 SearchIndexTarget {
20454 label: format!("cargo package `{}` index", package.scope_id),
20455 db_path: multiplicity::cargo_package_db_path(root, &package.scope_id),
20456 source_root: package.package_root.clone(),
20457 scope_name: Some(package.scope_id.clone()),
20458 reindex_cmd: format!(
20459 "tsift index --submodule {} {}",
20460 package.scope_id,
20461 root.display()
20462 ),
20463 }
20464}
20465
20466#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20467enum SearchIndexState {
20468 Missing,
20469 Fresh,
20470 Stale { stale_files: usize },
20471}
20472
20473fn resolve_search_index_targets(
20474 root: &Path,
20475 path_hint: &Path,
20476 scope: Option<&str>,
20477 federated: bool,
20478) -> Result<Vec<SearchIndexTarget>> {
20479 if let Some(scope_name) = scope {
20480 if let Some(scope) = config::Config::find_submodule(root, scope_name)? {
20481 let cfg = config::Config::load(root)?;
20482 return Ok(vec![SearchIndexTarget {
20483 label: format!("submodule `{}` index", scope.id),
20484 db_path: cfg.db_path_for(root, &scope.id),
20485 source_root: scope.source_root.clone(),
20486 scope_name: Some(scope.id.clone()),
20487 reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
20488 }]);
20489 }
20490 if let Some(package) = multiplicity::find_cargo_package(root, scope_name)? {
20491 return Ok(vec![cargo_package_index_target(root, package)]);
20492 }
20493 config::Config::resolve_submodule(root, scope_name)?;
20494 }
20495
20496 if federated {
20497 let cfg = config::Config::load(root)?;
20498 let mut targets = Vec::new();
20499 for scope in config::Config::submodule_dirs(root)? {
20500 if !cfg.federation_for_scope(&scope) {
20501 continue;
20502 }
20503 targets.push(SearchIndexTarget {
20504 label: format!("submodule `{}` index", scope.id),
20505 db_path: cfg.db_path_for(root, &scope.id),
20506 source_root: scope.source_root.clone(),
20507 scope_name: Some(scope.id.clone()),
20508 reindex_cmd: format!("tsift index --workspace {}", root.display()),
20509 });
20510 }
20511 return Ok(targets);
20512 }
20513
20514 if let Some(scope) = config::Config::infer_submodule_from_path(root, path_hint)? {
20515 let cfg = config::Config::load(root)?;
20516 return Ok(vec![SearchIndexTarget {
20517 label: format!("submodule `{}` index", scope.id),
20518 db_path: cfg.db_path_for(root, &scope.id),
20519 source_root: scope.source_root.clone(),
20520 scope_name: Some(scope.id.clone()),
20521 reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
20522 }]);
20523 }
20524
20525 if let Some(package) = multiplicity::infer_cargo_package_from_path(root, path_hint)? {
20526 return Ok(vec![cargo_package_index_target(root, package)]);
20527 }
20528
20529 if let Some(scope) = infer_agent_doc_task_submodule(root, path_hint)? {
20530 let cfg = config::Config::load(root)?;
20531 return Ok(vec![SearchIndexTarget {
20532 label: format!("submodule `{}` index", scope.id),
20533 db_path: cfg.db_path_for(root, &scope.id),
20534 source_root: scope.source_root.clone(),
20535 scope_name: Some(scope.id.clone()),
20536 reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
20537 }]);
20538 }
20539
20540 let scopes = config::Config::submodule_dirs(root)?;
20541 if !scopes.is_empty() {
20542 let root_db = root.join(".tsift/index.db");
20543 if !root_db.exists() {
20544 let available_scopes = scopes
20545 .iter()
20546 .map(|scope| scope.id.as_str())
20547 .collect::<Vec<_>>()
20548 .join(", ");
20549 let cfg = config::Config::load(root)?;
20550 let indexed_scopes = scopes
20551 .iter()
20552 .filter(|scope| cfg.db_path_for(root, &scope.id).exists())
20553 .map(|scope| scope.id.as_str())
20554 .collect::<Vec<_>>();
20555 let indexed_label = if indexed_scopes.is_empty() {
20556 "none".to_string()
20557 } else {
20558 indexed_scopes.join(", ")
20559 };
20560 bail!(
20561 "workspace root {} has no shared root index at {}. Default search requires `--scope <scope>` or `--federated` when the workspace uses scoped `.tsift/indexes/*/index.db` files. Available scopes: {}. Indexed scopes: {}.",
20562 root.display(),
20563 root_db.display(),
20564 available_scopes,
20565 indexed_label,
20566 );
20567 }
20568 }
20569
20570 Ok(vec![SearchIndexTarget {
20571 label: "index".to_string(),
20572 db_path: root.join(".tsift/index.db"),
20573 source_root: root.to_path_buf(),
20574 scope_name: None,
20575 reindex_cmd: format!("tsift index {}", root.display()),
20576 }])
20577}
20578
20579fn inspect_search_index(target: &SearchIndexTarget) -> Result<SearchIndexState> {
20580 if !target.source_root.exists() || !target.db_path.exists() {
20581 return Ok(SearchIndexState::Missing);
20582 }
20583
20584 let inspection =
20585 index::IndexDb::inspect_read_only(&target.db_path, &target.source_root, false)?;
20586 let stale_files =
20587 inspection.summary.new + inspection.summary.modified + inspection.summary.deleted;
20588 if stale_files == 0 {
20589 Ok(SearchIndexState::Fresh)
20590 } else {
20591 Ok(SearchIndexState::Stale { stale_files })
20592 }
20593}
20594
20595#[derive(Debug, Clone, PartialEq, Eq)]
20596struct RebuildSearchTarget {
20597 label: String,
20598 reason: RebuildSearchReason,
20599 reindex_cmd: String,
20600}
20601
20602#[derive(Debug, Clone, PartialEq, Eq)]
20603enum RebuildSearchReason {
20604 Missing,
20605 Stale { stale_files: usize },
20606}
20607
20608#[derive(Debug, Clone, PartialEq, Eq)]
20609struct DegradedSearchTarget {
20610 label: String,
20611 reason: RebuildSearchReason,
20612 reindex_cmd: String,
20613}
20614
20615#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20616pub(crate) enum DegradedSearchMode {
20617 ReadOnly,
20618 Exact,
20619}
20620
20621#[derive(Debug)]
20622struct SearchPrecheck {
20623 targets: Vec<SearchIndexTarget>,
20624 degraded_targets: Vec<DegradedSearchTarget>,
20625}
20626
20627fn is_active_writer_lock_error(err: &anyhow::Error) -> bool {
20628 err.chain().any(|cause| {
20629 cause
20630 .to_string()
20631 .contains("another tsift index writer is already active")
20632 })
20633}
20634
20635fn infer_agent_doc_task_submodule(
20636 root: &Path,
20637 path_hint: &Path,
20638) -> Result<Option<config::WorkspaceScope>> {
20639 let hinted_path = if path_hint.is_absolute() {
20640 path_hint.to_path_buf()
20641 } else {
20642 root.join(path_hint)
20643 };
20644 let Ok(relative) = hinted_path.strip_prefix(root) else {
20645 return Ok(None);
20646 };
20647 let mut components = relative.components();
20648 let Some(std::path::Component::Normal(first)) = components.next() else {
20649 return Ok(None);
20650 };
20651 if first != "tasks" {
20652 return Ok(None);
20653 }
20654 let Some(file_stem) = relative.file_stem().and_then(|stem| stem.to_str()) else {
20655 return Ok(None);
20656 };
20657 config::Config::find_submodule(root, file_stem)
20658}
20659
20660fn degraded_search_target(
20661 target: &SearchIndexTarget,
20662 reason: RebuildSearchReason,
20663) -> DegradedSearchTarget {
20664 DegradedSearchTarget {
20665 label: target.label.clone(),
20666 reason,
20667 reindex_cmd: target.reindex_cmd.clone(),
20668 }
20669}
20670
20671fn apply_search_index_update(
20672 root: &Path,
20673 target: &SearchIndexTarget,
20674) -> Result<index::IndexSummary> {
20675 run_index_update(
20676 &target.db_path,
20677 &target.source_root,
20678 format!("autoindexing {}", target.label),
20679 root,
20680 target.scope_name.as_deref(),
20681 false,
20682 false,
20683 )
20684}
20685
20686fn collect_rebuild_search_targets(
20687 targets: &[SearchIndexTarget],
20688) -> Result<Vec<RebuildSearchTarget>> {
20689 let mut rebuild_targets = Vec::new();
20690 for target in targets {
20691 let reason = match inspect_search_index(target)? {
20692 SearchIndexState::Missing => RebuildSearchReason::Missing,
20693 SearchIndexState::Fresh => continue,
20694 SearchIndexState::Stale { stale_files } => RebuildSearchReason::Stale { stale_files },
20695 };
20696 rebuild_targets.push(RebuildSearchTarget {
20697 label: target.label.clone(),
20698 reason,
20699 reindex_cmd: target.reindex_cmd.clone(),
20700 });
20701 }
20702 Ok(rebuild_targets)
20703}
20704
20705fn rebuild_search_target_detail(target: &RebuildSearchTarget) -> String {
20706 match target.reason {
20707 RebuildSearchReason::Missing => format!("{} is missing", target.label),
20708 RebuildSearchReason::Stale { stale_files } => {
20709 let file_suffix = if stale_files == 1 { "" } else { "s" };
20710 format!(
20711 "{} is stale ({} file{})",
20712 target.label, stale_files, file_suffix
20713 )
20714 }
20715 }
20716}
20717
20718fn rebuild_search_targets_message(rebuild_targets: &[RebuildSearchTarget]) -> String {
20719 if rebuild_targets.len() == 1 {
20720 let target = &rebuild_targets[0];
20721 return format!(
20722 "{}. Run `{}` to rebuild before retrying.",
20723 rebuild_search_target_detail(target),
20724 target.reindex_cmd
20725 );
20726 }
20727
20728 let summary: Vec<String> = rebuild_targets
20729 .iter()
20730 .take(3)
20731 .map(rebuild_search_target_detail)
20732 .collect();
20733 let overflow = rebuild_targets.len().saturating_sub(summary.len());
20734 let mut details = summary.join(", ");
20735 if overflow > 0 {
20736 details.push_str(&format!(", +{} more", overflow));
20737 }
20738 let reindex_cmd = rebuild_targets[0].reindex_cmd.clone();
20739 format!(
20740 "{} indexes need rebuild: {}. Run `{}` to rebuild before retrying.",
20741 rebuild_targets.len(),
20742 details,
20743 reindex_cmd
20744 )
20745}
20746
20747pub(crate) fn precheck_search_indexes(
20748 root: &Path,
20749 path_hint: &Path,
20750 scope: Option<&str>,
20751 federated: bool,
20752 autoindex: bool,
20753) -> Result<SearchPrecheck> {
20754 let targets = resolve_search_index_targets(root, path_hint, scope, federated)?;
20755 let mut stale_targets = Vec::new();
20756 let mut degraded_targets = Vec::new();
20757
20758 for target in &targets {
20759 match inspect_search_index(target)? {
20760 SearchIndexState::Missing => {
20761 if autoindex && let Err(err) = apply_search_index_update(root, target) {
20762 if is_active_writer_lock_error(&err) {
20763 degraded_targets
20764 .push(degraded_search_target(target, RebuildSearchReason::Missing));
20765 } else {
20766 return Err(err);
20767 }
20768 }
20769 }
20770 SearchIndexState::Fresh => {}
20771 SearchIndexState::Stale { stale_files } => {
20772 if autoindex {
20773 if let Err(err) = apply_search_index_update(root, target) {
20774 if is_active_writer_lock_error(&err) {
20775 degraded_targets.push(degraded_search_target(
20776 target,
20777 RebuildSearchReason::Stale { stale_files },
20778 ));
20779 } else {
20780 return Err(err);
20781 }
20782 }
20783 } else {
20784 stale_targets.push(RebuildSearchTarget {
20785 label: target.label.clone(),
20786 reason: RebuildSearchReason::Stale { stale_files },
20787 reindex_cmd: target.reindex_cmd.clone(),
20788 });
20789 }
20790 }
20791 }
20792 }
20793
20794 if stale_targets.is_empty() {
20795 return Ok(SearchPrecheck {
20796 targets,
20797 degraded_targets,
20798 });
20799 }
20800
20801 bail!(
20802 "tsift search aborted: {} \
20803 or re-run without `--no-autoindex`.",
20804 rebuild_search_targets_message(&stale_targets),
20805 );
20806}
20807
20808pub(crate) fn degraded_search_mode(targets: &[DegradedSearchTarget]) -> Option<DegradedSearchMode> {
20809 if targets.is_empty() {
20810 return None;
20811 }
20812
20813 if targets
20814 .iter()
20815 .all(|target| matches!(target.reason, RebuildSearchReason::Missing))
20816 {
20817 Some(DegradedSearchMode::Exact)
20818 } else {
20819 Some(DegradedSearchMode::ReadOnly)
20820 }
20821}
20822
20823fn degraded_search_targets_summary(targets: &[DegradedSearchTarget]) -> String {
20824 if targets.len() == 1 {
20825 let target = &targets[0];
20826 return match target.reason {
20827 RebuildSearchReason::Missing => format!("{} is missing", target.label),
20828 RebuildSearchReason::Stale { stale_files } => {
20829 let file_suffix = if stale_files == 1 { "" } else { "s" };
20830 format!(
20831 "{} is stale ({} file{})",
20832 target.label, stale_files, file_suffix
20833 )
20834 }
20835 };
20836 }
20837
20838 let missing = targets
20839 .iter()
20840 .filter(|target| matches!(target.reason, RebuildSearchReason::Missing))
20841 .count();
20842 let stale = targets.len().saturating_sub(missing);
20843 let mut parts = Vec::new();
20844 if stale > 0 {
20845 let suffix = if stale == 1 { "" } else { "es" };
20846 parts.push(format!("{stale} stale index{suffix}"));
20847 }
20848 if missing > 0 {
20849 let suffix = if missing == 1 { "" } else { "es" };
20850 parts.push(format!("{missing} missing index{suffix}"));
20851 }
20852 parts.join(", ")
20853}
20854
20855pub(crate) fn emit_degraded_search_note(
20856 targets: &[DegradedSearchTarget],
20857 mode: DegradedSearchMode,
20858) {
20859 let summary = degraded_search_targets_summary(targets);
20860 let reindex_cmd = &targets[0].reindex_cmd;
20861 match mode {
20862 DegradedSearchMode::ReadOnly => eprintln!(
20863 "note: active tsift writer detected; skipping autoindex because {}. \
20864 Continuing with read-only search and the current index snapshot; symbol hits may lag. \
20865 Retry `{}` after the active writer finishes for fresh index results.",
20866 summary, reindex_cmd
20867 ),
20868 DegradedSearchMode::Exact => eprintln!(
20869 "note: active tsift writer detected; skipping autoindex because {}. \
20870 Continuing with exact live-file search. Retry `{}` after the active writer finishes \
20871 for indexed symbol hits.",
20872 summary, reindex_cmd
20873 ),
20874 }
20875}
20876
20877fn search_timeout_message(
20878 timeout_secs: u64,
20879 strategy: &str,
20880 targets: &[SearchIndexTarget],
20881) -> Result<String> {
20882 let rebuild_targets = collect_rebuild_search_targets(targets)?;
20883 if rebuild_targets.is_empty() {
20884 return Ok(format!(
20885 "tsift search timed out after {}s (strategy: {}). \
20886 The search root looks fresh, so reindexing is unlikely to help. \
20887 Re-run with `--timeout 0` to disable the timeout, narrow `--path` / `--scope`, \
20888 or try a different strategy.",
20889 timeout_secs, strategy,
20890 ));
20891 }
20892
20893 Ok(format!(
20894 "tsift search timed out after {}s (strategy: {}). {}",
20895 timeout_secs,
20896 strategy,
20897 rebuild_search_targets_message(&rebuild_targets),
20898 ))
20899}
20900
20901fn is_exact_preferring_query_char(ch: char) -> bool {
20902 matches!(ch, '-' | '_' | '/' | '\\' | '.' | ':' | '#' | '@')
20903}
20904
20905fn query_prefers_exact_search(query: &str) -> bool {
20906 let trimmed = query.trim();
20907 !trimmed.is_empty()
20908 && !trimmed.chars().any(char::is_whitespace)
20909 && trimmed.chars().any(|ch| ch.is_alphanumeric())
20910 && trimmed.chars().any(is_exact_preferring_query_char)
20911 && trimmed
20912 .chars()
20913 .all(|ch| ch.is_alphanumeric() || is_exact_preferring_query_char(ch))
20914}
20915
20916pub(crate) fn resolve_search_strategy(query: &str, strategy: Option<String>) -> String {
20917 strategy.unwrap_or_else(|| {
20918 if query_prefers_exact_search(query) {
20919 "exact".to_string()
20920 } else {
20921 "lexical".to_string()
20922 }
20923 })
20924}
20925
20926
20927pub(crate) fn collect_source_files(path: &std::path::Path) -> Result<Vec<PathBuf>> {
20928 let mut files = Vec::new();
20929 if path.is_file() {
20930 files.push(path.to_path_buf());
20931 return Ok(files);
20932 }
20933 let walker = ignore::WalkBuilder::new(path)
20934 .hidden(true)
20935 .git_ignore(true)
20936 .build();
20937 for entry in walker {
20938 let entry = entry?;
20939 if entry.file_type().is_some_and(|ft| ft.is_file()) {
20940 let p = entry.path();
20941 if let Some(ext) = p.extension() {
20942 let ext = ext.to_string_lossy();
20943 if matches!(
20944 ext.as_ref(),
20945 "rs" | "py"
20946 | "ts"
20947 | "tsx"
20948 | "js"
20949 | "jsx"
20950 | "kt"
20951 | "kts"
20952 | "zig"
20953 | "sh"
20954 | "bash"
20955 | "zsh"
20956 ) {
20957 files.push(p.to_path_buf());
20958 }
20959 }
20960 }
20961 }
20962 Ok(files)
20963}
20964
20965#[cfg(test)]
20966 mod tests {
20967 use super::*;
20968 use super::semantic_edit::{
20969 EditOp,
20970 apply_edit_op, apply_edit_plan_atomically_inner, markdown_block_spans,
20971 markdown_section_spans,
20972 };
20973 use tsift_memory::{MemoryEventKind, MemoryStore};
20974
20975 use std::cell::RefCell;
20976 use substrate::{ConvexEdgeRow, ConvexGraphClient, ConvexGraphStore, ConvexNodeRow};
20977 fn parse_cli<I, T>(itr: I) -> Cli
20978 where
20979 I: IntoIterator<Item = T> + Send + 'static,
20980 T: Into<std::ffi::OsString> + Clone + Send + 'static,
20981 {
20982 std::thread::Builder::new()
20983 .name("cli-parse".to_string())
20984 .stack_size(16 * 1024 * 1024)
20985 .spawn(move || Cli::parse_from(itr))
20986 .unwrap()
20987 .join()
20988 .unwrap()
20989 }
20990
20991 fn try_parse_cli<I, T>(itr: I) -> std::result::Result<Cli, clap::Error>
20992 where
20993 I: IntoIterator<Item = T> + Send + 'static,
20994 T: Into<std::ffi::OsString> + Clone + Send + 'static,
20995 {
20996 std::thread::Builder::new()
20997 .name("cli-try-parse".to_string())
20998 .stack_size(16 * 1024 * 1024)
20999 .spawn(move || Cli::try_parse_from(itr))
21000 .unwrap()
21001 .join()
21002 .unwrap()
21003 }
21004
21005 fn build_relative_search_budget_report(
21006 query: &str,
21007 strategy: &str,
21008 root: &Path,
21009 response: &sift::SearchResponse,
21010 symbol_hits: &[index::SymbolHit],
21011 budget: ResponseBudget,
21012 filters: &SearchFacetFilters,
21013 ) -> SearchBudgetReport {
21014 build_search_budget_report(SearchBudgetReportInput {
21015 query,
21016 strategy,
21017 root,
21018 response,
21019 symbol_hits,
21020 absolute: false,
21021 budget,
21022 filters,
21023 })
21024 }
21025
21026 #[derive(Default)]
21027 struct MemoryConvexGraphClient {
21028 nodes: RefCell<BTreeMap<String, ConvexNodeRow>>,
21029 edges: RefCell<BTreeMap<String, ConvexEdgeRow>>,
21030 }
21031
21032 impl ConvexGraphClient for MemoryConvexGraphClient {
21033 fn upsert_node_row(&self, row: &ConvexNodeRow) -> Result<()> {
21034 self.nodes
21035 .borrow_mut()
21036 .insert(row.external_id.clone(), row.clone());
21037 Ok(())
21038 }
21039
21040 fn upsert_edge_row(&self, row: &ConvexEdgeRow) -> Result<()> {
21041 self.edges
21042 .borrow_mut()
21043 .insert(row.edge_key.clone(), row.clone());
21044 Ok(())
21045 }
21046
21047 fn delete_node_row(&self, external_id: &str) -> Result<usize> {
21048 Ok(usize::from(
21049 self.nodes.borrow_mut().remove(external_id).is_some(),
21050 ))
21051 }
21052
21053 fn delete_edge_row(&self, edge_key: &str) -> Result<usize> {
21054 Ok(usize::from(
21055 self.edges.borrow_mut().remove(edge_key).is_some(),
21056 ))
21057 }
21058
21059 fn node_row(&self, external_id: &str) -> Result<Option<ConvexNodeRow>> {
21060 Ok(self.nodes.borrow().get(external_id).cloned())
21061 }
21062
21063 fn node_rows(&self) -> Result<Vec<ConvexNodeRow>> {
21064 Ok(self.nodes.borrow().values().cloned().collect())
21065 }
21066
21067 fn edge_rows(&self) -> Result<Vec<ConvexEdgeRow>> {
21068 Ok(self.edges.borrow().values().cloned().collect())
21069 }
21070
21071 fn node_rows_by_kind(&self, kind: &str) -> Result<Vec<ConvexNodeRow>> {
21072 Ok(self
21073 .nodes
21074 .borrow()
21075 .values()
21076 .filter(|row| row.kind == kind)
21077 .cloned()
21078 .collect())
21079 }
21080
21081 fn outgoing_edge_rows(
21082 &self,
21083 from_external_id: &str,
21084 kind: Option<&str>,
21085 ) -> Result<Vec<ConvexEdgeRow>> {
21086 Ok(self
21087 .edges
21088 .borrow()
21089 .values()
21090 .filter(|row| row.from_external_id == from_external_id)
21091 .filter(|row| kind.is_none_or(|kind| row.kind == kind))
21092 .cloned()
21093 .collect())
21094 }
21095 }
21096
21097 fn init_git_repo(path: &Path) {
21098 let status = std::process::Command::new("git")
21099 .args(["init"])
21100 .current_dir(path)
21101 .status()
21102 .unwrap();
21103 assert!(status.success(), "git init failed");
21104
21105 let status = std::process::Command::new("git")
21106 .args(["add", "."])
21107 .current_dir(path)
21108 .status()
21109 .unwrap();
21110 assert!(status.success(), "git add failed");
21111
21112 let status = std::process::Command::new("git")
21113 .args([
21114 "-c",
21115 "user.name=tsift-tests",
21116 "-c",
21117 "user.email=tsift-tests@example.com",
21118 "commit",
21119 "--quiet",
21120 "-m",
21121 "init",
21122 ])
21123 .current_dir(path)
21124 .status()
21125 .unwrap();
21126 assert!(status.success(), "git commit failed");
21127 }
21128
21129 fn write_empty_root_index(root: &Path) {
21130 let index_dir = root.join(".tsift");
21131 fs::create_dir_all(&index_dir).unwrap();
21132 fs::write(index_dir.join("index.db"), "").unwrap();
21133 }
21134
21135 fn write_repeated_lines(path: &Path, line: &str, lines: usize) -> PathBuf {
21136 if let Some(parent) = path.parent() {
21137 fs::create_dir_all(parent).unwrap();
21138 }
21139 let body = std::iter::repeat_n(line, lines)
21140 .collect::<Vec<_>>()
21141 .join("\n");
21142 fs::write(path, format!("{body}\n")).unwrap();
21143 path.to_path_buf()
21144 }
21145
21146 #[test]
21149 fn token_capped_preview_returns_all_lines_when_under_cap() {
21150 let lines: Vec<&str> = vec!["fn foo() {", " 1 + 1", "}"];
21151 let result = build_token_capped_preview(&lines, 1, 3, 160, 1000);
21152 assert!(!result.was_capped);
21153 assert_eq!(result.preview.len(), 3);
21154 assert_eq!(result.capped_end, 3);
21155 }
21156
21157 #[test]
21158 fn token_capped_preview_truncates_when_over_cap() {
21159 let lines: Vec<&str> = (0..200).map(|_| " let x = some_very_long_expression_here();").collect();
21160 let result = build_token_capped_preview(&lines, 1, 200, 160, 100);
21161 assert!(result.was_capped);
21162 assert!(result.preview.len() < 200);
21163 assert!(result.capped_end < 200);
21164 }
21165
21166 #[test]
21167 fn token_capped_preview_keeps_at_least_one_line() {
21168 let long_line: String = "x".repeat(8000);
21169 let lines: Vec<&str> = vec![&long_line];
21170 let result = build_token_capped_preview(&lines, 1, 1, 160, 10);
21171 assert!(!result.was_capped);
21172 assert_eq!(result.preview.len(), 1);
21173 }
21174
21175 #[test]
21176 fn token_capped_preview_cap_at_boundary() {
21177 let lines: Vec<&str> = vec!["aaaa", "bbbb", "cccc", "dddd"];
21178 let result = build_token_capped_preview(&lines, 1, 4, 160, 4);
21179 assert!(!result.was_capped);
21180 assert_eq!(result.preview.len(), 4);
21181 }
21182
21183 #[test]
21184 fn token_capped_preview_cap_just_over_boundary() {
21185 let lines: Vec<&str> = vec!["aaaa", "bbbb", "cccc", "dddd"];
21186 let result = build_token_capped_preview(&lines, 1, 4, 160, 3);
21187 assert!(result.was_capped);
21188 assert_eq!(result.preview.len(), 3);
21189 assert_eq!(result.capped_end, 3);
21190 }
21191
21192 #[test]
21193 fn token_capped_preview_empty_lines() {
21194 let lines: Vec<&str> = vec![];
21195 let result = build_token_capped_preview(&lines, 1, 0, 160, 100);
21196 assert!(!result.was_capped);
21197 assert!(result.preview.is_empty());
21198 }
21199
21200 #[test]
21201 fn token_capped_preview_per_line_truncation_applied() {
21202 let long_line = "x".repeat(500);
21203 let lines: Vec<&str> = vec![&long_line, "short"];
21204 let result = build_token_capped_preview(&lines, 1, 2, 20, 10000);
21205 assert!(!result.was_capped);
21206 assert_eq!(result.preview.len(), 2);
21207 assert!(result.preview[0].text.len() <= 23);
21208 assert!(result.preview[0].text.ends_with("..."));
21209 }
21210
21211 #[test]
21214 fn route_search_defaults_to_haiku() {
21215 let (tier, model) = classify_task("find all uses of authenticate");
21216 assert_eq!(tier, "haiku");
21217 assert!(
21218 model.contains("haiku"),
21219 "expected haiku model, got {}",
21220 model
21221 );
21222 }
21223
21224 #[test]
21225 fn route_edit_keywords_to_sonnet() {
21226 for kw in &[
21227 "edit the file",
21228 "fix the bug",
21229 "update the config",
21230 "remove dead code",
21231 "create a new module",
21232 ] {
21233 let (tier, _) = classify_task(kw);
21234 assert_eq!(tier, "sonnet", "expected sonnet for {:?}", kw);
21235 }
21236 }
21237
21238 #[test]
21239 fn route_architecture_keywords_to_opus() {
21240 for kw in &[
21241 "design the API",
21242 "architecture review",
21243 "plan the migration",
21244 "analyze the system",
21245 "evaluate trade-offs",
21246 ] {
21247 let (tier, _) = classify_task(kw);
21248 assert_eq!(tier, "opus", "expected opus for {:?}", kw);
21249 }
21250 }
21251
21252 #[test]
21253 fn route_architecture_beats_edit() {
21254 let (tier, _) = classify_task("design and implement the new auth service");
21256 assert_eq!(tier, "opus");
21257 }
21258
21259 #[test]
21260 fn cli_accepts_global_compact_flag() {
21261 let cli = parse_cli(["tsift", "--compact", "status"]);
21262 assert!(cli.compact);
21263 assert!(matches!(cli.command, Some(Commands::Status { .. })));
21264 }
21265
21266 #[test]
21267 fn summarize_diff_scope_matches_relative_directory() {
21268 let root = Path::new("/repo");
21269 let extract_scope = resolve_extract_scope(root, Path::new("src/feature")).unwrap();
21270
21271 assert!(summarize_diff_matches_scope(
21272 Path::new("/repo/src/feature/main.rs"),
21273 &extract_scope
21274 ));
21275 assert!(!summarize_diff_matches_scope(
21276 Path::new("/repo/src/other/main.rs"),
21277 &extract_scope
21278 ));
21279 }
21280
21281 #[test]
21282 fn summarize_diff_scope_matches_relative_file() {
21283 let root = Path::new("/repo");
21284 let extract_scope = resolve_extract_scope(root, Path::new("src/feature/main.rs")).unwrap();
21285
21286 assert!(summarize_diff_matches_scope(
21287 Path::new("/repo/src/feature/main.rs"),
21288 &extract_scope
21289 ));
21290 assert!(!summarize_diff_matches_scope(
21291 Path::new("/repo/src/feature/lib.rs"),
21292 &extract_scope
21293 ));
21294 }
21295
21296 #[test]
21297 fn summarize_extract_scope_walks_relative_paths_from_root() {
21298 let dir = tempfile::tempdir().unwrap();
21299 let source_dir = dir.path().join("src");
21300 std::fs::create_dir_all(&source_dir).unwrap();
21301 let main_rs = source_dir.join("main.rs");
21302 std::fs::write(&main_rs, "fn alpha() {}\n").unwrap();
21303
21304 let extract_scope = resolve_extract_scope(dir.path(), Path::new("src")).unwrap();
21305 let files = collect_source_files(&extract_scope).unwrap();
21306
21307 assert_eq!(files, vec![main_rs]);
21308 }
21309
21310 #[test]
21311 fn summarize_extract_base_uses_nested_path_instead_of_project_root() {
21312 let dir = tempfile::tempdir().unwrap();
21313 let nested = dir.path().join("src/nested");
21314 std::fs::create_dir_all(&nested).unwrap();
21315 std::fs::write(dir.path().join("root.rs"), "fn root_level() {}\n").unwrap();
21316 let nested_file = nested.join("main.rs");
21317 std::fs::write(&nested_file, "fn nested_only() {}\n").unwrap();
21318
21319 let extract_base = resolve_extract_base(&nested).unwrap();
21320 let extract_scope = resolve_extract_scope(&extract_base, Path::new(".")).unwrap();
21321 let files = collect_source_files(&extract_scope).unwrap();
21322
21323 assert_eq!(extract_scope, nested);
21324 assert_eq!(files, vec![nested_file]);
21325 }
21326
21327 #[test]
21328 fn summarize_extract_base_uses_parent_of_file_path() {
21329 let dir = tempfile::tempdir().unwrap();
21330 let nested = dir.path().join("src/nested");
21331 std::fs::create_dir_all(&nested).unwrap();
21332 let file_path = nested.join("main.rs");
21333 std::fs::write(&file_path, "fn nested_only() {}\n").unwrap();
21334
21335 let extract_base = resolve_extract_base(&file_path).unwrap();
21336
21337 assert_eq!(extract_base, nested);
21338 }
21339
21340 #[test]
21341 fn summarize_extract_scope_normalizes_dotdot_segments() {
21342 let dir = tempfile::tempdir().unwrap();
21343 let source_dir = dir.path().join("src");
21344 std::fs::create_dir_all(&source_dir).unwrap();
21345
21346 let extract_scope = resolve_extract_scope(dir.path(), Path::new("src/../src")).unwrap();
21347
21348 assert_eq!(extract_scope, source_dir.canonicalize().unwrap());
21349 assert!(summarize_diff_matches_scope(
21350 &source_dir.join("main.rs"),
21351 &extract_scope
21352 ));
21353 }
21354
21355 #[cfg(unix)]
21356 #[test]
21357 fn summarize_extract_scope_canonicalizes_absolute_symlink_paths() {
21358 use std::os::unix::fs::symlink;
21359
21360 let dir = tempfile::tempdir().unwrap();
21361 let real_root = dir.path().join("real");
21362 let source_dir = real_root.join("src");
21363 std::fs::create_dir_all(&source_dir).unwrap();
21364 let symlink_scope = dir.path().join("scope-link");
21365 symlink(&source_dir, &symlink_scope).unwrap();
21366
21367 let extract_scope = resolve_extract_scope(&real_root, &symlink_scope).unwrap();
21368
21369 assert_eq!(extract_scope, source_dir.canonicalize().unwrap());
21370 assert!(summarize_diff_matches_scope(
21371 &source_dir.join("lib.rs"),
21372 &extract_scope
21373 ));
21374 }
21375
21376 #[test]
21377 fn summarize_diff_extract_includes_untracked_files() {
21378 let dir = tempfile::tempdir().unwrap();
21379 std::fs::write(dir.path().join("README.md"), "# repo\n").unwrap();
21380 init_git_repo(dir.path());
21381
21382 let source_dir = dir.path().join("src");
21383 std::fs::create_dir_all(&source_dir).unwrap();
21384 let new_file = source_dir.join("new.rs");
21385 std::fs::write(&new_file, "fn alpha_helper() {}\n").unwrap();
21386
21387 let files = summarize::git_changed_files(dir.path()).unwrap();
21388
21389 assert_eq!(files.existing, vec![new_file]);
21390 assert!(files.deleted.is_empty());
21391 }
21392
21393 #[test]
21394 fn summarize_diff_extract_treats_unborn_head_as_untracked_only() {
21395 let dir = tempfile::tempdir().unwrap();
21396 let status = std::process::Command::new("git")
21397 .args(["init"])
21398 .current_dir(dir.path())
21399 .status()
21400 .unwrap();
21401 assert!(status.success(), "git init failed");
21402
21403 let source_dir = dir.path().join("src");
21404 std::fs::create_dir_all(&source_dir).unwrap();
21405 let new_file = source_dir.join("new.rs");
21406 std::fs::write(&new_file, "fn alpha_helper() {}\n").unwrap();
21407
21408 let files = summarize::git_changed_files(dir.path()).unwrap();
21409
21410 assert_eq!(files.existing, vec![new_file]);
21411 assert!(files.deleted.is_empty());
21412 }
21413
21414 #[test]
21415 fn summarize_diff_extract_tracks_deleted_files() {
21416 let dir = tempfile::tempdir().unwrap();
21417 let source_dir = dir.path().join("src");
21418 std::fs::create_dir_all(&source_dir).unwrap();
21419 let deleted_file = source_dir.join("gone.rs");
21420 std::fs::write(&deleted_file, "fn stale() {}\n").unwrap();
21421 init_git_repo(dir.path());
21422
21423 std::fs::remove_file(&deleted_file).unwrap();
21424
21425 let files = summarize::git_changed_files(dir.path()).unwrap();
21426
21427 assert!(files.existing.is_empty());
21428 assert_eq!(files.deleted, vec![deleted_file]);
21429 }
21430
21431 #[test]
21432 fn summarize_diff_extract_tracks_git_renames() {
21433 let dir = tempfile::tempdir().unwrap();
21434 let source_dir = dir.path().join("src");
21435 std::fs::create_dir_all(&source_dir).unwrap();
21436 let old_file = source_dir.join("old.rs");
21437 let new_file = source_dir.join("new.rs");
21438 std::fs::write(&old_file, "fn stale() {}\n").unwrap();
21439 init_git_repo(dir.path());
21440
21441 let status = std::process::Command::new("git")
21442 .args(["mv", "src/old.rs", "src/new.rs"])
21443 .current_dir(dir.path())
21444 .status()
21445 .unwrap();
21446 assert!(status.success(), "git mv failed");
21447
21448 let files = summarize::git_changed_files(dir.path()).unwrap();
21449
21450 assert_eq!(files.existing, vec![new_file]);
21451 assert_eq!(files.deleted, vec![old_file]);
21452 }
21453
21454 #[test]
21455 fn summarize_diff_extract_deletes_removed_summary_rows() {
21456 let dir = tempfile::tempdir().unwrap();
21457 let source_dir = dir.path().join("src");
21458 std::fs::create_dir_all(&source_dir).unwrap();
21459 let deleted_file = source_dir.join("gone.rs");
21460 std::fs::write(&deleted_file, "fn stale() {}\n").unwrap();
21461 std::fs::write(dir.path().join("README.md"), "# repo\n").unwrap();
21462 init_git_repo(dir.path());
21463
21464 let summary_db =
21465 summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
21466 summary_db
21467 .insert(&summarize::Summary {
21468 id: 0,
21469 symbol_name: "stale".to_string(),
21470 file_path: "src/gone.rs".to_string(),
21471 content_hash: "hash1".to_string(),
21472 summary: "stale summary".to_string(),
21473 entities: None,
21474 relationships: None,
21475 concept_labels: None,
21476 extracted_at: "1700000000".to_string(),
21477 model: "test".to_string(),
21478 tokens_input: Some(100),
21479 tokens_output: Some(50),
21480 })
21481 .unwrap();
21482
21483 std::fs::remove_file(&deleted_file).unwrap();
21484
21485 cmd_summarize(
21486 None,
21487 None,
21488 Some(PathBuf::from("src")),
21489 true,
21490 false,
21491 dir.path(),
21492 false,
21493 true,
21494 false,
21495 false,
21496 false,
21497 )
21498 .unwrap();
21499
21500 assert!(summary_db.get_by_file("src/gone.rs").unwrap().is_empty());
21501 }
21502
21503 #[test]
21504 fn summarize_diff_extract_deletes_renamed_summary_rows() {
21505 let dir = tempfile::tempdir().unwrap();
21506 let source_dir = dir.path().join("src");
21507 std::fs::create_dir_all(&source_dir).unwrap();
21508 let old_file = source_dir.join("old.rs");
21509 std::fs::write(&old_file, "fn stale() {}\n").unwrap();
21510 std::fs::write(dir.path().join("README.md"), "# repo\n").unwrap();
21511 init_git_repo(dir.path());
21512
21513 let summary_db =
21514 summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
21515 summary_db
21516 .insert(&summarize::Summary {
21517 id: 0,
21518 symbol_name: "stale".to_string(),
21519 file_path: "src/old.rs".to_string(),
21520 content_hash: "hash1".to_string(),
21521 summary: "stale summary".to_string(),
21522 entities: None,
21523 relationships: None,
21524 concept_labels: None,
21525 extracted_at: "1700000000".to_string(),
21526 model: "test".to_string(),
21527 tokens_input: Some(100),
21528 tokens_output: Some(50),
21529 })
21530 .unwrap();
21531
21532 let status = std::process::Command::new("git")
21533 .args(["mv", "src/old.rs", "src/new.rs"])
21534 .current_dir(dir.path())
21535 .status()
21536 .unwrap();
21537 assert!(status.success(), "git mv failed");
21538
21539 cmd_summarize(
21540 None,
21541 None,
21542 Some(PathBuf::from("src")),
21543 true,
21544 false,
21545 dir.path(),
21546 false,
21547 true,
21548 false,
21549 false,
21550 false,
21551 )
21552 .unwrap();
21553
21554 assert!(summary_db.get_by_file("src/old.rs").unwrap().is_empty());
21555 }
21556
21557 #[test]
21558 fn summarize_full_extract_deletes_removed_summary_rows_when_scope_is_empty() {
21559 let dir = tempfile::tempdir().unwrap();
21560 let source_dir = dir.path().join("src");
21561 std::fs::create_dir_all(&source_dir).unwrap();
21562 let deleted_file = source_dir.join("gone.rs");
21563 std::fs::write(&deleted_file, "fn stale() {}\n").unwrap();
21564
21565 let summary_db =
21566 summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
21567 summary_db
21568 .insert(&summarize::Summary {
21569 id: 0,
21570 symbol_name: "stale".to_string(),
21571 file_path: "src/gone.rs".to_string(),
21572 content_hash: "hash1".to_string(),
21573 summary: "stale summary".to_string(),
21574 entities: None,
21575 relationships: None,
21576 concept_labels: None,
21577 extracted_at: "1700000000".to_string(),
21578 model: "test".to_string(),
21579 tokens_input: Some(100),
21580 tokens_output: Some(50),
21581 })
21582 .unwrap();
21583
21584 std::fs::remove_file(&deleted_file).unwrap();
21585
21586 cmd_summarize(
21587 None,
21588 None,
21589 Some(PathBuf::from("src")),
21590 false,
21591 false,
21592 dir.path(),
21593 false,
21594 true,
21595 false,
21596 false,
21597 false,
21598 )
21599 .unwrap();
21600
21601 assert!(summary_db.get_by_file("src/gone.rs").unwrap().is_empty());
21602 }
21603
21604 #[test]
21605 fn summarize_extract_fails_fast_when_summary_writer_lock_is_live() {
21606 let dir = tempfile::tempdir().unwrap();
21607 let source_dir = dir.path().join("src");
21608 std::fs::create_dir_all(&source_dir).unwrap();
21609 let file = source_dir.join("lib.rs");
21610 std::fs::write(&file, "fn helper() {}\n").unwrap();
21611
21612 let content = std::fs::read(&file).unwrap();
21613 let summary_db =
21614 summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
21615 summary_db
21616 .insert(&summarize::Summary {
21617 id: 0,
21618 symbol_name: "lib.rs".to_string(),
21619 file_path: "src/lib.rs".to_string(),
21620 content_hash: summarize::content_hash(&content),
21621 summary: "cached summary".to_string(),
21622 entities: None,
21623 relationships: None,
21624 concept_labels: None,
21625 extracted_at: "1700000000".to_string(),
21626 model: "test".to_string(),
21627 tokens_input: Some(100),
21628 tokens_output: Some(50),
21629 })
21630 .unwrap();
21631 drop(summary_db);
21632
21633 let lock_path = summarize::writer_lock_path(&dir.path().join(".tsift/summaries.db"));
21634 let _lock = hold_writer_lock(&lock_path);
21635
21636 let err = cmd_summarize(
21637 None,
21638 None,
21639 Some(PathBuf::from("src")),
21640 false,
21641 false,
21642 dir.path(),
21643 false,
21644 true,
21645 false,
21646 false,
21647 false,
21648 )
21649 .unwrap_err();
21650 let message = err.to_string();
21651
21652 assert!(message.contains("another tsift summarize extractor is already active"));
21653 assert!(message.contains("tsift summarize --extract"));
21654 }
21655
21656 #[test]
21657 fn summarize_stats_fails_closed_when_cache_missing() {
21658 let dir = tempfile::tempdir().unwrap();
21659 let err = cmd_summarize(
21660 None,
21661 None,
21662 None,
21663 false,
21664 true,
21665 dir.path(),
21666 false,
21667 false,
21668 false,
21669 false,
21670 false,
21671 )
21672 .unwrap_err();
21673
21674 assert!(
21675 err.to_string().contains("no summaries.db found"),
21676 "got: {err}"
21677 );
21678 assert!(!dir.path().join(".tsift/summaries.db").exists());
21679 }
21680
21681 #[test]
21682 fn summarize_stats_uses_snapshot_fallback_when_rollback_journal_is_locked() {
21683 let dir = tempfile::tempdir().unwrap();
21684 let summary_db =
21685 summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
21686 summary_db
21687 .insert(&summarize::Summary {
21688 id: 0,
21689 symbol_name: "alpha_helper".to_string(),
21690 file_path: "src/lib.rs".to_string(),
21691 content_hash: "hash1".to_string(),
21692 summary: "cached summary".to_string(),
21693 entities: None,
21694 relationships: None,
21695 concept_labels: None,
21696 extracted_at: "1700000000".to_string(),
21697 model: "claude-haiku-4-5-20251001".to_string(),
21698 tokens_input: Some(100),
21699 tokens_output: Some(40),
21700 })
21701 .unwrap();
21702 drop(summary_db);
21703 let _lock = hold_rollback_journal_lock(&dir.path().join(".tsift/summaries.db"));
21704
21705 let result = cmd_summarize(
21706 None,
21707 None,
21708 None,
21709 false,
21710 true,
21711 dir.path(),
21712 false,
21713 false,
21714 false,
21715 false,
21716 false,
21717 );
21718
21719 assert!(result.is_ok());
21720 }
21721
21722 #[test]
21723 fn summarize_symbol_query_uses_snapshot_fallback_when_rollback_journal_is_locked() {
21724 let dir = tempfile::tempdir().unwrap();
21725 let summary_db =
21726 summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
21727 summary_db
21728 .insert(&summarize::Summary {
21729 id: 0,
21730 symbol_name: "alpha_helper".to_string(),
21731 file_path: "src/lib.rs".to_string(),
21732 content_hash: "hash1".to_string(),
21733 summary: "cached summary".to_string(),
21734 entities: None,
21735 relationships: None,
21736 concept_labels: None,
21737 extracted_at: "1700000000".to_string(),
21738 model: "claude-haiku-4-5-20251001".to_string(),
21739 tokens_input: Some(100),
21740 tokens_output: Some(40),
21741 })
21742 .unwrap();
21743 drop(summary_db);
21744 let _lock = hold_rollback_journal_lock(&dir.path().join(".tsift/summaries.db"));
21745
21746 let result = cmd_summarize(
21747 Some("alpha_helper".to_string()),
21748 None,
21749 None,
21750 false,
21751 false,
21752 dir.path(),
21753 false,
21754 true,
21755 false,
21756 false,
21757 false,
21758 );
21759
21760 assert!(result.is_ok());
21761 }
21762
21763 #[test]
21764 fn summarize_cmd_uses_ancestor_project_root_for_nested_paths() {
21765 let dir = tempfile::tempdir().unwrap();
21766 let nested = dir.path().join("src/nested");
21767 std::fs::create_dir_all(&nested).unwrap();
21768
21769 let summary_db =
21770 summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
21771 summary_db
21772 .insert(&summarize::Summary {
21773 id: 0,
21774 symbol_name: "alpha_helper".to_string(),
21775 file_path: "src/lib.rs".to_string(),
21776 content_hash: "hash1".to_string(),
21777 summary: "cached summary".to_string(),
21778 entities: None,
21779 relationships: None,
21780 concept_labels: None,
21781 extracted_at: "1700000000".to_string(),
21782 model: "claude-haiku-4-5-20251001".to_string(),
21783 tokens_input: Some(100),
21784 tokens_output: Some(40),
21785 })
21786 .unwrap();
21787
21788 let result = cmd_summarize(
21789 Some("alpha_helper".to_string()),
21790 None,
21791 None,
21792 false,
21793 false,
21794 &nested,
21795 false,
21796 true,
21797 false,
21798 false,
21799 false,
21800 );
21801
21802 assert!(result.is_ok());
21803 assert!(!nested.join(".tsift/summaries.db").exists());
21804 }
21805
21806 #[test]
21807 fn summarize_extract_uses_matching_scoped_index_for_workspace_file() {
21808 let dir = tempfile::tempdir().unwrap();
21809 std::fs::write(
21810 dir.path().join(".gitmodules"),
21811 r#"[submodule "src/alpha"]
21812 path = src/alpha
21813 url = https://example.com/alpha
21814[submodule "src/beta"]
21815 path = src/beta
21816 url = https://example.com/beta
21817"#,
21818 )
21819 .unwrap();
21820
21821 let alpha_root = dir.path().join("src/alpha");
21822 let beta_root = dir.path().join("src/beta");
21823 std::fs::create_dir_all(alpha_root.join("src")).unwrap();
21824 std::fs::create_dir_all(beta_root.join("src")).unwrap();
21825 std::fs::create_dir_all(dir.path().join(".tsift/indexes/alpha")).unwrap();
21826 std::fs::create_dir_all(dir.path().join(".tsift/indexes/beta")).unwrap();
21827 std::fs::write(alpha_root.join("src/lib.rs"), "fn alpha_helper() {}\n").unwrap();
21828 let beta_file = beta_root.join("src/lib.rs");
21829 std::fs::write(&beta_file, "fn beta_helper() {}\n").unwrap();
21830 std::fs::write(dir.path().join(".tsift/indexes/alpha/index.db"), "").unwrap();
21831 std::fs::write(dir.path().join(".tsift/indexes/beta/index.db"), "").unwrap();
21832
21833 let context = find_symbols_db_for_file(dir.path(), &beta_file)
21834 .unwrap()
21835 .expect("expected matching scoped index");
21836
21837 assert_eq!(
21838 context.db_path,
21839 dir.path().join(".tsift/indexes/beta/index.db")
21840 );
21841 assert_eq!(context.source_root, beta_root);
21842 }
21843
21844 fn make_op(old: &str, new: &str, replace_all: bool) -> EditOp {
21847 EditOp {
21848 file: PathBuf::from("dummy.txt"),
21849 old: old.to_string(),
21850 new: new.to_string(),
21851 replace_all,
21852 }
21853 }
21854
21855 #[test]
21856 fn edit_replaces_single_occurrence() {
21857 let content = "hello world";
21858 let op = make_op("world", "rust", false);
21859 let (result, count) = apply_edit_op(content, &op).unwrap();
21860 assert_eq!(result, "hello rust");
21861 assert_eq!(count, 1);
21862 }
21863
21864 #[test]
21865 fn edit_replace_all_replaces_every_occurrence() {
21866 let content = "foo foo foo";
21867 let op = make_op("foo", "bar", true);
21868 let (result, count) = apply_edit_op(content, &op).unwrap();
21869 assert_eq!(result, "bar bar bar");
21870 assert_eq!(count, 3);
21871 }
21872
21873 #[test]
21874 fn edit_fails_when_old_not_found() {
21875 let content = "hello world";
21876 let op = make_op("missing", "x", false);
21877 assert!(apply_edit_op(content, &op).is_err());
21878 }
21879
21880 #[test]
21881 fn edit_fails_when_ambiguous_without_replace_all() {
21882 let content = "foo foo";
21883 let op = make_op("foo", "bar", false);
21884 let err = apply_edit_op(content, &op).unwrap_err();
21885 assert!(err.to_string().contains("2 times"), "got: {}", err);
21886 }
21887
21888 #[test]
21889 fn edit_fails_when_old_equals_new() {
21890 let content = "hello";
21891 let op = make_op("hello", "hello", false);
21892 assert!(apply_edit_op(content, &op).is_err());
21893 }
21894
21895 #[test]
21896 fn edit_batch_rolls_back_when_later_swap_fails() {
21897 let dir = tempfile::tempdir().unwrap();
21898 let alpha = dir.path().join("alpha.txt");
21899 let beta = dir.path().join("beta.txt");
21900 fs::write(&alpha, "alpha old\n").unwrap();
21901 fs::write(&beta, "beta old\n").unwrap();
21902
21903 let batch = EditBatch {
21904 edits: vec![
21905 EditOp {
21906 file: alpha.clone(),
21907 old: "old".to_string(),
21908 new: "new".to_string(),
21909 replace_all: false,
21910 },
21911 EditOp {
21912 file: beta.clone(),
21913 old: "old".to_string(),
21914 new: "new".to_string(),
21915 replace_all: false,
21916 },
21917 ],
21918 };
21919
21920 let plan = build_edit_plan(&batch).unwrap();
21921 let err = match apply_edit_plan_atomically_inner(plan, |commit_index, _| {
21922 if commit_index == 1 {
21923 bail!("simulated swap failure");
21924 }
21925 Ok(())
21926 }) {
21927 Ok(_) => panic!("expected simulated swap failure"),
21928 Err(err) => err,
21929 };
21930
21931 assert!(err.to_string().contains("simulated swap failure"));
21932 assert_eq!(fs::read_to_string(&alpha).unwrap(), "alpha old\n");
21933 assert_eq!(fs::read_to_string(&beta).unwrap(), "beta old\n");
21934 }
21935
21936 fn setup_test_db() -> (tempfile::NamedTempFile, Connection) {
21939 let tmp = tempfile::NamedTempFile::new().unwrap();
21940 let conn = Connection::open(tmp.path()).unwrap();
21941 conn.execute_batch(
21942 "CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT NOT NULL, email TEXT);
21943 INSERT INTO users VALUES (1, 'Alice', 'alice@example.com');
21944 INSERT INTO users VALUES (2, 'Bob', NULL);
21945 CREATE TABLE posts (id INTEGER PRIMARY KEY, user_id INTEGER NOT NULL, title TEXT NOT NULL, body TEXT,
21946 FOREIGN KEY(user_id) REFERENCES users(id));
21947 INSERT INTO posts VALUES (1, 1, 'Hello World', 'First post');
21948 INSERT INTO posts VALUES (2, 1, 'Second', NULL);
21949 INSERT INTO posts VALUES (3, 2, 'Bob post', 'Content here');"
21950 ).unwrap();
21951 (tmp, conn)
21952 }
21953
21954 #[test]
21957 fn rewrite_rg_simple_pattern() {
21958 let result = rewrite_command("rg authenticate");
21959 assert_eq!(
21960 result,
21961 Some("tsift --envelope search \"authenticate\" --exact --budget normal".to_string(),)
21962 );
21963 }
21964
21965 #[test]
21966 fn rewrite_rg_with_path() {
21967 let result = rewrite_command("rg authenticate src/");
21968 assert_eq!(
21969 result,
21970 Some(
21971 "tsift --envelope search \"authenticate\" --exact --budget normal --path \"src/\""
21972 .to_string()
21973 )
21974 );
21975 }
21976
21977 #[test]
21978 fn rewrite_rg_with_flags_ignored() {
21979 let result = rewrite_command("rg -i authenticate src/");
21980 assert_eq!(
21981 result,
21982 Some(
21983 "tsift --envelope search \"authenticate\" --exact --budget normal --path \"src/\""
21984 .to_string()
21985 )
21986 );
21987 }
21988
21989 #[test]
21990 fn rewrite_rg_with_type_flag() {
21991 let result = rewrite_command("rg -t rs authenticate");
21993 assert_eq!(
21994 result,
21995 Some("tsift --envelope search \"authenticate\" --exact --budget normal".to_string())
21996 );
21997 }
21998
21999 #[test]
22000 fn rewrite_rg_pipe_passthrough() {
22001 let result = rewrite_command("rg authenticate | head -5");
22003 assert_eq!(result, None);
22004 }
22005
22006 #[test]
22007 fn rewrite_rg_files_passthrough() {
22008 let result = rewrite_command("rg --files src/tsift .agent-doc logs");
22009 assert_eq!(result, None);
22010 }
22011
22012 #[test]
22013 fn rewrite_find_passthrough() {
22014 let result = rewrite_command("find src/tsift .agent-doc -type f -name '*.rs'");
22015 assert_eq!(result, None);
22016 }
22017
22018 #[test]
22019 fn rewrite_grep_recursive() {
22020 let result = rewrite_command("grep -r authenticate src/");
22021 assert_eq!(
22022 result,
22023 Some(
22024 "tsift --envelope search \"authenticate\" --exact --budget normal --path \"src/\""
22025 .to_string()
22026 )
22027 );
22028 }
22029
22030 #[test]
22031 fn rewrite_grep_non_recursive_passthrough() {
22032 let result = rewrite_command("grep authenticate file.txt");
22033 assert_eq!(result, None);
22034 }
22035
22036 #[test]
22037 fn rewrite_tsift_passthrough() {
22038 let result = rewrite_command("tsift search \"foo\"");
22039 assert_eq!(result, Some("tsift search \"foo\"".to_string()));
22040 }
22041
22042 #[test]
22043 fn rewrite_run_tsift_search_disables_timeout_by_default() {
22044 let result = effective_rewrite_run_command("tsift search hookcaps --exact --path /tmp/x");
22045 assert_eq!(
22046 result,
22047 "tsift search hookcaps --exact --path /tmp/x --timeout 0"
22048 );
22049 }
22050
22051 #[test]
22052 fn rewrite_run_preserves_explicit_search_timeout() {
22053 let result = effective_rewrite_run_command(
22054 "tsift search hookcaps --exact --path /tmp/x --timeout 5",
22055 );
22056 assert_eq!(
22057 result,
22058 "tsift search hookcaps --exact --path /tmp/x --timeout 5"
22059 );
22060 }
22061
22062 #[test]
22063 fn rewrite_unrelated_passthrough() {
22064 let result = rewrite_command("echo cargo build");
22065 assert_eq!(result, None);
22066 }
22067
22068 #[test]
22069 fn rewrite_rg_quoted_pattern() {
22070 let result = rewrite_command("rg \"fn main\"");
22071 assert_eq!(
22072 result,
22073 Some("tsift --envelope search \"fn main\" --exact --budget normal".to_string())
22074 );
22075 }
22076
22077 #[test]
22078 fn rewrite_git_diff_to_diff_digest() {
22079 let result = rewrite_command("git diff");
22080 assert_eq!(result, Some("tsift diff-digest .".to_string()));
22081 }
22082
22083 #[test]
22084 fn rewrite_git_diff_cached_to_diff_digest() {
22085 let result = rewrite_command("git diff --cached");
22086 assert_eq!(result, Some("tsift diff-digest --cached .".to_string()));
22087 }
22088
22089 #[test]
22090 fn rewrite_git_diff_with_path_to_diff_digest() {
22091 let result = rewrite_command("git diff -- src/");
22092 assert_eq!(result, Some("tsift diff-digest \"src/\"".to_string()));
22093 }
22094
22095 #[test]
22096 fn rewrite_git_diff_with_revision_passthrough() {
22097 let result = rewrite_command("git diff HEAD~1");
22098 assert_eq!(result, None);
22099 }
22100
22101 #[test]
22102 fn rewrite_git_show_to_revision_diff_digest() {
22103 let result = rewrite_command("git show HEAD~1");
22104 assert_eq!(
22105 result,
22106 Some("tsift diff-digest --revision \"HEAD~1\" .".to_string())
22107 );
22108 }
22109
22110 #[test]
22111 fn rewrite_git_log_patch_history_to_revision_diff_digest() {
22112 let result = rewrite_command("git log -p -1 HEAD~2");
22113 assert_eq!(
22114 result,
22115 Some("tsift diff-digest --revision \"HEAD~2\" .".to_string())
22116 );
22117 }
22118
22119 #[test]
22120 fn rewrite_cat_long_agent_doc_session_to_session_digest() {
22121 let dir = tempfile::tempdir().unwrap();
22122 let session = dir.path().join("tsift.md");
22123 let mut body = String::from("---\nagent_doc_session: tsift-v0.1\n---\n\n## Exchange\n");
22124 for index in 0..90 {
22125 body.push_str(&format!("❯ prompt {index}?\n"));
22126 }
22127 fs::write(&session, body).unwrap();
22128
22129 let result = rewrite_command(&format!("cat {}", shell_quote(session.to_str().unwrap())));
22130 assert_eq!(
22131 result,
22132 Some(format!(
22133 "tsift session-digest --path {} --input {} --source markdown",
22134 shell_quote(&resolve_digest_context_path(&session)),
22135 shell_quote(session.to_str().unwrap())
22136 ))
22137 );
22138 }
22139
22140 #[test]
22141 fn rewrite_head_long_claude_jsonl_to_session_digest() {
22142 let dir = tempfile::tempdir().unwrap();
22143 let session = dir.path().join("session.jsonl");
22144 let line =
22145 r#"{"message":{"role":"assistant","content":[{"type":"text","text":"❯ do [#yyhd]"}]}}"#;
22146 let body = std::iter::repeat_n(line, 120)
22147 .collect::<Vec<_>>()
22148 .join("\n");
22149 fs::write(&session, format!("{body}\n")).unwrap();
22150
22151 let result = rewrite_command(&format!(
22152 "head -n 120 {}",
22153 shell_quote(session.to_str().unwrap())
22154 ));
22155 assert_eq!(
22156 result,
22157 Some(format!(
22158 "tsift session-digest --path {} --input {} --source claude-jsonl",
22159 shell_quote(&resolve_digest_context_path(&session)),
22160 shell_quote(session.to_str().unwrap())
22161 ))
22162 );
22163 }
22164
22165 #[test]
22166 fn rewrite_head_long_codex_jsonl_to_session_digest() {
22167 let dir = tempfile::tempdir().unwrap();
22168 let session = dir.path().join("codex.jsonl");
22169 let line = r#"{"type":"event_msg","payload":{"type":"user_message","message":"do [#cdxlog]. spec-test-build-install-commit-push"}}"#;
22170 let body = std::iter::repeat_n(line, 120)
22171 .collect::<Vec<_>>()
22172 .join("\n");
22173 fs::write(&session, format!("{body}\n")).unwrap();
22174
22175 let result = rewrite_command(&format!(
22176 "head -n 120 {}",
22177 shell_quote(session.to_str().unwrap())
22178 ));
22179 assert_eq!(
22180 result,
22181 Some(format!(
22182 "tsift session-digest --path {} --input {} --source codex-jsonl",
22183 shell_quote(&resolve_digest_context_path(&session)),
22184 shell_quote(session.to_str().unwrap())
22185 ))
22186 );
22187 }
22188
22189 #[test]
22190 fn rewrite_small_transcript_window_passthrough() {
22191 let dir = tempfile::tempdir().unwrap();
22192 let session = dir.path().join("session.jsonl");
22193 let line = r#"{"message":{"role":"assistant","content":[{"type":"text","text":"hello"}]}}"#;
22194 let body = std::iter::repeat_n(line, 120)
22195 .collect::<Vec<_>>()
22196 .join("\n");
22197 fs::write(&session, format!("{body}\n")).unwrap();
22198
22199 let result = rewrite_command(&format!(
22200 "tail -n 20 {}",
22201 shell_quote(session.to_str().unwrap())
22202 ));
22203 assert_eq!(result, None);
22204 }
22205
22206 #[test]
22207 fn rewrite_sed_large_agent_doc_range_to_session_digest() {
22208 let dir = tempfile::tempdir().unwrap();
22209 let session = dir.path().join("tsift.md");
22210 let mut body = String::from("---\nagent_doc_session: tsift-v0.1\n---\n\n## Exchange\n");
22211 for index in 0..120 {
22212 body.push_str(&format!("### Re: topic {index}\n"));
22213 }
22214 fs::write(&session, body).unwrap();
22215
22216 let result = rewrite_command(&format!(
22217 "sed -n '1,120p' {}",
22218 shell_quote(session.to_str().unwrap())
22219 ));
22220 assert_eq!(
22221 result,
22222 Some(format!(
22223 "tsift session-digest --path {} --input {} --source markdown",
22224 shell_quote(&resolve_digest_context_path(&session)),
22225 shell_quote(session.to_str().unwrap())
22226 ))
22227 );
22228 }
22229
22230 #[test]
22231 fn rewrite_cat_large_agent_doc_log_to_session_digest() {
22232 let dir = tempfile::tempdir().unwrap();
22233 let session = dir.path().join("tsift.log");
22234 let line = "[1776528398] claude_start mode=fresh_restart restart_count=1";
22235 let body = std::iter::repeat_n(line, 120)
22236 .collect::<Vec<_>>()
22237 .join("\n");
22238 fs::write(&session, format!("{body}\n")).unwrap();
22239
22240 let result = rewrite_command(&format!("cat {}", shell_quote(session.to_str().unwrap())));
22241 assert_eq!(
22242 result,
22243 Some(format!(
22244 "tsift session-digest --path {} --input {} --source agent-doc-log",
22245 shell_quote(&resolve_digest_context_path(&session)),
22246 shell_quote(session.to_str().unwrap())
22247 ))
22248 );
22249 }
22250
22251 #[test]
22252 fn rewrite_session_reads_prefer_submodule_root_for_digest_path() {
22253 let dir = tempfile::tempdir().unwrap();
22254 fs::write(
22255 dir.path().join(".gitmodules"),
22256 r#"[submodule "src/tsift"]
22257 path = src/tsift
22258 url = https://example.com/tsift
22259"#,
22260 )
22261 .unwrap();
22262 let submodule = dir.path().join("src/tsift");
22263 fs::create_dir_all(submodule.join("tasks")).unwrap();
22264 fs::write(
22265 submodule.join(".git"),
22266 "gitdir: ../../.git/modules/src/tsift\n",
22267 )
22268 .unwrap();
22269 let session = submodule.join("tasks/plan.md");
22270 let mut body = String::from("---\nagent_doc_session: tsift-v0.1\n---\n\n## Exchange\n");
22271 for index in 0..90 {
22272 body.push_str(&format!("❯ prompt {index}?\n"));
22273 }
22274 fs::write(&session, body).unwrap();
22275
22276 let result = rewrite_command(&format!("cat {}", shell_quote(session.to_str().unwrap())));
22277
22278 assert_eq!(
22279 result,
22280 Some(format!(
22281 "tsift session-digest --path {} --input {} --source markdown",
22282 shell_quote(submodule.to_str().unwrap()),
22283 shell_quote(session.to_str().unwrap())
22284 ))
22285 );
22286 }
22287
22288 #[test]
22289 fn rewrite_regular_markdown_read_passthrough() {
22290 let dir = tempfile::tempdir().unwrap();
22291 let readme = dir.path().join("README.md");
22292 let body = std::iter::repeat_n("plain markdown", 120)
22293 .collect::<Vec<_>>()
22294 .join("\n");
22295 fs::write(&readme, format!("{body}\n")).unwrap();
22296
22297 let result = rewrite_command(&format!("cat {}", shell_quote(readme.to_str().unwrap())));
22298 assert_eq!(result, None);
22299 }
22300
22301 #[test]
22302 fn rewrite_cat_large_source_to_source_read_in_indexed_repo() {
22303 let dir = tempfile::tempdir().unwrap();
22304 write_empty_root_index(dir.path());
22305 let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 120);
22306
22307 let result = rewrite_command(&format!("cat {}", shell_quote(source.to_str().unwrap())));
22308
22309 assert_eq!(
22310 result,
22311 Some(format!(
22312 "tsift --envelope source-read \"src/lib.rs\" --path {} --start 1 --lines 80 --budget normal",
22313 shell_quote(&dir.path().to_string_lossy())
22314 ))
22315 );
22316 }
22317
22318 #[test]
22319 fn rewrite_head_small_source_window_passthrough() {
22320 let dir = tempfile::tempdir().unwrap();
22321 write_empty_root_index(dir.path());
22322 let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 120);
22323
22324 let result = rewrite_command(&format!(
22325 "head -n 20 {}",
22326 shell_quote(source.to_str().unwrap())
22327 ));
22328
22329 assert_eq!(result, None);
22330 }
22331
22332 #[test]
22333 fn rewrite_sed_large_source_range_to_source_read() {
22334 let dir = tempfile::tempdir().unwrap();
22335 write_empty_root_index(dir.path());
22336 let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 200);
22337
22338 let result = rewrite_command(&format!(
22339 "sed -n '40,160p' {}",
22340 shell_quote(source.to_str().unwrap())
22341 ));
22342
22343 assert_eq!(
22344 result,
22345 Some(format!(
22346 "tsift --envelope source-read \"src/lib.rs\" --path {} --start 40 --lines 121 --budget normal",
22347 shell_quote(&dir.path().to_string_lossy())
22348 ))
22349 );
22350 }
22351
22352 #[test]
22353 fn rewrite_tail_large_source_window_preserves_tail_anchor() {
22354 let dir = tempfile::tempdir().unwrap();
22355 write_empty_root_index(dir.path());
22356 let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 200);
22357
22358 let result = rewrite_command(&format!(
22359 "tail -n 120 {}",
22360 shell_quote(source.to_str().unwrap())
22361 ));
22362
22363 assert_eq!(
22364 result,
22365 Some(format!(
22366 "tsift --envelope source-read \"src/lib.rs\" --path {} --start 81 --lines 120 --budget normal",
22367 shell_quote(&dir.path().to_string_lossy())
22368 ))
22369 );
22370 }
22371
22372 #[test]
22373 fn rewrite_large_non_source_read_passthrough_even_when_indexed() {
22374 let dir = tempfile::tempdir().unwrap();
22375 write_empty_root_index(dir.path());
22376 let text = write_repeated_lines(&dir.path().join("notes.txt"), "plain text", 120);
22377
22378 let result = rewrite_command(&format!("cat {}", shell_quote(text.to_str().unwrap())));
22379
22380 assert_eq!(result, None);
22381 }
22382
22383 #[test]
22384 fn rewrite_large_source_read_passthrough_without_index() {
22385 let dir = tempfile::tempdir().unwrap();
22386 let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 120);
22387
22388 let result = rewrite_command(&format!("cat {}", shell_quote(source.to_str().unwrap())));
22389
22390 assert_eq!(result, None);
22391 }
22392
22393 #[test]
22394 fn rewrite_cargo_test_to_digest_runner() {
22395 let result = rewrite_command("cargo test --lib");
22396 assert_eq!(
22397 result,
22398 Some(
22399 "tsift --envelope digest-runner --kind \"test\" --path \".\" --shell-command \"cargo test --lib\" --runner \"cargo\"".to_string()
22400 )
22401 );
22402 }
22403
22404 #[test]
22405 fn rewrite_pytest_to_digest_runner() {
22406 let result = rewrite_command("pytest -q tests/test_cli.py");
22407 assert_eq!(
22408 result,
22409 Some(
22410 "tsift --envelope digest-runner --kind \"test\" --path \".\" --shell-command \"pytest -q tests/test_cli.py\" --runner \"pytest\"".to_string()
22411 )
22412 );
22413 }
22414
22415 #[test]
22416 fn rewrite_python_m_pytest_to_digest_runner() {
22417 let result = rewrite_command("python -m pytest tests/test_cli.py");
22418 assert_eq!(
22419 result,
22420 Some(
22421 "tsift --envelope digest-runner --kind \"test\" --path \".\" --shell-command \"python -m pytest tests/test_cli.py\" --runner \"pytest\"".to_string()
22422 )
22423 );
22424 }
22425
22426 #[test]
22427 fn rewrite_cargo_build_to_log_digest_runner() {
22428 let result = rewrite_command("cargo build --release");
22429 assert_eq!(
22430 result,
22431 Some(
22432 "tsift --envelope digest-runner --kind \"log\" --path \".\" --shell-command \"cargo build --release\"".to_string()
22433 )
22434 );
22435 }
22436
22437 #[test]
22438 fn rewrite_cargo_install_to_log_digest_runner() {
22439 let result = rewrite_command("cargo install --path . --force");
22440 assert_eq!(
22441 result,
22442 Some(
22443 "tsift --envelope digest-runner --kind \"log\" --path \".\" --shell-command \"cargo install --path . --force\"".to_string()
22444 )
22445 );
22446 }
22447
22448 #[test]
22449 fn rewrite_metacharacter_command_passthrough() {
22450 let result = rewrite_command("cargo test | head");
22451 assert_eq!(result, None);
22452 }
22453
22454 #[test]
22455 fn rewrite_output_cap_detects_search_even_with_global_flag() {
22456 let cap = rewrite_output_cap("tsift --compact search foo").expect("cap");
22457 assert_eq!(cap.max_lines, 50);
22458 assert_eq!(cap.strip_prefix, Some("Strategy:"));
22459 }
22460
22461 #[test]
22462 fn rewrite_output_cap_skips_structured_output() {
22463 assert!(rewrite_output_cap("tsift search foo --json").is_none());
22464 assert!(rewrite_output_cap("tsift --schema graph foo").is_none());
22465 assert!(rewrite_output_cap("tsift --envelope search foo").is_none());
22466 }
22467
22468 #[test]
22469 fn rewrite_output_format_forwards_envelope_to_digest_runner() {
22470 let command = rewrite_command("cargo test --lib").expect("rewrite");
22471 let forwarded = apply_rewrite_output_format(
22472 &command,
22473 OutputFormat {
22474 json_output: true,
22475 compact: false,
22476 pretty: false,
22477 terse: false,
22478 ultra_terse: false,
22479 schema: false,
22480 envelope: true,
22481 },
22482 );
22483 assert_eq!(
22484 forwarded,
22485 "tsift --envelope digest-runner --kind \"test\" --path \".\" --shell-command \"cargo test --lib\" --runner \"cargo\""
22486 );
22487 }
22488
22489 #[test]
22490 fn rewrite_output_format_forwards_json_when_requested() {
22491 let command = rewrite_command("cargo build --release").expect("rewrite");
22492 let forwarded = apply_rewrite_output_format(
22493 &command,
22494 OutputFormat {
22495 json_output: true,
22496 compact: false,
22497 pretty: true,
22498 terse: false,
22499 ultra_terse: false,
22500 schema: false,
22501 envelope: false,
22502 },
22503 );
22504 assert_eq!(
22505 forwarded,
22506 "tsift --pretty --envelope digest-runner --kind \"log\" --path \".\" --shell-command \"cargo build --release\""
22507 );
22508 }
22509
22510 #[test]
22511 fn output_cap_strips_search_header_and_truncates() {
22512 let capped = apply_output_cap(
22513 b"Strategy: exact | Indexed: 0 | Skipped: 0\n\nline1\nline2\nline3\n",
22514 OutputCap {
22515 max_lines: 2,
22516 strip_prefix: Some("Strategy:"),
22517 },
22518 );
22519 assert_eq!(
22520 capped,
22521 "line1\nline2\n... (+1 more lines; rerun the underlying tsift command directly for the full output)\n"
22522 );
22523 }
22524
22525 #[test]
22526 fn sql_schema_overview_lists_tables() {
22527 let (_tmp, conn) = setup_test_db();
22528 let tables = schema_overview(&conn).unwrap();
22529 let names: Vec<&str> = tables.iter().map(|t| t.name.as_str()).collect();
22530 assert_eq!(names, &["posts", "users"]);
22531 }
22532
22533 #[test]
22534 fn sql_schema_overview_row_counts() {
22535 let (_tmp, conn) = setup_test_db();
22536 let tables = schema_overview(&conn).unwrap();
22537 let users = tables.iter().find(|t| t.name == "users").unwrap();
22538 let posts = tables.iter().find(|t| t.name == "posts").unwrap();
22539 assert_eq!(users.row_count, 2);
22540 assert_eq!(posts.row_count, 3);
22541 }
22542
22543 #[test]
22544 fn sql_table_columns_metadata() {
22545 let (_tmp, conn) = setup_test_db();
22546 let cols = table_columns(&conn, "users").unwrap();
22547 assert_eq!(cols.len(), 3);
22548 assert_eq!(cols[0].name, "id");
22549 assert!(cols[0].pk);
22550 assert_eq!(cols[1].name, "name");
22551 assert!(cols[1].notnull);
22552 assert_eq!(cols[2].name, "email");
22553 assert!(!cols[2].notnull);
22554 }
22555
22556 #[test]
22557 fn sql_execute_query_returns_rows() {
22558 let (_tmp, conn) = setup_test_db();
22559 let (columns, rows) =
22560 execute_query(&conn, "SELECT name, email FROM users ORDER BY id").unwrap();
22561 assert_eq!(columns, &["name", "email"]);
22562 assert_eq!(rows.len(), 2);
22563 assert_eq!(rows[0][0], serde_json::json!("Alice"));
22564 assert_eq!(rows[0][1], serde_json::json!("alice@example.com"));
22565 assert_eq!(rows[1][1], serde_json::Value::Null);
22566 }
22567
22568 #[test]
22569 fn sql_execute_query_aggregate() {
22570 let (_tmp, conn) = setup_test_db();
22571 let (columns, rows) = execute_query(&conn, "SELECT COUNT(*) as cnt FROM posts").unwrap();
22572 assert_eq!(columns, &["cnt"]);
22573 assert_eq!(rows[0][0], serde_json::json!(3));
22574 }
22575
22576 #[test]
22577 fn sql_execute_query_join() {
22578 let (_tmp, conn) = setup_test_db();
22579 let (_cols, rows) = execute_query(
22580 &conn,
22581 "SELECT u.name, p.title FROM users u JOIN posts p ON u.id = p.user_id ORDER BY p.id",
22582 )
22583 .unwrap();
22584 assert_eq!(rows.len(), 3);
22585 assert_eq!(rows[0][0], serde_json::json!("Alice"));
22586 assert_eq!(rows[2][0], serde_json::json!("Bob"));
22587 }
22588
22589 #[test]
22590 fn sql_open_db_read_only() {
22591 let (tmp, _conn) = setup_test_db();
22592 drop(_conn);
22593 let ro_conn = open_db(tmp.path()).unwrap();
22594 let result = ro_conn.execute("INSERT INTO users VALUES (99, 'Fail', NULL)", []);
22595 assert!(result.is_err(), "read-only connection should reject writes");
22596 }
22597
22598 #[test]
22599 fn sql_empty_table_schema() {
22600 let tmp = tempfile::NamedTempFile::new().unwrap();
22601 let conn = Connection::open(tmp.path()).unwrap();
22602 conn.execute_batch("CREATE TABLE empty_tbl (id INTEGER PRIMARY KEY, data BLOB)")
22603 .unwrap();
22604 let tables = schema_overview(&conn).unwrap();
22605 assert_eq!(tables[0].row_count, 0);
22606 assert_eq!(tables[0].columns.len(), 2);
22607 }
22608
22609 fn setup_graph_index() -> tempfile::TempDir {
22612 let dir = tempfile::tempdir().unwrap();
22613 std::fs::write(
22614 dir.path().join("main.rs"),
22615 "fn helper() { println!(\"hi\"); }\nfn main() { helper(); Vec::new(); }",
22616 )
22617 .unwrap();
22618 let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
22619 db.apply_changes(dir.path()).unwrap();
22620 dir
22621 }
22622
22623 fn setup_traversal_project() -> tempfile::TempDir {
22624 let dir = setup_graph_index();
22625 let task_dir = dir.path().join("tasks/software");
22626 std::fs::create_dir_all(&task_dir).unwrap();
22627 std::fs::write(
22628 task_dir.join("tsift.md"),
22629 r#"---
22630agent_doc_session: tsift-v0.1
22631agent_doc_format: template
22632---
22633
22634## Exchange
22635
22636<!-- agent:exchange patch=append -->
22637❯ do [#kgnv]
22638Completed `#kgnv`; touched files `main.rs`; tests `cargo test traversal_graph`; follow-up `#gfix`.
22639<!-- /agent:exchange -->
22640
22641<!-- agent:queue -->
22642dispatch #spec-test-build-install-commit-push
22643- do [#kgnv]
22644<!-- /agent:queue -->
22645
22646## Backlog
22647
22648<!-- agent:backlog -->
22649- [ ] [#kgnv] Fix helper traversal handles while preserving graph navigation.
22650<!-- /agent:backlog -->
22651"#,
22652 )
22653 .unwrap();
22654 dir
22655 }
22656
22657 fn resolve_ast_span_node<'a>(
22658 graph: &'a TraversalGraphBuild,
22659 label: &str,
22660 symbol_kind: &str,
22661 ) -> &'a TraversalNode {
22662 graph
22663 .nodes
22664 .values()
22665 .find(|node| {
22666 node.kind == "ast_span"
22667 && node.label == label
22668 && node.properties.get("symbol_kind") == Some(&symbol_kind.to_string())
22669 })
22670 .unwrap_or_else(|| panic!("missing ast_span {symbol_kind} {label}"))
22671 }
22672
22673 fn setup_multilingual_ast_navigation_project() -> tempfile::TempDir {
22674 let dir = tempfile::tempdir().unwrap();
22675 std::fs::write(
22676 dir.path().join("rust.rs"),
22677 r#"mod fixture_nav_rust_mod {
22678 pub fn fixture_nav_rust_helper() {}
22679 pub fn fixture_nav_rust_entry() {
22680 fixture_nav_rust_helper();
22681 }
22682}
22683"#,
22684 )
22685 .unwrap();
22686 std::fs::write(
22687 dir.path().join("python.py"),
22688 r#"def fixture_nav_python_helper():
22689 return 1
22690
22691def fixture_nav_python_entry():
22692 return fixture_nav_python_helper()
22693"#,
22694 )
22695 .unwrap();
22696 std::fs::write(
22697 dir.path().join("typescript.ts"),
22698 r#"export function fixture_nav_typescript_entry(): number {
22699 return fixtureNavTsHelper();
22700}
22701
22702function fixtureNavTsHelper(): number {
22703 return 1;
22704}
22705"#,
22706 )
22707 .unwrap();
22708 std::fs::write(
22709 dir.path().join("javascript.js"),
22710 r#"function fixture_nav_javascript_entry() {
22711 return fixtureNavJsHelper();
22712}
22713
22714function fixtureNavJsHelper() {
22715 return 1;
22716}
22717"#,
22718 )
22719 .unwrap();
22720 std::fs::write(
22721 dir.path().join("kotlin.kt"),
22722 r#"fun fixture_nav_kotlin_entry(): Int {
22723 return fixtureNavKotlinHelper()
22724}
22725
22726fun fixtureNavKotlinHelper(): Int = 1
22727"#,
22728 )
22729 .unwrap();
22730 std::fs::write(
22731 dir.path().join("zig.zig"),
22732 r#"pub fn fixture_nav_zig_entry() i32 {
22733 return fixtureNavZigHelper();
22734}
22735
22736fn fixtureNavZigHelper() i32 {
22737 return 1;
22738}
22739"#,
22740 )
22741 .unwrap();
22742 std::fs::write(
22743 dir.path().join("bash.sh"),
22744 r#"#!/usr/bin/env bash
22745fixture_nav_bash_entry() {
22746 fixture_nav_bash_helper
22747}
22748
22749fixture_nav_bash_helper() {
22750 echo ok
22751}
22752
22753alias fixture_nav_bash_alias='echo alias'
22754"#,
22755 )
22756 .unwrap();
22757 std::fs::write(
22758 dir.path().join("README.md"),
22759 r#"# Fixture Guide
22760
22761## Fixture Section
22762
22763- Fixture step
22764 - Nested fixture step
22765
22766```python
22767def fixture_nav_markdown_embedded():
22768 return 1
22769```
22770"#,
22771 )
22772 .unwrap();
22773
22774 let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
22775 db.apply_changes(dir.path()).unwrap();
22776 dir
22777 }
22778
22779 fn assert_cli_expand_command_parses(command: &str) {
22780 let args = shell_split(command)
22781 .into_iter()
22782 .map(str::to_string)
22783 .collect::<Vec<_>>();
22784 assert!(
22785 try_parse_cli(args).is_ok(),
22786 "expand command should parse as a tsift CLI command: {command}"
22787 );
22788 }
22789
22790 fn setup_multiplicity_project() -> tempfile::TempDir {
22791 let dir = tempfile::tempdir().unwrap();
22792 std::fs::write(
22793 dir.path().join("Cargo.toml"),
22794 r#"[workspace]
22795members = ["crates/core-lib", "crates/cli-app"]
22796"#,
22797 )
22798 .unwrap();
22799 std::fs::create_dir_all(dir.path().join("crates/core-lib/src")).unwrap();
22800 std::fs::write(
22801 dir.path().join("crates/core-lib/Cargo.toml"),
22802 r#"[package]
22803name = "core-lib"
22804
22805[lib]
22806name = "core_lib"
22807
22808[features]
22809default = []
22810"#,
22811 )
22812 .unwrap();
22813 std::fs::write(
22814 dir.path().join("crates/core-lib/src/lib.rs"),
22815 "pub fn run() {}\n",
22816 )
22817 .unwrap();
22818 std::fs::create_dir_all(dir.path().join("crates/cli-app/src")).unwrap();
22819 std::fs::write(
22820 dir.path().join("crates/cli-app/Cargo.toml"),
22821 r#"[package]
22822name = "cli-app"
22823
22824[[bin]]
22825name = "cli-app"
22826
22827[dependencies]
22828core-lib = { path = "../core-lib" }
22829"#,
22830 )
22831 .unwrap();
22832 std::fs::write(
22833 dir.path().join("crates/cli-app/src/main.rs"),
22834 "use core_lib::run;\nfn main() { run(); }\n",
22835 )
22836 .unwrap();
22837 let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
22838 db.apply_changes(dir.path()).unwrap();
22839
22840 let task_dir = dir.path().join("tasks/software");
22841 std::fs::create_dir_all(&task_dir).unwrap();
22842 std::fs::write(
22843 task_dir.join("tsift.md"),
22844 r#"---
22845agent_doc_session: tsift-multiplicity
22846agent_doc_format: template
22847---
22848
22849## Backlog
22850
22851<!-- agent:backlog -->
22852- [ ] [#corepkg] Update the core-lib Cargo package ownership model.
22853<!-- /agent:backlog -->
22854"#,
22855 )
22856 .unwrap();
22857 init_git_repo(dir.path());
22858 dir
22859 }
22860
22861 fn setup_dependency_dag_project() -> tempfile::TempDir {
22862 let dir = tempfile::tempdir().unwrap();
22863 std::fs::write(
22864 dir.path().join("main.rs"),
22865 "fn shared_helper() {}\nfn main() { shared_helper(); }\n",
22866 )
22867 .unwrap();
22868 std::fs::write(
22869 dir.path().join("Cargo.toml"),
22870 "[package]\nname = \"dag-fixture\"\n",
22871 )
22872 .unwrap();
22873 let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
22874 db.apply_changes(dir.path()).unwrap();
22875
22876 let task_dir = dir.path().join("tasks/software");
22877 std::fs::create_dir_all(&task_dir).unwrap();
22878 std::fs::write(
22879 task_dir.join("tsift.md"),
22880 r#"---
22881agent_doc_session: tsift-dag
22882agent_doc_format: template
22883---
22884
22885## Exchange
22886
22887<!-- agent:exchange patch=append -->
22888Completed `#alpha`; touched files `main.rs`; tests `cargo test dependency_dag`; follow-up `#gamma`.
22889<!-- /agent:exchange -->
22890
22891## Backlog
22892
22893<!-- agent:backlog -->
22894- [ ] [#prep] Prepare Cargo.toml configuration before shared helper work.
22895- [ ] [#alpha] Update shared_helper in main.rs after #prep.
22896- [ ] [#beta] Refactor shared_helper tests in main.rs.
22897- [ ] [#gamma] Follow-up review for graph navigation.
22898<!-- /agent:backlog -->
22899"#,
22900 )
22901 .unwrap();
22902 dir
22903 }
22904
22905 fn setup_dependency_dag_cycle_project() -> tempfile::TempDir {
22906 let dir = setup_graph_index();
22907 let task_dir = dir.path().join("tasks/software");
22908 std::fs::create_dir_all(&task_dir).unwrap();
22909 std::fs::write(
22910 task_dir.join("tsift.md"),
22911 r#"---
22912agent_doc_session: tsift-dag-cycle
22913agent_doc_format: template
22914---
22915
22916## Backlog
22917
22918<!-- agent:backlog -->
22919- [ ] [#left] Left side depends on #right.
22920- [ ] [#right] Right side depends on #left.
22921<!-- /agent:backlog -->
22922"#,
22923 )
22924 .unwrap();
22925 dir
22926 }
22927
22928 fn seed_traversal_semantic_summaries(dir: &Path) {
22929 let summary_db = summarize::SummaryDb::open(&dir.join(".tsift/summaries.db")).unwrap();
22930 summary_db
22931 .insert(&summarize::Summary {
22932 id: 0,
22933 symbol_name: "helper".to_string(),
22934 file_path: "main.rs".to_string(),
22935 content_hash: "hash-main".to_string(),
22936 summary: "helper builds graph navigation handles for traversal.".to_string(),
22937 entities: Some(vec![
22938 summarize::Entity {
22939 name: "helper".to_string(),
22940 kind: "function".to_string(),
22941 description: "Builds graph navigation handles.".to_string(),
22942 },
22943 summarize::Entity {
22944 name: "TraversalGraph".to_string(),
22945 kind: "type".to_string(),
22946 description: "Carries GraphStore-backed traversal rows.".to_string(),
22947 },
22948 ]),
22949 relationships: Some(vec![summarize::Relationship {
22950 from: "helper".to_string(),
22951 to: "TraversalGraph".to_string(),
22952 kind: "uses".to_string(),
22953 }]),
22954 concept_labels: Some(vec![
22955 "graph navigation".to_string(),
22956 "semantic extraction".to_string(),
22957 ]),
22958 extracted_at: "1700000000".to_string(),
22959 model: "test-model".to_string(),
22960 tokens_input: Some(10),
22961 tokens_output: Some(5),
22962 })
22963 .unwrap();
22964 }
22965
22966 fn seed_tsift_memory_graph_db(dir: &Path) {
22967 let db = dir.join(".tsift").join("memory.db");
22968 let store = MemoryStore::open_or_create(&db).unwrap();
22969 let project = dir.to_string_lossy().to_string();
22970 let observation = MemoryEvent::new(
22971 MemoryEventKind::ImportedObservation,
22972 "claude-mem:observations:1",
22973 [
22974 "Graph memory adapter",
22975 "read-only projection",
22976 "graph-db should retrieve tsift memory observations",
22977 "Project memory is queried from .tsift/memory.db",
22978 "graph memory, tsift memory, semantic query",
22979 ]
22980 .join("\n\n"),
22981 )
22982 .with_session_id("claude-session-a")
22983 .with_observed_at_unix(1_700_000_000)
22984 .with_import("claude-mem", "observations:1")
22985 .with_metadata("project", project.clone())
22986 .with_metadata("observation_type", "fact")
22987 .with_metadata("prompt_number", "7")
22988 .with_metadata("discovery_tokens", "42")
22989 .with_metadata("content_hash", "hash-observation-1");
22990 store.insert_event(&observation).unwrap();
22991
22992 let summary = MemoryEvent::new(
22993 MemoryEventKind::ImportedSessionSummary,
22994 "claude-mem:session_summaries:2",
22995 [
22996 "Query old memory from graph-db",
22997 "Read-only tsift memory SQLite projection",
22998 "Semantic graph rows can point at existing memory",
22999 "Projected source and session nodes",
23000 "Keep capture ownership inside tsift-memory",
23001 "summary note",
23002 ]
23003 .join("\n\n"),
23004 )
23005 .with_session_id("claude-session-a")
23006 .with_observed_at_unix(1_700_000_010)
23007 .with_import("claude-mem", "session_summaries:2")
23008 .with_metadata("project", project)
23009 .with_metadata("prompt_number", "8")
23010 .with_metadata("discovery_tokens", "36");
23011 store.insert_event(&summary).unwrap();
23012
23013 let prompt = MemoryEvent::new(
23014 MemoryEventKind::ImportedUserPrompt,
23015 "claude-mem:user_prompts:3",
23016 "How can graph-db query tsift memory semantic history?",
23017 )
23018 .with_session_id("claude-session-a")
23019 .with_observed_at_unix(1_700_000_020)
23020 .with_import("claude-mem", "user_prompts:3")
23021 .with_metadata("prompt_number", "9");
23022 store.insert_event(&prompt).unwrap();
23023 }
23024
23025 #[test]
23026 fn graph_callers_query() {
23027 let dir = setup_graph_index();
23028 let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
23029 let callers = db.callers_of("helper").unwrap();
23030 assert_eq!(callers.len(), 1);
23031 assert_eq!(callers[0].caller_name, "main");
23032 }
23033
23034 #[test]
23035 fn graph_callees_query() {
23036 let dir = setup_graph_index();
23037 let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
23038 let callees = db.callees_of("main").unwrap();
23039 let names: Vec<&str> = callees.iter().map(|e| e.callee_name.as_str()).collect();
23040 assert!(names.contains(&"helper"));
23041 assert!(names.contains(&"new"));
23042 }
23043
23044 #[test]
23045 fn graph_no_callers_returns_empty() {
23046 let dir = setup_graph_index();
23047 let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
23048 let callers = db.callers_of("nonexistent").unwrap();
23049 assert!(callers.is_empty());
23050 }
23051
23052 #[test]
23053 fn graph_cmd_autoindexes_missing_index_by_default() {
23054 let dir = tempfile::tempdir().unwrap();
23055 std::fs::write(
23056 dir.path().join("main.rs"),
23057 "fn helper() {}\nfn main() { helper(); }\n",
23058 )
23059 .unwrap();
23060 let result = cmd_graph(
23061 "helper",
23062 dir.path(),
23063 true,
23064 false,
23065 None,
23066 20,
23067 false,
23068 true,
23069 false,
23070 false,
23071 false,
23072 false,
23073 false,
23074 TagpathSearchOpts::default(),
23075 );
23076
23077 assert!(result.is_ok());
23078 let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
23079 let summary = db.compute_changes(dir.path()).unwrap();
23080 assert_eq!(summary.new + summary.modified + summary.deleted, 0);
23081 }
23082
23083 #[test]
23084 fn traversal_graph_has_stable_typed_handles() {
23085 let dir = setup_traversal_project();
23086 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
23087 let graph_again = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
23088
23089 let file = resolve_traversal_node(&graph, "main.rs").unwrap();
23090 let symbol = resolve_traversal_node(&graph, "helper").unwrap();
23091 let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
23092 let session = resolve_traversal_node(&graph, "tsift-v0.1").unwrap();
23093
23094 assert!(file.handle.starts_with("gfil-"));
23095 assert!(symbol.handle.starts_with("gsym-"));
23096 assert!(backlog.handle.starts_with("gbak-"));
23097 assert!(session.handle.starts_with("gses-"));
23098
23099 assert_eq!(
23100 symbol.handle,
23101 resolve_traversal_node(&graph_again, "helper")
23102 .unwrap()
23103 .handle
23104 );
23105 assert_eq!(
23106 backlog.handle,
23107 resolve_traversal_node(&graph_again, "#kgnv")
23108 .unwrap()
23109 .handle
23110 );
23111 }
23112
23113 #[test]
23114 fn traversal_graph_links_backlog_items_to_code_tokens() {
23115 let dir = setup_traversal_project();
23116 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
23117 let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
23118 let helper = resolve_traversal_node(&graph, "helper").unwrap();
23119
23120 assert!(graph.edges.iter().any(|edge| {
23121 edge.from == backlog.handle && edge.to == helper.handle && edge.relation == "mentions"
23122 }));
23123 }
23124
23125 #[test]
23126 fn session_hinted_traversal_skips_global_call_edges() {
23127 let dir = setup_traversal_project();
23128 let session = dir.path().join("tasks/software/tsift.md");
23129 let bounded = build_traversal_graph_source(dir.path(), &session, None).unwrap();
23130 let backlog = resolve_traversal_node(&bounded, "#kgnv").unwrap();
23131 let helper = resolve_traversal_node(&bounded, "helper").unwrap();
23132
23133 assert!(bounded.edges.iter().any(|edge| {
23134 edge.from == backlog.handle && edge.to == helper.handle && edge.relation == "mentions"
23135 }));
23136 assert!(
23137 !bounded.edges.iter().any(|edge| edge.relation == "calls"),
23138 "session-hinted graph-db projections should not materialize unrelated global call edges"
23139 );
23140
23141 let full = build_traversal_graph_source(dir.path(), dir.path(), None).unwrap();
23142 assert!(
23143 full.edges.iter().any(|edge| edge.relation == "calls"),
23144 "root/full projections still carry the complete indexed call graph"
23145 );
23146 }
23147
23148 #[test]
23149 fn agent_doc_task_path_infers_matching_workspace_scope() {
23150 let dir = tempfile::tempdir().unwrap();
23151 std::fs::create_dir_all(dir.path().join("src/tsift")).unwrap();
23152 std::fs::create_dir_all(dir.path().join("tasks/software")).unwrap();
23153 std::fs::write(
23154 dir.path().join(".gitmodules"),
23155 "[submodule \"src/tsift\"]\n\tpath = src/tsift\n\turl = https://example.invalid/tsift.git\n",
23156 )
23157 .unwrap();
23158 let task = dir.path().join("tasks/software/tsift.md");
23159 std::fs::write(&task, "# tsift\n").unwrap();
23160
23161 let targets = resolve_search_index_targets(dir.path(), &task, None, false).unwrap();
23162 let query_db_path = resolve_query_db_path(dir.path(), &task, None).unwrap();
23163 let cfg = config::Config::load(dir.path()).unwrap();
23164
23165 assert_eq!(targets.len(), 1);
23166 assert_eq!(targets[0].scope_name.as_deref(), Some("tsift"));
23167 assert_eq!(targets[0].source_root, dir.path().join("src/tsift"));
23168 assert!(
23169 targets[0]
23170 .db_path
23171 .ends_with(".tsift/indexes/tsift/index.db")
23172 );
23173 assert_eq!(query_db_path, cfg.db_path_for(dir.path(), "tsift"));
23174 }
23175
23176 #[test]
23177 fn cargo_package_scope_selector_indexes_package_db() {
23178 let dir = setup_multiplicity_project();
23179 let targets =
23180 resolve_search_index_targets(dir.path(), dir.path(), Some("core_lib"), false).unwrap();
23181
23182 assert_eq!(targets.len(), 1);
23183 assert_eq!(targets[0].scope_name.as_deref(), Some("core-lib"));
23184 assert_eq!(targets[0].source_root, dir.path().join("crates/core-lib"));
23185 assert!(
23186 targets[0]
23187 .db_path
23188 .ends_with(".tsift/indexes/cargo/core-lib/index.db")
23189 );
23190
23191 cmd_index(
23192 dir.path(),
23193 false,
23194 false,
23195 false,
23196 false,
23197 true,
23198 false,
23199 Some("core_lib"),
23200 false,
23201 true,
23202 false,
23203 false,
23204 false,
23205 false,
23206 )
23207 .unwrap();
23208 assert!(targets[0].db_path.exists());
23209 }
23210
23211 #[test]
23212 fn path_inference_prefers_nested_cargo_package_without_submodule() {
23213 let dir = setup_multiplicity_project();
23214 let source = dir.path().join("crates/cli-app/src/main.rs");
23215 let targets = resolve_search_index_targets(dir.path(), &source, None, false).unwrap();
23216
23217 assert_eq!(targets.len(), 1);
23218 assert_eq!(targets[0].scope_name.as_deref(), Some("cli-app"));
23219 assert_eq!(targets[0].source_root, dir.path().join("crates/cli-app"));
23220 }
23221
23222 #[test]
23223 fn traversal_graph_projects_cargo_multiplicity_nodes_and_edges() {
23224 let dir = setup_multiplicity_project();
23225 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
23226 let workspace = resolve_traversal_node(&graph, "root cargo workspace").unwrap();
23227 let core = resolve_traversal_node(&graph, "core-lib").unwrap();
23228 let cli = resolve_traversal_node(&graph, "cli-app").unwrap();
23229 let core_file = resolve_traversal_node(&graph, "crates/core-lib/src/lib.rs").unwrap();
23230
23231 assert_eq!(workspace.kind, "cargo_workspace");
23232 assert_eq!(core.kind, "cargo_package");
23233 assert_eq!(
23234 core.properties.get("features"),
23235 Some(&"default".to_string())
23236 );
23237 assert!(graph.edges.iter().any(|edge| {
23238 edge.from == workspace.handle
23239 && edge.to == core.handle
23240 && edge.relation == "contains_package"
23241 }));
23242 assert!(graph.edges.iter().any(|edge| {
23243 edge.from == core.handle && edge.to == core_file.handle && edge.relation == "owns_file"
23244 }));
23245 assert!(graph.edges.iter().any(|edge| {
23246 edge.from == cli.handle
23247 && edge.to == core.handle
23248 && (edge.relation == "declares_dependency" || edge.relation == "uses_crate")
23249 }));
23250 }
23251
23252 #[test]
23253 fn conflict_matrix_uses_cargo_package_mentions_as_ownership_evidence() {
23254 let dir = setup_multiplicity_project();
23255 let session = dir.path().join("tasks/software/tsift.md");
23256 let report =
23257 build_conflict_matrix_report(&session, None, &["corepkg".to_string()], 3, 8, 20)
23258 .unwrap();
23259
23260 assert!(report.per_target_fail_closed.is_empty());
23261 let candidate = report
23262 .candidates
23263 .iter()
23264 .find(|candidate| candidate.target == "corepkg")
23265 .unwrap();
23266 assert!(
23267 candidate
23268 .owned_files
23269 .iter()
23270 .any(|file| file == "crates/core-lib/Cargo.toml"),
23271 "{:?}",
23272 candidate.owned_files
23273 );
23274 }
23275
23276 #[test]
23277 fn traversal_graph_links_agent_doc_queue_job_packets_to_backlog() {
23278 let dir = setup_traversal_project();
23279 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
23280 let job = resolve_traversal_node(&graph, "do #kgnv").unwrap();
23281 let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
23282
23283 assert_eq!(job.kind, "job_packet");
23284 assert!(job.handle.starts_with("gjob-"));
23285 assert!(graph.edges.iter().any(|edge| {
23286 edge.from == job.handle && edge.to == backlog.handle && edge.relation == "targets"
23287 }));
23288
23289 let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
23290 let jobs = store.nodes_by_kind("job_packet").unwrap();
23291 assert!(
23292 jobs.iter()
23293 .any(|node| node.properties.get("ref_id") == Some(&"kgnv".to_string())),
23294 "expected queued job packet in graph store, got {jobs:?}"
23295 );
23296 }
23297
23298 #[test]
23299 fn traversal_graph_includes_routes_and_handler_edges() {
23300 let dir = tempfile::tempdir().unwrap();
23301 std::fs::write(
23302 dir.path().join("api.py"),
23303 r#"@router.get("/items")
23304def list_items():
23305 return []
23306"#,
23307 )
23308 .unwrap();
23309 let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
23310 db.apply_changes(dir.path()).unwrap();
23311
23312 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
23313 let route = resolve_traversal_node(&graph, "/items").unwrap();
23314 let handler = resolve_traversal_node(&graph, "list_items").unwrap();
23315
23316 assert_eq!(route.kind, "route");
23317 assert!(graph.edges.iter().any(|edge| {
23318 edge.from == route.handle && edge.to == handler.handle && edge.relation == "handled_by"
23319 }));
23320 }
23321
23322 #[test]
23323 fn traversal_graph_projects_rust_ast_navigation_edges() {
23324 let dir = tempfile::tempdir().unwrap();
23325 std::fs::write(
23326 dir.path().join("main.rs"),
23327 r#"mod api {
23328 pub fn helper() {}
23329 pub fn handler() { helper(); }
23330}
23331
23332fn main() { api::handler(); }
23333"#,
23334 )
23335 .unwrap();
23336 let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
23337 db.apply_changes(dir.path()).unwrap();
23338
23339 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
23340 let api = resolve_ast_span_node(&graph, "api", "mod");
23341 let helper = resolve_ast_span_node(&graph, "helper", "function");
23342 let handler = resolve_ast_span_node(&graph, "handler", "function");
23343
23344 assert_eq!(helper.kind, "ast_span");
23345 assert!(helper.handle.starts_with("span-"));
23346 assert_eq!(helper.properties.get("language"), Some(&"rust".to_string()));
23347 assert!(graph.edges.iter().any(|edge| {
23348 edge.from == api.handle && edge.to == helper.handle && edge.relation == "contains"
23349 }));
23350 assert!(graph.edges.iter().any(|edge| {
23351 edge.from == api.handle && edge.to == helper.handle && edge.relation == "child"
23352 }));
23353 assert!(graph.edges.iter().any(|edge| {
23354 edge.from == helper.handle && edge.to == api.handle && edge.relation == "parent"
23355 }));
23356 assert!(graph.edges.iter().any(|edge| {
23357 edge.from == helper.handle
23358 && edge.to == handler.handle
23359 && edge.relation == "next_sibling"
23360 }));
23361 assert!(graph.edges.iter().any(|edge| {
23362 edge.from == handler.handle
23363 && edge.to == helper.handle
23364 && edge.relation == "previous_sibling"
23365 }));
23366 assert!(graph.edges.iter().any(|edge| {
23367 edge.from == helper.handle
23368 && edge.to == api.handle
23369 && edge.relation == "enclosing_module"
23370 }));
23371 assert!(graph.edges.iter().any(|edge| {
23372 edge.from == handler.handle && edge.to == helper.handle && edge.relation == "calls"
23373 }));
23374
23375 let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
23376 let ast_nodes = store.nodes_by_kind("ast_span").unwrap();
23377 assert!(
23378 ast_nodes.iter().any(|node| node.id == helper.handle
23379 && node.properties.get("symbol_kind") == Some(&"function".to_string())),
23380 "expected helper AST span in graph store, got {ast_nodes:?}"
23381 );
23382 assert!(
23383 store
23384 .outgoing_edges(&helper.handle, Some("parent"))
23385 .unwrap()
23386 .iter()
23387 .any(|edge| edge.to_id == api.handle),
23388 "expected persisted AST parent edge"
23389 );
23390 }
23391
23392 #[test]
23393 fn traversal_graph_projects_markdown_section_block_edges() {
23394 let dir = tempfile::tempdir().unwrap();
23395 std::fs::write(
23396 dir.path().join("README.md"),
23397 "# Guide\n\n- Setup\n- Verify\n\n```rust\nfn demo() {}\n```\n",
23398 )
23399 .unwrap();
23400 let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
23401 db.apply_changes(dir.path()).unwrap();
23402
23403 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
23404 let guide = resolve_ast_span_node(&graph, "Guide", "heading");
23405 let code = resolve_ast_span_node(&graph, "rust", "code_block");
23406 let embedded = resolve_ast_span_node(&graph, "demo", "function");
23407 let list_item = graph
23408 .nodes
23409 .values()
23410 .find(|node| {
23411 node.kind == "ast_span"
23412 && node.properties.get("symbol_kind") == Some(&"list_item".to_string())
23413 && node.properties.get("section_handle") == Some(&guide.handle)
23414 })
23415 .expect("missing Markdown list item AST span");
23416
23417 assert_eq!(
23418 code.properties.get("markdown_block_kind"),
23419 Some(&"fenced_code_block".to_string())
23420 );
23421 assert_eq!(
23422 guide.properties.get("heading_level"),
23423 Some(&"1".to_string())
23424 );
23425 assert_eq!(
23426 embedded.properties.get("embedded"),
23427 Some(&"true".to_string())
23428 );
23429 assert_eq!(
23430 embedded.properties.get("language"),
23431 Some(&"rust".to_string())
23432 );
23433 assert_eq!(
23434 embedded.properties.get("markdown_block_handle"),
23435 Some(&code.handle)
23436 );
23437 assert!(graph.edges.iter().any(|edge| {
23438 edge.from == guide.handle
23439 && edge.to == code.handle
23440 && edge.relation == "contains_markdown_block"
23441 }));
23442 assert!(graph.edges.iter().any(|edge| {
23443 edge.from == code.handle
23444 && edge.to == guide.handle
23445 && edge.relation == "enclosing_section"
23446 }));
23447 assert!(graph.edges.iter().any(|edge| {
23448 edge.from == guide.handle
23449 && edge.to == list_item.handle
23450 && edge.relation == "contains_markdown_block"
23451 }));
23452 assert!(graph.edges.iter().any(|edge| {
23453 edge.from == code.handle
23454 && edge.to == embedded.handle
23455 && edge.relation == "contains_embedded_symbol"
23456 }));
23457 assert!(graph.edges.iter().any(|edge| {
23458 edge.from == embedded.handle
23459 && edge.to == code.handle
23460 && edge.relation == "embedded_in_fence"
23461 }));
23462 assert!(graph.edges.iter().any(|edge| {
23463 edge.from == guide.handle
23464 && edge.to == embedded.handle
23465 && edge.relation == "contains_embedded_code"
23466 }));
23467
23468 let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
23469 assert!(
23470 store
23471 .outgoing_edges(&guide.handle, Some("contains_markdown_block"))
23472 .unwrap()
23473 .iter()
23474 .any(|edge| edge.to_id == code.handle),
23475 "expected persisted Markdown section/block edge"
23476 );
23477 assert!(
23478 store
23479 .outgoing_edges(&code.handle, Some("contains_embedded_symbol"))
23480 .unwrap()
23481 .iter()
23482 .any(|edge| edge.to_id == embedded.handle),
23483 "expected persisted Markdown fence/embedded symbol edge"
23484 );
23485 }
23486
23487 #[test]
23488 fn multilingual_ast_navigation_fixture_locks_recall_handles_expands_and_budget() {
23489 let dir = setup_multilingual_ast_navigation_project();
23490 let db =
23491 index::IndexDb::open_read_only_resilient(&dir.path().join(".tsift/index.db")).unwrap();
23492 let symbols = db.all_symbols().unwrap();
23493 let expected_symbols = [
23494 ("rust", "fixture_nav_rust_entry", "function", "rust.rs"),
23495 (
23496 "python",
23497 "fixture_nav_python_entry",
23498 "function",
23499 "python.py",
23500 ),
23501 (
23502 "typescript",
23503 "fixture_nav_typescript_entry",
23504 "function",
23505 "typescript.ts",
23506 ),
23507 (
23508 "javascript",
23509 "fixture_nav_javascript_entry",
23510 "function",
23511 "javascript.js",
23512 ),
23513 (
23514 "kotlin",
23515 "fixture_nav_kotlin_entry",
23516 "function",
23517 "kotlin.kt",
23518 ),
23519 ("zig", "fixture_nav_zig_entry", "function", "zig.zig"),
23520 ("bash", "fixture_nav_bash_entry", "function", "bash.sh"),
23521 ("markdown", "Fixture Section", "heading", "README.md"),
23522 ("markdown", "Fixture step", "list_item", "README.md"),
23523 ("markdown", "python", "code_block", "README.md"),
23524 ];
23525
23526 for (language, name, kind, file) in expected_symbols {
23527 let symbol = symbols
23528 .iter()
23529 .find(|symbol| {
23530 symbol.language == language
23531 && symbol.name == name
23532 && symbol.kind == kind
23533 && symbol.file.ends_with(file)
23534 })
23535 .unwrap_or_else(|| panic!("missing indexed {language} {kind} {name}"));
23536 assert!(
23537 symbol.start_byte.is_some() && symbol.end_byte.is_some(),
23538 "{language} {name} should carry AST byte spans"
23539 );
23540 }
23541
23542 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
23543 let graph_again = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
23544 let expected_ast_nodes = [
23545 ("fixture_nav_rust_entry", "function", "rust"),
23546 ("fixture_nav_python_entry", "function", "python"),
23547 ("fixture_nav_typescript_entry", "function", "typescript"),
23548 ("fixture_nav_javascript_entry", "function", "javascript"),
23549 ("fixture_nav_kotlin_entry", "function", "kotlin"),
23550 ("fixture_nav_zig_entry", "function", "zig"),
23551 ("fixture_nav_bash_entry", "function", "bash"),
23552 ("Fixture Section", "heading", "markdown"),
23553 ("Fixture step", "list_item", "markdown"),
23554 ("python", "code_block", "markdown"),
23555 ("fixture_nav_markdown_embedded", "function", "python"),
23556 ];
23557
23558 for (name, kind, language) in expected_ast_nodes {
23559 let node = resolve_ast_span_node(&graph, name, kind);
23560 let repeated = resolve_ast_span_node(&graph_again, name, kind);
23561 assert!(
23562 node.handle.starts_with("span-"),
23563 "{name} handle: {}",
23564 node.handle
23565 );
23566 assert_eq!(
23567 node.handle, repeated.handle,
23568 "{language} {name} handle drifted"
23569 );
23570 assert_eq!(
23571 node.properties.get("language"),
23572 Some(&language.to_string()),
23573 "{name} should keep its language label"
23574 );
23575 }
23576
23577 let markdown_section = resolve_ast_span_node(&graph, "Fixture Section", "heading");
23578 let markdown_code = resolve_ast_span_node(&graph, "python", "code_block");
23579 let embedded = resolve_ast_span_node(&graph, "fixture_nav_markdown_embedded", "function");
23580 assert!(graph.edges.iter().any(|edge| {
23581 edge.from == markdown_section.handle
23582 && edge.to == markdown_code.handle
23583 && edge.relation == "contains_markdown_block"
23584 }));
23585 assert!(graph.edges.iter().any(|edge| {
23586 edge.from == markdown_code.handle
23587 && edge.to == embedded.handle
23588 && edge.relation == "contains_embedded_symbol"
23589 }));
23590 assert!(
23591 graph.nodes.len() <= 80,
23592 "multilingual AST fixture should stay bounded, got {} nodes",
23593 graph.nodes.len()
23594 );
23595 assert!(
23596 graph.edges.len() <= 180,
23597 "multilingual AST fixture should stay bounded, got {} edges",
23598 graph.edges.len()
23599 );
23600
23601 let response = empty_search_response(dir.path(), "lexical");
23602 let symbol_hits = db.symbol_search("fixture_nav_python_entry", 20).unwrap();
23603 let report = build_relative_search_budget_report(
23604 "fixture_nav_python_entry",
23605 "lexical",
23606 dir.path(),
23607 &response,
23608 &symbol_hits,
23609 ResponseBudget::new(Some(8), Some(120)),
23610 &SearchFacetFilters::default(),
23611 );
23612 let report_again = build_relative_search_budget_report(
23613 "fixture_nav_python_entry",
23614 "lexical",
23615 dir.path(),
23616 &response,
23617 &symbol_hits,
23618 ResponseBudget::new(Some(8), Some(120)),
23619 &SearchFacetFilters::default(),
23620 );
23621
23622 let top = report
23623 .ranked
23624 .first()
23625 .expect("ranked preview should not be empty");
23626 assert_eq!(top.source, "symbol_span");
23627 assert_eq!(top.name.as_deref(), Some("fixture_nav_python_entry"));
23628 assert!(top.handle.starts_with("srnk-"));
23629 assert_eq!(top.handle, report_again.ranked[0].handle);
23630 assert!(
23631 top.reasons.iter().any(|reason| reason == "ast_span"),
23632 "expected AST span ranking reason, got {:?}",
23633 top.reasons
23634 );
23635 assert!(report.ranked.len() <= 8);
23636 assert!(report.symbols.len() <= 8);
23637
23638 let symbol = report
23639 .symbols
23640 .iter()
23641 .find(|symbol| symbol.name == "fixture_nav_python_entry")
23642 .expect("missing search preview symbol");
23643 assert_cli_expand_command_parses(&symbol.expand);
23644 let ast = symbol
23645 .ast
23646 .as_ref()
23647 .expect("search symbol should expose AST");
23648 assert_cli_expand_command_parses(&ast.expand.source_window);
23649 assert_cli_expand_command_parses(ast.expand.source_body.as_ref().unwrap());
23650 assert_cli_expand_command_parses(&ast.expand.symbol_read);
23651
23652 let markdown_hits = db.symbol_search("python", 20).unwrap();
23653 let markdown_report = build_relative_search_budget_report(
23654 "python",
23655 "lexical",
23656 dir.path(),
23657 &response,
23658 &markdown_hits,
23659 ResponseBudget::new(Some(8), Some(120)),
23660 &SearchFacetFilters::default(),
23661 );
23662 let markdown_symbol = markdown_report
23663 .symbols
23664 .iter()
23665 .find(|symbol| symbol.kind == "code_block" && symbol.language == "markdown")
23666 .expect("missing Markdown code-block symbol");
23667 let markdown_ast = markdown_symbol
23668 .ast
23669 .as_ref()
23670 .expect("Markdown code block should expose AST");
23671 assert_cli_expand_command_parses(markdown_ast.expand.markdown_ast.as_ref().unwrap());
23672 assert_eq!(
23673 markdown_ast
23674 .span
23675 .markdown
23676 .as_ref()
23677 .unwrap()
23678 .embedded_symbols[0]
23679 .name,
23680 "fixture_nav_markdown_embedded"
23681 );
23682 }
23683
23684 #[test]
23685 fn traversal_neighborhood_handles_prioritizes_high_signal_edges_when_limited() {
23686 let edges = vec![
23687 TraversalEdge {
23688 from: "origin".to_string(),
23689 to: "aaa_low".to_string(),
23690 relation: "unknown".to_string(),
23691 label: None,
23692 weight: 1,
23693 },
23694 TraversalEdge {
23695 from: "origin".to_string(),
23696 to: "zzz_high".to_string(),
23697 relation: "mentions".to_string(),
23698 label: None,
23699 weight: 1,
23700 },
23701 ];
23702
23703 let handles = traversal_neighborhood_handles(&edges, "origin", 1, 2);
23704
23705 assert!(handles.contains("origin"));
23706 assert!(handles.contains("zzz_high"), "{handles:?}");
23707 assert!(!handles.contains("aaa_low"), "{handles:?}");
23708 }
23709
23710 #[test]
23711 fn traversal_materializes_provider_neutral_sqlite_graph() {
23712 let dir = setup_traversal_project();
23713 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
23714 let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
23715
23716 let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
23717 let backlog_nodes = store.nodes_by_kind("backlog").unwrap();
23718 assert!(
23719 backlog_nodes.iter().any(|node| node.id == backlog.handle
23720 && node.properties.get("ref_id") == Some(&"kgnv".to_string())),
23721 "expected materialized backlog node, got {backlog_nodes:?}"
23722 );
23723 assert!(
23724 store
23725 .all_nodes()
23726 .unwrap()
23727 .iter()
23728 .any(|node| node.kind == GRAPH_PROJECTION_META_KIND
23729 && node.properties.get("projection_version")
23730 == Some(&GRAPH_PROJECTION_VERSION.to_string())),
23731 "expected projection metadata node"
23732 );
23733 let source_handles = store.nodes_by_kind("source_handle").unwrap();
23734 assert!(
23735 source_handles
23736 .iter()
23737 .any(|node| node.properties.get("file") == Some(&"main.rs".to_string())),
23738 "expected bounded source_handle rows, got {source_handles:?}"
23739 );
23740 let worker_context = store.nodes_by_kind("worker_context").unwrap();
23741 assert!(
23742 worker_context
23743 .iter()
23744 .any(|node| node.properties.get("target")
23745 == Some(&"tasks/software/tsift.md".to_string())),
23746 "expected bounded worker_context rows, got {worker_context:?}"
23747 );
23748 let worker_results = store.nodes_by_kind("worker_result").unwrap();
23749 assert!(
23750 worker_results.iter().any(|node| {
23751 node.properties.get("ref_id") == Some(&"kgnv".to_string())
23752 && node.properties.get("status") == Some(&"completed".to_string())
23753 && node.properties.get("touched_files") == Some(&"main.rs".to_string())
23754 && node.properties.get("follow_up_ids") == Some(&"gfix".to_string())
23755 }),
23756 "expected worker_result rows, got {worker_results:?}"
23757 );
23758 }
23759
23760 #[test]
23761 fn traversal_projection_materializes_cached_semantic_rows() {
23762 let dir = setup_traversal_project();
23763 seed_traversal_semantic_summaries(dir.path());
23764 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
23765 let helper = resolve_traversal_node(&graph, "helper").unwrap();
23766 let concept = resolve_traversal_node(&graph, "graph navigation").unwrap();
23767 let entity = resolve_traversal_node(&graph, "TraversalGraph").unwrap();
23768
23769 assert_eq!(concept.kind, "semantic_concept");
23770 assert_eq!(entity.kind, "semantic_entity");
23771 assert!(concept.handle.starts_with("gcon-"));
23772 assert!(entity.handle.starts_with("gent-"));
23773
23774 let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
23775 assert!(
23776 store
23777 .nodes_by_kind("semantic_concept")
23778 .unwrap()
23779 .iter()
23780 .any(|node| node.label == "semantic extraction"
23781 && node.properties.contains_key("embedding")),
23782 "expected persisted concept embeddings"
23783 );
23784 assert!(
23785 store
23786 .outgoing_edges(&helper.handle, Some("mentions_concept"))
23787 .unwrap()
23788 .iter()
23789 .any(|edge| edge.to_id == concept.handle),
23790 "expected helper symbol to link to cached summary concept"
23791 );
23792 assert!(
23793 store
23794 .outgoing_edges(
23795 &semantic_entity_handle("helper", "function"),
23796 Some("semantic_relation")
23797 )
23798 .unwrap()
23799 .iter()
23800 .any(|edge| edge.to_id == entity.handle
23801 && edge.properties.get("relationship_kind") == Some(&"uses".to_string())),
23802 "expected LLM relationship rows projected into GraphStore"
23803 );
23804 }
23805
23806 #[test]
23807 fn traversal_projection_materializes_tsift_memory_rows() {
23808 let dir = setup_traversal_project();
23809 seed_tsift_memory_graph_db(dir.path());
23810 let memory_db = dir.path().join(".tsift").join("memory.db");
23811 let store = MemoryStore::open_or_create(&memory_db).unwrap();
23812 for summary in ["first closeout", "second closeout"] {
23813 let event = MemoryEvent::new(
23814 MemoryEventKind::ResponseSummary,
23815 "tasks/software/tsift.md",
23816 summary,
23817 )
23818 .with_session_id("tasks/software/tsift.md")
23819 .with_observed_at_unix(1_700_000_100);
23820 store.insert_event(&event).unwrap();
23821 }
23822 refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
23823 let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
23824
23825 let native_sources = store
23826 .nodes_by_kind("source_handle")
23827 .unwrap()
23828 .into_iter()
23829 .filter(|node| {
23830 node.properties.get("provider") == Some(&"tsift-memory".to_string())
23831 && node.properties.get("source_ref")
23832 == Some(&"tasks/software/tsift.md".to_string())
23833 })
23834 .collect::<Vec<_>>();
23835 assert_eq!(
23836 native_sources.len(),
23837 2,
23838 "same-source native memory events must get distinct source handles"
23839 );
23840
23841 let source = store
23842 .nodes_by_kind("source_handle")
23843 .unwrap()
23844 .into_iter()
23845 .find(|node| {
23846 node.properties.get("source_ref") == Some(&"claude-mem:observations:1".to_string())
23847 })
23848 .expect("expected tsift-memory source handle");
23849 let session = store
23850 .nodes_by_kind("memory_session")
23851 .unwrap()
23852 .into_iter()
23853 .find(|node| {
23854 node.properties.get("provider") == Some(&"tsift-memory".to_string())
23855 && node.properties.get("session_id") == Some(&"claude-session-a".to_string())
23856 })
23857 .expect("expected tsift-memory session node");
23858 let event = store
23859 .nodes_by_kind("memory_event")
23860 .unwrap()
23861 .into_iter()
23862 .find(|node| {
23863 node.properties.get("source_ref") == Some(&"claude-mem:observations:1".to_string())
23864 && node.properties.get("provider") == Some(&"tsift-memory".to_string())
23865 && node.properties.get("imported_from") == Some(&"claude-mem".to_string())
23866 })
23867 .expect("expected tsift-memory event node");
23868 let concept = store
23869 .nodes_by_kind("semantic_concept")
23870 .unwrap()
23871 .into_iter()
23872 .find(|node| {
23873 node.properties.get("provider") == Some(&"tsift-memory".to_string())
23874 && node.label.contains("Graph memory adapter")
23875 && node.properties.contains_key("embedding")
23876 })
23877 .expect("expected tsift-memory semantic concept");
23878
23879 assert!(
23880 store
23881 .outgoing_edges(&session.id, Some("records_memory_source"))
23882 .unwrap()
23883 .iter()
23884 .any(|edge| edge.to_id == source.id),
23885 "expected session to link to source handle"
23886 );
23887 assert!(
23888 store
23889 .outgoing_edges(&session.id, Some("records_memory_event"))
23890 .unwrap()
23891 .iter()
23892 .any(|edge| edge.to_id == event.id),
23893 "expected session to link to memory event"
23894 );
23895 assert!(
23896 store
23897 .outgoing_edges(&event.id, Some("projects_source"))
23898 .unwrap()
23899 .iter()
23900 .any(|edge| edge.to_id == source.id),
23901 "expected memory event to project source handle"
23902 );
23903 assert!(
23904 store
23905 .outgoing_edges(&source.id, Some("mentions_concept"))
23906 .unwrap()
23907 .iter()
23908 .any(|edge| edge.to_id == concept.id),
23909 "expected source handle to seed semantic concept"
23910 );
23911
23912 let related = semantic_related_report_from_store(
23913 dir.path(),
23914 None,
23915 "tsift memory graph adapter",
23916 5,
23917 SemanticRelatedKind::Concept,
23918 &store,
23919 )
23920 .unwrap();
23921 assert!(
23922 related
23923 .items
23924 .iter()
23925 .any(|item| item.handle == concept.id && item.score > 0.0),
23926 "expected semantic query to retrieve tsift-memory concept, got {:?}",
23927 related.items
23928 );
23929
23930 let graph_related = graph_db_report_from_store(
23931 dir.path(),
23932 None,
23933 "sqlite",
23934 GraphDbQuery::Related {
23935 query: "tsift memory graph adapter".to_string(),
23936 kind: SemanticRelatedKind::Concept,
23937 depth: 1,
23938 seed_limit: 5,
23939 limit: 20,
23940 },
23941 &store,
23942 sqlite_graph_freshness(&store, "root").unwrap(),
23943 Vec::new(),
23944 )
23945 .unwrap();
23946 assert_eq!(
23947 graph_related
23948 .readiness
23949 .as_ref()
23950 .map(|readiness| readiness.status.as_str()),
23951 Some("ready"),
23952 "tsift-memory semantic rows should satisfy graph-db related readiness"
23953 );
23954 assert!(
23955 graph_related.nodes.iter().any(|node| {
23956 node.kind == "semantic_concept"
23957 && node.properties.get("provider") == Some(&"tsift-memory".to_string())
23958 }),
23959 "expected related graph output to include tsift-memory semantic rows"
23960 );
23961 }
23962
23963 #[test]
23964 fn semantic_related_query_uses_persisted_graph_embeddings() {
23965 let dir = setup_traversal_project();
23966 seed_traversal_semantic_summaries(dir.path());
23967 refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
23968 let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
23969
23970 let report = semantic_related_report_from_store(
23971 dir.path(),
23972 None,
23973 "graph navigation",
23974 5,
23975 SemanticRelatedKind::Concept,
23976 &store,
23977 )
23978 .unwrap();
23979
23980 assert_eq!(report.embedding_model, SEMANTIC_EMBEDDING_MODEL);
23981 assert!(
23982 report
23983 .items
23984 .iter()
23985 .any(|item| item.label == "graph navigation"
23986 && item.kind == "semantic_concept"
23987 && item.score > 0.9),
23988 "expected nearest concept match from graph embeddings, got {:?}",
23989 report.items
23990 );
23991 }
23992
23993 #[test]
23994 fn graph_db_related_query_uses_semantic_seeds_and_incident_neighborhoods() {
23995 let dir = setup_traversal_project();
23996 seed_traversal_semantic_summaries(dir.path());
23997 refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
23998 let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
23999
24000 let report = graph_db_report_from_store(
24001 dir.path(),
24002 None,
24003 "sqlite",
24004 GraphDbQuery::Related {
24005 query: "graph navigation".to_string(),
24006 kind: SemanticRelatedKind::All,
24007 depth: 1,
24008 seed_limit: 2,
24009 limit: 20,
24010 },
24011 &store,
24012 sqlite_graph_freshness(&store, "root").unwrap(),
24013 Vec::new(),
24014 )
24015 .unwrap();
24016
24017 let knowledge = report.knowledge_retrieval.as_ref().unwrap();
24018 assert_eq!(knowledge.mode, "semantic_seeded_neighborhood");
24019 assert_eq!(knowledge.seed_kind, "all");
24020 assert_eq!(knowledge.depth, 1);
24021 assert_eq!(
24022 report
24023 .readiness
24024 .as_ref()
24025 .map(|readiness| readiness.status.as_str()),
24026 Some("ready")
24027 );
24028 assert!(
24029 knowledge
24030 .diagnostics
24031 .iter()
24032 .any(|diagnostic| diagnostic.contains("incident"))
24033 );
24034 assert!(
24035 report
24036 .semantic_related
24037 .iter()
24038 .any(|item| item.label == "graph navigation"
24039 && item.kind == "semantic_concept"
24040 && item.score > 0.9),
24041 "expected natural-language query to seed the graph navigation concept, got {:?}",
24042 report.semantic_related
24043 );
24044 assert!(
24045 report
24046 .nodes
24047 .iter()
24048 .any(|node| node.kind == "semantic_concept" && node.label == "graph navigation")
24049 );
24050 assert!(
24051 report
24052 .nodes
24053 .iter()
24054 .any(|node| node.kind == "symbol" && node.label == "helper"),
24055 "incident expansion from semantic seed should recover source symbols, got {:?}",
24056 report
24057 .nodes
24058 .iter()
24059 .map(|node| (&node.kind, &node.label))
24060 .collect::<Vec<_>>()
24061 );
24062 assert!(
24063 report
24064 .edges
24065 .iter()
24066 .any(|edge| edge.kind == "mentions_concept")
24067 );
24068 assert!(
24069 report.output_budget.as_ref().is_some_and(|budget| budget
24070 .diagnostics
24071 .iter()
24072 .any(|diagnostic| { diagnostic.contains("budget ranking signals") })),
24073 "expected related output budget diagnostics, got {:?}",
24074 report.output_budget
24075 );
24076 }
24077
24078 #[test]
24079 fn graph_db_related_reports_summary_extract_gate_when_summary_cache_empty() {
24080 let dir = setup_graph_index();
24081 refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
24082 let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
24083
24084 let report = graph_db_report_from_store(
24085 dir.path(),
24086 None,
24087 "sqlite",
24088 GraphDbQuery::Related {
24089 query: "graph navigation".to_string(),
24090 kind: SemanticRelatedKind::All,
24091 depth: 1,
24092 seed_limit: 2,
24093 limit: 20,
24094 },
24095 &store,
24096 sqlite_graph_freshness(&store, "root").unwrap(),
24097 Vec::new(),
24098 )
24099 .unwrap();
24100
24101 let readiness = report.readiness.as_ref().unwrap();
24102 assert_eq!(readiness.status, "blocked");
24103 assert_eq!(readiness.reason, "summary_cache_empty");
24104 assert!(readiness.fail_closed);
24105 assert_eq!(
24106 readiness.next_commands,
24107 vec![
24108 "tsift summarize --extract .".to_string(),
24109 graph_db_refresh_command(dir.path(), None)
24110 ]
24111 );
24112 assert!(
24113 report
24114 .knowledge_retrieval
24115 .as_ref()
24116 .unwrap()
24117 .diagnostics
24118 .iter()
24119 .any(|diagnostic| diagnostic.contains("summary cache empty")
24120 && diagnostic.contains("graph-db materialized code/session rows")),
24121 "expected related diagnostics to carry readiness gate, got {:?}",
24122 report.knowledge_retrieval.as_ref().unwrap().diagnostics
24123 );
24124 }
24125
24126 #[test]
24127 fn graph_db_semantic_seeded_neighborhood_scores_before_caps() {
24128 let mut nodes = vec![
24129 SubstrateGraphNode::new("seed", "semantic_concept", "graph budget"),
24130 SubstrateGraphNode::new("zzz_high", "symbol", "high_signal"),
24131 ];
24132 let mut edges = vec![SubstrateGraphEdge::new(
24133 "zzz_high",
24134 "seed",
24135 "mentions_concept",
24136 )];
24137 for idx in 0..24 {
24138 let id = format!("aaa_low_{idx:02}");
24139 nodes.push(SubstrateGraphNode::new(
24140 id.clone(),
24141 "note",
24142 format!("low {idx}"),
24143 ));
24144 edges.push(SubstrateGraphEdge::new(id, "seed", "weak_link"));
24145 }
24146 let mut store = SqliteGraphStore::in_memory().unwrap();
24147 store
24148 .replace_projection(&GraphProjection { nodes, edges })
24149 .unwrap();
24150
24151 let subgraph =
24152 graph_db_semantic_seeded_neighborhood(&store, &["seed".to_string()], 1, 3).unwrap();
24153
24154 assert_eq!(subgraph.nodes.len(), 3);
24155 assert_eq!(subgraph.nodes[0].id, "seed");
24156 assert_eq!(
24157 subgraph.nodes[1].id, "zzz_high",
24158 "expected semantic mention edge to survive caps before lexicographic low-signal nodes: {:?}",
24159 subgraph.nodes
24160 );
24161 assert!(subgraph.truncated);
24162 assert!(
24163 subgraph
24164 .diagnostics
24165 .iter()
24166 .any(|diagnostic| diagnostic.contains("per-node edge scan cap")),
24167 "{:?}",
24168 subgraph.diagnostics
24169 );
24170 assert!(
24171 subgraph
24172 .diagnostics
24173 .iter()
24174 .any(|diagnostic| diagnostic.contains("skipped")),
24175 "{:?}",
24176 subgraph.diagnostics
24177 );
24178 }
24179
24180 #[test]
24181 fn conflict_matrix_uses_semantic_rows_as_dispatch_ranking_signal() {
24182 let dir = setup_traversal_project();
24183 seed_traversal_semantic_summaries(dir.path());
24184 init_git_repo(dir.path());
24185 let session = dir.path().join("tasks/software/tsift.md");
24186 refresh_traversal_graph_store(dir.path(), &session, None).unwrap();
24187 let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
24188 let freshness = sqlite_graph_freshness(&store, "root").unwrap();
24189 let evidence = graph_db_evidence_report_from_store(GraphDbEvidenceInput {
24190 root: dir.path(),
24191 scope: None,
24192 backend: "sqlite",
24193 target: "kgnv",
24194 depth: 4,
24195 limit: 8,
24196 cursor: None,
24197 store: &store,
24198 freshness,
24199 warnings: Vec::new(),
24200 })
24201 .unwrap();
24202 assert!(
24203 evidence
24204 .semantic_related
24205 .iter()
24206 .any(|node| node.kind == "semantic_concept" && node.label == "graph navigation"),
24207 "expected semantic evidence rows, got {:?}",
24208 evidence
24209 .semantic_related
24210 .iter()
24211 .map(|node| (&node.kind, &node.label))
24212 .collect::<Vec<_>>()
24213 );
24214 assert!(
24215 evidence
24216 .output_budget
24217 .as_ref()
24218 .is_some_and(|budget| budget.diagnostics.iter().any(|diagnostic| {
24219 diagnostic.contains("semantic_match")
24220 && diagnostic.contains("source_handle_coverage")
24221 })),
24222 "expected evidence output budget diagnostics, got {:?}",
24223 evidence.output_budget
24224 );
24225
24226 let cached_diff = diff_digest::compute(
24227 dir.path(),
24228 diff_digest::DiffDigestOptions {
24229 cached: true,
24230 revision: None,
24231 max_parsed_files: None,
24232 },
24233 )
24234 .unwrap();
24235 let impact_report = impact::compute(
24236 dir.path(),
24237 impact::ImpactOptions {
24238 cached: true,
24239 revision: None,
24240 scope: None,
24241 limit: 10,
24242 },
24243 )
24244 .unwrap();
24245 let graph_nodes = store.all_nodes().unwrap();
24246 let graph_index = conflict_matrix_graph_index(&graph_nodes);
24247 let semantic_candidate = conflict_matrix_candidate_from_evidence(
24248 dir.path(),
24249 &evidence,
24250 &graph_index,
24251 &cached_diff,
24252 &impact_report,
24253 );
24254 assert!(semantic_candidate.semantic_dispatch_score > 0);
24255 assert!(
24256 semantic_candidate
24257 .semantic_dispatch_reasons
24258 .iter()
24259 .any(|reason| reason.contains("semantic_concept") && reason.contains("owned file")),
24260 "expected semantic ranking explanations, got {:?}",
24261 semantic_candidate.semantic_dispatch_reasons
24262 );
24263 assert!(
24264 semantic_candidate
24265 .semantic_related
24266 .iter()
24267 .any(|item| item.label == "graph navigation")
24268 );
24269
24270 let mut plain_candidate = semantic_candidate.clone();
24271 plain_candidate.target = "plain".to_string();
24272 plain_candidate.semantic_related.clear();
24273 plain_candidate.semantic_dispatch_score = 0;
24274 plain_candidate.semantic_dispatch_reasons.clear();
24275 let mut ranked = [plain_candidate, semantic_candidate];
24276 ranked.sort_by(|left, right| {
24277 left.risk
24278 .cmp(&right.risk)
24279 .then_with(|| left.risk_score.cmp(&right.risk_score))
24280 .then_with(|| {
24281 right
24282 .semantic_dispatch_score
24283 .cmp(&left.semantic_dispatch_score)
24284 })
24285 .then_with(|| left.target.cmp(&right.target))
24286 });
24287 assert_eq!(ranked[0].target, "kgnv");
24288 }
24289
24290 #[test]
24291 fn dependency_dag_extracts_explicit_overlap_and_follow_up_edges() {
24292 let dir = setup_dependency_dag_project();
24293 let session = dir.path().join("tasks/software/tsift.md");
24294 let report = build_dependency_dag_report(dir.path(), None, &[], 4, 12).unwrap();
24295
24296 assert_eq!(report.contract_version, "dependency-dag-v1");
24297 assert_eq!(
24298 report.targets,
24299 vec![
24300 "prep".to_string(),
24301 "alpha".to_string(),
24302 "beta".to_string(),
24303 "gamma".to_string()
24304 ]
24305 );
24306 assert!(report.edges.iter().any(|edge| {
24307 edge.from == "prep" && edge.to == "alpha" && edge.kind == "explicit_depends_on"
24308 }));
24309 assert!(report.edges.iter().any(|edge| {
24310 edge.from == "alpha" && edge.to == "gamma" && edge.kind == "worker_result_follow_up"
24311 }));
24312 assert!(report.edges.iter().any(|edge| {
24313 edge.from == "alpha"
24314 && edge.to == "beta"
24315 && edge.kind == "shared_resource"
24316 && edge.shared_files.contains(&"main.rs".to_string())
24317 && edge.shared_symbols.contains(&"shared_helper".to_string())
24318 }));
24319 assert!(
24320 !report.cycle_diagnostics.has_cycles,
24321 "{:?}",
24322 report.cycle_diagnostics
24323 );
24324 assert_eq!(report.topo_batches[0].targets, vec!["prep".to_string()]);
24325 assert_eq!(report.topo_batches[1].targets, vec!["alpha".to_string()]);
24326 assert!(
24327 report.replay_commands[0].contains("dependency-dag"),
24328 "{:?}",
24329 report.replay_commands
24330 );
24331
24332 cmd_dependency_dag(
24333 &session,
24334 None,
24335 &["alpha".to_string(), "beta".to_string()],
24336 4,
24337 12,
24338 OutputFormat {
24339 json_output: true,
24340 compact: false,
24341 pretty: false,
24342 terse: false,
24343 ultra_terse: false,
24344 schema: false,
24345 envelope: false,
24346 },
24347 )
24348 .unwrap();
24349 }
24350
24351 #[test]
24352 fn dependency_dag_reports_cycles_from_explicit_depends_on_text() {
24353 let dir = setup_dependency_dag_cycle_project();
24354 let report = build_dependency_dag_report(dir.path(), None, &[], 4, 12).unwrap();
24355
24356 assert!(report.cycle_diagnostics.has_cycles);
24357 assert_eq!(
24358 report.cycle_diagnostics.blocked_nodes,
24359 vec!["left".to_string(), "right".to_string()]
24360 );
24361 assert!(report.cycle_diagnostics.cycle_edges.iter().any(|edge| {
24362 edge.from == "left" && edge.to == "right" && edge.kind == "explicit_depends_on"
24363 }));
24364 assert!(report.cycle_diagnostics.cycle_edges.iter().any(|edge| {
24365 edge.from == "right" && edge.to == "left" && edge.kind == "explicit_depends_on"
24366 }));
24367 }
24368
24369 #[test]
24370 fn traversal_projection_queries_match_sqlite_and_convex_stores() {
24371 let dir = setup_traversal_project();
24372 let source_graph = build_traversal_graph_source(dir.path(), dir.path(), None).unwrap();
24373 let projection = traversal_projection_from_graph(dir.path(), None, &source_graph).unwrap();
24374
24375 let mut sqlite = SqliteGraphStore::in_memory().unwrap();
24376 sqlite.replace_projection(&projection).unwrap();
24377 let convex = ConvexGraphStore::new(MemoryConvexGraphClient::default());
24378 projection.upsert_into(&convex).unwrap();
24379
24380 let sqlite_graph = traversal_graph_from_store(dir.path(), &sqlite).unwrap();
24381 let convex_graph = traversal_graph_from_store(dir.path(), &convex).unwrap();
24382 assert_eq!(sqlite_graph.nodes.len(), convex_graph.nodes.len());
24383 assert_eq!(sqlite_graph.edges.len(), convex_graph.edges.len());
24384
24385 let sqlite_backlog = resolve_traversal_node(&sqlite_graph, "#kgnv").unwrap();
24386 let convex_helper = resolve_traversal_node(&convex_graph, "helper").unwrap();
24387 assert!(convex_graph.edges.iter().any(|edge| {
24388 edge.from == sqlite_backlog.handle
24389 && edge.to == convex_helper.handle
24390 && edge.relation == "mentions"
24391 }));
24392 }
24393
24394 #[test]
24395 fn graph_db_api_queries_sqlite_neighborhood_and_schema() {
24396 let dir = setup_traversal_project();
24397 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
24398 let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
24399 let freshness = sqlite_graph_freshness(&store, "root").unwrap();
24400 assert_eq!(freshness.status, "current");
24401
24402 let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
24403 let report = graph_db_report_from_store(
24404 dir.path(),
24405 None,
24406 "sqlite",
24407 GraphDbQuery::Neighborhood {
24408 id: backlog.handle.clone(),
24409 depth: 1,
24410 edge_kind: Some("mentions".to_string()),
24411 cursor: None,
24412 limit: None,
24413 property_filters: Vec::new(),
24414 },
24415 &store,
24416 freshness,
24417 Vec::new(),
24418 )
24419 .unwrap();
24420 assert!(
24421 report
24422 .edges
24423 .iter()
24424 .any(|edge| edge.from_id == backlog.handle && edge.kind == "mentions"),
24425 "expected backlog mention edge, got {:?}",
24426 report.edges
24427 );
24428 assert!(
24429 report.ranked_neighbors.iter().any(|neighbor| {
24430 neighbor.depth == Some(1)
24431 && neighbor.edge_kinds.iter().any(|kind| kind == "mentions")
24432 && neighbor.node_id != backlog.handle
24433 && neighbor.handle_coverage_pct >= 95.0
24434 && neighbor.duplicate_name_precision >= 0.99
24435 }),
24436 "expected ranked neighborhood neighbors with quality scores, got {:?}",
24437 report.ranked_neighbors
24438 );
24439 assert!(report.ranked_neighbors.len() <= GRAPH_DB_RANKED_NEIGHBOR_CAP);
24440 let ranking_gate = report.neighborhood_ranking_gate.as_ref().unwrap();
24441 assert!(!ranking_gate.ranked_output_default);
24442 assert_eq!(ranking_gate.default_order, "stable_node_id");
24443 assert!(
24444 ranking_gate
24445 .diagnostics
24446 .iter()
24447 .any(|diagnostic| diagnostic.contains("score-capped")),
24448 "{ranking_gate:?}"
24449 );
24450 assert!(
24451 ranking_gate
24452 .required_metrics
24453 .iter()
24454 .any(|metric| metric == "handle_coverage_pct")
24455 );
24456 assert!(
24457 ranking_gate
24458 .required_metrics
24459 .iter()
24460 .any(|metric| metric == "duplicate_name_precision")
24461 );
24462 assert!(
24463 report
24464 .page
24465 .as_ref()
24466 .unwrap()
24467 .diagnostics
24468 .iter()
24469 .any(|diagnostic| diagnostic.contains("idx_graph_edges_from_kind")),
24470 "expected SQLite neighborhood query plan diagnostics, got {:?}",
24471 report.page.as_ref().unwrap().diagnostics
24472 );
24473 let edges_report = graph_db_report_from_store(
24474 dir.path(),
24475 None,
24476 "sqlite",
24477 GraphDbQuery::Edges {
24478 edge_kind: Some("mentions".to_string()),
24479 cursor: None,
24480 limit: Some(2),
24481 property_filters: Vec::new(),
24482 },
24483 &store,
24484 sqlite_graph_freshness(&store, "root").unwrap(),
24485 Vec::new(),
24486 )
24487 .unwrap();
24488 let edge_id = edges_report
24489 .edges
24490 .first()
24491 .map(|edge| edge.id.clone())
24492 .expect("expected at least one paged mentions edge");
24493 assert!(edges_report.edges.iter().any(|edge| edge.id == edge_id));
24494 assert_eq!(
24495 edges_report.page.as_ref().unwrap().returned_edges,
24496 edges_report.edges.len()
24497 );
24498
24499 let edge_report = graph_db_report_from_store(
24500 dir.path(),
24501 None,
24502 "sqlite",
24503 GraphDbQuery::Edge {
24504 id: edge_id.clone(),
24505 },
24506 &store,
24507 sqlite_graph_freshness(&store, "root").unwrap(),
24508 Vec::new(),
24509 )
24510 .unwrap();
24511 assert_eq!(
24512 edge_report.edge.as_ref().map(|e| graph_db_edge_key(&SubstrateGraphEdge::from(e))),
24513 Some(edge_id.clone())
24514 );
24515
24516 let incident_report = graph_db_report_from_store(
24517 dir.path(),
24518 None,
24519 "sqlite",
24520 GraphDbQuery::Incident {
24521 id: backlog.handle.clone(),
24522 edge_kind: Some("mentions".to_string()),
24523 cursor: None,
24524 limit: Some(1),
24525 property_filters: Vec::new(),
24526 },
24527 &store,
24528 sqlite_graph_freshness(&store, "root").unwrap(),
24529 Vec::new(),
24530 )
24531 .unwrap();
24532 assert_eq!(incident_report.page.as_ref().unwrap().returned_edges, 1);
24533 assert!(
24534 incident_report
24535 .edges
24536 .iter()
24537 .all(|edge| edge.from_id == backlog.handle || edge.to_id == backlog.handle),
24538 "{:?}",
24539 incident_report.edges
24540 );
24541
24542 let schema_report = graph_db_report_from_store(
24543 dir.path(),
24544 None,
24545 "sqlite",
24546 GraphDbQuery::Schema,
24547 &store,
24548 sqlite_graph_freshness(&store, "root").unwrap(),
24549 Vec::new(),
24550 )
24551 .unwrap();
24552 assert!(
24553 schema_report
24554 .schema
24555 .unwrap()
24556 .operations
24557 .iter()
24558 .any(|operation| operation.command.starts_with("neighborhood"))
24559 );
24560 }
24561
24562 #[test]
24563 fn graph_db_neighborhood_reports_dropped_by_budget_diagnostics() {
24564 let mut nodes = vec![SubstrateGraphNode::new(
24565 "origin",
24566 "backlog",
24567 "#budgeted-neighborhood",
24568 )];
24569 let mut edges = Vec::new();
24570 for idx in 0..32 {
24571 let id = format!("src-{idx:02}");
24572 nodes.push(
24573 SubstrateGraphNode::new(id.clone(), "source_handle", format!("source {idx}"))
24574 .with_property("source_ref", format!("fixture:{idx}"))
24575 .with_property("detail", "x".repeat(600)),
24576 );
24577 edges.push(SubstrateGraphEdge::new("origin", id, "mentions"));
24578 }
24579 let store = SqliteGraphStore::in_memory().unwrap();
24580 GraphProjection { nodes, edges }
24581 .upsert_into(&store)
24582 .unwrap();
24583
24584 let report = graph_db_report_from_store(
24585 Path::new("."),
24586 None,
24587 "fixture",
24588 GraphDbQuery::Neighborhood {
24589 id: "origin".to_string(),
24590 depth: 1,
24591 edge_kind: None,
24592 cursor: None,
24593 limit: None,
24594 property_filters: Vec::new(),
24595 },
24596 &store,
24597 current_graph_db_freshness(),
24598 Vec::new(),
24599 )
24600 .unwrap();
24601 let budget = report.output_budget.as_ref().unwrap();
24602 assert!(budget.selected_nodes < budget.candidate_nodes);
24603 assert!(
24604 budget.dropped_by_budget.iter().any(|drop| {
24605 drop.item == "node"
24606 && drop.kind == "source_handle"
24607 && drop.reason == "per_kind_quota"
24608 }),
24609 "expected source_handle budget drops, got {:?}",
24610 budget.dropped_by_budget
24611 );
24612 assert!(report.page.as_ref().unwrap().truncated);
24613 assert!(
24614 report
24615 .page
24616 .as_ref()
24617 .unwrap()
24618 .diagnostics
24619 .iter()
24620 .any(|diagnostic| diagnostic.contains("budget ranking signals")),
24621 "{:?}",
24622 report.page
24623 );
24624 }
24625
24626 #[test]
24627 fn graph_db_output_budget_uses_depth_overrides_for_evidence_rows() {
24628 let mut nodes = vec![SubstrateGraphNode::new("near", "note", "zzz shallow row")];
24629 let mut depth_by_id = BTreeMap::from([("near".to_string(), 1usize)]);
24630 for idx in 0..8 {
24631 let id = format!("far-{idx:02}");
24632 nodes.push(SubstrateGraphNode::new(
24633 id.clone(),
24634 "note",
24635 format!("aaa deeper row {idx}"),
24636 ));
24637 depth_by_id.insert(id, 6);
24638 }
24639
24640 let origin_ids = vec!["target".to_string()];
24641 let budgeted = graph_db_apply_output_budget_with_depths_and_cursor(
24642 &origin_ids,
24643 &BTreeMap::new(),
24644 nodes,
24645 Vec::new(),
24646 Some(3),
24647 Some(&depth_by_id),
24648 None,
24649 );
24650
24651 assert!(
24652 budgeted.nodes.iter().any(|node| node.id == "near"),
24653 "expected the shallow evidence row to outrank deeper rows, got {:?}",
24654 budgeted
24655 .nodes
24656 .iter()
24657 .map(|node| (&node.id, &node.label))
24658 .collect::<Vec<_>>()
24659 );
24660 assert!(
24661 budgeted.report.dropped_by_budget.iter().any(|drop| {
24662 drop.item == "node" && drop.kind == "note" && drop.reason == "per_kind_quota"
24663 }),
24664 "expected node quota drops, got {:?}",
24665 budgeted.report.dropped_by_budget
24666 );
24667 assert!(
24668 budgeted
24669 .report
24670 .diagnostics
24671 .iter()
24672 .any(|diagnostic| diagnostic.contains("depth")),
24673 "{:?}",
24674 budgeted.report.diagnostics
24675 );
24676 }
24677
24678 #[test]
24679 fn evidence_pagination_returns_next_cursor_when_truncated() {
24680 let mut nodes = vec![SubstrateGraphNode::new(
24681 "target".to_string(),
24682 "backlog_item",
24683 "target item".to_string(),
24684 )];
24685 let mut depth_by_id = BTreeMap::new();
24686 depth_by_id.insert("target".to_string(), 0);
24687 for idx in 0..20 {
24688 let id = format!("ev-{idx}");
24689 nodes.push(SubstrateGraphNode::new(
24690 id.clone(),
24691 "source_handle",
24692 format!("evidence row {idx}"),
24693 ).with_property("detail", "x".repeat(400)));
24694 depth_by_id.insert(id, 1);
24695 }
24696 let origin_ids = vec!["target".to_string()];
24697 let first_page = graph_db_apply_output_budget_with_depths_and_cursor(
24698 &origin_ids,
24699 &BTreeMap::new(),
24700 nodes.clone(),
24701 Vec::new(),
24702 Some(3),
24703 Some(&depth_by_id),
24704 None,
24705 );
24706 assert!(
24707 first_page.truncated,
24708 "expected first page to be truncated with 20 candidates and low limit, got {} selected of {} candidates",
24709 first_page.nodes.len(),
24710 first_page.report.candidate_nodes
24711 );
24712 assert!(
24713 first_page.next_cursor.is_some(),
24714 "expected next_cursor when truncated"
24715 );
24716 let cursor = first_page.next_cursor.unwrap();
24717 assert!(
24718 !cursor.is_empty(),
24719 "cursor should be a non-empty node id"
24720 );
24721 let first_ids: BTreeSet<_> = first_page.nodes.iter().map(|n| n.id.clone()).collect();
24722 let second_page = graph_db_apply_output_budget_with_depths_and_cursor(
24723 &origin_ids,
24724 &BTreeMap::new(),
24725 nodes.clone(),
24726 Vec::new(),
24727 Some(3),
24728 Some(&depth_by_id),
24729 Some(&cursor),
24730 );
24731 let second_ids: BTreeSet<_> = second_page.nodes.iter().map(|n| n.id.clone()).collect();
24732 let overlap: BTreeSet<_> = first_ids.intersection(&second_ids).cloned().collect();
24733 assert!(
24734 overlap.is_empty(),
24735 "pages should not overlap, but found shared ids: {overlap:?}"
24736 );
24737 assert!(
24738 second_page.report.diagnostics.iter().any(|d| d.contains("cursor skipped")),
24739 "expected cursor skip diagnostic, got {:?}",
24740 second_page.report.diagnostics
24741 );
24742 }
24743
24744 #[test]
24745 fn evidence_pagination_no_cursor_returns_all_when_within_budget() {
24746 let mut nodes = vec![SubstrateGraphNode::new(
24747 "target".to_string(),
24748 "backlog_item",
24749 "target item".to_string(),
24750 )];
24751 let mut depth_by_id = BTreeMap::new();
24752 depth_by_id.insert("target".to_string(), 0);
24753 for idx in 0..3 {
24754 let id = format!("ev-{idx}");
24755 nodes.push(SubstrateGraphNode::new(
24756 id.clone(),
24757 "source_handle",
24758 format!("evidence row {idx}"),
24759 ));
24760 depth_by_id.insert(id, 1);
24761 }
24762 let origin_ids = vec!["target".to_string()];
24763 let result = graph_db_apply_output_budget_with_depths_and_cursor(
24764 &origin_ids,
24765 &BTreeMap::new(),
24766 nodes,
24767 Vec::new(),
24768 None,
24769 Some(&depth_by_id),
24770 None,
24771 );
24772 assert!(
24773 !result.truncated,
24774 "expected no truncation with small candidate set and default budget"
24775 );
24776 assert!(
24777 result.next_cursor.is_none(),
24778 "expected no next_cursor when not truncated"
24779 );
24780 }
24781
24782 #[test]
24783 fn evidence_pagination_invalid_cursor_returns_first_page() {
24784 let mut nodes = vec![SubstrateGraphNode::new(
24785 "target".to_string(),
24786 "backlog_item",
24787 "target item".to_string(),
24788 )];
24789 let mut depth_by_id = BTreeMap::new();
24790 depth_by_id.insert("target".to_string(), 0);
24791 for idx in 0..5 {
24792 let id = format!("ev-{idx}");
24793 nodes.push(SubstrateGraphNode::new(
24794 id.clone(),
24795 "source_handle",
24796 format!("evidence row {idx}"),
24797 ));
24798 depth_by_id.insert(id, 1);
24799 }
24800 let origin_ids = vec!["target".to_string()];
24801 let result = graph_db_apply_output_budget_with_depths_and_cursor(
24802 &origin_ids,
24803 &BTreeMap::new(),
24804 nodes.clone(),
24805 Vec::new(),
24806 None,
24807 Some(&depth_by_id),
24808 Some("nonexistent-id"),
24809 );
24810 assert!(
24811 result.report.diagnostics.iter().any(|d| d.contains("cursor skipped 0")),
24812 "invalid cursor should skip 0 candidates, got {:?}",
24813 result.report.diagnostics
24814 );
24815 }
24816
24817 #[test]
24818 fn graph_db_status_uses_snapshot_fallback_when_rollback_journal_is_locked() {
24819 let dir = setup_traversal_project();
24820 refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
24821 let graph_db = dir.path().join(".tsift/graph.db");
24822 let _lock = hold_rollback_journal_lock(&graph_db);
24823
24824 let report =
24825 graph_db_operator_report_from_disk(dir.path(), None, &graph_db, "status", None, vec![])
24826 .unwrap();
24827
24828 assert_eq!(report.status, "current");
24829 assert_eq!(
24830 report.recovery,
24831 Some(index::ReadOnlyRecovery::SnapshotFallback)
24832 );
24833 assert!(
24834 report
24835 .warnings
24836 .iter()
24837 .any(|warning| warning.contains("rollback-journal lock")),
24838 "expected rollback-journal recovery warning, got {:?}",
24839 report.warnings
24840 );
24841 }
24842
24843 #[test]
24844 fn graph_db_status_copies_wal_sidecars_when_locked() {
24845 let dir = setup_traversal_project();
24846 refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
24847 let graph_db = dir.path().join(".tsift/graph.db");
24848 let _lock = hold_wal_database_lock(&graph_db);
24849
24850 let report =
24851 graph_db_operator_report_from_disk(dir.path(), None, &graph_db, "status", None, vec![])
24852 .unwrap();
24853
24854 assert_eq!(report.status, "current");
24855 assert_eq!(
24856 report.recovery,
24857 Some(index::ReadOnlyRecovery::SnapshotFallbackWal)
24858 );
24859 assert!(
24860 report
24861 .warnings
24862 .iter()
24863 .any(|warning| warning.contains("WAL-aware snapshot fallback")),
24864 "expected WAL recovery warning, got {:?}",
24865 report.warnings
24866 );
24867 }
24868
24869 #[test]
24870 fn graph_db_evidence_uses_snapshot_fallback_when_graph_db_is_locked() {
24871 let dir = setup_traversal_project();
24872 let session = dir.path().join("tasks/software/tsift.md");
24873 refresh_traversal_graph_store(dir.path(), &session, None).unwrap();
24874 let graph_db = dir.path().join(".tsift/graph.db");
24875 let _lock = hold_rollback_journal_lock(&graph_db);
24876
24877 let result = cmd_graph_db(
24878 &session,
24879 None,
24880 GraphDbBackend::Sqlite,
24881 None,
24882 GraphDbQuery::Evidence {
24883 target: "kgnv".to_string(),
24884 depth: 3,
24885 limit: 8,
24886 cursor: None,
24887 },
24888 OutputFormat {
24889 json_output: false,
24890 compact: true,
24891 pretty: false,
24892 terse: false,
24893 ultra_terse: false,
24894 schema: false,
24895 envelope: false,
24896 },
24897 );
24898
24899 assert!(result.is_ok());
24900 }
24901
24902 fn current_graph_db_freshness() -> GraphDbFreshnessReport {
24903 GraphDbFreshnessReport {
24904 status: "current".to_string(),
24905 fail_closed: false,
24906 projection_version: Some(GRAPH_PROJECTION_VERSION.to_string()),
24907 content_hash: Some("fixture".to_string()),
24908 source_watermark: None,
24909 diagnostics: Vec::new(),
24910 }
24911 }
24912
24913 #[test]
24914 fn graph_db_evidence_fails_closed_with_repair_command_for_stale_freshness() {
24915 let dir = setup_traversal_project();
24916 refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
24917 let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
24918 let stale = GraphDbFreshnessReport {
24919 status: "stale".to_string(),
24920 fail_closed: true,
24921 projection_version: Some("old-v0".to_string()),
24922 content_hash: None,
24923 source_watermark: None,
24924 diagnostics: vec!["projection content hash is missing".to_string()],
24925 };
24926
24927 let err = match graph_db_evidence_report_from_store(GraphDbEvidenceInput {
24928 root: dir.path(),
24929 scope: None,
24930 backend: "sqlite",
24931 target: "kgnv",
24932 depth: 3,
24933 limit: 8,
24934 cursor: None,
24935 store: &store,
24936 freshness: stale,
24937 warnings: Vec::new(),
24938 }) {
24939 Ok(_) => panic!("stale graph freshness should fail closed"),
24940 Err(err) => err,
24941 };
24942 let message = err.to_string();
24943 assert!(message.contains("failed closed"), "{message}");
24944 assert!(message.contains("graph-db --path"), "{message}");
24945 assert!(message.contains("refresh --json"), "{message}");
24946 }
24947
24948 fn paged_graph_ids(
24949 store: &impl GraphStore,
24950 cursor: Option<&str>,
24951 ) -> (Vec<String>, GraphDbPageReport) {
24952 let report = graph_db_report_from_store(
24953 Path::new("."),
24954 None,
24955 "fixture",
24956 GraphDbQuery::Kind {
24957 kind: "backlog".to_string(),
24958 cursor: cursor.map(str::to_string),
24959 limit: Some(2),
24960 property_filters: vec!["phase=open".to_string()],
24961 },
24962 store,
24963 current_graph_db_freshness(),
24964 Vec::new(),
24965 )
24966 .unwrap();
24967 (
24968 report.nodes.iter().map(|node| node.id.clone()).collect(),
24969 report.page.unwrap(),
24970 )
24971 }
24972
24973 #[test]
24974 fn graph_db_query_pagination_and_filters_match_sqlite_and_convex() {
24975 let nodes = (0..5)
24976 .map(|idx| {
24977 let phase = if idx == 1 { "closed" } else { "open" };
24978 SubstrateGraphNode::new(format!("gbak-{idx:02}"), "backlog", format!("#{idx:02}"))
24979 .with_property("phase", phase)
24980 })
24981 .collect::<Vec<_>>();
24982 let projection = GraphProjection {
24983 nodes,
24984 edges: Vec::new(),
24985 };
24986 let sqlite = SqliteGraphStore::in_memory().unwrap();
24987 projection.upsert_into(&sqlite).unwrap();
24988 let convex = ConvexGraphStore::new(MemoryConvexGraphClient::default());
24989 projection.upsert_into(&convex).unwrap();
24990
24991 let (sqlite_first_ids, sqlite_first_page) = paged_graph_ids(&sqlite, None);
24992 let (convex_first_ids, convex_first_page) = paged_graph_ids(&convex, None);
24993 assert_eq!(sqlite_first_ids, vec!["gbak-00", "gbak-02"]);
24994 assert_eq!(sqlite_first_ids, convex_first_ids);
24995 assert_eq!(sqlite_first_page.next_cursor.as_deref(), Some("gbak-02"));
24996 assert!(sqlite_first_page.truncated);
24997 assert_eq!(
24998 sqlite_first_page.returned_nodes,
24999 convex_first_page.returned_nodes
25000 );
25001 assert_eq!(
25002 sqlite_first_page.property_filters,
25003 convex_first_page.property_filters
25004 );
25005 assert!(
25006 sqlite_first_page
25007 .diagnostics
25008 .iter()
25009 .any(|diagnostic| diagnostic.contains("idx_graph_nodes_kind")),
25010 "expected SQLite kind query plan diagnostics, got {:?}",
25011 sqlite_first_page.diagnostics
25012 );
25013
25014 let cursor = sqlite_first_page.next_cursor.as_deref();
25015 let (sqlite_next_ids, sqlite_next_page) = paged_graph_ids(&sqlite, cursor);
25016 let (convex_next_ids, convex_next_page) = paged_graph_ids(&convex, cursor);
25017 assert_eq!(sqlite_next_ids, vec!["gbak-03", "gbak-04"]);
25018 assert_eq!(sqlite_next_ids, convex_next_ids);
25019 assert_eq!(sqlite_next_page.next_cursor, None);
25020 assert!(!sqlite_next_page.truncated);
25021 assert_eq!(
25022 sqlite_next_page.returned_nodes,
25023 convex_next_page.returned_nodes
25024 );
25025 assert_eq!(
25026 sqlite_next_page.property_filters,
25027 convex_next_page.property_filters
25028 );
25029 }
25030
25031 #[test]
25032 fn traversal_shortest_path_crosses_artifacts_and_symbols() {
25033 let dir = setup_traversal_project();
25034 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
25035 let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
25036 let main = resolve_traversal_node(&graph, "main").unwrap();
25037
25038 let path = traversal_shortest_handles(&graph.edges, &backlog.handle, &main.handle).unwrap();
25039 assert_eq!(path.first(), Some(&backlog.handle));
25040 assert_eq!(path.last(), Some(&main.handle));
25041 assert!(
25042 path.len() >= 3,
25043 "expected backlog -> symbol -> main, got {path:?}"
25044 );
25045 }
25046
25047 #[test]
25048 fn traversal_report_recommends_next_bugfix_nodes() {
25049 let dir = setup_traversal_project();
25050 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
25051 let report = traversal_report(dir.path(), None, graph, Some("#kgnv"), None, 1, 50).unwrap();
25052
25053 assert_eq!(report.mode, "neighborhood");
25054 assert!(
25055 report
25056 .recommendations
25057 .iter()
25058 .any(|rec| rec.label == "helper" && rec.reason.contains("matched")),
25059 "expected helper recommendation, got {:?}",
25060 report.recommendations
25061 );
25062 assert!(
25063 !report.exploration.source_windows.is_empty(),
25064 "expected exploration source windows"
25065 );
25066 assert!(
25067 report
25068 .exploration
25069 .no_reread_guidance
25070 .contains("avoid whole-file reads")
25071 );
25072 }
25073
25074 #[test]
25075 fn traversal_graph_refreshes_stale_index_before_loading_symbols() {
25076 let dir = setup_traversal_project();
25077 std::thread::sleep(std::time::Duration::from_millis(50));
25078 std::fs::write(
25079 dir.path().join("main.rs"),
25080 "fn fresh_helper() { println!(\"fresh\"); }\nfn main() { fresh_helper(); }\n",
25081 )
25082 .unwrap();
25083
25084 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
25085
25086 assert!(
25087 graph
25088 .warnings
25089 .iter()
25090 .any(|warning| warning.contains("index refreshed")
25091 && warning.contains("graph traversal packet")),
25092 "expected refresh diagnostic, got {:?}",
25093 graph.warnings
25094 );
25095 assert!(resolve_traversal_node(&graph, "fresh_helper").is_some());
25096
25097 let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
25098 let summary = db.compute_changes(dir.path()).unwrap();
25099 assert_eq!(summary.new + summary.modified + summary.deleted, 0);
25100 }
25101
25102 #[test]
25103 fn traversal_graph_falls_back_to_raw_source_when_stale_refresh_is_blocked() {
25104 let dir = setup_traversal_project();
25105 let db_path = dir.path().join(".tsift/index.db");
25106 let _writer = hold_writer_lock(&index::writer_lock_path(&db_path));
25107 std::thread::sleep(std::time::Duration::from_millis(50));
25108 std::fs::write(
25109 dir.path().join("main.rs"),
25110 "fn fresh_helper() { println!(\"fresh\"); }\nfn main() { fresh_helper(); }\n",
25111 )
25112 .unwrap();
25113
25114 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
25115 let file = resolve_traversal_node(&graph, "main.rs").unwrap();
25116
25117 assert!(
25118 graph
25119 .warnings
25120 .iter()
25121 .any(|warning| warning.contains("falling back to raw source file nodes")),
25122 "expected raw-source fallback diagnostic, got {:?}",
25123 graph.warnings
25124 );
25125 assert!(
25126 file.detail
25127 .as_deref()
25128 .is_some_and(|detail| detail.contains("raw source fallback")),
25129 "expected raw-source detail, got {:?}",
25130 file.detail
25131 );
25132 assert!(
25133 file.expand.contains("source-read"),
25134 "expected source-read fallback command, got {}",
25135 file.expand
25136 );
25137 assert!(
25138 resolve_traversal_node(&graph, "helper").is_none(),
25139 "stale symbol evidence should be skipped when refresh is blocked"
25140 );
25141 }
25142
25143 #[test]
25144 fn traversal_cmd_supports_json_and_html_outputs() {
25145 let dir = setup_traversal_project();
25146 cmd_traverse(
25147 Some("#kgnv"),
25148 Some("main"),
25149 dir.path(),
25150 None,
25151 1,
25152 50,
25153 TraverseFormat::Json,
25154 false,
25155 false,
25156 false,
25157 None,
25158 )
25159 .unwrap();
25160 cmd_traverse(
25161 None,
25162 None,
25163 dir.path(),
25164 None,
25165 1,
25166 50,
25167 TraverseFormat::Html,
25168 false,
25169 false,
25170 false,
25171 None,
25172 )
25173 .unwrap();
25174 }
25175
25176 #[test]
25177 fn traversal_html_renders_inline_graph_visualization() {
25178 let dir = setup_traversal_project();
25179 seed_traversal_semantic_summaries(dir.path());
25180 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
25181 let report = traversal_report(dir.path(), None, graph, None, None, 1, 50).unwrap();
25182 let html = traversal_report_html(&report).unwrap();
25183
25184 assert!(html.contains("id=\"graph-canvas\""));
25185 assert!(html.contains("semantic_concept"));
25186 assert!(html.contains("graph navigation"));
25187 assert!(html.contains("JSON.parse"));
25188 }
25189
25190 #[test]
25191 fn compact_helpers_trim_scores_and_snippets() {
25192 assert_eq!(format_score(0.12345, true), "0.12");
25193 assert_eq!(format_score(0.12345, false), "0.1235");
25194 let snippet = compact_snippet(" first line with useful context\nsecond");
25195 assert_eq!(snippet.as_deref(), Some("first line with useful context"));
25196 }
25197
25198 #[test]
25199 fn compact_members_caps_list() {
25200 let members: Vec<graph::CommunityMember> = ["a", "b", "c", "d", "e", "f"]
25201 .iter()
25202 .map(|n| graph::CommunityMember::new(*n))
25203 .collect();
25204 assert_eq!(compact_members(&members, 5), "a, b, c, d, e (+1 more)");
25205 }
25206
25207 #[test]
25208 fn abbreviate_kind_maps_common_kinds() {
25209 assert_eq!(abbreviate_kind("function"), "fn");
25210 assert_eq!(abbreviate_kind("method"), "meth");
25211 assert_eq!(abbreviate_kind("class"), "cls");
25212 assert_eq!(abbreviate_kind("interface"), "iface");
25213 assert_eq!(abbreviate_kind("type_alias"), "type");
25214 assert_eq!(abbreviate_kind("data_class"), "data_cls");
25215 assert_eq!(abbreviate_kind("sealed_class"), "sealed_cls");
25216 assert_eq!(abbreviate_kind("enum_class"), "enum_cls");
25217 assert_eq!(abbreviate_kind("companion_object"), "comp_obj");
25218 assert_eq!(abbreviate_kind("object"), "obj");
25219 assert_eq!(abbreviate_kind("heading"), "h");
25220 assert_eq!(abbreviate_kind("code_block"), "code");
25221 assert_eq!(abbreviate_kind("struct"), "struct");
25223 assert_eq!(abbreviate_kind("trait"), "trait");
25224 assert_eq!(abbreviate_kind("enum"), "enum");
25225 assert_eq!(abbreviate_kind("const"), "const");
25226 assert_eq!(abbreviate_kind("unknown_kind"), "unknown_kind");
25227 }
25228
25229 #[test]
25230 fn abbreviate_match_type_maps_search_types() {
25231 assert_eq!(abbreviate_match_type("exact_name"), "exact");
25232 assert_eq!(abbreviate_match_type("partial_tags"), "partial");
25233 assert_eq!(abbreviate_match_type("all_tags"), "all_tags");
25234 assert_eq!(abbreviate_match_type("other_type"), "other_type");
25235 }
25236
25237 #[test]
25238 fn explain_compact_groups_edges_by_file() {
25239 let edges = vec![
25240 index::StoredEdge {
25241 caller_file: "src/main.rs".to_string(),
25242 caller_name: "main".to_string(),
25243 caller_line: 1,
25244 callee_name: "helper".to_string(),
25245 call_site_line: 2,
25246 tagpath_handle: None,
25247 },
25248 index::StoredEdge {
25249 caller_file: "src/main.rs".to_string(),
25250 caller_name: "main".to_string(),
25251 caller_line: 1,
25252 callee_name: "render".to_string(),
25253 call_site_line: 3,
25254 tagpath_handle: None,
25255 },
25256 ];
25257 let lines = format_edge_groups(&edges, false);
25258 assert_eq!(lines, vec![" src/main.rs (2): helper, render"]);
25259 }
25260
25261 #[test]
25262 fn search_hit_groups_preserve_file_counts_and_samples() {
25263 let dir = tempfile::tempdir().unwrap();
25264 let root = dir.path();
25265 let main_rs = root.join("src/main.rs");
25266 fs::create_dir_all(main_rs.parent().unwrap()).unwrap();
25267 fs::write(&main_rs, "claudescore-3 anchor\nclaudescore-3 follow-up\n").unwrap();
25268 let freshness = exact_search_file_timestamp(&main_rs);
25269 let hits = vec![
25270 sift::SearchHit {
25271 artifact_id: "a".to_string(),
25272 artifact_kind: sift::ContextArtifactKind::File,
25273 path: main_rs.display().to_string(),
25274 rank: 1,
25275 score: 10.0,
25276 confidence: sift::ScoreConfidence::High,
25277 location: Some("line 3".to_string()),
25278 snippet: "claudescore-3 anchor".to_string(),
25279 provenance: sift::ArtifactProvenance {
25280 adapter: sift::AcquisitionAdapterKind::FileSystem,
25281 source: "ripgrep -F".to_string(),
25282 synthetic: false,
25283 },
25284 freshness: freshness.clone(),
25285 budget: sift::ArtifactBudget::from_text("claudescore-3 anchor", 1),
25286 },
25287 sift::SearchHit {
25288 artifact_id: "b".to_string(),
25289 artifact_kind: sift::ContextArtifactKind::File,
25290 path: main_rs.display().to_string(),
25291 rank: 2,
25292 score: 9.0,
25293 confidence: sift::ScoreConfidence::High,
25294 location: Some("line 7".to_string()),
25295 snippet: "claudescore-3 follow-up".to_string(),
25296 provenance: sift::ArtifactProvenance {
25297 adapter: sift::AcquisitionAdapterKind::FileSystem,
25298 source: "ripgrep -F".to_string(),
25299 synthetic: false,
25300 },
25301 freshness: freshness.clone(),
25302 budget: sift::ArtifactBudget::from_text("claudescore-3 follow-up", 1),
25303 },
25304 sift::SearchHit {
25305 artifact_id: "c".to_string(),
25306 artifact_kind: sift::ContextArtifactKind::File,
25307 path: main_rs.display().to_string(),
25308 rank: 3,
25309 score: 8.0,
25310 confidence: sift::ScoreConfidence::High,
25311 location: Some("line 9".to_string()),
25312 snippet: "claudescore-3 tail".to_string(),
25313 provenance: sift::ArtifactProvenance {
25314 adapter: sift::AcquisitionAdapterKind::FileSystem,
25315 source: "ripgrep -F".to_string(),
25316 synthetic: false,
25317 },
25318 freshness,
25319 budget: sift::ArtifactBudget::from_text("claudescore-3 tail", 1),
25320 },
25321 ];
25322
25323 let groups = group_search_hits(&hits, root, false);
25324 assert_eq!(groups.len(), 1);
25325 assert_eq!(groups[0].path, "src/main.rs");
25326 assert_eq!(groups[0].hits, 3);
25327 assert_eq!(
25328 groups[0].samples,
25329 vec![
25330 "line 3: claudescore-3 anchor".to_string(),
25331 "line 7: claudescore-3 follow-up".to_string()
25332 ]
25333 );
25334 assert!(should_collapse_search_hits(&hits, root, false));
25335 }
25336
25337 #[test]
25338 fn dense_edge_groups_trigger_collapse() {
25339 let edges = vec![
25340 index::StoredEdge {
25341 caller_file: "src/main.rs".to_string(),
25342 caller_name: "main".to_string(),
25343 caller_line: 1,
25344 callee_name: "helper".to_string(),
25345 call_site_line: 2,
25346 tagpath_handle: None,
25347 },
25348 index::StoredEdge {
25349 caller_file: "src/main.rs".to_string(),
25350 caller_name: "beta".to_string(),
25351 caller_line: 5,
25352 callee_name: "helper".to_string(),
25353 call_site_line: 6,
25354 tagpath_handle: None,
25355 },
25356 index::StoredEdge {
25357 caller_file: "src/main.rs".to_string(),
25358 caller_name: "gamma".to_string(),
25359 caller_line: 9,
25360 callee_name: "helper".to_string(),
25361 call_site_line: 10,
25362 tagpath_handle: None,
25363 },
25364 ];
25365 assert!(should_collapse_edge_groups(&edges));
25366 }
25367
25368 fn setup_workspace() -> tempfile::TempDir {
25371 let dir = tempfile::tempdir().unwrap();
25372 let root = dir.path();
25373 std::fs::write(
25374 root.join(".gitmodules"),
25375 r#"[submodule "src/alpha"]
25376 path = src/alpha
25377 url = https://example.com/alpha
25378[submodule "src/beta"]
25379 path = src/beta
25380 url = https://example.com/beta
25381"#,
25382 )
25383 .unwrap();
25384 let alpha = root.join("src/alpha");
25385 let beta = root.join("src/beta");
25386 std::fs::create_dir_all(&alpha).unwrap();
25387 std::fs::create_dir_all(&beta).unwrap();
25388 std::fs::write(
25389 alpha.join("lib.rs"),
25390 "fn alpha_helper() {}\nfn alpha_main() { alpha_helper(); }",
25391 )
25392 .unwrap();
25393 std::fs::write(beta.join("lib.rs"), "fn beta_func() {}").unwrap();
25394 dir
25395 }
25396
25397 fn setup_workspace_with_duplicate_leaf_names() -> tempfile::TempDir {
25398 let dir = tempfile::tempdir().unwrap();
25399 let root = dir.path();
25400 std::fs::write(
25401 root.join(".gitmodules"),
25402 r#"[submodule "pkg/app/foo"]
25403 path = pkg/app/foo
25404 url = https://example.com/pkg-app-foo
25405[submodule "vendor/foo"]
25406 path = vendor/foo
25407 url = https://example.com/vendor-foo
25408"#,
25409 )
25410 .unwrap();
25411 let pkg_foo = root.join("pkg/app/foo");
25412 let vendor_foo = root.join("vendor/foo");
25413 std::fs::create_dir_all(&pkg_foo).unwrap();
25414 std::fs::create_dir_all(&vendor_foo).unwrap();
25415 std::fs::write(
25416 pkg_foo.join("lib.rs"),
25417 "fn pkg_only() {}\nfn shared_name() { pkg_only(); }\n",
25418 )
25419 .unwrap();
25420 std::fs::write(
25421 vendor_foo.join("lib.rs"),
25422 "fn vendor_only() {}\nfn shared_name() { vendor_only(); }\n",
25423 )
25424 .unwrap();
25425 dir
25426 }
25427
25428 #[test]
25429 fn workspace_index_creates_per_submodule_dbs() {
25430 let dir = setup_workspace();
25431 cmd_index(
25432 dir.path(),
25433 false,
25434 false,
25435 false,
25436 false,
25437 false,
25438 true,
25439 None,
25440 false,
25441 false,
25442 false,
25443 false,
25444 false,
25445 false,
25446 )
25447 .unwrap();
25448 assert!(dir.path().join(".tsift/indexes/alpha/index.db").exists());
25449 assert!(dir.path().join(".tsift/indexes/beta/index.db").exists());
25450 }
25451
25452 #[test]
25453 fn workspace_index_single_submodule() {
25454 let dir = setup_workspace();
25455 cmd_index(
25456 dir.path(),
25457 false,
25458 false,
25459 false,
25460 false,
25461 false,
25462 false,
25463 Some("alpha"),
25464 false,
25465 false,
25466 false,
25467 false,
25468 false,
25469 false,
25470 )
25471 .unwrap();
25472 assert!(dir.path().join(".tsift/indexes/alpha/index.db").exists());
25473 assert!(!dir.path().join(".tsift/indexes/beta/index.db").exists());
25474 }
25475
25476 #[test]
25477 fn workspace_index_single_submodule_errors_on_unknown_scope() {
25478 let dir = setup_workspace();
25479
25480 let err = cmd_index(
25481 dir.path(),
25482 false,
25483 false,
25484 false,
25485 false,
25486 false,
25487 false,
25488 Some("missing"),
25489 false,
25490 false,
25491 false,
25492 false,
25493 false,
25494 false,
25495 )
25496 .unwrap_err();
25497
25498 let msg = err.to_string();
25499 assert!(msg.contains("unknown scope `missing`"));
25500 assert!(msg.contains("Available scopes: alpha, beta"));
25501 assert!(!dir.path().join(".tsift/indexes/missing/index.db").exists());
25502 }
25503
25504 #[test]
25505 fn workspace_index_uses_unique_scope_ids_when_leaf_names_collide() {
25506 let dir = setup_workspace_with_duplicate_leaf_names();
25507 cmd_index(
25508 dir.path(),
25509 false,
25510 false,
25511 false,
25512 false,
25513 false,
25514 true,
25515 None,
25516 false,
25517 false,
25518 false,
25519 false,
25520 false,
25521 false,
25522 )
25523 .unwrap();
25524
25525 assert!(
25526 dir.path()
25527 .join(".tsift/indexes/pkg/app/foo/index.db")
25528 .exists()
25529 );
25530 assert!(
25531 dir.path()
25532 .join(".tsift/indexes/vendor/foo/index.db")
25533 .exists()
25534 );
25535 }
25536
25537 #[test]
25538 fn federated_search_across_submodules() {
25539 let dir = setup_workspace();
25540 cmd_index(
25541 dir.path(),
25542 false,
25543 false,
25544 false,
25545 false,
25546 false,
25547 true,
25548 None,
25549 false,
25550 false,
25551 false,
25552 false,
25553 false,
25554 false,
25555 )
25556 .unwrap();
25557 let (hits, _diag) = federated_symbol_search(
25558 dir.path(),
25559 "alpha_helper",
25560 10,
25561 &TagpathSearchOpts {
25562 no_tagpath: true,
25563 strict: false,
25564 },
25565 )
25566 .unwrap();
25567 assert!(
25568 !hits.is_empty(),
25569 "should find alpha_helper via federated search"
25570 );
25571 }
25572
25573 #[test]
25574 fn federated_search_respects_isolation() {
25575 let dir = setup_workspace();
25576 let tsift_dir = dir.path().join(".tsift");
25577 std::fs::create_dir_all(&tsift_dir).unwrap();
25578 std::fs::write(
25579 tsift_dir.join("config.toml"),
25580 r#"
25581[overrides.alpha]
25582tier = "isolated"
25583"#,
25584 )
25585 .unwrap();
25586 cmd_index(
25587 dir.path(),
25588 false,
25589 false,
25590 false,
25591 false,
25592 false,
25593 true,
25594 None,
25595 false,
25596 false,
25597 false,
25598 false,
25599 false,
25600 false,
25601 )
25602 .unwrap();
25603 let (hits, _diag) = federated_symbol_search(
25604 dir.path(),
25605 "alpha_helper",
25606 10,
25607 &TagpathSearchOpts {
25608 no_tagpath: true,
25609 strict: false,
25610 },
25611 )
25612 .unwrap();
25613 assert!(
25614 hits.is_empty(),
25615 "isolated submodule should not appear in federated search"
25616 );
25617 }
25618
25619 #[test]
25620 fn federated_lexical_search_respects_isolation() {
25621 let dir = setup_workspace();
25622 let tsift_dir = dir.path().join(".tsift");
25623 std::fs::create_dir_all(&tsift_dir).unwrap();
25624 std::fs::write(
25625 tsift_dir.join("config.toml"),
25626 r#"
25627[overrides.alpha]
25628tier = "isolated"
25629"#,
25630 )
25631 .unwrap();
25632 cmd_index(
25633 dir.path(),
25634 false,
25635 false,
25636 false,
25637 false,
25638 false,
25639 true,
25640 None,
25641 false,
25642 false,
25643 false,
25644 false,
25645 false,
25646 false,
25647 )
25648 .unwrap();
25649
25650 let response = federated_sift_search(
25651 dir.path(),
25652 &dir.path().join(".tsift/search-cache"),
25653 "fn",
25654 10,
25655 0,
25656 "lexical",
25657 )
25658 .unwrap();
25659
25660 assert!(
25661 !response.hits.is_empty(),
25662 "shared scopes should still contribute lexical hits"
25663 );
25664 assert!(
25665 response
25666 .hits
25667 .iter()
25668 .all(|hit| hit.path.ends_with("src/beta/lib.rs")),
25669 "isolated scope should not leak lexical hits: {:?}",
25670 response.hits
25671 );
25672 }
25673
25674 #[test]
25675 fn federated_lexical_search_respects_private_tier() {
25676 let dir = setup_workspace();
25677 let tsift_dir = dir.path().join(".tsift");
25678 std::fs::create_dir_all(&tsift_dir).unwrap();
25679 std::fs::write(
25680 tsift_dir.join("config.toml"),
25681 r#"
25682[overrides.alpha]
25683tier = "private"
25684"#,
25685 )
25686 .unwrap();
25687 cmd_index(
25688 dir.path(),
25689 false,
25690 false,
25691 false,
25692 false,
25693 false,
25694 true,
25695 None,
25696 false,
25697 false,
25698 false,
25699 false,
25700 false,
25701 false,
25702 )
25703 .unwrap();
25704
25705 let response = federated_sift_search(
25706 dir.path(),
25707 &dir.path().join(".tsift/search-cache"),
25708 "fn",
25709 10,
25710 0,
25711 "lexical",
25712 )
25713 .unwrap();
25714
25715 assert!(
25716 !response.hits.is_empty(),
25717 "shared scopes should still contribute lexical hits"
25718 );
25719 assert!(
25720 response
25721 .hits
25722 .iter()
25723 .all(|hit| hit.path.ends_with("src/beta/lib.rs")),
25724 "private scope should not leak lexical hits: {:?}",
25725 response.hits
25726 );
25727 }
25728
25729 #[test]
25730 fn scoped_search_finds_submodule_symbols() {
25731 let dir = setup_workspace();
25732 cmd_index(
25733 dir.path(),
25734 false,
25735 false,
25736 false,
25737 false,
25738 false,
25739 true,
25740 None,
25741 false,
25742 false,
25743 false,
25744 false,
25745 false,
25746 false,
25747 )
25748 .unwrap();
25749 let cfg = config::Config::load(dir.path()).unwrap();
25750 let db_path = cfg.db_path_for(dir.path(), "alpha");
25751 let db = index::IndexDb::open(&db_path).unwrap();
25752 let hits = db.symbol_search("alpha_main", 10).unwrap();
25753 assert!(!hits.is_empty());
25754 assert_eq!(hits[0].name, "alpha_main");
25755 }
25756
25757 #[test]
25758 fn scoped_search_cmd_errors_on_unknown_scope() {
25759 let dir = setup_workspace();
25760
25761 let err = cmd_search(
25762 "alpha_main".to_string(),
25763 Some(dir.path().to_path_buf()),
25764 5,
25765 Some("lexical".to_string()),
25766 Some("missing".to_string()),
25767 false,
25768 false,
25769 false,
25770 0,
25771 false,
25772 false,
25773 false,
25774 false,
25775 false,
25776 false,
25777 false,
25778 )
25779 .unwrap_err();
25780
25781 let msg = err.to_string();
25782 assert!(msg.contains("unknown scope `missing`"));
25783 assert!(msg.contains("Available scopes: alpha, beta"));
25784 }
25785
25786 #[test]
25787 fn scoped_search_cmd_errors_on_ambiguous_legacy_scope_name() {
25788 let dir = setup_workspace_with_duplicate_leaf_names();
25789 cmd_index(
25790 dir.path(),
25791 false,
25792 false,
25793 false,
25794 false,
25795 false,
25796 true,
25797 None,
25798 false,
25799 false,
25800 false,
25801 false,
25802 false,
25803 false,
25804 )
25805 .unwrap();
25806
25807 let err = cmd_search(
25808 "vendor_only".to_string(),
25809 Some(dir.path().to_path_buf()),
25810 5,
25811 Some("lexical".to_string()),
25812 Some("foo".to_string()),
25813 false,
25814 false,
25815 false,
25816 0,
25817 false,
25818 false,
25819 false,
25820 false,
25821 false,
25822 false,
25823 false,
25824 )
25825 .unwrap_err();
25826
25827 let msg = err.to_string();
25828 assert!(msg.contains("ambiguous scope `foo`"));
25829 assert!(msg.contains("pkg/app/foo"));
25830 assert!(msg.contains("vendor/foo"));
25831 }
25832
25833 #[test]
25834 fn scoped_graph_query() {
25835 let dir = setup_workspace();
25836 cmd_index(
25837 dir.path(),
25838 false,
25839 false,
25840 false,
25841 false,
25842 false,
25843 true,
25844 None,
25845 false,
25846 false,
25847 false,
25848 false,
25849 false,
25850 false,
25851 )
25852 .unwrap();
25853 let cfg = config::Config::load(dir.path()).unwrap();
25854 let db_path = cfg.db_path_for(dir.path(), "alpha");
25855 let db = index::IndexDb::open(&db_path).unwrap();
25856 let callees = db.callees_of("alpha_main").unwrap();
25857 let names: Vec<&str> = callees.iter().map(|e| e.callee_name.as_str()).collect();
25858 assert!(names.contains(&"alpha_helper"));
25859 }
25860
25861 fn assert_workspace_query_requires_scope(err: anyhow::Error) {
25862 let msg = err.to_string();
25863 assert!(msg.contains("require `--scope <scope>`"), "{msg}");
25864 assert!(msg.contains("Available scopes: alpha, beta"), "{msg}");
25865 assert!(msg.contains("Indexed scopes: alpha, beta"), "{msg}");
25866 assert!(
25867 !msg.contains("no index found at"),
25868 "workspace query should fail with scope guidance, got: {msg}"
25869 );
25870 }
25871
25872 fn assert_workspace_search_requires_explicit_target(err: anyhow::Error) {
25873 let msg = err.to_string();
25874 assert!(
25875 msg.contains("requires `--scope <scope>` or `--federated`"),
25876 "{msg}"
25877 );
25878 assert!(msg.contains("Available scopes: alpha, beta"), "{msg}");
25879 assert!(msg.contains("Indexed scopes: alpha, beta"), "{msg}");
25880 assert!(
25881 !msg.contains("autoindexing index"),
25882 "workspace search should fail before creating a shared root index: {msg}"
25883 );
25884 }
25885
25886 #[test]
25887 fn graph_cmd_requires_scope_for_workspace_root_without_shared_index() {
25888 let dir = setup_workspace();
25889 cmd_index(
25890 dir.path(),
25891 false,
25892 false,
25893 false,
25894 false,
25895 false,
25896 true,
25897 None,
25898 false,
25899 false,
25900 false,
25901 false,
25902 false,
25903 false,
25904 )
25905 .unwrap();
25906
25907 let err = cmd_graph(
25908 "alpha_main",
25909 dir.path(),
25910 false,
25911 false,
25912 None,
25913 20,
25914 false,
25915 false,
25916 false,
25917 false,
25918 false,
25919 false,
25920 false,
25921 TagpathSearchOpts::default(),
25922 )
25923 .unwrap_err();
25924
25925 assert_workspace_query_requires_scope(err);
25926 }
25927
25928 #[test]
25929 fn graph_cmd_infers_scope_from_nested_workspace_path() {
25930 let dir = setup_workspace();
25931 cmd_index(
25932 dir.path(),
25933 false,
25934 false,
25935 false,
25936 false,
25937 false,
25938 true,
25939 None,
25940 false,
25941 false,
25942 false,
25943 false,
25944 false,
25945 false,
25946 )
25947 .unwrap();
25948 let nested = dir.path().join("src/alpha/nested");
25949 std::fs::create_dir_all(&nested).unwrap();
25950
25951 let result = cmd_graph(
25952 "alpha_main",
25953 &nested,
25954 false,
25955 false,
25956 None,
25957 20,
25958 false,
25959 false,
25960 false,
25961 false,
25962 false,
25963 false,
25964 false,
25965 TagpathSearchOpts::default(),
25966 );
25967
25968 assert!(result.is_ok());
25969 }
25970
25971 #[test]
25972 fn communities_cmd_requires_scope_for_workspace_root_without_shared_index() {
25973 let dir = setup_workspace();
25974 cmd_index(
25975 dir.path(),
25976 false,
25977 false,
25978 false,
25979 false,
25980 false,
25981 true,
25982 None,
25983 false,
25984 false,
25985 false,
25986 false,
25987 false,
25988 false,
25989 )
25990 .unwrap();
25991
25992 let err = cmd_communities(
25993 dir.path(),
25994 None,
25995 1,
25996 10,
25997 false,
25998 false,
25999 false,
26000 false,
26001 false,
26002 false,
26003 TagpathSearchOpts::default(),
26004 )
26005 .unwrap_err();
26006
26007 assert_workspace_query_requires_scope(err);
26008 }
26009
26010 #[test]
26011 fn communities_cmd_infers_scope_from_nested_workspace_path() {
26012 let dir = setup_workspace();
26013 cmd_index(
26014 dir.path(),
26015 false,
26016 false,
26017 false,
26018 false,
26019 false,
26020 true,
26021 None,
26022 false,
26023 false,
26024 false,
26025 false,
26026 false,
26027 false,
26028 )
26029 .unwrap();
26030 let nested = dir.path().join("src/alpha/nested");
26031 std::fs::create_dir_all(&nested).unwrap();
26032
26033 let result = cmd_communities(
26034 &nested,
26035 None,
26036 1,
26037 10,
26038 false,
26039 false,
26040 false,
26041 false,
26042 false,
26043 false,
26044 TagpathSearchOpts::default(),
26045 );
26046
26047 assert!(result.is_ok());
26048 }
26049
26050 #[test]
26051 fn path_cmd_requires_scope_for_workspace_root_without_shared_index() {
26052 let dir = setup_workspace();
26053 cmd_index(
26054 dir.path(),
26055 false,
26056 false,
26057 false,
26058 false,
26059 false,
26060 true,
26061 None,
26062 false,
26063 false,
26064 false,
26065 false,
26066 false,
26067 false,
26068 )
26069 .unwrap();
26070
26071 let err = cmd_path(
26072 "alpha_main",
26073 "alpha_helper",
26074 dir.path(),
26075 None,
26076 false,
26077 false,
26078 false,
26079 false,
26080 false,
26081 TagpathSearchOpts::default(),
26082 )
26083 .unwrap_err();
26084
26085 assert_workspace_query_requires_scope(err);
26086 }
26087
26088 #[test]
26089 fn path_cmd_infers_scope_from_nested_workspace_path() {
26090 let dir = setup_workspace();
26091 cmd_index(
26092 dir.path(),
26093 false,
26094 false,
26095 false,
26096 false,
26097 false,
26098 true,
26099 None,
26100 false,
26101 false,
26102 false,
26103 false,
26104 false,
26105 false,
26106 )
26107 .unwrap();
26108 let nested = dir.path().join("src/alpha/nested");
26109 std::fs::create_dir_all(&nested).unwrap();
26110
26111 let result = cmd_path(
26112 "alpha_main",
26113 "alpha_helper",
26114 &nested,
26115 None,
26116 false,
26117 false,
26118 false,
26119 false,
26120 false,
26121 TagpathSearchOpts::default(),
26122 );
26123
26124 assert!(result.is_ok());
26125 }
26126
26127 #[test]
26128 fn path_cmd_uses_snapshot_fallback_when_rollback_journal_is_locked() {
26129 let dir = setup_graph_index();
26130 let db_path = dir.path().join(".tsift/index.db");
26131 let _lock = hold_rollback_journal_lock(&db_path);
26132
26133 let result = cmd_path(
26134 "main",
26135 "helper",
26136 dir.path(),
26137 None,
26138 false,
26139 false,
26140 false,
26141 false,
26142 false,
26143 TagpathSearchOpts::default(),
26144 );
26145
26146 assert!(result.is_ok());
26147 }
26148
26149 #[test]
26150 fn explain_cmd_requires_scope_for_workspace_root_without_shared_index() {
26151 let dir = setup_workspace();
26152 cmd_index(
26153 dir.path(),
26154 false,
26155 false,
26156 false,
26157 false,
26158 false,
26159 true,
26160 None,
26161 false,
26162 false,
26163 false,
26164 false,
26165 false,
26166 false,
26167 )
26168 .unwrap();
26169
26170 let err = cmd_explain(
26171 "alpha_main",
26172 dir.path(),
26173 None,
26174 15,
26175 false,
26176 false,
26177 false,
26178 false,
26179 false,
26180 false,
26181 false,
26182 false,
26183 )
26184 .unwrap_err();
26185
26186 assert_workspace_query_requires_scope(err);
26187 }
26188
26189 #[test]
26190 fn explain_cmd_infers_scope_from_nested_workspace_path() {
26191 let dir = setup_workspace();
26192 cmd_index(
26193 dir.path(),
26194 false,
26195 false,
26196 false,
26197 false,
26198 false,
26199 true,
26200 None,
26201 false,
26202 false,
26203 false,
26204 false,
26205 false,
26206 false,
26207 )
26208 .unwrap();
26209 let nested = dir.path().join("src/alpha/nested");
26210 std::fs::create_dir_all(&nested).unwrap();
26211
26212 let result = cmd_explain(
26213 "alpha_main",
26214 &nested,
26215 None,
26216 15,
26217 false,
26218 false,
26219 false,
26220 false,
26221 false,
26222 false,
26223 false,
26224 false,
26225 );
26226
26227 assert!(result.is_ok());
26228 }
26229
26230 #[test]
26231 fn explain_cmd_uses_snapshot_fallback_when_rollback_journal_is_locked() {
26232 let dir = setup_graph_index();
26233 let db_path = dir.path().join(".tsift/index.db");
26234 let _lock = hold_rollback_journal_lock(&db_path);
26235
26236 let result = cmd_explain(
26237 "main",
26238 dir.path(),
26239 None,
26240 15,
26241 false,
26242 false,
26243 false,
26244 false,
26245 false,
26246 false,
26247 false,
26248 false,
26249 );
26250
26251 assert!(result.is_ok());
26252 }
26253
26254 #[test]
26257 fn community_detection_groups_related() {
26258 let dir = setup_graph_index();
26259 let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
26260 let edges = db.all_edges().unwrap();
26261 let result = graph::detect_communities(&edges);
26262 assert!(result.node_count > 0);
26263 assert!(!result.communities.is_empty());
26264 }
26265
26266 #[test]
26267 fn community_cmd_autoindexes_missing_index_by_default() {
26268 let dir = tempfile::tempdir().unwrap();
26269 let result = cmd_communities(
26270 dir.path(),
26271 None,
26272 2,
26273 10,
26274 false,
26275 false,
26276 false,
26277 false,
26278 false,
26279 false,
26280 TagpathSearchOpts::default(),
26281 );
26282
26283 assert!(result.is_ok());
26284 assert!(dir.path().join(".tsift/index.db").exists());
26285 }
26286
26287 #[test]
26290 fn path_finds_connected_symbols() {
26291 let dir = setup_graph_index();
26292 let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
26293 let edges = db.all_edges().unwrap();
26294 let result = graph::shortest_path(&edges, "main", "helper");
26295 assert!(result.is_some());
26296 let path = result.unwrap();
26297 assert_eq!(path.hops, 1);
26298 }
26299
26300 #[test]
26301 fn path_returns_none_for_unknown() {
26302 let dir = setup_graph_index();
26303 let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
26304 let edges = db.all_edges().unwrap();
26305 assert!(graph::shortest_path(&edges, "main", "nonexistent").is_none());
26306 }
26307
26308 #[test]
26309 fn path_cmd_autoindexes_missing_index_by_default() {
26310 let dir = tempfile::tempdir().unwrap();
26311 let result = cmd_path(
26312 "a",
26313 "b",
26314 dir.path(),
26315 None,
26316 false,
26317 false,
26318 false,
26319 false,
26320 false,
26321 TagpathSearchOpts::default(),
26322 );
26323
26324 assert!(result.is_ok());
26325 assert!(dir.path().join(".tsift/index.db").exists());
26326 }
26327
26328 #[test]
26331 fn explain_shows_symbol_info() {
26332 let dir = setup_graph_index();
26333 let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
26334 let symbols = db.symbol_info("main").unwrap();
26335 assert!(!symbols.is_empty());
26336 assert_eq!(symbols[0].name, "main");
26337 assert_eq!(symbols[0].kind, "function");
26338 }
26339
26340 #[test]
26341 fn explain_cmd_autoindexes_missing_index_by_default() {
26342 let dir = tempfile::tempdir().unwrap();
26343 let result = cmd_explain(
26344 "main",
26345 dir.path(),
26346 None,
26347 15,
26348 false,
26349 false,
26350 false,
26351 false,
26352 false,
26353 false,
26354 false,
26355 false,
26356 );
26357
26358 assert!(result.is_ok());
26359 assert!(dir.path().join(".tsift/index.db").exists());
26360 }
26361
26362 fn hold_write_lock(db_path: &std::path::Path) -> Connection {
26363 let conn = Connection::open(db_path).unwrap();
26364 conn.execute_batch("BEGIN IMMEDIATE").unwrap();
26365 conn
26366 }
26367
26368 fn hold_writer_lock(lock_path: &std::path::Path) -> std::fs::File {
26369 use fs4::fs_std::FileExt;
26370 use std::io::Write;
26371
26372 let mut file = std::fs::OpenOptions::new()
26373 .read(true)
26374 .write(true)
26375 .create(true)
26376 .truncate(false)
26377 .open(lock_path)
26378 .unwrap();
26379 assert!(file.try_lock_exclusive().unwrap());
26380 writeln!(file, "{}", std::process::id()).unwrap();
26381 file
26382 }
26383
26384 fn hold_rollback_journal_lock(db_path: &std::path::Path) -> Connection {
26385 let conn = Connection::open(db_path).unwrap();
26386 conn.execute_batch("PRAGMA journal_mode=DELETE; BEGIN EXCLUSIVE;")
26387 .unwrap();
26388 std::fs::write(substrate::rollback_journal_path(db_path), "locked").unwrap();
26389 conn
26390 }
26391
26392 fn hold_wal_database_lock(db_path: &std::path::Path) -> Connection {
26393 let conn = Connection::open(db_path).unwrap();
26394 conn.execute_batch(
26395 "PRAGMA journal_mode=WAL;
26396 PRAGMA wal_autocheckpoint=0;
26397 CREATE TABLE IF NOT EXISTS wal_lock_probe (id INTEGER PRIMARY KEY);
26398 INSERT INTO wal_lock_probe DEFAULT VALUES;
26399 PRAGMA locking_mode=EXCLUSIVE;
26400 BEGIN EXCLUSIVE;",
26401 )
26402 .unwrap();
26403 assert!(substrate::wal_sidecar_path(db_path).exists());
26404 conn
26405 }
26406
26407 #[test]
26408 fn index_cmd_reports_wal_sidecar_diagnostics_without_tsift_writer_lock() {
26409 let dir = setup_graph_index();
26410 let db_path = dir.path().join(".tsift/index.db");
26411 let _lock = hold_wal_database_lock(&db_path);
26412
26413 let err = cmd_index(
26414 dir.path(),
26415 false,
26416 false,
26417 false,
26418 false,
26419 false,
26420 false,
26421 None,
26422 false,
26423 false,
26424 false,
26425 false,
26426 false,
26427 false,
26428 )
26429 .unwrap_err();
26430
26431 let msg = err.to_string();
26432 assert!(msg.contains("indexing"));
26433 assert!(msg.contains("lock diagnostics:"));
26434 assert!(msg.contains("lock: absent"));
26435 assert!(msg.contains("wal: present") || msg.contains("shm: present"));
26436 assert!(msg.contains("wedged writer holding live WAL sidecars"));
26437 assert!(msg.contains("snapshot fallback"));
26438 }
26439
26440 #[test]
26441 fn search_cmd_succeeds_while_writer_lock_is_held() {
26442 let dir = setup_graph_index();
26443 let db_path = dir.path().join(".tsift/index.db");
26444 let _lock = hold_write_lock(&db_path);
26445
26446 let result = cmd_search(
26447 "main".to_string(),
26448 Some(dir.path().to_path_buf()),
26449 5,
26450 Some("lexical".to_string()),
26451 None,
26452 false,
26453 false,
26454 false,
26455 0,
26456 true,
26457 false,
26458 false,
26459 false,
26460 false,
26461 false,
26462 false,
26463 );
26464
26465 assert!(result.is_ok());
26466 }
26467
26468 #[test]
26469 fn search_cmd_uses_snapshot_fallback_when_rollback_journal_lock_appears_after_precheck() {
26470 let dir = setup_graph_index();
26471 let _hook = install_search_post_precheck_lock(dir.path().join(".tsift/index.db"));
26472
26473 let result = cmd_search(
26474 "main".to_string(),
26475 Some(dir.path().to_path_buf()),
26476 5,
26477 Some("lexical".to_string()),
26478 None,
26479 false,
26480 false,
26481 false,
26482 0,
26483 true,
26484 false,
26485 false,
26486 false,
26487 false,
26488 false,
26489 false,
26490 );
26491
26492 assert!(result.is_ok());
26493 }
26494
26495 #[test]
26496 fn search_cmd_uses_wal_snapshot_fallback_when_lock_appears_after_precheck() {
26497 let dir = setup_graph_index();
26498 let _hook = install_search_post_precheck_wal_lock(dir.path().join(".tsift/index.db"));
26499
26500 let result = cmd_search(
26501 "main".to_string(),
26502 Some(dir.path().to_path_buf()),
26503 5,
26504 Some("lexical".to_string()),
26505 None,
26506 false,
26507 false,
26508 false,
26509 0,
26510 true,
26511 false,
26512 false,
26513 false,
26514 false,
26515 false,
26516 false,
26517 );
26518
26519 assert!(result.is_ok());
26520 }
26521
26522 #[test]
26523 fn search_cmd_fails_fast_when_autoindex_disabled_and_index_is_stale() {
26524 let dir = setup_graph_index();
26525 std::thread::sleep(std::time::Duration::from_millis(50));
26526 std::fs::write(
26527 dir.path().join("main.rs"),
26528 "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
26529 )
26530 .unwrap();
26531
26532 let err = cmd_search(
26533 "helper".to_string(),
26534 Some(dir.path().to_path_buf()),
26535 5,
26536 Some("lexical".to_string()),
26537 None,
26538 false,
26539 false,
26540 false,
26541 0,
26542 false,
26543 false,
26544 false,
26545 false,
26546 false,
26547 false,
26548 false,
26549 )
26550 .unwrap_err();
26551
26552 assert!(err.to_string().contains("search aborted"));
26553 assert!(err.to_string().contains("index is stale"));
26554 assert!(err.to_string().contains("--no-autoindex"));
26555 }
26556
26557 #[test]
26558 fn search_cmd_reports_stale_when_root_index_is_locked_by_rollback_journal() {
26559 let dir = setup_graph_index();
26560 std::thread::sleep(std::time::Duration::from_millis(50));
26561 std::fs::write(
26562 dir.path().join("main.rs"),
26563 "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
26564 )
26565 .unwrap();
26566 let _lock = hold_rollback_journal_lock(&dir.path().join(".tsift/index.db"));
26567
26568 let err = cmd_search(
26569 "helper".to_string(),
26570 Some(dir.path().to_path_buf()),
26571 5,
26572 Some("lexical".to_string()),
26573 None,
26574 false,
26575 false,
26576 false,
26577 0,
26578 false,
26579 false,
26580 false,
26581 false,
26582 false,
26583 false,
26584 false,
26585 )
26586 .unwrap_err();
26587
26588 assert!(err.to_string().contains("search aborted"));
26589 assert!(err.to_string().contains("index is stale"));
26590 assert!(!err.to_string().contains("database is locked"));
26591 }
26592
26593 #[test]
26594 fn search_cmd_autoindexes_stale_index_by_default() {
26595 let dir = setup_graph_index();
26596 std::thread::sleep(std::time::Duration::from_millis(50));
26597 std::fs::write(
26598 dir.path().join("main.rs"),
26599 "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
26600 )
26601 .unwrap();
26602
26603 let result = cmd_search(
26604 "helper".to_string(),
26605 Some(dir.path().to_path_buf()),
26606 5,
26607 Some("lexical".to_string()),
26608 None,
26609 false,
26610 false,
26611 true,
26612 0,
26613 false,
26614 false,
26615 false,
26616 false,
26617 false,
26618 false,
26619 false,
26620 );
26621
26622 assert!(result.is_ok());
26623
26624 let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
26625 let summary = db.compute_changes(dir.path()).unwrap();
26626 assert_eq!(summary.new + summary.modified + summary.deleted, 0);
26627 }
26628
26629 #[test]
26630 fn search_cmd_keeps_read_only_results_when_active_writer_blocks_autoindex() {
26631 let dir = setup_graph_index();
26632 std::thread::sleep(std::time::Duration::from_millis(50));
26633 std::fs::write(
26634 dir.path().join("main.rs"),
26635 "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
26636 )
26637 .unwrap();
26638 let _lock = hold_writer_lock(&dir.path().join(".tsift/index.lock"));
26639
26640 let result = cmd_search(
26641 "helper".to_string(),
26642 Some(dir.path().to_path_buf()),
26643 5,
26644 Some("lexical".to_string()),
26645 None,
26646 false,
26647 false,
26648 true,
26649 0,
26650 false,
26651 false,
26652 false,
26653 false,
26654 false,
26655 false,
26656 false,
26657 );
26658
26659 assert!(result.is_ok());
26660
26661 let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
26662 let summary = db.compute_changes(dir.path()).unwrap();
26663 assert_eq!(summary.modified, 1);
26664 }
26665
26666 #[test]
26667 fn search_cmd_autoindex_reports_lock_diagnostics_when_rollback_journal_blocks_writer() {
26668 let dir = setup_graph_index();
26669 std::thread::sleep(std::time::Duration::from_millis(50));
26670 std::fs::write(
26671 dir.path().join("main.rs"),
26672 "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
26673 )
26674 .unwrap();
26675 let _lock = hold_rollback_journal_lock(&dir.path().join(".tsift/index.db"));
26676
26677 let err = cmd_search(
26678 "helper".to_string(),
26679 Some(dir.path().to_path_buf()),
26680 5,
26681 Some("lexical".to_string()),
26682 None,
26683 false,
26684 false,
26685 true,
26686 0,
26687 false,
26688 false,
26689 false,
26690 false,
26691 false,
26692 false,
26693 false,
26694 )
26695 .unwrap_err();
26696
26697 let msg = err.to_string();
26698 assert!(msg.contains("autoindexing index"));
26699 assert!(msg.contains("lock diagnostics:"));
26700 assert!(msg.contains("journal: present"));
26701 assert!(msg.contains("next: inspect the host for a wedged rollback-journal writer"));
26702 }
26703
26704 #[test]
26705 fn search_cmd_uses_ancestor_project_root_for_nested_paths() {
26706 let dir = setup_graph_index();
26707 let nested = dir.path().join("src/nested");
26708 std::fs::create_dir_all(&nested).unwrap();
26709
26710 let result = cmd_search(
26711 "helper".to_string(),
26712 Some(nested.clone()),
26713 5,
26714 Some("lexical".to_string()),
26715 None,
26716 false,
26717 false,
26718 true,
26719 0,
26720 false,
26721 false,
26722 false,
26723 false,
26724 false,
26725 false,
26726 false,
26727 );
26728
26729 assert!(result.is_ok());
26730 assert!(!nested.join(".tsift/index.db").exists());
26731 }
26732
26733 #[test]
26734 fn exact_search_returns_literal_matches() {
26735 let dir = tempfile::tempdir().unwrap();
26736 std::fs::write(dir.path().join("notes.txt"), "alpha\nclaudescore-3\nbeta\n").unwrap();
26737
26738 let response = run_exact_search_with_timeout(dir.path(), "claudescore-3", 5, 0).unwrap();
26739
26740 assert_eq!(response.strategy, "exact");
26741 assert_eq!(response.hits.len(), 1);
26742 assert!(response.hits[0].path.ends_with("notes.txt"));
26743 assert_eq!(response.hits[0].location.as_deref(), Some("line 2"));
26744 assert!(response.hits[0].snippet.contains("claudescore-3"));
26745 }
26746
26747 #[test]
26748 fn exact_search_skips_stale_index_precheck() {
26749 let dir = setup_graph_index();
26750 std::thread::sleep(std::time::Duration::from_millis(50));
26751 std::fs::write(
26752 dir.path().join("main.rs"),
26753 "fn helper() { println!(\"updated\"); }\nfn main() { helper(); }\n",
26754 )
26755 .unwrap();
26756
26757 let result = cmd_search(
26758 "println!(\"updated\")".to_string(),
26759 Some(dir.path().to_path_buf()),
26760 5,
26761 Some("exact".to_string()),
26762 None,
26763 false,
26764 false,
26765 false,
26766 0,
26767 false,
26768 false,
26769 false,
26770 false,
26771 false,
26772 false,
26773 false,
26774 );
26775
26776 assert!(result.is_ok());
26777 }
26778
26779 #[test]
26780 fn workspace_exact_search_does_not_require_shared_root_index() {
26781 let dir = setup_workspace();
26782 cmd_index(
26783 dir.path(),
26784 false,
26785 false,
26786 false,
26787 false,
26788 false,
26789 true,
26790 None,
26791 false,
26792 false,
26793 false,
26794 false,
26795 false,
26796 false,
26797 )
26798 .unwrap();
26799
26800 let result = cmd_search(
26801 "alpha_helper".to_string(),
26802 Some(dir.path().to_path_buf()),
26803 5,
26804 Some("exact".to_string()),
26805 None,
26806 false,
26807 false,
26808 false,
26809 0,
26810 false,
26811 false,
26812 false,
26813 false,
26814 false,
26815 false,
26816 false,
26817 );
26818
26819 assert!(result.is_ok());
26820 assert!(!dir.path().join(".tsift/index.db").exists());
26821 }
26822
26823 #[test]
26824 fn identifier_like_query_prefers_exact_search() {
26825 assert!(query_prefers_exact_search("claudescore-3"));
26826 assert!(query_prefers_exact_search("alpha_helper"));
26827 assert!(query_prefers_exact_search("src/main.rs"));
26828 assert!(query_prefers_exact_search("crate::module"));
26829 assert!(!query_prefers_exact_search("authenticate"));
26830 assert!(!query_prefers_exact_search("fn main"));
26831 assert!(!query_prefers_exact_search("."));
26832 }
26833
26834 #[test]
26835 fn resolve_search_strategy_auto_promotes_identifier_like_queries() {
26836 assert_eq!(resolve_search_strategy("claudescore-3", None), "exact");
26837 assert_eq!(resolve_search_strategy("authenticate", None), "lexical");
26838 assert_eq!(
26839 resolve_search_strategy("claudescore-3", Some("hybrid".to_string())),
26840 "hybrid"
26841 );
26842 }
26843
26844 #[test]
26845 fn workspace_identifier_like_search_auto_uses_exact_backend() {
26846 let dir = setup_workspace();
26847 cmd_index(
26848 dir.path(),
26849 false,
26850 false,
26851 false,
26852 false,
26853 false,
26854 true,
26855 None,
26856 false,
26857 false,
26858 false,
26859 false,
26860 false,
26861 false,
26862 )
26863 .unwrap();
26864
26865 let result = cmd_search(
26866 "alpha_helper".to_string(),
26867 Some(dir.path().to_path_buf()),
26868 5,
26869 None,
26870 None,
26871 false,
26872 false,
26873 false,
26874 0,
26875 false,
26876 false,
26877 false,
26878 false,
26879 false,
26880 false,
26881 false,
26882 );
26883
26884 assert!(result.is_ok());
26885 assert!(!dir.path().join(".tsift/index.db").exists());
26886 }
26887
26888 #[test]
26889 fn index_cmd_uses_ancestor_project_root_for_nested_paths() {
26890 let dir = setup_graph_index();
26891 let nested = dir.path().join("src/nested");
26892 std::fs::create_dir_all(&nested).unwrap();
26893 std::fs::write(nested.join("extra.rs"), "fn nested_helper() {}\n").unwrap();
26894
26895 let result = cmd_index(
26896 &nested, false, false, false, false, false, false, None, false, false, false, false,
26897 false, false,
26898 );
26899
26900 assert!(result.is_ok());
26901 assert!(dir.path().join(".tsift/index.db").exists());
26902 assert!(!nested.join(".tsift/index.db").exists());
26903 }
26904
26905 #[test]
26906 fn workspace_index_cmd_uses_ancestor_project_root_for_nested_paths() {
26907 let dir = setup_workspace();
26908 let nested = dir.path().join("docs/nested");
26909 std::fs::create_dir_all(&nested).unwrap();
26910
26911 let result = cmd_index(
26912 &nested, false, false, false, false, false, true, None, false, false, false, false,
26913 false, false,
26914 );
26915
26916 let cfg = config::Config::load(dir.path()).unwrap();
26917
26918 assert!(result.is_ok());
26919 assert!(cfg.db_path_for(dir.path(), "alpha").exists());
26920 assert!(cfg.db_path_for(dir.path(), "beta").exists());
26921 }
26922
26923 #[test]
26924 fn status_cmd_autoindexes_missing_workspace_scopes() {
26925 let dir = setup_workspace();
26926 let cfg = config::Config::load(dir.path()).unwrap();
26927 let alpha = config::Config::resolve_submodule(dir.path(), "alpha").unwrap();
26928 let alpha_db_path = cfg.db_path_for(dir.path(), &alpha.id);
26929 let alpha_db = index::IndexDb::open(&alpha_db_path).unwrap();
26930 alpha_db.apply_changes(&alpha.source_root).unwrap();
26931
26932 let beta_db_path = cfg.db_path_for(dir.path(), "beta");
26933 assert!(!beta_db_path.exists());
26934
26935 cmd_status(
26936 dir.path(),
26937 StatusCommandOptions {
26938 fix: false,
26939 no_fix: false,
26940 json_output: true,
26941 compact: false,
26942 pretty: false,
26943 terse: false,
26944 schema: false,
26945 },
26946 )
26947 .unwrap();
26948
26949 assert!(beta_db_path.exists());
26950 let report = status::check_status(dir.path()).unwrap();
26951 assert!(matches!(report.index, status::IndexStatus::Fresh { .. }));
26952 }
26953
26954 #[test]
26955 fn status_cmd_autoindexes_workspace_when_all_scopes_are_missing() {
26956 let dir = setup_workspace();
26957 let cfg = config::Config::load(dir.path()).unwrap();
26958
26959 cmd_status(
26960 dir.path(),
26961 StatusCommandOptions {
26962 fix: false,
26963 no_fix: false,
26964 json_output: true,
26965 compact: false,
26966 pretty: false,
26967 terse: false,
26968 schema: false,
26969 },
26970 )
26971 .unwrap();
26972
26973 assert!(cfg.db_path_for(dir.path(), "alpha").exists());
26974 assert!(cfg.db_path_for(dir.path(), "beta").exists());
26975 let report = status::check_status(dir.path()).unwrap();
26976 assert!(matches!(report.index, status::IndexStatus::Fresh { .. }));
26977 }
26978
26979 #[test]
26980 fn status_cmd_fix_refreshes_stale_index() {
26981 let dir = setup_graph_index();
26982 std::thread::sleep(std::time::Duration::from_millis(50));
26983 std::fs::write(
26984 dir.path().join("main.rs"),
26985 "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }\n",
26986 )
26987 .unwrap();
26988
26989 let report = status::check_status(dir.path()).unwrap();
26990 assert!(matches!(report.index, status::IndexStatus::Stale { .. }));
26991
26992 cmd_status(
26993 dir.path(),
26994 StatusCommandOptions {
26995 fix: false,
26996 no_fix: false,
26997 json_output: true,
26998 compact: false,
26999 pretty: false,
27000 terse: false,
27001 schema: false,
27002 },
27003 )
27004 .unwrap();
27005
27006 let report = status::check_status(dir.path()).unwrap();
27007 assert!(matches!(report.index, status::IndexStatus::Fresh { .. }));
27008 }
27009
27010 #[test]
27011 fn status_cmd_reports_wal_snapshot_recovery_without_tsift_writer_lock() {
27012 let dir = setup_graph_index();
27013 let db_path = dir.path().join(".tsift/index.db");
27014 let _lock = hold_wal_database_lock(&db_path);
27015
27016 cmd_status(
27017 dir.path(),
27018 StatusCommandOptions {
27019 fix: false,
27020 no_fix: false,
27021 json_output: true,
27022 compact: false,
27023 pretty: false,
27024 terse: false,
27025 schema: false,
27026 },
27027 )
27028 .unwrap();
27029
27030 let report = status::check_status(dir.path()).unwrap();
27031 assert!(matches!(
27032 report.index,
27033 status::IndexStatus::Fresh {
27034 recovery: Some(index::ReadOnlyRecovery::SnapshotFallbackWal),
27035 ..
27036 }
27037 ));
27038 let locks = status::check_locks(dir.path(), None, None).unwrap();
27039 assert!(matches!(
27040 locks.writer_lock,
27041 status::WriterLockStatus::Absent { .. }
27042 ));
27043 assert!(locks.wal_sidecar.present || locks.shared_memory_sidecar.present);
27044 assert!(
27045 locks
27046 .recommended_action
27047 .contains("wedged writer holding live WAL sidecars")
27048 );
27049 }
27050
27051 #[test]
27052 fn locks_report_uses_ancestor_project_root_for_nested_paths() {
27053 let dir = setup_graph_index();
27054 let nested = dir.path().join("src/nested");
27055 std::fs::create_dir_all(&nested).unwrap();
27056
27057 let root = lint::resolve_project_root_or_canonical_path(&nested).unwrap();
27058 let report = status::check_locks(&root, Some(&nested), None).unwrap();
27059
27060 assert_eq!(report.source_root, dir.path());
27061 assert_eq!(report.db_path, dir.path().join(".tsift/index.db"));
27062 }
27063
27064 #[test]
27065 fn workspace_locks_report_infers_scope_from_nested_path() {
27066 let dir = setup_workspace();
27067 cmd_index(
27068 dir.path(),
27069 false,
27070 false,
27071 false,
27072 false,
27073 false,
27074 true,
27075 None,
27076 false,
27077 false,
27078 false,
27079 false,
27080 false,
27081 false,
27082 )
27083 .unwrap();
27084 let nested = dir.path().join("src/alpha/nested");
27085 std::fs::create_dir_all(&nested).unwrap();
27086
27087 let root = lint::resolve_project_root_or_canonical_path(&nested).unwrap();
27088 let report = status::check_locks(&root, Some(&nested), None).unwrap();
27089 let cfg = config::Config::load(dir.path()).unwrap();
27090
27091 assert_eq!(report.label, "submodule `alpha` index");
27092 assert_eq!(report.source_root, dir.path().join("src/alpha"));
27093 assert_eq!(report.db_path, cfg.db_path_for(dir.path(), "alpha"));
27094 assert_eq!(
27095 report.reindex_command,
27096 format!("tsift index --submodule alpha {}", dir.path().display())
27097 );
27098 }
27099
27100 #[test]
27101 fn scoped_search_cmd_autoindexes_stale_submodule_index_by_default() {
27102 let dir = setup_workspace();
27103 cmd_index(
27104 dir.path(),
27105 false,
27106 false,
27107 false,
27108 false,
27109 false,
27110 true,
27111 None,
27112 false,
27113 false,
27114 false,
27115 false,
27116 false,
27117 false,
27118 )
27119 .unwrap();
27120
27121 let alpha = dir.path().join("src/alpha/lib.rs");
27122 std::thread::sleep(std::time::Duration::from_millis(50));
27123 std::fs::write(
27124 &alpha,
27125 "fn alpha_helper() { println!(\"updated\"); }\nfn alpha_main() { alpha_helper(); }",
27126 )
27127 .unwrap();
27128
27129 let result = cmd_search(
27130 "alpha_helper".to_string(),
27131 Some(dir.path().to_path_buf()),
27132 5,
27133 Some("lexical".to_string()),
27134 Some("alpha".to_string()),
27135 false,
27136 false,
27137 true,
27138 0,
27139 false,
27140 false,
27141 false,
27142 false,
27143 false,
27144 false,
27145 false,
27146 );
27147
27148 assert!(result.is_ok());
27149
27150 let cfg = config::Config::load(dir.path()).unwrap();
27151 let db = index::IndexDb::open_read_only(&cfg.db_path_for(dir.path(), "alpha")).unwrap();
27152 let summary = db.compute_changes(&dir.path().join("src/alpha")).unwrap();
27153 assert_eq!(summary.new + summary.modified + summary.deleted, 0);
27154 }
27155
27156 #[test]
27157 fn scoped_search_cmd_reports_stale_when_submodule_index_is_locked_by_rollback_journal() {
27158 let dir = setup_workspace();
27159 cmd_index(
27160 dir.path(),
27161 false,
27162 false,
27163 false,
27164 false,
27165 false,
27166 true,
27167 None,
27168 false,
27169 false,
27170 false,
27171 false,
27172 false,
27173 false,
27174 )
27175 .unwrap();
27176
27177 let alpha = dir.path().join("src/alpha/lib.rs");
27178 std::thread::sleep(std::time::Duration::from_millis(50));
27179 std::fs::write(
27180 &alpha,
27181 "fn alpha_helper() { println!(\"updated\"); }\nfn alpha_main() { alpha_helper(); }",
27182 )
27183 .unwrap();
27184
27185 let cfg = config::Config::load(dir.path()).unwrap();
27186 let _lock = hold_rollback_journal_lock(&cfg.db_path_for(dir.path(), "alpha"));
27187
27188 let err = cmd_search(
27189 "alpha_helper".to_string(),
27190 Some(dir.path().to_path_buf()),
27191 5,
27192 Some("lexical".to_string()),
27193 Some("alpha".to_string()),
27194 false,
27195 false,
27196 false,
27197 0,
27198 false,
27199 false,
27200 false,
27201 false,
27202 false,
27203 false,
27204 false,
27205 )
27206 .unwrap_err();
27207
27208 assert!(err.to_string().contains("search aborted"));
27209 assert!(err.to_string().contains("submodule `alpha` index"));
27210 assert!(!err.to_string().contains("database is locked"));
27211 }
27212
27213 #[test]
27214 fn federated_search_cmd_autoindexes_stale_indexes_by_default() {
27215 let dir = setup_workspace();
27216 cmd_index(
27217 dir.path(),
27218 false,
27219 false,
27220 false,
27221 false,
27222 false,
27223 true,
27224 None,
27225 false,
27226 false,
27227 false,
27228 false,
27229 false,
27230 false,
27231 )
27232 .unwrap();
27233
27234 let alpha = dir.path().join("src/alpha/lib.rs");
27235 std::thread::sleep(std::time::Duration::from_millis(50));
27236 std::fs::write(
27237 &alpha,
27238 "fn alpha_helper() { println!(\"updated\"); }\nfn alpha_main() { alpha_helper(); }",
27239 )
27240 .unwrap();
27241
27242 let result = cmd_search(
27243 "alpha_helper".to_string(),
27244 Some(dir.path().to_path_buf()),
27245 5,
27246 Some("lexical".to_string()),
27247 None,
27248 true,
27249 false,
27250 true,
27251 0,
27252 false,
27253 false,
27254 false,
27255 false,
27256 false,
27257 false,
27258 false,
27259 );
27260
27261 assert!(result.is_ok());
27262
27263 let cfg = config::Config::load(dir.path()).unwrap();
27264 let db = index::IndexDb::open_read_only(&cfg.db_path_for(dir.path(), "alpha")).unwrap();
27265 let summary = db.compute_changes(&dir.path().join("src/alpha")).unwrap();
27266 assert_eq!(summary.new + summary.modified + summary.deleted, 0);
27267 }
27268
27269 #[test]
27270 fn federated_search_cmd_reports_stale_when_submodule_index_is_locked_by_rollback_journal() {
27271 let dir = setup_workspace();
27272 cmd_index(
27273 dir.path(),
27274 false,
27275 false,
27276 false,
27277 false,
27278 false,
27279 true,
27280 None,
27281 false,
27282 false,
27283 false,
27284 false,
27285 false,
27286 false,
27287 )
27288 .unwrap();
27289
27290 let alpha = dir.path().join("src/alpha/lib.rs");
27291 std::thread::sleep(std::time::Duration::from_millis(50));
27292 std::fs::write(
27293 &alpha,
27294 "fn alpha_helper() { println!(\"updated\"); }\nfn alpha_main() { alpha_helper(); }",
27295 )
27296 .unwrap();
27297
27298 let cfg = config::Config::load(dir.path()).unwrap();
27299 let _lock = hold_rollback_journal_lock(&cfg.db_path_for(dir.path(), "alpha"));
27300
27301 let err = cmd_search(
27302 "alpha_helper".to_string(),
27303 Some(dir.path().to_path_buf()),
27304 5,
27305 Some("lexical".to_string()),
27306 None,
27307 true,
27308 false,
27309 false,
27310 30,
27311 false,
27312 false,
27313 false,
27314 false,
27315 false,
27316 false,
27317 false,
27318 )
27319 .unwrap_err();
27320
27321 assert!(err.to_string().contains("stale"));
27322 assert!(err.to_string().contains("submodule `alpha` index"));
27323 assert!(!err.to_string().contains("database is locked"));
27324 }
27325
27326 #[test]
27327 fn workspace_search_cmd_requires_explicit_target_without_shared_root_index() {
27328 let dir = setup_workspace();
27329 cmd_index(
27330 dir.path(),
27331 false,
27332 false,
27333 false,
27334 false,
27335 false,
27336 true,
27337 None,
27338 false,
27339 false,
27340 false,
27341 false,
27342 false,
27343 false,
27344 )
27345 .unwrap();
27346
27347 let err = cmd_search(
27348 "alpha_helper".to_string(),
27349 Some(dir.path().to_path_buf()),
27350 5,
27351 Some("lexical".to_string()),
27352 None,
27353 false,
27354 false,
27355 true,
27356 0,
27357 false,
27358 false,
27359 false,
27360 false,
27361 false,
27362 false,
27363 false,
27364 )
27365 .unwrap_err();
27366
27367 assert_workspace_search_requires_explicit_target(err);
27368 assert!(!dir.path().join(".tsift/index.db").exists());
27369 }
27370
27371 #[test]
27372 fn workspace_search_cmd_infers_scope_from_nested_path() {
27373 let dir = setup_workspace();
27374 cmd_index(
27375 dir.path(),
27376 false,
27377 false,
27378 false,
27379 false,
27380 false,
27381 true,
27382 None,
27383 false,
27384 false,
27385 false,
27386 false,
27387 false,
27388 false,
27389 )
27390 .unwrap();
27391 let nested = dir.path().join("src/alpha/nested");
27392 std::fs::create_dir_all(&nested).unwrap();
27393
27394 let result = cmd_search(
27395 "alpha_helper".to_string(),
27396 Some(nested),
27397 5,
27398 Some("lexical".to_string()),
27399 None,
27400 false,
27401 false,
27402 false,
27403 0,
27404 false,
27405 false,
27406 false,
27407 false,
27408 false,
27409 false,
27410 false,
27411 );
27412
27413 assert!(result.is_ok());
27414 }
27415
27416 #[test]
27417 fn resolve_query_db_path_infers_matching_duplicate_leaf_scope_from_nested_path() {
27418 let dir = setup_workspace_with_duplicate_leaf_names();
27419 cmd_index(
27420 dir.path(),
27421 false,
27422 false,
27423 false,
27424 false,
27425 false,
27426 true,
27427 None,
27428 false,
27429 false,
27430 false,
27431 false,
27432 false,
27433 false,
27434 )
27435 .unwrap();
27436 let nested = dir.path().join("vendor/foo/nested");
27437 std::fs::create_dir_all(&nested).unwrap();
27438
27439 let root = lint::resolve_project_root_or_canonical_path(&nested).unwrap();
27440 let db_path = resolve_query_db_path(&root, &nested, None).unwrap();
27441 let cfg = config::Config::load(dir.path()).unwrap();
27442
27443 assert_eq!(db_path, cfg.db_path_for(dir.path(), "vendor/foo"));
27444 }
27445
27446 #[test]
27447 fn graph_cmd_succeeds_while_writer_lock_is_held() {
27448 let dir = setup_graph_index();
27449 let db_path = dir.path().join(".tsift/index.db");
27450 let _lock = hold_write_lock(&db_path);
27451
27452 let result = cmd_graph(
27453 "main",
27454 dir.path(),
27455 false,
27456 false,
27457 None,
27458 20,
27459 false,
27460 true,
27461 false,
27462 false,
27463 false,
27464 false,
27465 false,
27466 TagpathSearchOpts::default(),
27467 );
27468
27469 assert!(result.is_ok());
27470 }
27471
27472 #[test]
27473 fn graph_cmd_autoindexes_stale_index_by_default() {
27474 let dir = setup_graph_index();
27475 std::thread::sleep(std::time::Duration::from_millis(50));
27476 std::fs::write(
27477 dir.path().join("main.rs"),
27478 "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }\n",
27479 )
27480 .unwrap();
27481
27482 let result = cmd_graph(
27483 "helper",
27484 dir.path(),
27485 true,
27486 false,
27487 None,
27488 20,
27489 false,
27490 true,
27491 false,
27492 false,
27493 false,
27494 false,
27495 false,
27496 TagpathSearchOpts::default(),
27497 );
27498
27499 assert!(result.is_ok());
27500 let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
27501 let summary = db.compute_changes(dir.path()).unwrap();
27502 assert_eq!(summary.new + summary.modified + summary.deleted, 0);
27503 }
27504
27505 #[test]
27506 fn graph_cmd_uses_snapshot_fallback_when_rollback_journal_is_locked() {
27507 let dir = setup_graph_index();
27508 let db_path = dir.path().join(".tsift/index.db");
27509 let _lock = hold_rollback_journal_lock(&db_path);
27510
27511 let result = cmd_graph(
27512 "main",
27513 dir.path(),
27514 false,
27515 false,
27516 None,
27517 20,
27518 false,
27519 true,
27520 false,
27521 false,
27522 false,
27523 false,
27524 false,
27525 TagpathSearchOpts::default(),
27526 );
27527
27528 assert!(result.is_ok());
27529 }
27530
27531 #[test]
27532 fn graph_cmd_uses_ancestor_project_root_for_nested_paths() {
27533 let dir = setup_graph_index();
27534 let nested = dir.path().join("src/nested");
27535 std::fs::create_dir_all(&nested).unwrap();
27536
27537 let result = cmd_graph(
27538 "helper",
27539 &nested,
27540 true,
27541 false,
27542 None,
27543 20,
27544 false,
27545 false,
27546 false,
27547 false,
27548 false,
27549 false,
27550 false,
27551 TagpathSearchOpts::default(),
27552 );
27553
27554 assert!(result.is_ok());
27555 }
27556
27557 #[test]
27558 fn communities_cmd_succeeds_while_writer_lock_is_held() {
27559 let dir = setup_graph_index();
27560 let _lock = hold_writer_lock(&dir.path().join(".tsift/index.lock"));
27561
27562 let result = cmd_communities(
27563 dir.path(),
27564 None,
27565 1,
27566 10,
27567 false,
27568 false,
27569 false,
27570 false,
27571 false,
27572 false,
27573 TagpathSearchOpts::default(),
27574 );
27575
27576 assert!(result.is_ok());
27577 }
27578
27579 #[test]
27580 fn communities_cmd_uses_snapshot_fallback_when_rollback_journal_is_locked() {
27581 let dir = setup_graph_index();
27582 let db_path = dir.path().join(".tsift/index.db");
27583 let _lock = hold_rollback_journal_lock(&db_path);
27584
27585 let result = cmd_communities(
27586 dir.path(),
27587 None,
27588 1,
27589 10,
27590 false,
27591 false,
27592 false,
27593 false,
27594 false,
27595 false,
27596 TagpathSearchOpts::default(),
27597 );
27598
27599 assert!(result.is_ok());
27600 }
27601
27602 #[test]
27603 fn lint_finds_entities_from_project_root_index_db() {
27604 let dir = tempfile::tempdir().unwrap();
27605 std::fs::write(dir.path().join("main.rs"), "fn alpha_helper() {}\n").unwrap();
27606 std::fs::write(
27607 dir.path().join("README.md"),
27608 "alpha_helper should be backticked.\n",
27609 )
27610 .unwrap();
27611 cmd_index(
27612 dir.path(),
27613 false,
27614 false,
27615 false,
27616 false,
27617 false,
27618 false,
27619 None,
27620 false,
27621 false,
27622 false,
27623 false,
27624 false,
27625 false,
27626 )
27627 .unwrap();
27628
27629 let root = lint::find_project_root_for_path(&dir.path().join("README.md"))
27630 .unwrap()
27631 .unwrap();
27632 let entities = lint::collect_entities_from_index_path(&root).unwrap();
27633 let result = lint::lint_markdown(&dir.path().join("README.md"), &entities).unwrap();
27634
27635 assert!(
27636 result
27637 .annotations
27638 .iter()
27639 .any(|ann| ann.text == "alpha_helper")
27640 );
27641 }
27642
27643 #[test]
27646 fn search_direct_runs_ok() {
27647 let dir = tempfile::tempdir().unwrap();
27648 let search_dir = dir.path().to_path_buf();
27649 let cache_dir = search_dir.join(".tsift/search-cache");
27650 std::fs::write(search_dir.join("test.rs"), "fn main() {}").unwrap();
27651 let result = run_sift_search(&search_dir, &cache_dir, "main", 1, "lexical");
27652 assert!(result.is_ok(), "direct search should succeed");
27653 assert!(
27654 cache_dir.exists(),
27655 "search should create the configured cache dir"
27656 );
27657 }
27658
27659 #[test]
27660 fn search_timeout_zero_disables_timeout() {
27661 let dir = tempfile::tempdir().unwrap();
27662 let search_dir = dir.path().to_path_buf();
27663 let cache_dir = search_dir.join(".tsift/search-cache");
27664 std::fs::write(search_dir.join("test.rs"), "fn main() {}").unwrap();
27665 let result = run_search_with_timeout(&search_dir, &cache_dir, "main", 1, 0, "lexical", &[]);
27666 assert!(result.is_ok(), "timeout=0 should still work (no timeout)");
27667 assert!(
27668 cache_dir.exists(),
27669 "timeout=0 should keep using the stable search cache dir"
27670 );
27671 }
27672
27673 #[test]
27674 fn search_timeout_message_reports_missing_index_as_rebuild_needed() {
27675 let dir = tempfile::tempdir().unwrap();
27676 std::fs::write(dir.path().join("main.rs"), "fn main() {}\n").unwrap();
27677 cmd_index(
27678 dir.path(),
27679 false,
27680 false,
27681 false,
27682 false,
27683 false,
27684 false,
27685 None,
27686 false,
27687 false,
27688 false,
27689 false,
27690 false,
27691 false,
27692 )
27693 .unwrap();
27694 let db_path = dir.path().join(".tsift/index.db");
27695 std::fs::remove_file(&db_path).unwrap();
27696 let search_target = SearchIndexTarget {
27697 label: "index".to_string(),
27698 db_path,
27699 source_root: dir.path().to_path_buf(),
27700 scope_name: None,
27701 reindex_cmd: format!("tsift index {}", dir.path().display()),
27702 };
27703
27704 let message = search_timeout_message(1, "lexical", &[search_target]).unwrap();
27705
27706 assert!(message.contains("timed out after 1s"));
27707 assert!(message.contains("index is missing"));
27708 assert!(message.contains("Run `tsift index"));
27709 assert!(!message.contains("search root looks fresh"));
27710 }
27711
27712 #[test]
27713 fn search_worker_output_path_uses_json_suffix() {
27714 let path = next_search_worker_output_path();
27715 assert!(path.extension().is_some_and(|ext| ext == "json"));
27716 }
27717
27718 #[test]
27721 fn index_quiet_suppresses_file_list() {
27722 let dir = setup_graph_index();
27723 let result = cmd_index(
27724 dir.path(),
27725 false,
27726 true,
27727 false,
27728 false,
27729 true,
27730 false,
27731 None,
27732 false,
27733 false,
27734 false,
27735 false,
27736 false,
27737 false,
27738 );
27739 assert!(result.is_ok());
27740 }
27741
27742 #[test]
27743 fn index_exit_code_implies_quiet() {
27744 let dir = setup_graph_index();
27745 let result = cmd_index(
27746 dir.path(),
27747 false,
27748 true,
27749 false,
27750 false,
27751 false,
27752 false,
27753 None,
27754 false,
27755 false,
27756 false,
27757 false,
27758 false,
27759 false,
27760 );
27761 assert!(result.is_ok());
27762 }
27763
27764 #[test]
27765 fn index_quiet_json_omits_changes() {
27766 let dir = setup_graph_index();
27767 let result = cmd_index(
27768 dir.path(),
27769 false,
27770 true,
27771 false,
27772 false,
27773 true,
27774 false,
27775 None,
27776 true,
27777 false,
27778 false,
27779 false,
27780 false,
27781 false,
27782 );
27783 assert!(result.is_ok());
27784 }
27785
27786 #[test]
27787 fn cli_workflow_defaults_to_search_topic() {
27788 let cli = parse_cli(["tsift", "workflow"]);
27789 match cli.command {
27790 Some(Commands::Workflow { topic, json }) => {
27791 assert_eq!(topic, "search");
27792 assert!(!json);
27793 }
27794 _ => panic!("expected Workflow command"),
27795 }
27796 }
27797
27798 #[test]
27799 fn search_workflow_recipe_preserves_handles_across_expansions() {
27800 let recipe = workflow::search_workflow_recipe();
27801 let step_names: Vec<&str> = recipe.steps.iter().map(|step| step.name).collect();
27802 assert_eq!(
27803 step_names,
27804 vec![
27805 "exact-anchor",
27806 "semantic-search",
27807 "explain-symbol",
27808 "summarize-selection",
27809 "digest-expansion"
27810 ]
27811 );
27812 assert!(
27813 recipe
27814 .handle_contract
27815 .iter()
27816 .any(|item| item.contains("originating command"))
27817 );
27818 assert!(
27819 recipe.steps[1]
27820 .preserves
27821 .iter()
27822 .any(|item| item.contains("sfam-*"))
27823 );
27824 assert!(
27825 recipe.steps[2]
27826 .preserves
27827 .iter()
27828 .any(|item| item.contains("ecall-*"))
27829 );
27830 assert!(
27831 recipe.steps[4]
27832 .preserves
27833 .iter()
27834 .any(|item| item.contains("artifact handles"))
27835 );
27836 }
27837
27838 #[test]
27841 fn to_json_compact_default() {
27842 let val = serde_json::json!({"a": 1, "b": [2, 3]});
27843 let compact = to_json(&val, false, false).unwrap();
27844 assert!(!compact.contains('\n'));
27845 assert!(
27846 compact.contains("\"a\":1")
27847 || compact.contains("\"a\": 1")
27848 || compact.contains("\"a\":")
27849 );
27850 }
27851
27852 #[test]
27853 fn to_json_pretty_indents() {
27854 let val = serde_json::json!({"a": 1, "b": [2, 3]});
27855 let pretty = to_json(&val, true, false).unwrap();
27856 assert!(pretty.contains('\n'));
27857 assert!(pretty.contains(" "));
27858 }
27859
27860 #[test]
27861 fn to_json_compact_is_shorter() {
27862 let val =
27863 serde_json::json!({"name": "test", "items": [1, 2, 3], "nested": {"key": "value"}});
27864 let compact = to_json(&val, false, false).unwrap();
27865 let pretty = to_json(&val, true, false).unwrap();
27866 assert!(compact.len() < pretty.len());
27867 }
27868
27869 #[test]
27870 fn terse_renames_keys() {
27871 let val =
27872 serde_json::json!({"caller_file": "a.rs", "caller_name": "main", "call_site_line": 10});
27873 let result = to_json(&val, false, true).unwrap();
27874 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
27875 assert!(parsed["_s"].is_object());
27876 let d = &parsed["d"];
27877 assert_eq!(d["cf"], "a.rs");
27878 assert_eq!(d["cn"], "main");
27879 assert_eq!(d["csl"], 10);
27880 }
27881
27882 #[test]
27883 fn terse_schema_only_includes_used_keys() {
27884 let val = serde_json::json!({"name": "test", "score": 0.5});
27885 let result = to_json(&val, false, true).unwrap();
27886 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
27887 let schema = parsed["_s"].as_object().unwrap();
27888 assert_eq!(schema["n"], "name");
27889 assert_eq!(schema["sc"], "score");
27890 assert!(!schema.contains_key("cf"));
27891 }
27892
27893 #[test]
27894 fn terse_nested_arrays() {
27895 let val = serde_json::json!({"callers": [{"caller_name": "a", "caller_file": "b.rs", "caller_line": 1, "callee_name": "c", "call_site_line": 2}]});
27896 let result = to_json(&val, false, true).unwrap();
27897 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
27898 let d = &parsed["d"];
27899 assert_eq!(d["crs"][0]["cn"], "a");
27900 assert_eq!(d["crs"][0]["cf"], "b.rs");
27901 }
27902
27903 #[test]
27904 fn terse_preserves_unknown_keys() {
27905 let val = serde_json::json!({"custom_field": "value", "name": "test"});
27906 let result = to_json(&val, false, true).unwrap();
27907 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
27908 let d = &parsed["d"];
27909 assert_eq!(d["custom_field"], "value");
27910 assert_eq!(d["n"], "test");
27911 }
27912
27913 #[test]
27916 fn ultra_terse_strips_properties_from_graph_nodes() {
27917 let val = serde_json::json!({
27918 "nodes": [{"id": "fn:main", "kind": "fn", "name": "main", "properties": {"line": "10"}}]
27919 });
27920 let result = to_json_schema(&val, false, true, true, false).unwrap();
27921 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
27922 let node = &parsed["d"]["nodes"][0];
27923 assert_eq!(node["id"], "fn:main");
27924 assert_eq!(node["k"], "fn");
27925 assert_eq!(node["n"], "main");
27926 assert!(node.get("properties").is_none());
27927 }
27928
27929 #[test]
27930 fn ultra_terse_strips_properties_from_graph_edges() {
27931 let val = serde_json::json!({
27932 "edges": [{"from_id": "a", "to_id": "b", "kind": "calls", "properties": {"weight": "2"}}]
27933 });
27934 let result = to_json_schema(&val, false, true, true, false).unwrap();
27935 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
27936 let edge = &parsed["d"]["edges"][0];
27937 assert_eq!(edge["from_id"], "a");
27938 assert_eq!(edge["to_id"], "b");
27939 assert_eq!(edge["k"], "c");
27940 assert!(edge.get("properties").is_none());
27941 }
27942
27943 #[test]
27944 fn ultra_terse_abbreviates_edge_kinds() {
27945 let val = serde_json::json!({
27946 "edges": [
27947 {"from_id": "a", "to_id": "b", "kind": "defines"},
27948 {"from_id": "a", "to_id": "c", "kind": "contains"},
27949 {"from_id": "a", "to_id": "d", "kind": "imports"},
27950 {"from_id": "a", "to_id": "e", "kind": "mentions"},
27951 {"from_id": "a", "to_id": "f", "kind": "semantic_relation"},
27952 {"from_id": "a", "to_id": "g", "kind": "belongs_to"},
27953 {"from_id": "a", "to_id": "h", "kind": "scopes_context"},
27954 {"from_id": "a", "to_id": "i", "kind": "uses"},
27955 {"from_id": "a", "to_id": "j", "kind": "parent"},
27956 {"from_id": "a", "to_id": "k", "kind": "unknown_edge"},
27957 ]
27958 });
27959 let result = to_json_schema(&val, false, true, true, false).unwrap();
27960 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
27961 let edges = &parsed["d"]["edges"].as_array().unwrap();
27962 assert_eq!(edges[0]["k"], "d");
27963 assert_eq!(edges[1]["k"], "ct");
27964 assert_eq!(edges[2]["k"], "i");
27965 assert_eq!(edges[3]["k"], "m");
27966 assert_eq!(edges[4]["k"], "sr");
27967 assert_eq!(edges[5]["k"], "bt");
27968 assert_eq!(edges[6]["k"], "sctx");
27969 assert_eq!(edges[7]["k"], "u");
27970 assert_eq!(edges[8]["k"], "p");
27971 assert_eq!(edges[9]["k"], "unknown_edge");
27972 }
27973
27974 #[test]
27975 fn ultra_terse_strips_provenance_freshness_from_edges() {
27976 let val = serde_json::json!({
27977 "edges": [{"from_id": "a", "to_id": "b", "kind": "calls", "provenance": [{"source": "tsift"}], "freshness": {"observed_at_unix": 1234567890}}]
27978 });
27979 let result = to_json_schema(&val, false, true, true, false).unwrap();
27980 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
27981 let edge = &parsed["d"]["edges"][0];
27982 assert!(edge.get("provenance").is_none());
27983 assert!(edge.get("freshness").is_none());
27984 assert_eq!(edge["k"], "c");
27985 }
27986
27987 #[test]
27988 fn ultra_terse_truncates_snippets() {
27989 let long_snippet = "x".repeat(120);
27990 let val = serde_json::json!({"snippet": long_snippet});
27991 let result = to_json_schema(&val, false, true, true, false).unwrap();
27992 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
27993 let snipped = parsed["d"]["sn"].as_str().unwrap();
27994 assert_eq!(snipped.len(), 80);
27995 assert!(snipped.ends_with("..."));
27996 }
27997
27998 #[test]
27999 fn ultra_terse_truncates_abbreviated_snippet_key() {
28000 let long_snippet = "y".repeat(100);
28001 let val = serde_json::json!({"snippet": long_snippet});
28002 let result = to_json_schema(&val, false, true, true, false).unwrap();
28003 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28004 let snipped = parsed["d"]["sn"].as_str().unwrap();
28005 assert_eq!(snipped.len(), 80);
28006 assert!(snipped.ends_with("..."));
28007 }
28008
28009 #[test]
28010 fn ultra_terse_compacts_coverage_snapshot() {
28011 let val = serde_json::json!({
28012 "mode": "incremental",
28013 "total_sector_count": 10,
28014 "dirty_sector_count": 2,
28015 "active_rebuild": Some("rebuild-1"),
28016 "completed_dirty_sector_count": 1,
28017 "mounted_sector_count": 8,
28018 "rebuilding_sector_count": 1,
28019 "resumed_sector_count": 3,
28020 "reused_sector_count": 5
28021 });
28022 let result = to_json_schema(&val, false, true, true, false).unwrap();
28023 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28024 let d = &parsed["d"];
28025 assert_eq!(d["mode"], "incremental");
28026 assert_eq!(d["total_sector_count"], 10);
28027 assert_eq!(d["dirty_sector_count"], 2);
28028 assert!(d.get("active_rebuild").is_none());
28029 assert!(d.get("completed_dirty_sector_count").is_none());
28030 assert!(d.get("mounted_sector_count").is_none());
28031 assert!(d.get("rebuilding_sector_count").is_none());
28032 assert!(d.get("resumed_sector_count").is_none());
28033 assert!(d.get("reused_sector_count").is_none());
28034 }
28035
28036 #[test]
28037 fn ultra_terse_short_snippet_unchanged() {
28038 let val = serde_json::json!({"snippet": "short text"});
28039 let result = to_json_schema(&val, false, true, true, false).unwrap();
28040 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28041 assert_eq!(parsed["d"]["sn"], "short text");
28042 }
28043
28044 #[test]
28045 fn ultra_terse_non_graph_object_properties_preserved() {
28046 let val = serde_json::json!({"config": {"properties": {"a": "1"}}});
28047 let result = to_json_schema(&val, false, true, true, false).unwrap();
28048 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28049 assert!(parsed["d"]["config"]["properties"].is_object());
28050 }
28051
28052 #[test]
28055 fn schema_converts_homogeneous_arrays() {
28056 let val = serde_json::json!({"symbols": [
28057 {"name": "foo", "kind": "fn", "line": 10},
28058 {"name": "bar", "kind": "fn", "line": 20}
28059 ]});
28060 let result = to_json_schema(&val, false, false, false, true).unwrap();
28061 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28062 let syms = &parsed["symbols"];
28063 let columns = syms["_c"]
28064 .as_array()
28065 .unwrap()
28066 .iter()
28067 .map(|value| value.as_str().unwrap())
28068 .collect::<Vec<_>>();
28069 let row0 = syms["_r"][0].as_array().unwrap();
28070 let row1 = syms["_r"][1].as_array().unwrap();
28071 let name_index = columns.iter().position(|column| *column == "name").unwrap();
28072 let kind_index = columns.iter().position(|column| *column == "kind").unwrap();
28073 let line_index = columns.iter().position(|column| *column == "line").unwrap();
28074 assert_eq!(row0[name_index], "foo");
28075 assert_eq!(row0[kind_index], "fn");
28076 assert_eq!(row0[line_index], 10);
28077 assert_eq!(row1[name_index], "bar");
28078 assert_eq!(row1[kind_index], "fn");
28079 assert_eq!(row1[line_index], 20);
28080 }
28081
28082 #[test]
28083 fn schema_skips_short_arrays() {
28084 let val = serde_json::json!({"items": [{"name": "only"}]});
28085 let result = to_json_schema(&val, false, false, false, true).unwrap();
28086 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28087 assert!(parsed["items"].is_array());
28088 assert_eq!(parsed["items"][0]["name"], "only");
28089 }
28090
28091 #[test]
28092 fn schema_skips_heterogeneous_arrays() {
28093 let val = serde_json::json!({"items": [{"a": 1}, {"b": 2}]});
28094 let result = to_json_schema(&val, false, false, false, true).unwrap();
28095 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28096 assert!(parsed["items"].is_array());
28097 assert_eq!(parsed["items"][0]["a"], 1);
28098 }
28099
28100 #[test]
28101 fn schema_with_terse_combines() {
28102 let val = serde_json::json!({"callers": [
28103 {"caller_name": "a", "caller_file": "x.rs"},
28104 {"caller_name": "b", "caller_file": "y.rs"}
28105 ]});
28106 let result = to_json_schema(&val, false, true, false, true).unwrap();
28107 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28108 assert!(parsed["_s"].is_object());
28109 let d = &parsed["d"];
28110 let crs = &d["crs"];
28111 assert!(crs["_c"].is_array());
28112 assert!(crs["_r"].is_array());
28113 let columns = crs["_c"]
28114 .as_array()
28115 .unwrap()
28116 .iter()
28117 .map(|value| value.as_str().unwrap())
28118 .collect::<Vec<_>>();
28119 let row = crs["_r"][0].as_array().unwrap();
28120 let name_index = columns.iter().position(|column| *column == "cn").unwrap();
28121 let file_index = columns.iter().position(|column| *column == "cf").unwrap();
28122 assert_eq!(row[name_index], "a");
28123 assert_eq!(row[file_index], "x.rs");
28124 }
28125
28126 #[test]
28127 fn schema_preserves_non_object_arrays() {
28128 let val = serde_json::json!({"tags": ["a", "b", "c"]});
28129 let result = to_json_schema(&val, false, false, false, true).unwrap();
28130 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28131 assert_eq!(parsed["tags"], serde_json::json!(["a", "b", "c"]));
28132 }
28133
28134 #[test]
28135 fn cli_accepts_global_schema_flag() {
28136 let cli = parse_cli(["tsift", "--schema", "search", "test"]);
28137 assert!(cli.schema);
28138 assert!(matches!(cli.command, Some(Commands::Search { .. })));
28139 }
28140
28141 #[test]
28142 fn cli_accepts_global_envelope_flag() {
28143 let cli = parse_cli([
28144 "tsift",
28145 "--envelope",
28146 "context-pack",
28147 "tasks/software/tsift.md",
28148 ]);
28149 assert!(cli.envelope);
28150 assert!(matches!(cli.command, Some(Commands::ContextPack { .. })));
28151 }
28152
28153 #[test]
28154 fn cli_accepts_locks_command() {
28155 let cli = parse_cli(["tsift", "locks"]);
28156 assert!(matches!(cli.command, Some(Commands::Locks { .. })));
28157 }
28158
28159 #[test]
28160 fn cli_parses_memory_budget_guard_command() {
28161 let cli = parse_cli([
28162 "tsift",
28163 "memory",
28164 "budget-guard",
28165 "--file",
28166 "tool.log",
28167 "--budget-tokens",
28168 "1000",
28169 "--json",
28170 ]);
28171 match cli.command {
28172 Some(Commands::Memory {
28173 command:
28174 crate::cli::MemoryCommand::BudgetGuard {
28175 file,
28176 budget_tokens,
28177 json,
28178 ..
28179 },
28180 }) => {
28181 assert_eq!(file.as_deref(), Some(std::path::Path::new("tool.log")));
28182 assert_eq!(budget_tokens, 1000);
28183 assert!(json);
28184 }
28185 _ => panic!("expected memory budget-guard command"),
28186 }
28187 }
28188
28189 #[test]
28190 fn cli_parses_memory_capture_agent_doc_closeout_command() {
28191 let cli = parse_cli([
28192 "tsift",
28193 "memory",
28194 "capture-agent-doc-closeout",
28195 ".",
28196 "--session-path",
28197 "tasks/software/tsift.md",
28198 "--prompt-target",
28199 "do [#tsiftmemhooks]",
28200 "--response-summary",
28201 "wired closeout capture",
28202 "--commit-hash",
28203 "abc123",
28204 "--session-check-status",
28205 "clean",
28206 "--json",
28207 ]);
28208 match cli.command {
28209 Some(Commands::Memory {
28210 command:
28211 crate::cli::MemoryCommand::CaptureAgentDocCloseout {
28212 path,
28213 session_path,
28214 prompt_target,
28215 response_summary,
28216 commit_hash,
28217 session_check_status,
28218 json,
28219 },
28220 }) => {
28221 assert_eq!(path, std::path::PathBuf::from("."));
28222 assert_eq!(
28223 session_path,
28224 std::path::PathBuf::from("tasks/software/tsift.md")
28225 );
28226 assert_eq!(prompt_target, "do [#tsiftmemhooks]");
28227 assert_eq!(response_summary, "wired closeout capture");
28228 assert_eq!(commit_hash.as_deref(), Some("abc123"));
28229 assert_eq!(session_check_status, "clean");
28230 assert!(json);
28231 }
28232 _ => panic!("expected memory capture-agent-doc-closeout command"),
28233 }
28234 }
28235
28236 #[test]
28237 fn cli_locks_accepts_scope_flag() {
28238 let cli = parse_cli(["tsift", "locks", "--scope", "alpha"]);
28239 match cli.command {
28240 Some(Commands::Locks { scope, .. }) => {
28241 assert_eq!(scope.as_deref(), Some("alpha"));
28242 }
28243 _ => panic!("expected Locks command"),
28244 }
28245 }
28246
28247 #[test]
28248 fn cli_search_accepts_autoindex_flag() {
28249 let cli = parse_cli(["tsift", "search", "test", "--autoindex"]);
28250 match cli.command {
28251 Some(Commands::Search {
28252 autoindex,
28253 no_autoindex,
28254 ..
28255 }) => {
28256 assert!(autoindex);
28257 assert!(!no_autoindex);
28258 }
28259 _ => panic!("expected Search command"),
28260 }
28261 }
28262
28263 #[test]
28264 fn cli_search_accepts_exact_flag() {
28265 let cli = parse_cli(["tsift", "search", "test", "--exact"]);
28266 match cli.command {
28267 Some(Commands::Search {
28268 exact, strategy, ..
28269 }) => {
28270 assert!(exact);
28271 assert!(strategy.is_none());
28272 }
28273 _ => panic!("expected Search command"),
28274 }
28275 }
28276
28277 #[test]
28278 fn cli_parses_diff_digest_command() {
28279 let cli = parse_cli(["tsift", "diff-digest", "--json", "."]);
28280 match cli.command {
28281 Some(Commands::DiffDigest {
28282 json,
28283 path,
28284 cached,
28285 revision,
28286 max_parsed_files,
28287 }) => {
28288 assert!(json);
28289 assert_eq!(path, PathBuf::from("."));
28290 assert!(!cached);
28291 assert!(revision.is_none());
28292 assert_eq!(max_parsed_files, 25);
28293 }
28294 _ => panic!("expected DiffDigest command"),
28295 }
28296 }
28297
28298 #[test]
28299 fn cli_rejects_conflicting_diff_digest_modes() {
28300 match try_parse_cli([
28301 "tsift",
28302 "diff-digest",
28303 "--cached",
28304 "--revision",
28305 "HEAD",
28306 ".",
28307 ]) {
28308 Ok(_) => panic!("expected conflicting diff-digest modes to fail"),
28309 Err(err) => {
28310 assert!(err.to_string().contains("--cached"));
28311 assert!(err.to_string().contains("--revision"));
28312 }
28313 }
28314 }
28315
28316 #[test]
28317 fn cli_parses_test_digest_command() {
28318 let cli = parse_cli([
28319 "tsift",
28320 "test-digest",
28321 "--path",
28322 ".",
28323 "--input",
28324 "target/test.log",
28325 "--runner",
28326 "cargo",
28327 "--json",
28328 ]);
28329 match cli.command {
28330 Some(Commands::TestDigest {
28331 json,
28332 path,
28333 input,
28334 runner,
28335 }) => {
28336 assert!(json);
28337 assert_eq!(path, PathBuf::from("."));
28338 assert_eq!(input, Some(PathBuf::from("target/test.log")));
28339 assert_eq!(runner.as_deref(), Some("cargo"));
28340 }
28341 _ => panic!("expected TestDigest command"),
28342 }
28343 }
28344
28345 #[test]
28346 fn cli_parses_log_digest_command() {
28347 let cli = parse_cli([
28348 "tsift",
28349 "log-digest",
28350 "--path",
28351 ".",
28352 "--input",
28353 "target/build.log",
28354 "--json",
28355 ]);
28356 match cli.command {
28357 Some(Commands::LogDigest { json, path, input }) => {
28358 assert!(json);
28359 assert_eq!(path, PathBuf::from("."));
28360 assert_eq!(input, Some(PathBuf::from("target/build.log")));
28361 }
28362 _ => panic!("expected LogDigest command"),
28363 }
28364 }
28365
28366 #[test]
28367 fn cli_parses_metric_digest_command() {
28368 let cli = parse_cli([
28369 "tsift",
28370 "metric-digest",
28371 "--input",
28372 "target/runs.json",
28373 "--baseline",
28374 "target/prior.json",
28375 "--metric",
28376 "session_mae",
28377 "--lower-is-better",
28378 "session_mae",
28379 "--history",
28380 "4",
28381 "--top",
28382 "2",
28383 "--json",
28384 ]);
28385 match cli.command {
28386 Some(Commands::MetricDigest {
28387 input,
28388 baseline,
28389 metrics,
28390 lower_is_better,
28391 history,
28392 top,
28393 json,
28394 ..
28395 }) => {
28396 assert!(json);
28397 assert_eq!(input, Some(PathBuf::from("target/runs.json")));
28398 assert_eq!(baseline, Some(PathBuf::from("target/prior.json")));
28399 assert_eq!(metrics, vec!["session_mae"]);
28400 assert_eq!(lower_is_better, vec!["session_mae"]);
28401 assert_eq!(history, 4);
28402 assert_eq!(top, 2);
28403 }
28404 _ => panic!("expected MetricDigest command"),
28405 }
28406 }
28407
28408 #[test]
28409 fn cli_parses_dci_benchmark_command() {
28410 let cli = parse_cli([
28411 "tsift",
28412 "dci-benchmark",
28413 "--fixture",
28414 "fixtures/dci-search-benchmark.json",
28415 "--json",
28416 ]);
28417 match cli.command {
28418 Some(Commands::DciBenchmark { fixture, json }) => {
28419 assert!(json);
28420 assert_eq!(fixture, PathBuf::from("fixtures/dci-search-benchmark.json"));
28421 }
28422 _ => panic!("expected DciBenchmark command"),
28423 }
28424 }
28425
28426 #[test]
28427 fn cli_parses_session_digest_command() {
28428 let cli = parse_cli([
28429 "tsift",
28430 "session-digest",
28431 "--path",
28432 ".",
28433 "--input",
28434 "target/session.md",
28435 "--source",
28436 "markdown",
28437 "--json",
28438 ]);
28439 match cli.command {
28440 Some(Commands::SessionDigest {
28441 json,
28442 path,
28443 input,
28444 source,
28445 }) => {
28446 assert!(json);
28447 assert_eq!(path, PathBuf::from("."));
28448 assert_eq!(input, Some(PathBuf::from("target/session.md")));
28449 assert_eq!(source.as_deref(), Some("markdown"));
28450 }
28451 _ => panic!("expected SessionDigest command"),
28452 }
28453 }
28454
28455 #[test]
28456 fn cli_parses_session_cost_command() {
28457 let cli = parse_cli([
28458 "tsift",
28459 "session-cost",
28460 "--input",
28461 "target/session.jsonl",
28462 "--source",
28463 "codex-jsonl",
28464 "--json",
28465 ]);
28466 match cli.command {
28467 Some(Commands::SessionCost {
28468 json,
28469 input,
28470 source,
28471 }) => {
28472 assert!(json);
28473 assert_eq!(input, Some(PathBuf::from("target/session.jsonl")));
28474 assert_eq!(source.as_deref(), Some("codex-jsonl"));
28475 }
28476 _ => panic!("expected SessionCost command"),
28477 }
28478 }
28479
28480 #[test]
28481 fn cli_parses_session_review_command() {
28482 let cli = parse_cli([
28483 "tsift",
28484 "session-review",
28485 "tasks/software/tsift.md",
28486 "--next-context",
28487 "--json",
28488 ]);
28489 match cli.command {
28490 Some(Commands::SessionReview {
28491 json,
28492 next_context,
28493 path,
28494 ..
28495 }) => {
28496 assert!(json);
28497 assert!(next_context);
28498 assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
28499 }
28500 _ => panic!("expected SessionReview command"),
28501 }
28502 }
28503
28504 #[test]
28505 fn cli_search_accepts_budget_flags() {
28506 let cli = parse_cli([
28507 "tsift",
28508 "search",
28509 "alpha_helper",
28510 "--max-items",
28511 "3",
28512 "--max-bytes",
28513 "96",
28514 ]);
28515 match cli.command {
28516 Some(Commands::Search {
28517 max_items,
28518 max_bytes,
28519 ..
28520 }) => {
28521 assert_eq!(max_items, Some(3));
28522 assert_eq!(max_bytes, Some(96));
28523 }
28524 _ => panic!("expected Search command"),
28525 }
28526 }
28527
28528 #[test]
28529 fn cli_search_accepts_budget_preset() {
28530 let cli = parse_cli(["tsift", "search", "alpha_helper", "--budget", "small"]);
28531 match cli.command {
28532 Some(Commands::Search { budget, .. }) => {
28533 assert_eq!(budget, Some(ResponseBudgetPreset::Small));
28534 }
28535 _ => panic!("expected Search command"),
28536 }
28537 }
28538
28539 #[test]
28540 fn cli_search_accepts_ast_facet_filters() {
28541 let cli = parse_cli([
28542 "tsift",
28543 "search",
28544 "setup",
28545 "--lang",
28546 "markdown",
28547 "--kind",
28548 "list_item",
28549 "--node-kind",
28550 "list_item",
28551 "--section",
28552 "Install",
28553 "--parent",
28554 "Run setup.",
28555 "--child",
28556 "Confirm setup.",
28557 "--fence-language",
28558 "rust",
28559 "--list-depth",
28560 "1",
28561 "--heading-level",
28562 "2",
28563 ]);
28564 match cli.command {
28565 Some(Commands::Search {
28566 lang,
28567 kind,
28568 node_kind,
28569 section,
28570 parent,
28571 child,
28572 fence_language,
28573 list_depth,
28574 heading_level,
28575 ..
28576 }) => {
28577 assert_eq!(lang, vec!["markdown"]);
28578 assert_eq!(kind, vec!["list_item"]);
28579 assert_eq!(node_kind, vec!["list_item"]);
28580 assert_eq!(section, vec!["Install"]);
28581 assert_eq!(parent, vec!["Run setup."]);
28582 assert_eq!(child, vec!["Confirm setup."]);
28583 assert_eq!(fence_language, vec!["rust"]);
28584 assert_eq!(list_depth, vec![1]);
28585 assert_eq!(heading_level, vec![2]);
28586 }
28587 _ => panic!("expected Search command"),
28588 }
28589 }
28590
28591 #[test]
28592 fn response_budget_presets_fill_defaults_and_preserve_explicit_caps() {
28593 let small = ResponseBudget::from_cli(None, None, Some(ResponseBudgetPreset::Small), false);
28594 assert_eq!(small.preview_items(), 3);
28595 assert_eq!(small.preview_bytes(), 120);
28596 assert_eq!(small.follow_up_items(), 4);
28597
28598 let overridden =
28599 ResponseBudget::from_cli(Some(7), None, Some(ResponseBudgetPreset::Small), false);
28600 assert_eq!(overridden.preview_items(), 7);
28601 assert_eq!(overridden.preview_bytes(), 120);
28602 assert_eq!(overridden.follow_up_items(), 7);
28603
28604 let envelope_default = ResponseBudget::from_cli(None, None, None, true);
28605 assert!(envelope_default.is_active());
28606 }
28607
28608 #[test]
28609 fn cli_explain_accepts_budget_flags() {
28610 let cli = parse_cli([
28611 "tsift",
28612 "explain",
28613 "alpha_helper",
28614 "--max-items",
28615 "2",
28616 "--max-bytes",
28617 "80",
28618 ]);
28619 match cli.command {
28620 Some(Commands::Explain {
28621 max_items,
28622 max_bytes,
28623 ..
28624 }) => {
28625 assert_eq!(max_items, Some(2));
28626 assert_eq!(max_bytes, Some(80));
28627 }
28628 _ => panic!("expected Explain command"),
28629 }
28630 }
28631
28632 #[test]
28633 fn cli_session_review_accepts_budget_flags() {
28634 let cli = parse_cli([
28635 "tsift",
28636 "session-review",
28637 "tasks/software/tsift.md",
28638 "--max-items",
28639 "4",
28640 "--max-bytes",
28641 "120",
28642 ]);
28643 match cli.command {
28644 Some(Commands::SessionReview {
28645 max_items,
28646 max_bytes,
28647 ..
28648 }) => {
28649 assert_eq!(max_items, Some(4));
28650 assert_eq!(max_bytes, Some(120));
28651 }
28652 _ => panic!("expected SessionReview command"),
28653 }
28654 }
28655
28656 #[test]
28657 fn cli_parses_context_pack_command() {
28658 let cli = parse_cli([
28659 "tsift",
28660 "context-pack",
28661 "tasks/software/tsift.md",
28662 "--test-input",
28663 "target/test.log",
28664 "--runner",
28665 "cargo",
28666 "--log-input",
28667 "target/build.log",
28668 "--max-items",
28669 "3",
28670 "--max-bytes",
28671 "96",
28672 "--json",
28673 ]);
28674 match cli.command {
28675 Some(Commands::ContextPack {
28676 path,
28677 test_input,
28678 runner,
28679 log_input,
28680 json,
28681 max_items,
28682 max_bytes,
28683 budget,
28684 convex_snapshot,
28685 }) => {
28686 assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
28687 assert_eq!(test_input, Some(PathBuf::from("target/test.log")));
28688 assert_eq!(runner.as_deref(), Some("cargo"));
28689 assert_eq!(log_input, Some(PathBuf::from("target/build.log")));
28690 assert!(json);
28691 assert_eq!(max_items, Some(3));
28692 assert_eq!(max_bytes, Some(96));
28693 assert!(budget.is_none());
28694 assert!(convex_snapshot.is_none());
28695 }
28696 _ => panic!("expected ContextPack command"),
28697 }
28698 }
28699
28700 #[test]
28701 fn cli_parses_token_savings_command() {
28702 let cli = parse_cli([
28703 "tsift",
28704 "token-savings",
28705 "--fixture",
28706 "fixtures/tsift-token-savings.json",
28707 "--fail-under",
28708 "--json",
28709 ]);
28710 match cli.command {
28711 Some(Commands::TokenSavings {
28712 fixture,
28713 fail_under,
28714 json,
28715 }) => {
28716 assert_eq!(fixture, PathBuf::from("fixtures/tsift-token-savings.json"));
28717 assert!(fail_under);
28718 assert!(json);
28719 }
28720 _ => panic!("expected TokenSavings command"),
28721 }
28722 }
28723
28724 #[test]
28725 fn token_savings_report_records_fixture_thresholds() {
28726 let raw_symbols = [
28727 "validate_user",
28728 "validateUser",
28729 "ValidateUser",
28730 "validate-user",
28731 "VALIDATE_USER",
28732 "Validate_User",
28733 "raw_symbol",
28734 "rawSymbol",
28735 "RawSymbol",
28736 "raw-symbol",
28737 "RAW_SYMBOL",
28738 "Raw_Symbol",
28739 ]
28740 .iter()
28741 .enumerate()
28742 .map(|(idx, identifier)| TokenSavingsRawSymbol {
28743 identifier: (*identifier).to_string(),
28744 file: format!("src/example_{idx}.rs"),
28745 line: (idx + 1) as u64,
28746 context: "function".to_string(),
28747 })
28748 .collect();
28749 let fixture = TokenSavingsFixture {
28750 schema_version: 1,
28751 description: "fixture".to_string(),
28752 token_estimate: "ceil(utf8_bytes / 4)".to_string(),
28753 cases: vec![TokenSavingsFixtureCase {
28754 name: "search-preview".to_string(),
28755 surface: "search".to_string(),
28756 minimum_savings_percent: 40.0,
28757 raw_symbols,
28758 tagpath_families: vec![
28759 TokenSavingsFamily {
28760 canonical: "validate_user".to_string(),
28761 count: 6,
28762 aliases: BTreeMap::new(),
28763 },
28764 TokenSavingsFamily {
28765 canonical: "raw_symbol".to_string(),
28766 count: 6,
28767 aliases: BTreeMap::new(),
28768 },
28769 ],
28770 context_pack_inputs: None,
28771 session_review_inputs: None,
28772 source_read_inputs: None,
28773 markdown_projection_inputs: None,
28774 }],
28775 };
28776
28777 let report = build_token_savings_report(&fixture).unwrap();
28778
28779 assert!(report.pass);
28780 assert_eq!(report.cases[0].raw_symbol_count, 12);
28781 assert_eq!(report.cases[0].family_count, 2);
28782 assert_eq!(report.cases[0].status, "pass");
28783 assert!(report.cases[0].byte_delta > 0);
28784 assert!(report.cases[0].raw_estimated_tokens > report.cases[0].envelope_estimated_tokens);
28785 assert!(report.cases[0].savings_percent >= 40.0);
28786 }
28787
28788 #[test]
28789 fn token_savings_source_read_inputs_preserve_required_anchors() {
28790 let fixture = TokenSavingsFixture {
28791 schema_version: 1,
28792 description: "fixture".to_string(),
28793 token_estimate: "ceil(utf8_bytes / 4)".to_string(),
28794 cases: vec![TokenSavingsFixtureCase {
28795 name: "source-read".to_string(),
28796 surface: "source-read".to_string(),
28797 minimum_savings_percent: 40.0,
28798 raw_symbols: Vec::new(),
28799 tagpath_families: Vec::new(),
28800 context_pack_inputs: None,
28801 session_review_inputs: None,
28802 source_read_inputs: Some(TokenSavingsSourceReadInputs {
28803 reads: vec![TokenSavingsSourceReadInput {
28804 command: "sed -n '40,160p' src/main.rs".to_string(),
28805 file: "src/main.rs".to_string(),
28806 raw_start: 40,
28807 raw_lines: 121,
28808 raw_excerpt: "line 40\n".repeat(121),
28809 envelope_start: 40,
28810 envelope_lines: 121,
28811 required_line_anchors: vec![40, 120, 160],
28812 }],
28813 }),
28814 markdown_projection_inputs: None,
28815 }],
28816 };
28817
28818 let report = build_token_savings_report(&fixture).unwrap();
28819
28820 assert!(report.pass);
28821 assert_eq!(report.cases[0].surface, "source-read");
28822 assert!(report.cases[0].savings_percent >= 40.0);
28823 }
28824
28825 #[test]
28826 fn token_savings_source_read_inputs_fail_when_anchor_is_hidden() {
28827 let fixture = TokenSavingsFixture {
28828 schema_version: 1,
28829 description: "fixture".to_string(),
28830 token_estimate: "ceil(utf8_bytes / 4)".to_string(),
28831 cases: vec![TokenSavingsFixtureCase {
28832 name: "source-read".to_string(),
28833 surface: "source-read".to_string(),
28834 minimum_savings_percent: 40.0,
28835 raw_symbols: Vec::new(),
28836 tagpath_families: Vec::new(),
28837 context_pack_inputs: None,
28838 session_review_inputs: None,
28839 source_read_inputs: Some(TokenSavingsSourceReadInputs {
28840 reads: vec![TokenSavingsSourceReadInput {
28841 command: "cat src/main.rs".to_string(),
28842 file: "src/main.rs".to_string(),
28843 raw_start: 1,
28844 raw_lines: 200,
28845 raw_excerpt: "line\n".repeat(200),
28846 envelope_start: 1,
28847 envelope_lines: 80,
28848 required_line_anchors: vec![120],
28849 }],
28850 }),
28851 markdown_projection_inputs: None,
28852 }],
28853 };
28854
28855 let err = match build_token_savings_report(&fixture) {
28856 Ok(_) => panic!("hidden anchor should fail the source-read fixture"),
28857 Err(err) => err,
28858 };
28859
28860 assert!(err.to_string().contains("hides required line anchor 120"));
28861 }
28862
28863 #[test]
28864 fn token_savings_markdown_projection_inputs_require_outline_and_selected_nodes() {
28865 let fixture = TokenSavingsFixture {
28866 schema_version: 1,
28867 description: "fixture".to_string(),
28868 token_estimate: "ceil(utf8_bytes / 4)".to_string(),
28869 cases: vec![TokenSavingsFixtureCase {
28870 name: "markdown-projection".to_string(),
28871 surface: "context-pack".to_string(),
28872 minimum_savings_percent: 40.0,
28873 raw_symbols: Vec::new(),
28874 tagpath_families: Vec::new(),
28875 context_pack_inputs: None,
28876 session_review_inputs: None,
28877 source_read_inputs: None,
28878 markdown_projection_inputs: Some(TokenSavingsMarkdownProjectionInputs {
28879 documents: vec![TokenSavingsMarkdownProjectionInput {
28880 command: "context-pack markdown body".to_string(),
28881 file: "tasks/software/tsift.md".to_string(),
28882 raw_markdown: "# Heading\n\n".repeat(120),
28883 outline_nodes: vec!["Heading".to_string(), "Details".to_string()],
28884 selected_nodes: vec!["mdast-selected".to_string()],
28885 expand:
28886 "tsift --envelope markdown-ast tasks/software/tsift.md --node mdast-selected --budget normal"
28887 .to_string(),
28888 }],
28889 }),
28890 }],
28891 };
28892
28893 let report = build_token_savings_report(&fixture).unwrap();
28894
28895 assert!(report.pass);
28896 assert_eq!(report.cases[0].surface, "context-pack");
28897 assert!(report.cases[0].savings_percent >= 40.0);
28898 }
28899
28900 #[test]
28901 fn markdown_ast_projection_cache_reuses_large_document_section_and_block_lookups() {
28902 let mut content = String::from("# Cache Root\n\n");
28903 for idx in 0..96 {
28904 content.push_str(&format!(
28905 "## Section {idx}\n\n- Item {idx}\n\n```rust\nfn sample_{idx}() {{}}\n```\n\n"
28906 ));
28907 }
28908
28909 let first = markdown_ast_projection("semantic-edit", content.as_bytes()).unwrap();
28910 assert!(!first.cache_hit);
28911 assert!(first.nodes.len() > 200);
28912
28913 let sections = markdown_section_spans(&content).unwrap();
28914 let list_items = markdown_block_spans(&content, "list_item").unwrap();
28915 let code_blocks = markdown_block_spans(&content, "code_block").unwrap();
28916 let second = markdown_ast_projection("semantic-edit", content.as_bytes()).unwrap();
28917
28918 assert!(second.cache_hit);
28919 assert_eq!(second.nodes.len(), first.nodes.len());
28920 assert_eq!(sections.len(), 97);
28921 assert_eq!(list_items.len(), 96);
28922 assert_eq!(code_blocks.len(), 96);
28923 let first_code = first
28924 .nodes
28925 .iter()
28926 .find(|node| node.kind == "code_block")
28927 .expect("expected a Markdown code block");
28928 let first_code_node = markdown_ast_node(
28929 Path::new("/repo"),
28930 "semantic-edit",
28931 first_code,
28932 content.as_bytes(),
28933 &first.nodes,
28934 8,
28935 );
28936 assert_eq!(first_code_node.metadata.embedded_symbols.len(), 1);
28937 assert_eq!(
28938 first_code_node.metadata.embedded_symbols[0].name,
28939 "sample_0"
28940 );
28941 assert_eq!(
28942 first_code_node.metadata.embedded_symbols[0].language,
28943 "rust"
28944 );
28945 }
28946
28947 #[test]
28948 fn search_budget_report_truncates_symbol_preview_and_emits_stable_handle() {
28949 let response = empty_search_response(Path::new("/repo"), "lexical");
28950 let symbol_hits = vec![index::SymbolHit {
28951 name: "alpha_helper_with_a_long_name".to_string(),
28952 kind: "function".to_string(),
28953 language: "rust".to_string(),
28954 file: "/repo/src/lib.rs".to_string(),
28955 line: 12,
28956 end_line: None,
28957 node_kind: None,
28958 start_byte: None,
28959 end_byte: None,
28960 body_start_byte: None,
28961 body_end_byte: None,
28962 tags: None,
28963 score: 0.98,
28964 match_type: "exact_name".to_string(),
28965 tagpath_handle: None,
28966 }];
28967
28968 let report = build_relative_search_budget_report(
28969 "alpha_helper_with_a_long_name",
28970 "lexical",
28971 Path::new("/repo"),
28972 &response,
28973 &symbol_hits,
28974 ResponseBudget::new(Some(1), Some(12)),
28975 &SearchFacetFilters::default(),
28976 );
28977
28978 assert_eq!(report.symbols.len(), 1);
28979 assert!(report.symbols[0].handle.starts_with("sfam-"));
28980 assert_eq!(report.symbols[0].tag_alias.as_deref(), Some("alpha/hel..."));
28981 assert_eq!(report.symbols[0].name, "alpha_hel...");
28982 assert_eq!(report.symbols[0].file, "src/lib.rs");
28983 assert!(report.symbols[0].expand.contains("tsift search"));
28984 }
28985
28986 #[test]
28987 fn search_budget_report_promotes_ast_span_artifacts_for_symbols() {
28988 let dir = tempfile::tempdir().unwrap();
28989 let src_dir = dir.path().join("src");
28990 fs::create_dir_all(&src_dir).unwrap();
28991 let source = "fn alpha_helper() {\n beta();\n}\n";
28992 let file = src_dir.join("lib.rs");
28993 fs::write(&file, source).unwrap();
28994 let body_start = source.find("{\n").unwrap() + 1;
28995 let body_end = source.rfind("\n}").unwrap() + 1;
28996
28997 let response = empty_search_response(dir.path(), "lexical");
28998 let symbol_hits = vec![index::SymbolHit {
28999 name: "alpha_helper".to_string(),
29000 kind: "function".to_string(),
29001 language: "rust".to_string(),
29002 file: file.to_string_lossy().to_string(),
29003 line: 0,
29004 end_line: Some(2),
29005 node_kind: Some("function_item".to_string()),
29006 start_byte: Some(0),
29007 end_byte: Some(i64::try_from(source.len()).unwrap()),
29008 body_start_byte: Some(i64::try_from(body_start).unwrap()),
29009 body_end_byte: Some(i64::try_from(body_end).unwrap()),
29010 tags: Some("alpha,helper".to_string()),
29011 score: 0.98,
29012 match_type: "exact_name".to_string(),
29013 tagpath_handle: None,
29014 }];
29015
29016 let report = build_relative_search_budget_report(
29017 "alpha helper",
29018 "lexical",
29019 dir.path(),
29020 &response,
29021 &symbol_hits,
29022 ResponseBudget::new(Some(5), Some(96)),
29023 &SearchFacetFilters::default(),
29024 );
29025
29026 let symbol = &report.symbols[0];
29027 assert_eq!(symbol.language, "rust");
29028 assert_eq!(symbol.end_line, Some(2));
29029 let ast = symbol
29030 .ast
29031 .as_ref()
29032 .expect("search symbol preview should expose an AST span artifact");
29033 assert_eq!(ast.artifact_kind, "ast_span");
29034 assert!(ast.span.handle.starts_with("span-"));
29035 assert_eq!(ast.span.node_kind, "function_item");
29036 assert_eq!(ast.span.start_byte, 0);
29037 assert_eq!(ast.span.end_byte, source.len());
29038 assert_eq!(ast.span.body_start_byte, Some(body_start));
29039 assert_eq!(ast.span.body_end_byte, Some(body_end));
29040 assert!(ast.expand.source_window.contains("source-read"));
29041 assert!(
29042 ast.expand
29043 .source_body
29044 .as_ref()
29045 .unwrap()
29046 .contains("source-read")
29047 );
29048 assert!(ast.expand.symbol_read.contains("symbol-read"));
29049 assert!(ast.expand.markdown_ast.is_none());
29050 }
29051
29052 #[test]
29053 fn search_budget_report_links_markdown_spans_to_markdown_ast_expansion() {
29054 let dir = tempfile::tempdir().unwrap();
29055 let source = "# Guide\n\n## Install\n\n- Run setup.\n";
29056 let file = dir.path().join("README.md");
29057 fs::write(&file, source).unwrap();
29058 let heading_start = source.find("## Install").unwrap();
29059 let heading_end = source.len();
29060
29061 let response = empty_search_response(dir.path(), "lexical");
29062 let symbol_hits = vec![index::SymbolHit {
29063 name: "Install".to_string(),
29064 kind: "heading".to_string(),
29065 language: "markdown".to_string(),
29066 file: file.to_string_lossy().to_string(),
29067 line: 2,
29068 end_line: Some(4),
29069 node_kind: Some("atx_heading".to_string()),
29070 start_byte: Some(i64::try_from(heading_start).unwrap()),
29071 end_byte: Some(i64::try_from(heading_end).unwrap()),
29072 body_start_byte: Some(i64::try_from(source.find("- Run setup.").unwrap()).unwrap()),
29073 body_end_byte: Some(i64::try_from(heading_end).unwrap()),
29074 tags: Some("install".to_string()),
29075 score: 1.0,
29076 match_type: "exact_name".to_string(),
29077 tagpath_handle: None,
29078 }];
29079
29080 let report = build_relative_search_budget_report(
29081 "Install",
29082 "lexical",
29083 dir.path(),
29084 &response,
29085 &symbol_hits,
29086 ResponseBudget::new(Some(5), Some(96)),
29087 &SearchFacetFilters::default(),
29088 );
29089
29090 let ast = report.symbols[0]
29091 .ast
29092 .as_ref()
29093 .expect("Markdown search symbol should expose an AST span artifact");
29094 assert_eq!(ast.span.node_kind, "atx_heading");
29095 assert_eq!(ast.span.markdown.as_ref().unwrap().heading_level, Some(2));
29096 let markdown_ast = ast
29097 .expand
29098 .markdown_ast
29099 .as_ref()
29100 .expect("Markdown symbols should include markdown-ast expansion");
29101 assert!(markdown_ast.contains("markdown-ast"), "{markdown_ast}");
29102 assert!(markdown_ast.contains("--node"), "{markdown_ast}");
29103 assert!(markdown_ast.contains(&ast.span.handle), "{markdown_ast}");
29104 assert!(ast.expand.source_window.contains("source-read"));
29105 assert!(ast.expand.symbol_read.contains("symbol-read"));
29106 }
29107
29108 #[test]
29109 fn search_budget_report_exposes_markdown_embedded_code_symbols() {
29110 let dir = tempfile::tempdir().unwrap();
29111 let source = "# Guide\n\n```rust\nfn sample() {}\n```\n";
29112 let file = dir.path().join("README.md");
29113 fs::write(&file, source).unwrap();
29114 let fence_start = source.find("```rust").unwrap();
29115 let body_start = source.find("fn sample").unwrap();
29116 let body_end = body_start + "fn sample() {}\n".len();
29117
29118 let response = empty_search_response(dir.path(), "lexical");
29119 let symbol_hits = vec![index::SymbolHit {
29120 name: "rust".to_string(),
29121 kind: "code_block".to_string(),
29122 language: "markdown".to_string(),
29123 file: file.to_string_lossy().to_string(),
29124 line: 2,
29125 end_line: Some(4),
29126 node_kind: Some("fenced_code_block".to_string()),
29127 start_byte: Some(i64::try_from(fence_start).unwrap()),
29128 end_byte: Some(i64::try_from(source.len()).unwrap()),
29129 body_start_byte: Some(i64::try_from(body_start).unwrap()),
29130 body_end_byte: Some(i64::try_from(body_end).unwrap()),
29131 tags: Some("rust".to_string()),
29132 score: 1.0,
29133 match_type: "exact_name".to_string(),
29134 tagpath_handle: None,
29135 }];
29136
29137 let report = build_relative_search_budget_report(
29138 "rust",
29139 "lexical",
29140 dir.path(),
29141 &response,
29142 &symbol_hits,
29143 ResponseBudget::new(Some(5), Some(96)),
29144 &SearchFacetFilters::default(),
29145 );
29146
29147 let embedded = &report.symbols[0]
29148 .ast
29149 .as_ref()
29150 .unwrap()
29151 .span
29152 .markdown
29153 .as_ref()
29154 .unwrap()
29155 .embedded_symbols;
29156 assert_eq!(embedded.len(), 1);
29157 assert_eq!(embedded[0].name, "sample");
29158 assert_eq!(embedded[0].kind, "function");
29159 assert_eq!(embedded[0].language, "rust");
29160 assert_eq!(embedded[0].node_kind, "function_item");
29161 assert!(embedded[0].handle.starts_with("span-"));
29162 assert_eq!(embedded[0].start_byte, body_start);
29163 assert_eq!(embedded[0].start_line, 4);
29164 }
29165
29166 fn test_lexical_search_hit(
29167 path: &Path,
29168 rank: usize,
29169 score: f64,
29170 snippet: &str,
29171 ) -> sift::SearchHit {
29172 sift::SearchHit {
29173 artifact_id: format!("hit-{rank}"),
29174 artifact_kind: sift::ContextArtifactKind::File,
29175 budget: sift::ArtifactBudget::from_text(snippet, 1),
29176 confidence: sift::ScoreConfidence::High,
29177 freshness: sift::ArtifactFreshness {
29178 modified_unix_secs: None,
29179 observed_unix_secs: 0,
29180 },
29181 location: Some("line 1".to_string()),
29182 path: path.to_string_lossy().to_string(),
29183 provenance: sift::ArtifactProvenance {
29184 adapter: sift::AcquisitionAdapterKind::FileSystem,
29185 source: "test lexical hit".to_string(),
29186 synthetic: false,
29187 },
29188 rank,
29189 score,
29190 snippet: snippet.to_string(),
29191 }
29192 }
29193
29194 fn test_summary(symbol_name: &str, file_path: &str, summary: &str) -> summarize::Summary {
29195 summarize::Summary {
29196 id: 0,
29197 symbol_name: symbol_name.to_string(),
29198 file_path: file_path.to_string(),
29199 content_hash: "hash".to_string(),
29200 summary: summary.to_string(),
29201 entities: None,
29202 relationships: None,
29203 concept_labels: None,
29204 extracted_at: "2026-06-02T00:00:00Z".to_string(),
29205 model: "test".to_string(),
29206 tokens_input: None,
29207 tokens_output: None,
29208 }
29209 }
29210
29211 #[test]
29212 fn search_budget_ranked_preview_prioritizes_precise_ast_span_over_broad_file_hit() {
29213 let dir = tempfile::tempdir().unwrap();
29214 let src_dir = dir.path().join("src");
29215 fs::create_dir_all(&src_dir).unwrap();
29216 let source = "fn alpha_helper() {}\n";
29217 let file = src_dir.join("lib.rs");
29218 let broad_file = dir.path().join("README.md");
29219 fs::write(&file, source).unwrap();
29220 fs::write(
29221 &broad_file,
29222 "alpha helper alpha helper alpha helper in prose\n",
29223 )
29224 .unwrap();
29225
29226 let mut response = empty_search_response(dir.path(), "lexical");
29227 response.hits.push(test_lexical_search_hit(
29228 &broad_file,
29229 1,
29230 240.0,
29231 "alpha helper alpha helper alpha helper in prose",
29232 ));
29233 let symbol_hits = vec![index::SymbolHit {
29234 name: "alpha_helper".to_string(),
29235 kind: "function".to_string(),
29236 language: "rust".to_string(),
29237 file: file.to_string_lossy().to_string(),
29238 line: 0,
29239 end_line: Some(0),
29240 node_kind: Some("function_item".to_string()),
29241 start_byte: Some(0),
29242 end_byte: Some(i64::try_from(source.len()).unwrap()),
29243 body_start_byte: Some(i64::try_from(source.find("{}").unwrap() + 1).unwrap()),
29244 body_end_byte: Some(i64::try_from(source.find("{}").unwrap() + 1).unwrap()),
29245 tags: Some("alpha,helper".to_string()),
29246 score: 0.8,
29247 match_type: "all_tags".to_string(),
29248 tagpath_handle: None,
29249 }];
29250
29251 let report = build_relative_search_budget_report(
29252 "alpha helper",
29253 "lexical",
29254 dir.path(),
29255 &response,
29256 &symbol_hits,
29257 ResponseBudget::new(Some(5), Some(128)),
29258 &SearchFacetFilters::default(),
29259 );
29260
29261 assert_eq!(report.ranked[0].source, "symbol_span");
29262 assert_eq!(report.ranked[0].name.as_deref(), Some("alpha_helper"));
29263 assert!(report.ranked[0].score > report.ranked[1].score);
29264 assert_eq!(report.ranked[1].source, "lexical_file");
29265 }
29266
29267 #[test]
29268 fn search_budget_ranked_preview_includes_summary_and_graph_evidence() {
29269 let dir = tempfile::tempdir().unwrap();
29270 let source = "# Guide\n\n```rust\nfn sample() {}\n```\n";
29271 let file = dir.path().join("README.md");
29272 fs::write(&file, source).unwrap();
29273 let summary_db =
29274 summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
29275 summary_db
29276 .insert(&test_summary(
29277 "rust",
29278 "README.md",
29279 "Rust fence contains a sample function.",
29280 ))
29281 .unwrap();
29282
29283 let fence_start = source.find("```rust").unwrap();
29284 let body_start = source.find("fn sample").unwrap();
29285 let body_end = body_start + "fn sample() {}\n".len();
29286 let response = empty_search_response(dir.path(), "lexical");
29287 let symbol_hits = vec![index::SymbolHit {
29288 name: "rust".to_string(),
29289 kind: "code_block".to_string(),
29290 language: "markdown".to_string(),
29291 file: file.to_string_lossy().to_string(),
29292 line: 2,
29293 end_line: Some(4),
29294 node_kind: Some("fenced_code_block".to_string()),
29295 start_byte: Some(i64::try_from(fence_start).unwrap()),
29296 end_byte: Some(i64::try_from(source.len()).unwrap()),
29297 body_start_byte: Some(i64::try_from(body_start).unwrap()),
29298 body_end_byte: Some(i64::try_from(body_end).unwrap()),
29299 tags: Some("rust".to_string()),
29300 score: 1.0,
29301 match_type: "exact_name".to_string(),
29302 tagpath_handle: None,
29303 }];
29304
29305 let report = build_relative_search_budget_report(
29306 "rust",
29307 "lexical",
29308 dir.path(),
29309 &response,
29310 &symbol_hits,
29311 ResponseBudget::new(Some(5), Some(128)),
29312 &SearchFacetFilters::default(),
29313 );
29314
29315 let symbol = &report.symbols[0];
29316 assert_eq!(symbol.summary_refs, 1);
29317 assert_eq!(symbol.graph_neighbors, 1);
29318 assert!(
29319 report.ranked[0]
29320 .reasons
29321 .iter()
29322 .any(|reason| reason == "summary_refs:1")
29323 );
29324 assert!(
29325 report.ranked[0]
29326 .reasons
29327 .iter()
29328 .any(|reason| reason == "graph_neighbors:1")
29329 );
29330 }
29331
29332 fn markdown_search_facet_fixture() -> tempfile::TempDir {
29333 let dir = tempfile::tempdir().unwrap();
29334 let source = r#"# Guide
29335
29336## Install
29337
29338- Run setup.
29339 - Confirm setup.
29340
29341```rust
29342fn sample() {}
29343```
29344"#;
29345 fs::write(dir.path().join("README.md"), source).unwrap();
29346 let index_dir = dir.path().join(".tsift");
29347 fs::create_dir_all(&index_dir).unwrap();
29348 run_index_update(
29349 &index_dir.join("index.db"),
29350 dir.path(),
29351 "indexing markdown search facet fixture".to_string(),
29352 dir.path(),
29353 None,
29354 false,
29355 false,
29356 )
29357 .unwrap();
29358 dir
29359 }
29360
29361 fn markdown_search_facet_hits(root: &Path, query: &str) -> Vec<index::SymbolHit> {
29362 let db = index::IndexDb::open_read_only_resilient(&root.join(".tsift/index.db")).unwrap();
29363 db.symbol_search(query, 20).unwrap()
29364 }
29365
29366 #[test]
29367 fn search_facet_filters_match_scalar_symbol_fields() {
29368 let dir = tempfile::tempdir().unwrap();
29369 let hits = vec![
29370 index::SymbolHit {
29371 name: "alpha_helper".to_string(),
29372 kind: "function".to_string(),
29373 language: "rust".to_string(),
29374 file: dir.path().join("src/lib.rs").to_string_lossy().to_string(),
29375 line: 0,
29376 end_line: None,
29377 node_kind: Some("function_item".to_string()),
29378 start_byte: None,
29379 end_byte: None,
29380 body_start_byte: None,
29381 body_end_byte: None,
29382 tags: None,
29383 score: 1.0,
29384 match_type: "exact_name".to_string(),
29385 tagpath_handle: None,
29386 },
29387 index::SymbolHit {
29388 name: "Install".to_string(),
29389 kind: "heading".to_string(),
29390 language: "markdown".to_string(),
29391 file: dir.path().join("README.md").to_string_lossy().to_string(),
29392 line: 0,
29393 end_line: None,
29394 node_kind: Some("atx_heading".to_string()),
29395 start_byte: None,
29396 end_byte: None,
29397 body_start_byte: None,
29398 body_end_byte: None,
29399 tags: None,
29400 score: 0.9,
29401 match_type: "exact_name".to_string(),
29402 tagpath_handle: None,
29403 },
29404 ];
29405
29406 let filtered = apply_search_facet_filters(
29407 dir.path(),
29408 hits,
29409 &SearchFacetFilters {
29410 languages: vec!["rust".to_string()],
29411 kinds: vec!["function".to_string()],
29412 node_kinds: vec!["function_item".to_string()],
29413 ..SearchFacetFilters::default()
29414 },
29415 );
29416
29417 assert_eq!(filtered.len(), 1);
29418 assert_eq!(filtered[0].name, "alpha_helper");
29419 }
29420
29421 #[test]
29422 fn search_facet_filters_match_markdown_sections_and_block_metadata() {
29423 let dir = markdown_search_facet_fixture();
29424
29425 let nested_list = apply_search_facet_filters(
29426 dir.path(),
29427 markdown_search_facet_hits(dir.path(), "setup"),
29428 &SearchFacetFilters {
29429 sections: vec!["Install".to_string()],
29430 parents: vec!["Run setup.".to_string()],
29431 list_depths: vec![1],
29432 ..SearchFacetFilters::default()
29433 },
29434 );
29435 assert_eq!(nested_list.len(), 1);
29436 assert_eq!(nested_list[0].name, "Confirm setup.");
29437
29438 let parent_list = apply_search_facet_filters(
29439 dir.path(),
29440 markdown_search_facet_hits(dir.path(), "setup"),
29441 &SearchFacetFilters {
29442 children: vec!["Confirm setup.".to_string()],
29443 ..SearchFacetFilters::default()
29444 },
29445 );
29446 assert_eq!(parent_list.len(), 1);
29447 assert_eq!(parent_list[0].name, "Run setup.");
29448
29449 let heading = apply_search_facet_filters(
29450 dir.path(),
29451 markdown_search_facet_hits(dir.path(), "Install"),
29452 &SearchFacetFilters {
29453 heading_levels: vec![2],
29454 node_kinds: vec!["atx_heading".to_string()],
29455 ..SearchFacetFilters::default()
29456 },
29457 );
29458 assert_eq!(heading.len(), 1);
29459 assert_eq!(heading[0].name, "Install");
29460
29461 let fence = apply_search_facet_filters(
29462 dir.path(),
29463 markdown_search_facet_hits(dir.path(), "rust"),
29464 &SearchFacetFilters {
29465 fence_languages: vec!["rust".to_string()],
29466 kinds: vec!["code_block".to_string()],
29467 ..SearchFacetFilters::default()
29468 },
29469 );
29470 assert_eq!(fence.len(), 1);
29471 assert_eq!(fence[0].kind, "code_block");
29472
29473 let embedded_child = apply_search_facet_filters(
29474 dir.path(),
29475 markdown_search_facet_hits(dir.path(), "rust"),
29476 &SearchFacetFilters {
29477 children: vec!["sample".to_string()],
29478 kinds: vec!["code_block".to_string()],
29479 ..SearchFacetFilters::default()
29480 },
29481 );
29482 assert_eq!(embedded_child.len(), 1);
29483 assert_eq!(embedded_child[0].name, "rust");
29484 }
29485
29486 #[test]
29487 fn search_budget_report_groups_repeated_symbols_by_canonical_tag_family() {
29488 let response = empty_search_response(Path::new("/repo"), "lexical");
29489 let symbol_hits = vec![
29490 index::SymbolHit {
29491 name: "alpha_helper".to_string(),
29492 kind: "function".to_string(),
29493 language: "rust".to_string(),
29494 file: "/repo/src/lib.rs".to_string(),
29495 line: 12,
29496 end_line: None,
29497 node_kind: None,
29498 start_byte: None,
29499 end_byte: None,
29500 body_start_byte: None,
29501 body_end_byte: None,
29502 tags: Some("alpha,helper".to_string()),
29503 score: 0.98,
29504 match_type: "exact_name".to_string(),
29505 tagpath_handle: None,
29506 },
29507 index::SymbolHit {
29508 name: "alphaHelper".to_string(),
29509 kind: "method".to_string(),
29510 language: "rust".to_string(),
29511 file: "/repo/src/main.rs".to_string(),
29512 line: 34,
29513 end_line: None,
29514 node_kind: None,
29515 start_byte: None,
29516 end_byte: None,
29517 body_start_byte: None,
29518 body_end_byte: None,
29519 tags: Some("alpha,helper".to_string()),
29520 score: 0.93,
29521 match_type: "tag_overlap".to_string(),
29522 tagpath_handle: None,
29523 },
29524 index::SymbolHit {
29525 name: "alpha_helper".to_string(),
29526 kind: "function".to_string(),
29527 language: "rust".to_string(),
29528 file: "/repo/src/worker.rs".to_string(),
29529 line: 56,
29530 end_line: None,
29531 node_kind: None,
29532 start_byte: None,
29533 end_byte: None,
29534 body_start_byte: None,
29535 body_end_byte: None,
29536 tags: Some("alpha,helper".to_string()),
29537 score: 0.91,
29538 match_type: "tag_overlap".to_string(),
29539 tagpath_handle: None,
29540 },
29541 ];
29542
29543 let report = build_relative_search_budget_report(
29544 "alpha helper",
29545 "lexical",
29546 Path::new("/repo"),
29547 &response,
29548 &symbol_hits,
29549 ResponseBudget::new(Some(5), Some(48)),
29550 &SearchFacetFilters::default(),
29551 );
29552
29553 assert_eq!(report.symbol_total, 1);
29554 assert_eq!(report.raw_symbol_total, 3);
29555 assert_eq!(report.symbols.len(), 1);
29556 assert_eq!(report.symbols[0].tag_alias.as_deref(), Some("alpha/helper"));
29557 assert_eq!(report.symbols[0].match_count, 3);
29558 assert_eq!(report.symbols[0].surface_count, 2);
29559 assert_eq!(report.symbols[0].file_count, 3);
29560 assert_eq!(
29561 report.symbols[0].surface_examples,
29562 vec!["alpha_helper".to_string(), "alphaHelper".to_string()]
29563 );
29564 assert!(report.symbols[0].name.contains("(+1 variant)"));
29565 assert!(report.symbols[0].file.contains("(+2 files)"));
29566 assert!(report.symbols[0].expand.contains("tsift search"));
29567 assert!(report.symbols[0].expand.contains("alpha helper"));
29568 }
29569
29570 #[test]
29571 fn search_budget_report_carries_active_filters() {
29572 let response = empty_search_response(Path::new("/repo"), "lexical");
29573 let symbol_hits = vec![index::SymbolHit {
29574 name: "alpha_helper".to_string(),
29575 kind: "function".to_string(),
29576 language: "rust".to_string(),
29577 file: "/repo/src/lib.rs".to_string(),
29578 line: 12,
29579 end_line: None,
29580 node_kind: Some("function_item".to_string()),
29581 start_byte: None,
29582 end_byte: None,
29583 body_start_byte: None,
29584 body_end_byte: None,
29585 tags: Some("alpha,helper".to_string()),
29586 score: 0.98,
29587 match_type: "exact_name".to_string(),
29588 tagpath_handle: None,
29589 }];
29590 let filters = SearchFacetFilters {
29591 languages: vec!["rust".to_string()],
29592 kinds: vec!["function".to_string()],
29593 node_kinds: vec!["function_item".to_string()],
29594 ..SearchFacetFilters::default()
29595 };
29596
29597 let report = build_relative_search_budget_report(
29598 "alpha helper",
29599 "lexical",
29600 Path::new("/repo"),
29601 &response,
29602 &symbol_hits,
29603 ResponseBudget::new(Some(5), Some(48)),
29604 &filters,
29605 );
29606
29607 assert_eq!(report.filters, filters);
29608 assert_eq!(
29609 search_facet_filters_summary(&report.filters),
29610 "lang=rust kind=function node-kind=function_item"
29611 );
29612 }
29613
29614 #[test]
29615 fn search_budget_report_warns_on_broad_preview_and_lists_narrowing_commands() {
29616 let mut response = empty_search_response(Path::new("/repo"), "lexical");
29617 response.indexed_artifacts = 450;
29618 let symbol_hits = vec![
29619 index::SymbolHit {
29620 name: "alpha_helper".to_string(),
29621 kind: "function".to_string(),
29622 language: "rust".to_string(),
29623 file: "/repo/src/lib.rs".to_string(),
29624 line: 12,
29625 end_line: None,
29626 node_kind: None,
29627 start_byte: None,
29628 end_byte: None,
29629 body_start_byte: None,
29630 body_end_byte: None,
29631 tags: Some("alpha,helper".to_string()),
29632 score: 0.98,
29633 match_type: "exact_name".to_string(),
29634 tagpath_handle: None,
29635 },
29636 index::SymbolHit {
29637 name: "beta_helper".to_string(),
29638 kind: "function".to_string(),
29639 language: "rust".to_string(),
29640 file: "/repo/src/beta.rs".to_string(),
29641 line: 21,
29642 end_line: None,
29643 node_kind: None,
29644 start_byte: None,
29645 end_byte: None,
29646 body_start_byte: None,
29647 body_end_byte: None,
29648 tags: Some("beta,helper".to_string()),
29649 score: 0.92,
29650 match_type: "tag_overlap".to_string(),
29651 tagpath_handle: None,
29652 },
29653 ];
29654
29655 let report = build_relative_search_budget_report(
29656 "helper",
29657 "lexical",
29658 Path::new("/repo"),
29659 &response,
29660 &symbol_hits,
29661 ResponseBudget::new(Some(1), Some(64)),
29662 &SearchFacetFilters::default(),
29663 );
29664
29665 let guard = report
29666 .scale_guard
29667 .as_ref()
29668 .expect("broad previews should emit a scale guard");
29669 assert_eq!(guard.level, "high-hit");
29670 assert_eq!(guard.signals.indexed_artifacts, 450);
29671 assert_eq!(guard.signals.raw_symbol_matches, 2);
29672 assert!(
29673 guard
29674 .narrow_commands
29675 .iter()
29676 .any(|command| command.contains("--exact"))
29677 );
29678 assert!(
29679 guard
29680 .narrow_commands
29681 .iter()
29682 .any(|command| command.contains("alpha helper"))
29683 );
29684 assert!(
29685 guard
29686 .narrow_commands
29687 .last()
29688 .unwrap()
29689 .contains("workflow search")
29690 );
29691 }
29692
29693 #[test]
29694 fn explain_budget_report_limits_edges_and_members() {
29695 let symbols = vec![index::StoredSymbol {
29696 name: "alpha_helper".to_string(),
29697 kind: "function".to_string(),
29698 language: "rust".to_string(),
29699 signature: None,
29700 file: "src/lib.rs".to_string(),
29701 line: 10,
29702 end_line: None,
29703 node_kind: None,
29704 start_byte: None,
29705 end_byte: None,
29706 body_start_byte: None,
29707 body_end_byte: None,
29708 parent_module: None,
29709 visibility: None,
29710 tags: None,
29711 tagpath_handle: None,
29712 }];
29713 let callers = vec![
29714 index::StoredEdge {
29715 caller_file: "src/main.rs".to_string(),
29716 caller_name: "main".to_string(),
29717 caller_line: 1,
29718 callee_name: "alpha_helper".to_string(),
29719 call_site_line: 3,
29720 tagpath_handle: None,
29721 },
29722 index::StoredEdge {
29723 caller_file: "src/worker.rs".to_string(),
29724 caller_name: "worker".to_string(),
29725 caller_line: 5,
29726 callee_name: "alpha_helper".to_string(),
29727 call_site_line: 8,
29728 tagpath_handle: None,
29729 },
29730 ];
29731 let community = graph::Community {
29732 id: 1,
29733 members: vec![
29734 graph::CommunityMember::new("alpha_helper"),
29735 graph::CommunityMember::new("main"),
29736 graph::CommunityMember::new("worker"),
29737 ],
29738 modularity_contribution: 0.5,
29739 };
29740
29741 let report = build_explain_budget_report(
29742 "alpha_helper",
29743 Path::new("/repo"),
29744 &symbols,
29745 &callers,
29746 2,
29747 false,
29748 &[],
29749 0,
29750 false,
29751 Some(&community),
29752 ResponseBudget::new(Some(1), Some(24)),
29753 );
29754
29755 assert_eq!(report.definitions.len(), 1);
29756 assert_eq!(report.callers.len(), 1);
29757 assert!(report.truncated);
29758 assert_eq!(report.community.as_ref().unwrap().members.len(), 1);
29759 assert_eq!(
29760 report.definitions[0].tag_alias.as_deref(),
29761 Some("alpha/helper")
29762 );
29763 assert!(report.callers[0].handle.starts_with("ecall-"));
29764 assert_eq!(report.callers[0].tag_alias.as_deref(), Some("main"));
29765 }
29766
29767 #[test]
29768 fn session_review_next_context_budget_limits_lists() {
29769 let report = session_review::SessionReviewReport {
29770 root: "/repo".to_string(),
29771 target: "tasks/software/tsift.md".to_string(),
29772 target_kind: "file".to_string(),
29773 sessions_considered: 1,
29774 sessions_matched: 1,
29775 claude_sessions: 1,
29776 codex_sessions: 0,
29777 agent_doc_logs: 0,
29778 prompt_target_count: 2,
29779 command_groups: 0,
29780 file_groups: 2,
29781 symbol_groups: 1,
29782 failure_groups: 1,
29783 runtime_event_groups: 0,
29784 restart_churn_groups: 0,
29785 closeout_groups: 0,
29786 usage_samples: 1,
29787 prompt_tokens: 120,
29788 cached_input_tokens: 80,
29789 cache_creation_input_tokens: 0,
29790 output_tokens: 40,
29791 reasoning_output_tokens: 0,
29792 total_tokens: 240,
29793 cached_input_ratio: Some(40.0),
29794 largest_turn_total_tokens: 240,
29795 aggregate_cost: session_review::SessionReviewCostSummary {
29796 scope: "bounded_matched_sessions".to_string(),
29797 sessions: 1,
29798 usage_samples: 1,
29799 prompt_tokens: 120,
29800 cached_input_tokens: 80,
29801 cache_creation_input_tokens: 0,
29802 output_tokens: 40,
29803 reasoning_output_tokens: 0,
29804 total_tokens: 240,
29805 cached_input_ratio: Some(40.0),
29806 largest_turn_total_tokens: 240,
29807 },
29808 latest_session_cost: Some(session_review::SessionReviewCostSummary {
29809 scope: "latest_matched_session".to_string(),
29810 sessions: 1,
29811 usage_samples: 1,
29812 prompt_tokens: 120,
29813 cached_input_tokens: 80,
29814 cache_creation_input_tokens: 0,
29815 output_tokens: 40,
29816 reasoning_output_tokens: 0,
29817 total_tokens: 240,
29818 cached_input_ratio: Some(66.67),
29819 largest_turn_total_tokens: 240,
29820 }),
29821 guardrails: vec![
29822 session_cost::SessionCostGuardrail {
29823 kind: "cache_resend".to_string(),
29824 severity: "warn".to_string(),
29825 message: "cached input ratio was high".to_string(),
29826 guidance: "compact or restart the session".to_string(),
29827 },
29828 session_cost::SessionCostGuardrail {
29829 kind: "prompt_budget".to_string(),
29830 severity: "warn".to_string(),
29831 message: "largest prompt turn reached 999999 tokens".to_string(),
29832 guidance: "compact the session before another large turn".to_string(),
29833 },
29834 session_cost::SessionCostGuardrail {
29835 kind: "restart_loop".to_string(),
29836 severity: "warn".to_string(),
29837 message: "restart churn detected".to_string(),
29838 guidance: "restart cleanly".to_string(),
29839 },
29840 session_cost::SessionCostGuardrail {
29841 kind: "noop_closeout".to_string(),
29842 severity: "warn".to_string(),
29843 message: "commit_already_current appeared 8 times".to_string(),
29844 guidance: "avoid reopening without new edits".to_string(),
29845 },
29846 ],
29847 loop_clusters: vec![],
29848 file_read_diagnostics: vec![],
29849 prompt_targets: vec![
29850 session_review::SessionReviewPromptTarget {
29851 text: "do one".to_string(),
29852 occurrences: 1,
29853 },
29854 session_review::SessionReviewPromptTarget {
29855 text: "do two".to_string(),
29856 occurrences: 1,
29857 },
29858 ],
29859 commands: vec![],
29860 touched_files: vec![],
29861 touched_symbols: vec![],
29862 failures: vec![],
29863 runtime_events: vec![],
29864 restart_churn: vec![],
29865 closeout: vec![],
29866 largest_turns: vec![],
29867 sessions: vec![session_review::SessionReviewSession {
29868 source: "claude_jsonl".to_string(),
29869 path: "/tmp/session.jsonl".to_string(),
29870 matched_by: vec!["path".to_string()],
29871 modified_unix_secs: None,
29872 prompt_target_count: 2,
29873 command_groups: 0,
29874 file_groups: 2,
29875 symbol_groups: 1,
29876 failure_groups: 1,
29877 runtime_event_groups: 0,
29878 restart_churn_groups: 0,
29879 closeout_groups: 0,
29880 usage_samples: 1,
29881 prompt_tokens: 120,
29882 cached_input_tokens: 80,
29883 cache_creation_input_tokens: 0,
29884 output_tokens: 40,
29885 reasoning_output_tokens: 0,
29886 total_tokens: 240,
29887 largest_turn_total_tokens: 240,
29888 }],
29889 next_context: session_review::SessionReviewNextContext {
29890 target: "tasks/software/tsift.md".to_string(),
29891 active_prompt_targets: vec!["do one".to_string(), "do two".to_string()],
29892 last_verification: session_review::SessionReviewVerificationState {
29893 status: "green".to_string(),
29894 detail: "cargo test".to_string(),
29895 },
29896 touched_files: vec!["src/lib.rs".to_string(), "src/main.rs".to_string()],
29897 touched_symbols: vec!["alpha_helper".to_string(), "main".to_string()],
29898 unresolved_failures: vec![session_review::SessionReviewFailure {
29899 kind: "timeout".to_string(),
29900 message: "search timed out".to_string(),
29901 occurrences: 1,
29902 command: None,
29903 session_path: None,
29904 }],
29905 next_digest_commands: vec![
29906 "tsift session-review --next-context tasks/software/tsift.md".to_string(),
29907 "tsift diff-digest .".to_string(),
29908 "tsift test-digest --path . < target/very-long-test-output-file-name-that-must-remain-executable.log".to_string(),
29909 "tsift log-digest --path . < target/very-long-build-output-file-name-that-must-remain-executable.log".to_string(),
29910 ],
29911 },
29912 warnings: vec![],
29913 };
29914
29915 let budget_report = build_session_review_next_context_budget_report(
29916 &report,
29917 ResponseBudget::new(Some(1), Some(12)),
29918 None,
29919 );
29920
29921 assert!(budget_report.truncated);
29922 assert_eq!(budget_report.prompt_targets, vec!["do one"]);
29923 assert_eq!(budget_report.touched_files, vec!["src/lib.rs"]);
29924 assert!(
29925 budget_report.touched_symbol_refs[0]
29926 .handle
29927 .starts_with("ncsym-")
29928 );
29929 assert_eq!(
29930 budget_report.touched_symbol_refs[0].tag_alias.as_deref(),
29931 Some("alpha/helper")
29932 );
29933 assert!(
29934 budget_report.unresolved_failures[0]
29935 .handle
29936 .starts_with("snf-")
29937 );
29938 assert_eq!(budget_report.next_digest_commands.len(), 4);
29939 assert_eq!(
29940 budget_report.next_digest_commands[2],
29941 "tsift test-digest --path . < target/very-long-test-output-file-name-that-must-remain-executable.log"
29942 );
29943 assert_eq!(budget_report.next_token_actions.len(), 1);
29944 assert_eq!(budget_report.next_token_actions[0].kind, "prompt_budget");
29945
29946 let full_action_report = build_session_review_next_context_budget_report(
29947 &report,
29948 ResponseBudget::new(Some(4), Some(120)),
29949 None,
29950 );
29951 assert_eq!(
29952 full_action_report
29953 .next_token_actions
29954 .iter()
29955 .map(|action| action.kind.as_str())
29956 .collect::<Vec<_>>(),
29957 vec![
29958 "prompt_budget",
29959 "cache_resend",
29960 "restart_loop",
29961 "noop_closeout"
29962 ]
29963 );
29964 assert_eq!(
29965 full_action_report.next_token_actions[0]
29966 .compact_command
29967 .as_deref(),
29968 Some("agent-doc compact \"tasks/software/tsift.md\" --commit")
29969 );
29970 assert_eq!(
29971 full_action_report.next_token_actions[0]
29972 .restart_command
29973 .as_deref(),
29974 Some("agent-doc start \"tasks/software/tsift.md\"")
29975 );
29976 assert!(
29977 full_action_report.next_token_actions[0]
29978 .digest_commands
29979 .iter()
29980 .any(|command| command
29981 == "tsift --envelope context-pack \"tasks/software/tsift.md\" --budget normal")
29982 );
29983 }
29984
29985 #[test]
29986 fn context_pack_diff_preview_limits_files_and_symbols() {
29987 let report = diff_digest::DiffDigestReport {
29988 root: "/repo".to_string(),
29989 mode: diff_digest::DiffDigestMode::WorkingTree,
29990 revision: None,
29991 files_changed: 2,
29992 files_with_current_summaries: 1,
29993 symbols_touched: 3,
29994 call_edges_added: 1,
29995 call_edges_removed: 0,
29996 files: vec![
29997 diff_digest::DiffDigestFile {
29998 path: "src/lib.rs".to_string(),
29999 status: diff_digest::DiffDigestFileStatus::Modified,
30000 touched_symbols: vec!["alpha_helper".to_string(), "beta_helper".to_string()],
30001 summary_state: diff_digest::DiffDigestSummaryState::Current,
30002 current_summaries: vec![diff_digest::DiffDigestSummarySnippet {
30003 symbol: "alpha_helper".to_string(),
30004 summary: "alpha helper handles the main alpha workflow".to_string(),
30005 }],
30006 added_call_edges: vec!["alpha->beta".to_string()],
30007 removed_call_edges: vec![],
30008 warnings: vec!["stale parse".to_string()],
30009 },
30010 diff_digest::DiffDigestFile {
30011 path: "src/main.rs".to_string(),
30012 status: diff_digest::DiffDigestFileStatus::Added,
30013 touched_symbols: vec!["main".to_string()],
30014 summary_state: diff_digest::DiffDigestSummaryState::Missing,
30015 current_summaries: vec![],
30016 added_call_edges: vec![],
30017 removed_call_edges: vec![],
30018 warnings: vec![],
30019 },
30020 ],
30021 };
30022
30023 let preview =
30024 build_context_pack_diff_preview(&report, ResponseBudget::new(Some(1), Some(11)), None);
30025
30026 assert!(preview.truncated);
30027 assert_eq!(preview.files.len(), 1);
30028 assert_eq!(preview.files[0].path, "src/lib.rs");
30029 assert_eq!(preview.files[0].touched_symbols, vec!["alpha_he..."]);
30030 assert!(
30031 preview.files[0].touched_symbol_refs[0]
30032 .handle
30033 .starts_with("cdsym-")
30034 );
30035 assert_eq!(
30036 preview.files[0].touched_symbol_refs[0].tag_alias.as_deref(),
30037 Some("alpha/he...")
30038 );
30039 assert!(
30040 preview.files[0].summary_refs[0]
30041 .handle
30042 .starts_with("cdsum-")
30043 );
30044 assert_eq!(
30045 preview.files[0].summary_refs[0].tag_alias.as_deref(),
30046 Some("alpha/he...")
30047 );
30048 assert_eq!(preview.files[0].summary_refs[0].summary, "alpha he...");
30049 assert_eq!(
30050 preview.files[0].summary_refs[0].expand,
30051 "tsift summarize --file \"src/lib.rs\""
30052 );
30053 assert_eq!(preview.files[0].warnings, vec!["stale parse"]);
30054 }
30055
30056 #[test]
30057 fn context_pack_status_reminders_include_stale_index_state() {
30058 let dir = setup_graph_index();
30059 std::thread::sleep(std::time::Duration::from_millis(50));
30060 std::fs::write(
30061 dir.path().join("main.rs"),
30062 "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }\n",
30063 )
30064 .unwrap();
30065
30066 let reminders = context_pack_status_reminders(dir.path());
30067
30068 assert_eq!(reminders.len(), 1);
30069 assert!(reminders[0].contains("index stale"));
30070 assert!(reminders[0].contains("tsift index ."));
30071 }
30072
30073 #[test]
30080 fn build_context_pack_reuses_inspect_within_scope() {
30081 let dir = setup_graph_index();
30082 init_git_repo(dir.path());
30083 let _guard = index::InspectScopeGuard::new();
30084 let _ = build_context_pack_report(
30085 dir.path(),
30086 None,
30087 None,
30088 None,
30089 ResponseBudget::new(Some(2), Some(96)),
30090 )
30091 .unwrap();
30092 let (hits, misses) = index::inspect_scope_stats();
30093 assert!(
30094 hits >= 1,
30095 "expected at least one cached inspect within scope (hits={hits}, misses={misses})"
30096 );
30097 assert!(
30098 misses >= 1,
30099 "expected at least one initial inspect miss (hits={hits}, misses={misses})"
30100 );
30101 }
30102
30103 #[test]
30108 fn inspect_read_only_outside_scope_does_not_cache() {
30109 let dir = setup_graph_index();
30110 let db_path = dir.path().join(".tsift/index.db");
30111 let _first = index::IndexDb::inspect_read_only(&db_path, dir.path(), false).unwrap();
30112 let (hits, misses) = index::inspect_scope_stats();
30113 assert_eq!(
30114 (hits, misses),
30115 (0, 0),
30116 "no scope guard => no hits/misses recorded"
30117 );
30118 let _second = index::IndexDb::inspect_read_only(&db_path, dir.path(), false).unwrap();
30119 let (hits, _) = index::inspect_scope_stats();
30120 assert_eq!(hits, 0, "must not reuse inspection outside of any scope");
30121 }
30122
30123 #[test]
30124 fn context_pack_refreshes_stale_index_before_handoff() {
30125 let dir = setup_graph_index();
30126 init_git_repo(dir.path());
30127 std::thread::sleep(std::time::Duration::from_millis(50));
30128 std::fs::write(
30129 dir.path().join("main.rs"),
30130 "fn helper() { println!(\"updated\"); }\nfn main() { helper(); }\n",
30131 )
30132 .unwrap();
30133
30134 let report = build_context_pack_report(
30135 dir.path(),
30136 None,
30137 None,
30138 None,
30139 ResponseBudget::new(Some(2), Some(96)),
30140 )
30141 .unwrap();
30142
30143 assert!(
30144 report
30145 .status_reminders
30146 .iter()
30147 .any(|reminder| reminder.contains("index refreshed")
30148 && reminder.contains("context-pack handoff")),
30149 "expected context-pack refresh diagnostic, got {:?}",
30150 report.status_reminders
30151 );
30152 assert!(
30153 !report
30154 .status_reminders
30155 .iter()
30156 .any(|reminder| reminder.contains("index stale")),
30157 "stale reminder should be gone after refresh: {:?}",
30158 report.status_reminders
30159 );
30160
30161 let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
30162 let summary = db.compute_changes(dir.path()).unwrap();
30163 assert_eq!(summary.new + summary.modified + summary.deleted, 0);
30164 }
30165
30166 #[test]
30167 fn context_pack_materializes_source_handles_into_graph_store() {
30168 let dir = tempfile::tempdir().unwrap();
30169 let packet = ExplorationPacket {
30170 budget: exploration_budget_for_counts(2, 1),
30171 relationship_map: vec![ExplorationRelation {
30172 from: "file:main.rs".to_string(),
30173 relation: "touches_symbol".to_string(),
30174 to: "symbol:helper".to_string(),
30175 label: Some("modified diff".to_string()),
30176 }],
30177 source_windows: vec![ExplorationSourceWindow {
30178 handle: "xwin-test".to_string(),
30179 file: "main.rs".to_string(),
30180 start: 1,
30181 end: 32,
30182 reason: "changed file".to_string(),
30183 expand: "tsift source-read main.rs --path . --start 1 --lines 32".to_string(),
30184 }],
30185 worker_context: vec![ExplorationWorkerContext {
30186 handle: "xwrk-test".to_string(),
30187 target: "tasks/software/tsift.md".to_string(),
30188 summary: "do #kgnv".to_string(),
30189 expand: "tsift --envelope context-pack tasks/software/tsift.md --budget normal"
30190 .to_string(),
30191 }],
30192 no_reread_guidance: "use windows".to_string(),
30193 };
30194
30195 let packet = materialize_context_pack_exploration_packet(dir.path(), packet).unwrap();
30196 assert_eq!(packet.source_windows[0].handle, "xwin-test");
30197
30198 let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
30199 let source_handles = store.nodes_by_kind("source_handle").unwrap();
30200 assert_eq!(source_handles.len(), 1);
30201 assert_eq!(
30202 source_handles[0].properties.get("file"),
30203 Some(&"main.rs".to_string())
30204 );
30205 assert_eq!(
30206 store
30207 .outgoing_edges(&exploration_ref_id("file:main.rs"), Some("touches_symbol"))
30208 .unwrap()
30209 .len(),
30210 1
30211 );
30212 let worker_context = store.nodes_by_kind("worker_context").unwrap();
30213 assert_eq!(worker_context.len(), 1);
30214 assert_eq!(
30215 store
30216 .outgoing_edges("xwrk-test", Some("scopes_source"))
30217 .unwrap()
30218 .len(),
30219 1
30220 );
30221 }
30222
30223 #[test]
30224 fn context_pack_records_graph_orchestration_observability() {
30225 let dir = setup_traversal_project();
30226 init_git_repo(dir.path());
30227 let session = dir.path().join("tasks/software/tsift.md");
30228 refresh_traversal_graph_store(dir.path(), &session, None).unwrap();
30229
30230 let report = build_context_pack_report(
30231 &session,
30232 None,
30233 None,
30234 None,
30235 ResponseBudget::new(Some(4), Some(160)),
30236 )
30237 .unwrap();
30238
30239 assert_eq!(
30240 report.graph_orchestration.contract_version,
30241 CONTEXT_PACK_GRAPH_ORCHESTRATION_CONTRACT_VERSION
30242 );
30243 assert_eq!(
30244 report
30245 .graph_orchestration
30246 .projection_freshness
30247 .status
30248 .as_str(),
30249 "current"
30250 );
30251 assert!(!report.graph_orchestration.projection_hashes.is_empty());
30252 assert_eq!(report.graph_orchestration.readiness.status, "blocked");
30253 assert_eq!(
30254 report.graph_orchestration.readiness.reason,
30255 "summary_cache_empty"
30256 );
30257 assert!(report.graph_orchestration.readiness.fail_closed);
30258 assert!(
30259 report
30260 .graph_orchestration
30261 .readiness
30262 .next_commands
30263 .iter()
30264 .any(|command| command == "tsift summarize --extract ."),
30265 "{:?}",
30266 report.graph_orchestration.readiness.next_commands
30267 );
30268 assert!(
30269 report
30270 .graph_orchestration
30271 .evidence_packet_ids
30272 .iter()
30273 .all(|id| !id.starts_with("gevd-")),
30274 "evidence packet ids should be empty when readiness is blocked: {:?}",
30275 report.graph_orchestration.evidence_packet_ids
30276 );
30277 assert!(
30278 report
30279 .graph_orchestration
30280 .conflict_matrix_decisions
30281 .iter()
30282 .any(|decision| decision.contains("readiness blocked")),
30283 "conflict-matrix decisions should reference readiness block: {:?}",
30284 report.graph_orchestration.conflict_matrix_decisions
30285 );
30286 assert!(
30287 !report
30288 .graph_orchestration
30289 .follow_up_commands
30290 .iter()
30291 .any(|command| command.contains("conflict-matrix")),
30292 "conflict-matrix command should not appear when readiness is blocked: {:?}",
30293 report.graph_orchestration.follow_up_commands
30294 );
30295 assert!(
30296 report
30297 .graph_orchestration
30298 .follow_up_commands
30299 .iter()
30300 .any(|command| command == "tsift summarize --extract ."),
30301 "{:?}",
30302 report.graph_orchestration.follow_up_commands
30303 );
30304 assert!(
30305 !report
30306 .graph_orchestration
30307 .worker_ownership_blocks
30308 .is_empty()
30309 );
30310 }
30311
30312 #[test]
30313 fn convex_sync_report_chunks_upserts_and_tombstones() {
30314 let dir = setup_traversal_project();
30315 let source_graph = build_traversal_graph_source(dir.path(), dir.path(), None).unwrap();
30316 let projection = traversal_projection_from_graph(dir.path(), None, &source_graph).unwrap();
30317 let mut snapshot = projection.to_convex_rows();
30318 snapshot.nodes.push(ConvexNodeRow {
30319 external_id: "stale-node".to_string(),
30320 kind: "backlog".to_string(),
30321 label: "stale".to_string(),
30322 properties: BTreeMap::new(),
30323 provenance: Vec::new(),
30324 freshness: None,
30325 });
30326 snapshot.edges.clear();
30327 snapshot.edges.push(ConvexEdgeRow {
30328 edge_key: "stale-edge".to_string(),
30329 from_external_id: "stale-node".to_string(),
30330 to_external_id: "stale-node".to_string(),
30331 kind: "mentions".to_string(),
30332 properties: BTreeMap::new(),
30333 provenance: Vec::new(),
30334 freshness: None,
30335 });
30336 let snapshot_path = dir.path().join("convex-snapshot.json");
30337 fs::write(&snapshot_path, serde_json::to_string(&snapshot).unwrap()).unwrap();
30338
30339 let report = build_convex_sync_report(dir.path(), None, Some(&snapshot_path), 2).unwrap();
30340
30341 assert_eq!(report.freshness.status, "stale");
30342 assert!(report.freshness.fail_closed);
30343 assert_eq!(report.node_tombstones, vec!["stale-node".to_string()]);
30344 assert!(
30345 report.edge_upserts.len() > 1,
30346 "snapshot without edges should upsert local edges"
30347 );
30348 assert_eq!(report.edge_tombstones, vec!["stale-edge".to_string()]);
30349 assert_eq!(
30350 report.chunks.first().map(|chunk| chunk.operation.as_str()),
30351 Some("delete_edges"),
30352 "edge tombstones should be planned before node tombstones"
30353 );
30354 assert!(
30355 report
30356 .chunks
30357 .iter()
30358 .any(|chunk| chunk.operation == "upsert_edges" && chunk.count <= 2),
30359 "expected chunked edge upserts, got {:?}",
30360 report.chunks
30361 );
30362 }
30363
30364 #[test]
30365 fn convex_snapshot_validation_fails_closed_when_stale() {
30366 let dir = setup_traversal_project();
30367 build_traversal_graph(dir.path(), dir.path(), None).unwrap();
30368 let snapshot = ConvexProjectionRows::default();
30369 let snapshot_path = dir.path().join("empty-convex-snapshot.json");
30370 fs::write(&snapshot_path, serde_json::to_string(&snapshot).unwrap()).unwrap();
30371
30372 let err = verify_convex_projection_snapshot(dir.path(), None, &snapshot_path).unwrap_err();
30373 assert!(
30374 err.to_string()
30375 .contains("Convex graph projection is not current"),
30376 "{err}"
30377 );
30378 }
30379
30380 #[test]
30381 fn convex_sync_report_marks_live_apply_mode_without_network() {
30382 let dir = setup_traversal_project();
30383 let report =
30384 build_convex_sync_report_with_snapshot(dir.path(), None, None, 100, false).unwrap();
30385
30386 assert!(!report.dry_run);
30387 assert!(
30388 !report
30389 .diagnostics
30390 .iter()
30391 .any(|diagnostic| diagnostic.contains("dry-run only")),
30392 "apply-mode report should not claim dry-run diagnostics"
30393 );
30394 assert!(
30395 report
30396 .chunks
30397 .iter()
30398 .any(|chunk| chunk.operation == "upsert_nodes"),
30399 "live apply mode should still expose chunked idempotent operations"
30400 );
30401 }
30402
30403 #[test]
30404 fn convex_sync_apply_round_trips_with_http_backend() {
30405 use std::net::TcpListener;
30406 use std::sync::{Arc, Mutex};
30407
30408 let dir = setup_traversal_project();
30409 let report =
30410 build_convex_sync_report_with_snapshot(dir.path(), None, None, 100, false).unwrap();
30411 let expected_chunks = report.chunks.len();
30412 assert!(expected_chunks > 0);
30413
30414 let listener = TcpListener::bind("127.0.0.1:0").unwrap();
30415 let endpoint = format!("http://{}", listener.local_addr().unwrap());
30416 let operations = Arc::new(Mutex::new(Vec::<String>::new()));
30417 let server_operations = Arc::clone(&operations);
30418 let server = std::thread::spawn(move || {
30419 for _ in 0..expected_chunks {
30420 let (mut stream, _) = listener.accept().unwrap();
30421 let mut reader = BufReader::new(stream.try_clone().unwrap());
30422 let mut request_line = String::new();
30423 reader.read_line(&mut request_line).unwrap();
30424 assert!(request_line.starts_with("POST "));
30425
30426 let mut content_length = 0usize;
30427 loop {
30428 let mut line = String::new();
30429 reader.read_line(&mut line).unwrap();
30430 if line == "\r\n" {
30431 break;
30432 }
30433 if let Some(value) = line.to_ascii_lowercase().strip_prefix("content-length:") {
30434 content_length = value.trim().parse().unwrap();
30435 }
30436 }
30437
30438 let mut body = vec![0u8; content_length];
30439 reader.read_exact(&mut body).unwrap();
30440 let request: serde_json::Value = serde_json::from_slice(&body).unwrap();
30441 server_operations
30442 .lock()
30443 .unwrap()
30444 .push(request["operation"].as_str().unwrap().to_string());
30445
30446 let response = br#"{"status":"ok","message":"accepted"}"#;
30447 write!(
30448 stream,
30449 "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n",
30450 response.len()
30451 )
30452 .unwrap();
30453 stream.write_all(response).unwrap();
30454 }
30455 });
30456
30457 cmd_convex_sync(
30458 ConvexSyncOptions {
30459 path: dir.path(),
30460 scope: None,
30461 snapshot: None,
30462 chunk_size: 100,
30463 remote_snapshot: false,
30464 apply: true,
30465 endpoint: Some(&endpoint),
30466 auth_token_env: "TSIFT_TEST_CONVEX_AUTH_TOKEN",
30467 },
30468 OutputFormat {
30469 json_output: false,
30470 compact: true,
30471 pretty: false,
30472 terse: false,
30473 ultra_terse: false,
30474 schema: false,
30475 envelope: false,
30476 },
30477 )
30478 .unwrap();
30479 server.join().unwrap();
30480
30481 let operations = operations.lock().unwrap().clone();
30482 assert!(operations.contains(&"upsert_nodes".to_string()));
30483 assert!(operations.contains(&"upsert_edges".to_string()));
30484 }
30485
30486 #[test]
30487 fn context_pack_diff_preview_attaches_tag_ontology_refs() {
30488 let root = tempfile::tempdir().unwrap();
30489 fs::create_dir_all(root.path().join(".naming/tags")).unwrap();
30490 fs::write(
30491 root.path().join(".naming/tags/alpha.md"),
30492 "+++\ntag = \"alpha\"\ntitle = \"Alpha Domain\"\ndomain = \"fixture\"\n+++\n\nAlpha definition.\n",
30493 )
30494 .unwrap();
30495 let ontology = load_tag_ontology_preview_context(root.path()).unwrap();
30496 let report = diff_digest::DiffDigestReport {
30497 root: root.path().display().to_string(),
30498 mode: diff_digest::DiffDigestMode::WorkingTree,
30499 revision: None,
30500 files_changed: 1,
30501 files_with_current_summaries: 1,
30502 symbols_touched: 1,
30503 call_edges_added: 0,
30504 call_edges_removed: 0,
30505 files: vec![diff_digest::DiffDigestFile {
30506 path: "src/lib.rs".to_string(),
30507 status: diff_digest::DiffDigestFileStatus::Modified,
30508 touched_symbols: vec!["alpha_helper".to_string()],
30509 summary_state: diff_digest::DiffDigestSummaryState::Current,
30510 current_summaries: vec![diff_digest::DiffDigestSummarySnippet {
30511 symbol: "alpha_helper".to_string(),
30512 summary: "alpha helper summary".to_string(),
30513 }],
30514 added_call_edges: vec![],
30515 removed_call_edges: vec![],
30516 warnings: vec![],
30517 }],
30518 };
30519
30520 let preview = build_context_pack_diff_preview(
30521 &report,
30522 ResponseBudget::new(Some(1), Some(80)),
30523 Some(&ontology),
30524 );
30525
30526 let symbol_ref = &preview.files[0].touched_symbol_refs[0].ontology_refs[0];
30527 assert!(symbol_ref.handle.starts_with("tont-"));
30528 assert_eq!(symbol_ref.tag, "alpha");
30529 assert_eq!(symbol_ref.path, ".naming/tags/alpha.md");
30530 assert_eq!(symbol_ref.title.as_deref(), Some("Alpha Domain"));
30531 assert_eq!(symbol_ref.domain.as_deref(), Some("fixture"));
30532 assert_eq!(
30533 preview.files[0].summary_refs[0].ontology_refs[0].path,
30534 ".naming/tags/alpha.md"
30535 );
30536 }
30537
30538 #[test]
30539 fn context_pack_test_preview_limits_failure_groups() {
30540 let report = test_digest::TestDigestReport {
30541 root: "/repo".to_string(),
30542 runner: "cargo".to_string(),
30543 failures: 2,
30544 grouped_failures: 2,
30545 counts: test_digest::TestDigestCounts {
30546 passed: Some(8),
30547 failed: Some(2),
30548 skipped: Some(1),
30549 },
30550 failure_groups: vec![
30551 test_digest::TestDigestFailure {
30552 tests: vec!["suite::alpha_failure".to_string()],
30553 message: "assertion failed".to_string(),
30554 path: Some("src/lib.rs".to_string()),
30555 line: Some(42),
30556 column: None,
30557 occurrences: 1,
30558 summary_state: test_digest::TestDigestSummaryState::Current,
30559 current_summaries: vec![test_digest::TestDigestSummarySnippet {
30560 symbol: "alpha_failure".to_string(),
30561 summary: "failure summary for alpha test".to_string(),
30562 }],
30563 },
30564 test_digest::TestDigestFailure {
30565 tests: vec!["suite::beta_failure".to_string()],
30566 message: "panic".to_string(),
30567 path: Some("src/main.rs".to_string()),
30568 line: Some(7),
30569 column: None,
30570 occurrences: 1,
30571 summary_state: test_digest::TestDigestSummaryState::Missing,
30572 current_summaries: vec![],
30573 },
30574 ],
30575 warnings: vec!["warning text".to_string()],
30576 };
30577
30578 let preview =
30579 build_context_pack_test_preview(&report, ResponseBudget::new(Some(1), Some(14)), None);
30580
30581 assert!(preview.truncated);
30582 assert_eq!(preview.failure_groups.len(), 1);
30583 assert_eq!(preview.failure_groups[0].tests, vec!["suite::alph..."]);
30584 assert_eq!(preview.failure_groups[0].message, "assertion f...");
30585 assert!(
30586 preview.failure_groups[0].summary_refs[0]
30587 .handle
30588 .starts_with("ctsum-")
30589 );
30590 assert_eq!(
30591 preview.failure_groups[0].summary_refs[0].expand,
30592 "tsift summarize --file \"src/lib.rs\""
30593 );
30594 assert_eq!(preview.warnings, vec!["warning text"]);
30595 }
30596
30597 #[test]
30598 fn context_pack_log_preview_limits_signals_and_refs() {
30599 let report = log_digest::LogDigestReport {
30600 root: "/repo".to_string(),
30601 total_lines: 12,
30602 non_empty_lines: 10,
30603 signal_groups: 2,
30604 repeated_line_groups: 2,
30605 repeated_line_occurrences: 3,
30606 file_ref_groups: 2,
30607 symbol_ref_groups: 2,
30608 stack_groups: 1,
30609 signals: vec![
30610 log_digest::LogDigestSignal {
30611 severity: "error".to_string(),
30612 message: "src/lib.rs:42 boom".to_string(),
30613 path: Some("src/lib.rs".to_string()),
30614 line: Some(42),
30615 column: None,
30616 occurrences: 2,
30617 summary_state: log_digest::LogDigestSummaryState::Current,
30618 current_summaries: vec![log_digest::LogDigestSummarySnippet {
30619 symbol: "alpha_helper".to_string(),
30620 summary: "alpha helper cached log summary".to_string(),
30621 }],
30622 },
30623 log_digest::LogDigestSignal {
30624 severity: "warn".to_string(),
30625 message: "slow path".to_string(),
30626 path: None,
30627 line: None,
30628 column: None,
30629 occurrences: 1,
30630 summary_state: log_digest::LogDigestSummaryState::Unavailable,
30631 current_summaries: vec![],
30632 },
30633 ],
30634 repeated_lines: vec![
30635 log_digest::LogDigestRepeatedLine {
30636 line: "retrying work item alpha".to_string(),
30637 occurrences: 3,
30638 },
30639 log_digest::LogDigestRepeatedLine {
30640 line: "retrying work item beta".to_string(),
30641 occurrences: 2,
30642 },
30643 ],
30644 file_refs: vec![
30645 log_digest::LogDigestFileRef {
30646 path: "src/lib.rs".to_string(),
30647 line: Some(42),
30648 column: None,
30649 occurrences: 2,
30650 summary_state: log_digest::LogDigestSummaryState::Current,
30651 current_summaries: vec![log_digest::LogDigestSummarySnippet {
30652 symbol: "alpha_helper".to_string(),
30653 summary: "alpha helper cached file summary".to_string(),
30654 }],
30655 },
30656 log_digest::LogDigestFileRef {
30657 path: "src/main.rs".to_string(),
30658 line: Some(7),
30659 column: None,
30660 occurrences: 1,
30661 summary_state: log_digest::LogDigestSummaryState::Missing,
30662 current_summaries: vec![],
30663 },
30664 ],
30665 symbol_refs: vec![
30666 log_digest::LogDigestSymbolRef {
30667 symbol: "alpha_helper".to_string(),
30668 occurrences: 2,
30669 summary_state: log_digest::LogDigestSummaryState::Current,
30670 current_summaries: vec![log_digest::LogDigestSummarySnippet {
30671 symbol: "alpha_helper".to_string(),
30672 summary: "alpha helper cached symbol summary".to_string(),
30673 }],
30674 },
30675 log_digest::LogDigestSymbolRef {
30676 symbol: "beta_helper".to_string(),
30677 occurrences: 1,
30678 summary_state: log_digest::LogDigestSummaryState::Missing,
30679 current_summaries: vec![],
30680 },
30681 ],
30682 stack_traces: vec![log_digest::LogDigestStackGroup {
30683 frames: vec!["frame one".to_string()],
30684 occurrences: 1,
30685 }],
30686 warnings: vec!["warning text".to_string()],
30687 };
30688
30689 let preview =
30690 build_context_pack_log_preview(&report, ResponseBudget::new(Some(1), Some(14)), None);
30691
30692 assert!(preview.truncated);
30693 assert_eq!(preview.signals.len(), 1);
30694 assert_eq!(preview.signals[0].message, "src/lib.rs:...");
30695 assert_eq!(preview.repeated_lines[0].line, "retrying wo...");
30696 assert_eq!(preview.file_refs.len(), 1);
30697 assert_eq!(preview.symbol_refs[0].symbol, "alpha_helper");
30698 assert!(
30699 preview.signals[0].summary_refs[0]
30700 .handle
30701 .starts_with("clsum-")
30702 );
30703 assert!(
30704 preview.file_refs[0].summary_refs[0]
30705 .handle
30706 .starts_with("clfsum-")
30707 );
30708 assert!(
30709 preview.symbol_refs[0].summary_refs[0]
30710 .handle
30711 .starts_with("clssum-")
30712 );
30713 assert_eq!(
30714 preview.symbol_refs[0].summary_refs[0].tag_alias.as_deref(),
30715 Some("alpha/helper")
30716 );
30717 assert_eq!(
30718 preview.symbol_refs[0].summary_refs[0].expand,
30719 "tsift summarize \"alpha_helper\""
30720 );
30721 assert_eq!(preview.warnings, vec!["warning text"]);
30722 }
30723
30724 #[test]
30725 fn cli_search_rejects_exact_with_strategy_flag() {
30726 let cli = try_parse_cli([
30727 "tsift",
30728 "search",
30729 "test",
30730 "--exact",
30731 "--strategy",
30732 "lexical",
30733 ]);
30734 assert!(cli.is_err());
30735 }
30736
30737 #[test]
30738 fn cli_search_autoindexes_by_default() {
30739 let cli = parse_cli(["tsift", "search", "test"]);
30740 match cli.command {
30741 Some(Commands::Search {
30742 autoindex,
30743 no_autoindex,
30744 ..
30745 }) => {
30746 assert!(!autoindex);
30747 assert!(!no_autoindex);
30748 assert!(autoindex || !no_autoindex);
30749 }
30750 _ => panic!("expected Search command"),
30751 }
30752 }
30753
30754 #[test]
30755 fn cli_search_accepts_no_autoindex_flag() {
30756 let cli = parse_cli(["tsift", "search", "test", "--no-autoindex"]);
30757 match cli.command {
30758 Some(Commands::Search {
30759 autoindex,
30760 no_autoindex,
30761 ..
30762 }) => {
30763 assert!(!autoindex);
30764 assert!(no_autoindex);
30765 }
30766 _ => panic!("expected Search command"),
30767 }
30768 }
30769
30770 #[test]
30771 fn cli_search_rejects_conflicting_autoindex_flags() {
30772 let cli = try_parse_cli(["tsift", "search", "test", "--autoindex", "--no-autoindex"]);
30773 assert!(cli.is_err());
30774 }
30775
30776 #[test]
30779 fn cli_accepts_global_absolute_flag() {
30780 let cli = parse_cli(["tsift", "--absolute", "status"]);
30781 assert!(cli.absolute);
30782 assert!(matches!(cli.command, Some(Commands::Status { .. })));
30783 }
30784
30785 #[test]
30786 fn cli_accepts_global_tabular_flag() {
30787 let cli = parse_cli(["tsift", "--tabular", "search", "test"]);
30788 assert!(cli.tabular);
30789 assert!(matches!(cli.command, Some(Commands::Search { .. })));
30790 }
30791
30792 #[test]
30793 fn cli_tabular_with_graph() {
30794 let cli = parse_cli(["tsift", "--tabular", "graph", "main"]);
30795 assert!(cli.tabular);
30796 assert!(matches!(cli.command, Some(Commands::Graph { .. })));
30797 }
30798
30799 #[test]
30800 fn cli_tabular_with_communities() {
30801 let cli = parse_cli(["tsift", "--tabular", "communities"]);
30802 assert!(cli.tabular);
30803 assert!(matches!(cli.command, Some(Commands::Communities { .. })));
30804 }
30805
30806 #[test]
30807 fn cli_tabular_with_explain() {
30808 let cli = parse_cli(["tsift", "--tabular", "explain", "main"]);
30809 assert!(cli.tabular);
30810 assert!(matches!(cli.command, Some(Commands::Explain { .. })));
30811 }
30812
30813 #[test]
30814 fn cli_traverse_accepts_path_target_and_html_format() {
30815 let cli = parse_cli([
30816 "tsift", "traverse", "#kgnv", "--to", "main", "--path", ".", "--format", "html",
30817 ]);
30818 match cli.command {
30819 Some(Commands::Traverse {
30820 node,
30821 to,
30822 path,
30823 format,
30824 ..
30825 }) => {
30826 assert_eq!(node.as_deref(), Some("#kgnv"));
30827 assert_eq!(to.as_deref(), Some("main"));
30828 assert_eq!(path, PathBuf::from("."));
30829 assert_eq!(format, TraverseFormat::Html);
30830 }
30831 _ => panic!("expected Traverse command"),
30832 }
30833 }
30834
30835 #[test]
30836 fn cli_parses_semantic_related_command() {
30837 let cli = parse_cli([
30838 "tsift",
30839 "semantic",
30840 "graph navigation",
30841 "--path",
30842 ".",
30843 "--kind",
30844 "all",
30845 "--limit",
30846 "3",
30847 "--json",
30848 ]);
30849 match cli.command {
30850 Some(Commands::Semantic {
30851 query,
30852 path,
30853 kind,
30854 limit,
30855 json,
30856 ..
30857 }) => {
30858 assert_eq!(query, "graph navigation");
30859 assert_eq!(path, PathBuf::from("."));
30860 assert_eq!(kind, SemanticRelatedKind::All);
30861 assert_eq!(limit, 3);
30862 assert!(json);
30863 }
30864 _ => panic!("expected Semantic command"),
30865 }
30866 }
30867
30868 #[test]
30869 fn cli_parses_convex_sync_command() {
30870 let cli = parse_cli([
30871 "tsift",
30872 "convex-sync",
30873 ".",
30874 "--snapshot",
30875 "rows.json",
30876 "--chunk-size",
30877 "25",
30878 "--json",
30879 ]);
30880 match cli.command {
30881 Some(Commands::ConvexSync {
30882 path,
30883 snapshot,
30884 chunk_size,
30885 json,
30886 ..
30887 }) => {
30888 assert_eq!(path, PathBuf::from("."));
30889 assert_eq!(snapshot, Some(PathBuf::from("rows.json")));
30890 assert_eq!(chunk_size, 25);
30891 assert!(json);
30892 }
30893 _ => panic!("expected ConvexSync command"),
30894 }
30895 }
30896
30897 #[test]
30898 fn cli_parses_convex_sync_live_flags() {
30899 let cli = parse_cli([
30900 "tsift",
30901 "convex-sync",
30902 ".",
30903 "--remote-snapshot",
30904 "--apply",
30905 "--endpoint",
30906 "https://example.test/convex-graph",
30907 "--auth-token-env",
30908 "TSIFT_TEST_TOKEN",
30909 ]);
30910 match cli.command {
30911 Some(Commands::ConvexSync {
30912 remote_snapshot,
30913 apply,
30914 endpoint,
30915 auth_token_env,
30916 ..
30917 }) => {
30918 assert!(remote_snapshot);
30919 assert!(apply);
30920 assert_eq!(
30921 endpoint.as_deref(),
30922 Some("https://example.test/convex-graph")
30923 );
30924 assert_eq!(auth_token_env, "TSIFT_TEST_TOKEN");
30925 }
30926 _ => panic!("expected ConvexSync command"),
30927 }
30928 }
30929
30930 #[test]
30931 fn cli_parses_graph_db_query() {
30932 let cli = parse_cli([
30933 "tsift",
30934 "graph-db",
30935 "--backend",
30936 "convex-snapshot",
30937 "--convex-snapshot",
30938 "rows.json",
30939 "--json",
30940 "neighborhood",
30941 "gbak-kgnv",
30942 "--depth",
30943 "2",
30944 "--edge-kind",
30945 "mentions",
30946 "--property",
30947 "path=tasks/software/tsift.md",
30948 "--cursor",
30949 "gbak-old",
30950 "--limit",
30951 "10",
30952 ]);
30953 match cli.command {
30954 Some(Commands::GraphDb {
30955 backend,
30956 convex_snapshot,
30957 json,
30958 query,
30959 ..
30960 }) => {
30961 assert_eq!(backend, GraphDbBackend::ConvexSnapshot);
30962 assert_eq!(convex_snapshot, Some(PathBuf::from("rows.json")));
30963 assert!(json);
30964 match query {
30965 GraphDbQuery::Neighborhood {
30966 id,
30967 depth,
30968 edge_kind,
30969 cursor,
30970 limit,
30971 property_filters,
30972 } => {
30973 assert_eq!(id, "gbak-kgnv");
30974 assert_eq!(depth, 2);
30975 assert_eq!(edge_kind.as_deref(), Some("mentions"));
30976 assert_eq!(cursor.as_deref(), Some("gbak-old"));
30977 assert_eq!(limit, Some(10));
30978 assert_eq!(
30979 property_filters,
30980 vec!["path=tasks/software/tsift.md".to_string()]
30981 );
30982 }
30983 _ => panic!("expected graph-db neighborhood query"),
30984 }
30985 }
30986 _ => panic!("expected GraphDb command"),
30987 }
30988 }
30989
30990 #[test]
30991 fn cli_parses_graph_db_backend_eval_surrealdb_candidate() {
30992 let cli = parse_cli([
30993 "tsift",
30994 "graph-db",
30995 "--json",
30996 "backend-eval",
30997 "--candidate",
30998 "surrealdb",
30999 "--target",
31000 "gval",
31001 "--full-projection",
31002 ]);
31003 match cli.command {
31004 Some(Commands::GraphDb { json, query, .. }) => {
31005 assert!(json);
31006 match query {
31007 GraphDbQuery::BackendEval {
31008 candidates,
31009 targets,
31010 full_projection,
31011 } => {
31012 assert_eq!(candidates, vec!["surrealdb".to_string()]);
31013 assert_eq!(targets, vec!["gval".to_string()]);
31014 assert!(full_projection);
31015 }
31016 _ => panic!("expected graph-db backend-eval query"),
31017 }
31018 }
31019 _ => panic!("expected GraphDb command"),
31020 }
31021 }
31022
31023 #[test]
31024 fn cli_parses_graph_db_tokensave_backend() {
31025 let cli = parse_cli([
31026 "tsift",
31027 "graph-db",
31028 "--backend",
31029 "tokensave",
31030 "--json",
31031 "node",
31032 "fn:main",
31033 ]);
31034 match cli.command {
31035 Some(Commands::GraphDb {
31036 backend,
31037 json,
31038 query,
31039 ..
31040 }) => {
31041 assert_eq!(backend, GraphDbBackend::Tokensave);
31042 assert!(json);
31043 match query {
31044 GraphDbQuery::Node { id } => assert_eq!(id, "fn:main"),
31045 _ => panic!("expected graph-db node query"),
31046 }
31047 }
31048 _ => panic!("expected GraphDb command"),
31049 }
31050 }
31051
31052 #[test]
31053 fn cli_parses_analyze_command() {
31054 let cli = parse_cli([
31055 "tsift", "analyze", ".", "--scope", "core", "--entry", "main", "--entry", "run",
31056 "--limit", "7", "--json",
31057 ]);
31058 match cli.command {
31059 Some(Commands::Analyze {
31060 path,
31061 scope,
31062 entry_points,
31063 limit,
31064 json,
31065 }) => {
31066 assert_eq!(path, PathBuf::from("."));
31067 assert_eq!(scope.as_deref(), Some("core"));
31068 assert_eq!(entry_points, vec!["main".to_string(), "run".to_string()]);
31069 assert_eq!(limit, 7);
31070 assert!(json);
31071 }
31072 _ => panic!("expected Analyze command"),
31073 }
31074 }
31075
31076 #[test]
31077 fn cli_parses_graph_db_related_query() {
31078 let cli = parse_cli([
31079 "tsift",
31080 "graph-db",
31081 "--json",
31082 "related",
31083 "voice avatar memory retrieval",
31084 "--kind",
31085 "all",
31086 "--depth",
31087 "3",
31088 "--seed-limit",
31089 "4",
31090 "--limit",
31091 "12",
31092 ]);
31093 match cli.command {
31094 Some(Commands::GraphDb { json, query, .. }) => {
31095 assert!(json);
31096 match query {
31097 GraphDbQuery::Related {
31098 query,
31099 kind,
31100 depth,
31101 seed_limit,
31102 limit,
31103 } => {
31104 assert_eq!(query, "voice avatar memory retrieval");
31105 assert_eq!(kind, SemanticRelatedKind::All);
31106 assert_eq!(depth, 3);
31107 assert_eq!(seed_limit, 4);
31108 assert_eq!(limit, 12);
31109 }
31110 _ => panic!("expected graph-db related query"),
31111 }
31112 }
31113 _ => panic!("expected GraphDb command"),
31114 }
31115 }
31116
31117 #[test]
31118 fn cli_parses_graph_db_compact_query() {
31119 let cli = parse_cli([
31120 "tsift",
31121 "graph-db",
31122 "--path",
31123 ".",
31124 "compact",
31125 "--apply",
31126 "--prune-tombstones",
31127 "--confirmed-convex-reconciled",
31128 ]);
31129 match cli.command {
31130 Some(Commands::GraphDb { query, .. }) => match query {
31131 GraphDbQuery::Compact {
31132 apply,
31133 prune_tombstones,
31134 confirmed_convex_reconciled,
31135 } => {
31136 assert!(apply);
31137 assert!(prune_tombstones);
31138 assert!(confirmed_convex_reconciled);
31139 }
31140 _ => panic!("expected graph-db compact query"),
31141 },
31142 _ => panic!("expected GraphDb command"),
31143 }
31144 }
31145
31146 #[test]
31147 fn cli_parses_impact_command() {
31148 let cli = parse_cli(["tsift", "impact", ".", "--cached", "--limit", "5"]);
31149 match cli.command {
31150 Some(Commands::Impact {
31151 path,
31152 cached,
31153 limit,
31154 ..
31155 }) => {
31156 assert_eq!(path, PathBuf::from("."));
31157 assert!(cached);
31158 assert_eq!(limit, 5);
31159 }
31160 _ => panic!("expected Impact command"),
31161 }
31162 }
31163
31164 #[test]
31165 fn cli_parses_conflict_matrix_command() {
31166 let cli = parse_cli([
31167 "tsift",
31168 "conflict-matrix",
31169 "--path",
31170 "tasks/software/tsift.md",
31171 "--depth",
31172 "4",
31173 "--limit",
31174 "12",
31175 "--impact-limit",
31176 "6",
31177 "--json",
31178 "pwcm",
31179 "#g6kf",
31180 ]);
31181 match cli.command {
31182 Some(Commands::ConflictMatrix {
31183 targets,
31184 path,
31185 depth,
31186 limit,
31187 impact_limit,
31188 json,
31189 ..
31190 }) => {
31191 assert_eq!(targets, vec!["pwcm".to_string(), "#g6kf".to_string()]);
31192 assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
31193 assert_eq!(depth, 4);
31194 assert_eq!(limit, 12);
31195 assert_eq!(impact_limit, 6);
31196 assert!(json);
31197 }
31198 _ => panic!("expected ConflictMatrix command"),
31199 }
31200 }
31201
31202 #[test]
31203 fn cli_parses_dispatch_trace_command() {
31204 let cli = parse_cli([
31205 "tsift",
31206 "dispatch-trace",
31207 "--path",
31208 "tasks/software/tsift.md",
31209 "--format",
31210 "html",
31211 "--depth",
31212 "4",
31213 "pwcm",
31214 "#g6kf",
31215 ]);
31216 match cli.command {
31217 Some(Commands::DispatchTrace {
31218 targets,
31219 path,
31220 format,
31221 depth,
31222 ..
31223 }) => {
31224 assert_eq!(targets, vec!["pwcm".to_string(), "#g6kf".to_string()]);
31225 assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
31226 assert_eq!(format, DispatchTraceFormat::Html);
31227 assert_eq!(depth, 4);
31228 }
31229 _ => panic!("expected DispatchTrace command"),
31230 }
31231 }
31232
31233 #[test]
31234 fn cli_parses_dependency_dag_command() {
31235 let cli = parse_cli([
31236 "tsift",
31237 "dependency-dag",
31238 "--path",
31239 "tasks/software/tsift.md",
31240 "--depth",
31241 "5",
31242 "--limit",
31243 "20",
31244 "--json",
31245 "alpha",
31246 "#beta",
31247 ]);
31248 match cli.command {
31249 Some(Commands::DependencyDag {
31250 targets,
31251 path,
31252 depth,
31253 limit,
31254 json,
31255 ..
31256 }) => {
31257 assert_eq!(targets, vec!["alpha".to_string(), "#beta".to_string()]);
31258 assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
31259 assert_eq!(depth, 5);
31260 assert_eq!(limit, 20);
31261 assert!(json);
31262 }
31263 _ => panic!("expected DependencyDag command"),
31264 }
31265 }
31266
31267 #[test]
31268 fn relativize_strips_root_prefix() {
31269 let root = std::path::Path::new("/home/user/project");
31270 assert_eq!(
31271 relativize("/home/user/project/src/main.rs", root),
31272 "src/main.rs"
31273 );
31274 }
31275
31276 #[test]
31277 fn relativize_leaves_non_matching_path() {
31278 let root = std::path::Path::new("/home/user/project");
31279 assert_eq!(
31280 relativize("/other/path/file.rs", root),
31281 "/other/path/file.rs"
31282 );
31283 }
31284
31285 #[test]
31286 fn relativize_leaves_already_relative() {
31287 let root = std::path::Path::new("/home/user/project");
31288 assert_eq!(relativize("src/main.rs", root), "src/main.rs");
31289 }
31290
31291 #[test]
31292 fn relativize_pathbuf_strips_prefix() {
31293 let root = std::path::Path::new("/home/user/project");
31294 let path = std::path::Path::new("/home/user/project/src/lib.rs");
31295 assert_eq!(relativize_pathbuf(path, root), PathBuf::from("src/lib.rs"));
31296 }
31297
31298 #[test]
31299 fn relativize_edges_strips_caller_file() {
31300 let root = std::path::Path::new("/tmp/proj");
31301 let mut edges = vec![index::StoredEdge {
31302 caller_file: "/tmp/proj/src/main.rs".to_string(),
31303 caller_name: "main".to_string(),
31304 caller_line: 1,
31305 callee_name: "helper".to_string(),
31306 call_site_line: 5,
31307 tagpath_handle: None,
31308 }];
31309 relativize_edges(&mut edges, root);
31310 assert_eq!(edges[0].caller_file, "src/main.rs");
31311 }
31312
31313 #[test]
31314 fn relativize_json_paths_strips_known_keys() {
31315 let root = std::path::Path::new("/tmp/proj");
31316 let mut val = serde_json::json!({
31317 "file": "/tmp/proj/src/main.rs",
31318 "path": "/tmp/proj/test.rs",
31319 "name": "/tmp/proj/not-a-path",
31320 "hits": [{"path": "/tmp/proj/nested.rs", "score": 1.0}]
31321 });
31322 relativize_json_paths(&mut val, root);
31323 assert_eq!(val["file"], "src/main.rs");
31324 assert_eq!(val["path"], "test.rs");
31325 assert_eq!(val["name"], "/tmp/proj/not-a-path");
31326 assert_eq!(val["hits"][0]["path"], "nested.rs");
31327 }
31328
31329 #[test]
31332 fn cli_graph_accepts_limit_flag() {
31333 let cli = parse_cli(["tsift", "graph", "main", "--limit", "5"]);
31334 match cli.command {
31335 Some(Commands::Graph { limit, .. }) => assert_eq!(limit, 5),
31336 _ => panic!("expected Graph command"),
31337 }
31338 }
31339
31340 #[test]
31341 fn cli_graph_default_limit_is_20() {
31342 let cli = parse_cli(["tsift", "graph", "main"]);
31343 match cli.command {
31344 Some(Commands::Graph { limit, .. }) => assert_eq!(limit, 20),
31345 _ => panic!("expected Graph command"),
31346 }
31347 }
31348
31349 #[test]
31350 fn cli_communities_accepts_limit_flag() {
31351 let cli = parse_cli(["tsift", "communities", "--limit", "3"]);
31352 match cli.command {
31353 Some(Commands::Communities { limit, .. }) => assert_eq!(limit, 3),
31354 _ => panic!("expected Communities command"),
31355 }
31356 }
31357
31358 #[test]
31359 fn cli_communities_default_limit_is_10() {
31360 let cli = parse_cli(["tsift", "communities"]);
31361 match cli.command {
31362 Some(Commands::Communities { limit, .. }) => assert_eq!(limit, 10),
31363 _ => panic!("expected Communities command"),
31364 }
31365 }
31366
31367 #[test]
31368 fn cli_explain_accepts_limit_flag() {
31369 let cli = parse_cli(["tsift", "explain", "main", "--limit", "7"]);
31370 match cli.command {
31371 Some(Commands::Explain { limit, .. }) => assert_eq!(limit, 7),
31372 _ => panic!("expected Explain command"),
31373 }
31374 }
31375
31376 #[test]
31377 fn cli_explain_default_limit_is_15() {
31378 let cli = parse_cli(["tsift", "explain", "main"]);
31379 match cli.command {
31380 Some(Commands::Explain { limit, .. }) => assert_eq!(limit, 15),
31381 _ => panic!("expected Explain command"),
31382 }
31383 }
31384
31385 #[test]
31386 fn cli_limit_zero_means_unlimited() {
31387 let cli = parse_cli(["tsift", "graph", "main", "--limit", "0"]);
31388 match cli.command {
31389 Some(Commands::Graph { limit, .. }) => assert_eq!(limit, 0),
31390 _ => panic!("expected Graph command"),
31391 }
31392 }
31393
31394 #[test]
31395 fn graph_cmd_limit_runs_ok() {
31396 let dir = setup_graph_index();
31397 let result = cmd_graph(
31398 "main",
31399 dir.path(),
31400 false,
31401 false,
31402 None,
31403 1,
31404 false,
31405 false,
31406 false,
31407 false,
31408 false,
31409 false,
31410 false,
31411 TagpathSearchOpts::default(),
31412 );
31413 assert!(result.is_ok());
31414 }
31415
31416 #[test]
31417 fn graph_cmd_unlimited_runs_ok() {
31418 let dir = setup_graph_index();
31419 let result = cmd_graph(
31420 "main",
31421 dir.path(),
31422 false,
31423 false,
31424 None,
31425 0,
31426 false,
31427 false,
31428 false,
31429 false,
31430 false,
31431 false,
31432 false,
31433 TagpathSearchOpts::default(),
31434 );
31435 assert!(result.is_ok());
31436 }
31437
31438 #[test]
31439 fn graph_cmd_tabular_runs_ok() {
31440 let dir = setup_graph_index();
31441 let result = cmd_graph(
31442 "main",
31443 dir.path(),
31444 false,
31445 false,
31446 None,
31447 20,
31448 false,
31449 false,
31450 false,
31451 false,
31452 false,
31453 true,
31454 false,
31455 TagpathSearchOpts::default(),
31456 );
31457 assert!(result.is_ok());
31458 }
31459
31460 #[test]
31461 fn communities_cmd_tabular_runs_ok() {
31462 let dir = setup_graph_index();
31463 let result = cmd_communities(
31464 dir.path(),
31465 None,
31466 1,
31467 10,
31468 false,
31469 false,
31470 false,
31471 false,
31472 true,
31473 false,
31474 TagpathSearchOpts::default(),
31475 );
31476 assert!(result.is_ok());
31477 }
31478
31479 #[test]
31480 fn explain_cmd_tabular_runs_ok() {
31481 let dir = setup_graph_index();
31482 let result = cmd_explain(
31483 "main",
31484 dir.path(),
31485 None,
31486 15,
31487 false,
31488 false,
31489 false,
31490 false,
31491 false,
31492 true,
31493 false,
31494 false,
31495 );
31496 assert!(result.is_ok());
31497 }
31498
31499 #[test]
31500 fn traversal_excludes_agent_doc_runtime_paths_from_source_watermark() {
31501 let cases = [
31506 ".agent-doc",
31507 ".agent-doc/snapshots/abc.md",
31508 ".agent-doc/baselines/abc.md",
31509 ".agent-doc/archives/2026.md",
31510 ".agent-doc/runtime/run.jsonl",
31511 "src/foo/.agent-doc",
31512 "src/foo/.agent-doc/snapshots/x.md",
31513 "./.agent-doc/snapshots/x.md",
31514 ];
31515 for path in cases {
31516 assert!(
31517 traversal_relative_path_is_generated_artifact(path),
31518 "expected `{path}` to be excluded from source watermark"
31519 );
31520 }
31521 for path in [
31523 "src/main.rs",
31524 "tests/perf_gate.rs",
31525 "fixtures/x.json",
31526 "agent-doc/src/lib.rs", "src/.agent-doc-helper.rs",
31528 ] {
31529 assert!(
31530 !traversal_relative_path_is_generated_artifact(path),
31531 "expected `{path}` to be included in source watermark"
31532 );
31533 }
31534 }
31535
31536 #[test]
31537 fn traversal_excludes_tsift_and_target_runtime_paths_from_source_watermark() {
31538 let cases = [
31546 ".tsift",
31547 ".tsift/index.db",
31548 ".tsift/indexes/foo/index.db",
31549 ".tsift/conflict-matrix-cache/inputs/abc.json",
31550 ".tsift/summaries.db",
31551 "src/foo/.tsift",
31552 "src/foo/.tsift/graph.db",
31553 "./.tsift/index.db",
31554 "target",
31555 "target/debug/build/x",
31556 "target/release/tsift",
31557 "src/foo/target/debug/x",
31558 "./target/release/x",
31559 ];
31560 for path in cases {
31561 assert!(
31562 traversal_relative_path_is_generated_artifact(path),
31563 "expected `{path}` to be excluded from source watermark"
31564 );
31565 }
31566 for path in [
31568 "src/ctx-core-dev/lib/a__target/CHANGELOG.md",
31569 "src/ctx-core-dev/lib/a__target/A__Target/index.d.ts",
31570 "src/tsift-extras/lib.rs",
31571 "tsift/README.md",
31572 "src/targeting.rs",
31573 "src/.tsiftrc",
31574 "src/agent-doc-helper.rs",
31575 ] {
31576 assert!(
31577 !traversal_relative_path_is_generated_artifact(path),
31578 "expected `{path}` to be included in source watermark"
31579 );
31580 }
31581 }
31582
31583 #[test]
31584 fn traversal_source_watermark_is_stable_across_invocations_on_quiescent_root() {
31585 let dir = tempfile::tempdir().unwrap();
31594 let root = dir.path();
31595 std::fs::create_dir_all(root.join("src")).unwrap();
31596 std::fs::write(root.join("src/main.rs"), "fn main() {}\n").unwrap();
31597 let hint = root.join("README.md");
31598 std::fs::write(&hint, "# stable\n").unwrap();
31599 std::fs::create_dir_all(root.join(".tsift")).unwrap();
31601 std::fs::write(root.join(".tsift/index.db"), b"placeholder").unwrap();
31602 std::fs::create_dir_all(root.join("target/debug")).unwrap();
31603 std::fs::write(root.join("target/debug/marker"), b"placeholder").unwrap();
31604
31605 let first = traversal_source_watermark(root, &hint, None, true)
31606 .expect("first watermark call must succeed")
31607 .expect("first watermark must produce a hash for hinted markdown");
31608 let second = traversal_source_watermark(root, &hint, None, true)
31609 .expect("second watermark call must succeed")
31610 .expect("second watermark must produce a hash for hinted markdown");
31611 assert_eq!(
31612 first, second,
31613 "watermark must be identical across back-to-back invocations on a quiescent root"
31614 );
31615
31616 std::fs::write(root.join(".tsift/index.db"), b"changed").unwrap();
31618 std::fs::write(root.join("target/debug/marker"), b"changed").unwrap();
31619 let third = traversal_source_watermark(root, &hint, None, true)
31620 .expect("third watermark call must succeed")
31621 .expect("third watermark must produce a hash for hinted markdown");
31622 assert_eq!(
31623 first, third,
31624 "watermark must ignore mutations under .tsift/ and target/"
31625 );
31626
31627 std::thread::sleep(std::time::Duration::from_millis(20));
31632 std::fs::write(&hint, "# stable edited with longer content\n").unwrap();
31633 let fourth = traversal_source_watermark(root, &hint, None, true)
31634 .expect("fourth watermark call must succeed")
31635 .expect("fourth watermark must produce a hash for hinted markdown");
31636 assert_ne!(
31637 first, fourth,
31638 "watermark must invalidate when the hinted markdown file changes"
31639 );
31640 }
31641
31642 #[test]
31643 fn traversal_source_watermark_uses_summary_rows_not_summaries_db_metadata() {
31644 let dir = tempfile::tempdir().unwrap();
31648 let root = dir.path();
31649 std::fs::write(root.join("README.md"), "# stable\n").unwrap();
31650 let summaries_db_path = root.join(".tsift/summaries.db");
31651 let summary_db = summarize::SummaryDb::open(&summaries_db_path).unwrap();
31652 let mut summary = summarize::Summary {
31653 id: 0,
31654 symbol_name: "main".to_string(),
31655 file_path: "src/main.rs".to_string(),
31656 content_hash: "hash-main".to_string(),
31657 summary: "main wires the CLI".to_string(),
31658 entities: Some(vec![summarize::Entity {
31659 name: "Cli".to_string(),
31660 kind: "type".to_string(),
31661 description: "Command-line interface".to_string(),
31662 }]),
31663 relationships: None,
31664 concept_labels: Some(vec!["cli".to_string()]),
31665 extracted_at: "1700000000".to_string(),
31666 model: "test-model".to_string(),
31667 tokens_input: Some(10),
31668 tokens_output: Some(5),
31669 };
31670 summary_db.insert(&summary).unwrap();
31671 drop(summary_db);
31672
31673 let hint = root.join("README.md");
31674 let first = traversal_source_watermark(root, &hint, None, true)
31675 .expect("first watermark call must succeed")
31676 .expect("first watermark must produce a hash");
31677
31678 std::thread::sleep(std::time::Duration::from_millis(20));
31679 let conn = Connection::open(&summaries_db_path).unwrap();
31680 conn.pragma_update(None, "user_version", 1).unwrap();
31681 conn.pragma_update(None, "user_version", 0).unwrap();
31682 drop(conn);
31683
31684 let second = traversal_source_watermark(root, &hint, None, true)
31685 .expect("second watermark call must succeed")
31686 .expect("second watermark must produce a hash");
31687 assert_eq!(
31688 first, second,
31689 "metadata-only summaries.db churn must not invalidate the source watermark"
31690 );
31691
31692 summary.entities = Some(vec![summarize::Entity {
31693 name: "GraphCache".to_string(),
31694 kind: "type".to_string(),
31695 description: "Stable full-projection cache input".to_string(),
31696 }]);
31697 let summary_db = summarize::SummaryDb::open(&summaries_db_path).unwrap();
31698 summary_db.delete_by_file("src/main.rs").unwrap();
31699 summary_db.insert(&summary).unwrap();
31700 drop(summary_db);
31701
31702 let third = traversal_source_watermark(root, &hint, None, true)
31703 .expect("third watermark call must succeed")
31704 .expect("third watermark must produce a hash");
31705 assert_ne!(
31706 first, third,
31707 "semantic summary row changes must invalidate the source watermark"
31708 );
31709 }
31710
31711 #[test]
31712 fn full_projection_source_watermark_ignores_source_mtime_when_index_rows_unchanged() {
31713 let dir = tempfile::tempdir().unwrap();
31717 let root = dir.path();
31718 std::fs::create_dir_all(root.join("src")).unwrap();
31719 std::fs::create_dir_all(root.join(".tsift")).unwrap();
31720 let source = root.join("src/lib.rs");
31721 let source_body = "pub fn alpha() { beta(); }\npub fn beta() {}\n";
31722 std::fs::write(&source, source_body).unwrap();
31723 let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
31724 db.rebuild(root).unwrap();
31725 drop(db);
31726
31727 let first = graph_db_backend_eval_full_projection_source_watermark(root, None)
31728 .unwrap()
31729 .value;
31730 std::thread::sleep(std::time::Duration::from_millis(20));
31731 std::fs::write(&source, source_body).unwrap();
31732 let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
31733 db.apply_changes(root).unwrap();
31734 drop(db);
31735
31736 let second = graph_db_backend_eval_full_projection_source_watermark(root, None)
31737 .unwrap()
31738 .value;
31739 assert_eq!(
31740 first, second,
31741 "mtime-only source index churn must not invalidate the full-projection cache"
31742 );
31743 }
31744
31745 #[test]
31746 fn full_projection_source_watermark_ignores_session_markdown_churn() {
31747 let dir = tempfile::tempdir().unwrap();
31752 let root = dir.path();
31753 std::fs::create_dir_all(root.join("src")).unwrap();
31754 std::fs::create_dir_all(root.join("tasks/software")).unwrap();
31755 std::fs::create_dir_all(root.join(".tsift")).unwrap();
31756 std::fs::write(root.join("src/lib.rs"), "pub fn alpha() {}\n").unwrap();
31757 let task_doc = root.join("tasks/software/tsift.md");
31758 std::fs::write(
31759 &task_doc,
31760 "---\nagent_doc_session: tsift-v0.1\n---\n\n## Backlog\n\n- [ ] [#one] Initial item\n",
31761 )
31762 .unwrap();
31763 let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
31764 db.rebuild(root).unwrap();
31765 drop(db);
31766
31767 let first = graph_db_backend_eval_full_projection_source_watermark(root, None)
31768 .unwrap()
31769 .value;
31770 std::fs::write(
31771 &task_doc,
31772 "---\nagent_doc_session: tsift-v0.1\n---\n\n## Backlog\n\n- [ ] [#one] Edited item\n",
31773 )
31774 .unwrap();
31775 let second = graph_db_backend_eval_full_projection_source_watermark(root, None)
31776 .unwrap()
31777 .value;
31778 assert_eq!(
31779 first, second,
31780 "session markdown churn must not invalidate the full-projection code/summary cache"
31781 );
31782 }
31783
31784 #[test]
31785 fn full_projection_cache_hit_skips_provider_neutral_rebuild_after_mtime_churn() {
31786 let dir = tempfile::tempdir().unwrap();
31790 let root = dir.path();
31791 std::fs::create_dir_all(root.join("src")).unwrap();
31792 std::fs::create_dir_all(root.join(".tsift")).unwrap();
31793 let source = root.join("src/lib.rs");
31794 let source_body = "pub fn alpha() { beta(); }\npub fn beta() {}\n";
31795 std::fs::write(&source, source_body).unwrap();
31796 let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
31797 db.rebuild(root).unwrap();
31798 drop(db);
31799
31800 let (_projection, _warnings, _phases, first_stats) =
31801 graph_db_backend_eval_full_projection_with_profile(root, None).unwrap();
31802 assert!(
31803 !first_stats.hit,
31804 "the first full-projection run should populate the cache"
31805 );
31806
31807 std::thread::sleep(std::time::Duration::from_millis(20));
31808 std::fs::write(&source, source_body).unwrap();
31809 let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
31810 db.apply_changes(root).unwrap();
31811 drop(db);
31812
31813 let (_projection, _warnings, phases, second_stats) =
31814 graph_db_backend_eval_full_projection_with_profile(root, None).unwrap();
31815 assert!(second_stats.hit, "mtime-only churn should still cache-hit");
31816 let source_graph_build = phases
31817 .iter()
31818 .find(|phase| phase.name == "full_projection.source_graph_build")
31819 .expect("cache hit must report source_graph_build");
31820 let projection_rows = phases
31821 .iter()
31822 .find(|phase| phase.name == "full_projection.projection_rows")
31823 .expect("cache hit must report projection_rows");
31824 assert_eq!(source_graph_build.duration_micros, 0);
31825 assert_eq!(projection_rows.duration_micros, 0);
31826 }
31827
31828 #[test]
31829 fn build_token_capped_preview_within_cap() {
31830 let lines: Vec<&str> = vec!["fn foo() {", " 1 + 2", "}"];
31831 let capped = build_token_capped_preview(&lines, 1, 3, 160, 1000);
31832 assert!(!capped.was_capped);
31833 assert_eq!(capped.preview.len(), 3);
31834 assert_eq!(capped.capped_end, 3);
31835 }
31836
31837 #[test]
31838 fn build_token_capped_preview_truncates_long_body() {
31839 let owned: Vec<String> = (0..200).map(|i| format!(" let line_{i} = {i};")).collect();
31840 let lines: Vec<&str> = owned.iter().map(|s| s.as_str()).collect();
31841 let capped = build_token_capped_preview(&lines, 1, 200, 160, 100);
31842 assert!(capped.was_capped);
31843 assert!(capped.preview.len() < 200);
31844 assert!(capped.capped_end < 200);
31845 assert!(!capped.preview.is_empty());
31846 }
31847
31848 #[test]
31849 fn build_token_capped_preview_respects_start_offset() {
31850 let owned: Vec<String> = (0..100).map(|i| format!("line {i}")).collect();
31851 let lines: Vec<&str> = owned.iter().map(|s| s.as_str()).collect();
31852 let capped = build_token_capped_preview(&lines, 50, 100, 160, 50);
31853 assert!(capped.was_capped);
31854 assert!(capped.capped_end >= 50);
31855 assert!(capped.capped_end < 100);
31856 assert_eq!(capped.preview[0].line, 50);
31857 }
31858
31859 #[test]
31860 fn response_budget_body_token_cap_defaults() {
31861 let budget = ResponseBudget::from_cli(None, None, Some(ResponseBudgetPreset::Normal), true);
31862 assert_eq!(budget.body_token_cap(), 1500);
31863
31864 let budget = ResponseBudget::from_cli(None, None, Some(ResponseBudgetPreset::Small), true);
31865 assert_eq!(budget.body_token_cap(), 500);
31866
31867 let budget = ResponseBudget::from_cli(None, None, Some(ResponseBudgetPreset::Deep), true);
31868 assert_eq!(budget.body_token_cap(), 3000);
31869 }
31870
31871 #[test]
31872 fn build_token_capped_preview_empty_input() {
31873 let lines: Vec<&str> = vec![];
31874 let capped = build_token_capped_preview(&lines, 1, 0, 160, 1000);
31875 assert!(!capped.was_capped);
31876 assert!(capped.preview.is_empty());
31877 }
31878
31879 #[test]
31880 fn build_token_capped_preview_single_long_line_fits() {
31881 let lines: Vec<&str> = vec!["short"];
31882 let capped = build_token_capped_preview(&lines, 1, 1, 160, 100);
31883 assert!(!capped.was_capped);
31884 assert_eq!(capped.preview.len(), 1);
31885 assert_eq!(capped.capped_end, 1);
31886 }
31887
31888 #[test]
31889 fn edge_index_replaces_from_id_to_id_with_positions() {
31890 let input = serde_json::json!({
31891 "nodes": [
31892 {"id": "symbol:src/lib.rs:foo"},
31893 {"id": "symbol:src/lib.rs:bar"},
31894 {"id": "symbol:src/lib.rs:baz"}
31895 ],
31896 "edges": [
31897 {"from_id": "symbol:src/lib.rs:foo", "to_id": "symbol:src/lib.rs:bar", "k": "calls"},
31898 {"from_id": "symbol:src/lib.rs:bar", "to_id": "symbol:src/lib.rs:baz", "k": "calls"}
31899 ]
31900 });
31901 let result = edge_index_transform(input);
31902 let edges = result.get("edges").unwrap().as_array().unwrap();
31903 assert_eq!(edges.len(), 2);
31904 assert_eq!(edges[0]["from"], 0);
31905 assert_eq!(edges[0]["to"], 1);
31906 assert_eq!(edges[1]["from"], 1);
31907 assert_eq!(edges[1]["to"], 2);
31908 assert!(edges[0].get("from_id").is_none());
31909 assert!(edges[0].get("to_id").is_none());
31910 }
31911
31912 #[test]
31913 fn edge_index_preserves_unresolved_ids_as_strings() {
31914 let input = serde_json::json!({
31915 "nodes": [{"id": "symbol:src/lib.rs:foo"}],
31916 "edges": [
31917 {"from_id": "symbol:src/lib.rs:foo", "to_id": "symbol:other.rs:missing", "k": "ref"}
31918 ]
31919 });
31920 let result = edge_index_transform(input);
31921 let edge = &result["edges"][0];
31922 assert_eq!(edge["from"], 0);
31923 assert_eq!(edge["to_id"], "symbol:other.rs:missing");
31924 }
31925
31926 #[test]
31927 fn edge_index_noop_without_nodes_and_edges() {
31928 let input = serde_json::json!({"report": {"entries": [{"from_id": "a", "to_id": "b"}]}});
31929 let result = edge_index_transform(input);
31930 assert_eq!(result["report"]["entries"][0]["from_id"], "a");
31931 }
31932}
31933
31934#[derive(Serialize)]
31937struct TableInfo {
31938 name: String,
31939 columns: Vec<ColumnInfo>,
31940 row_count: i64,
31941}
31942
31943#[derive(Serialize)]
31944struct ColumnInfo {
31945 name: String,
31946 #[serde(rename = "type")]
31947 col_type: String,
31948 notnull: bool,
31949 pk: bool,
31950 #[serde(skip_serializing_if = "Option::is_none")]
31951 default_value: Option<String>,
31952}
31953
31954pub(crate) fn open_db(path: &std::path::Path) -> Result<Connection> {
31956 let conn = Connection::open_with_flags(
31957 path,
31958 rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
31959 )
31960 .with_context(|| format!("opening database: {}", path.display()))?;
31961 Ok(conn)
31962}
31963
31964pub(crate) fn schema_overview(conn: &Connection) -> Result<Vec<TableInfo>> {
31966 let mut stmt = conn.prepare(
31967 "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%' ORDER BY name",
31968 )?;
31969 let table_names: Vec<String> = stmt
31970 .query_map([], |row| row.get(0))?
31971 .collect::<std::result::Result<Vec<_>, _>>()?;
31972
31973 let mut tables = Vec::new();
31974 for tbl in table_names {
31975 let columns = table_columns(conn, &tbl)?;
31976 let row_count: i64 =
31977 conn.query_row(&format!("SELECT COUNT(*) FROM \"{}\"", tbl), [], |row| {
31978 row.get(0)
31979 })?;
31980 tables.push(TableInfo {
31981 name: tbl,
31982 columns,
31983 row_count,
31984 });
31985 }
31986 Ok(tables)
31987}
31988
31989pub(crate) fn table_columns(conn: &Connection, table: &str) -> Result<Vec<ColumnInfo>> {
31991 let mut stmt = conn.prepare(&format!("PRAGMA table_info(\"{}\")", table))?;
31992 let cols = stmt
31993 .query_map([], |row| {
31994 Ok(ColumnInfo {
31995 name: row.get(1)?,
31996 col_type: row.get::<_, String>(2).unwrap_or_default(),
31997 notnull: row.get::<_, bool>(3).unwrap_or(false),
31998 pk: row.get::<_, i32>(5).unwrap_or(0) > 0,
31999 default_value: row.get(4)?,
32000 })
32001 })?
32002 .collect::<std::result::Result<Vec<_>, _>>()?;
32003 Ok(cols)
32004}
32005
32006pub(crate) fn execute_query(
32008 conn: &Connection,
32009 sql: &str,
32010) -> Result<(Vec<String>, Vec<Vec<serde_json::Value>>)> {
32011 let mut stmt = conn.prepare(sql).context("preparing SQL query")?;
32012 let col_names: Vec<String> = stmt.column_names().iter().map(|s| s.to_string()).collect();
32013 let col_count = col_names.len();
32014
32015 let mut rows = Vec::new();
32016 let mut query_rows = stmt.query([])?;
32017 while let Some(row) = query_rows.next()? {
32018 let mut vals = Vec::with_capacity(col_count);
32019 for i in 0..col_count {
32020 let val = match row.get_ref(i)? {
32021 rusqlite::types::ValueRef::Null => serde_json::Value::Null,
32022 rusqlite::types::ValueRef::Integer(n) => serde_json::json!(n),
32023 rusqlite::types::ValueRef::Real(f) => serde_json::json!(f),
32024 rusqlite::types::ValueRef::Text(s) => {
32025 serde_json::Value::String(String::from_utf8_lossy(s).into_owned())
32026 }
32027 rusqlite::types::ValueRef::Blob(b) => {
32028 serde_json::Value::String(format!("<blob {} bytes>", b.len()))
32029 }
32030 };
32031 vals.push(val);
32032 }
32033 rows.push(vals);
32034 }
32035 Ok((col_names, rows))
32036}
32037
32038
32039#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32040enum DigestRunnerKind {
32041 Test,
32042 Log,
32043}
32044
32045impl DigestRunnerKind {
32046 fn parse(raw: &str) -> Result<Self> {
32047 match raw.trim().to_ascii_lowercase().as_str() {
32048 "test" => Ok(Self::Test),
32049 "log" => Ok(Self::Log),
32050 other => bail!("unsupported digest runner kind `{other}`; expected test or log"),
32051 }
32052 }
32053
32054 fn as_str(self) -> &'static str {
32055 match self {
32056 Self::Test => "test",
32057 Self::Log => "log",
32058 }
32059 }
32060}
32061
32062pub(crate) fn shell_split(s: &str) -> Vec<&str> {
32064 let mut parts = Vec::new();
32065 let mut i = 0;
32066 let bytes = s.as_bytes();
32067 while i < bytes.len() {
32068 while i < bytes.len() && bytes[i].is_ascii_whitespace() {
32070 i += 1;
32071 }
32072 if i >= bytes.len() {
32073 break;
32074 }
32075 let start = i;
32076 if bytes[i] == b'"' || bytes[i] == b'\'' {
32077 let quote = bytes[i];
32078 i += 1;
32079 while i < bytes.len() && bytes[i] != quote {
32080 i += 1;
32081 }
32082 if i < bytes.len() {
32083 i += 1; }
32085 } else {
32086 while i < bytes.len() && !bytes[i].is_ascii_whitespace() {
32087 i += 1;
32088 }
32089 }
32090 parts.push(&s[start..i]);
32091 }
32092 parts
32093}
32094
32095pub(crate) fn shell_quote(s: &str) -> String {
32097 let unquoted =
32099 if (s.starts_with('"') && s.ends_with('"')) || (s.starts_with('\'') && s.ends_with('\'')) {
32100 &s[1..s.len() - 1]
32101 } else {
32102 s
32103 };
32104
32105 if unquoted
32106 .chars()
32107 .all(|c| c.is_alphanumeric() || c == '_' || c == '-' || c == '.' || c == '/')
32108 {
32109 format!("\"{}\"", unquoted)
32110 } else {
32111 format!(
32112 "\"{}\"",
32113 unquoted.replace('\\', "\\\\").replace('"', "\\\"")
32114 )
32115 }
32116}
32117
32118fn empty_search_coverage() -> sift::SearchCoverageSnapshot {
32119 sift::SearchCoverageSnapshot {
32120 mode: sift::SearchCoverageMode::Sealed,
32121 total_sector_count: 0,
32122 mounted_sector_count: 0,
32123 reused_sector_count: 0,
32124 dirty_sector_count: 0,
32125 completed_dirty_sector_count: 0,
32126 rebuilding_sector_count: 0,
32127 resumed_sector_count: 0,
32128 active_rebuild: None,
32129 }
32130}
32131
32132fn aggregate_search_coverage(responses: &[sift::SearchResponse]) -> sift::SearchCoverageSnapshot {
32133 let total_sector_count = responses
32134 .iter()
32135 .map(|response| response.coverage.total_sector_count)
32136 .sum();
32137 let mounted_sector_count = responses
32138 .iter()
32139 .map(|response| response.coverage.mounted_sector_count)
32140 .sum();
32141 let reused_sector_count = responses
32142 .iter()
32143 .map(|response| response.coverage.reused_sector_count)
32144 .sum();
32145 let dirty_sector_count = responses
32146 .iter()
32147 .map(|response| response.coverage.dirty_sector_count)
32148 .sum();
32149 let completed_dirty_sector_count = responses
32150 .iter()
32151 .map(|response| response.coverage.completed_dirty_sector_count)
32152 .sum();
32153 let rebuilding_sector_count = responses
32154 .iter()
32155 .map(|response| response.coverage.rebuilding_sector_count)
32156 .sum();
32157 let resumed_sector_count = responses
32158 .iter()
32159 .map(|response| response.coverage.resumed_sector_count)
32160 .sum();
32161
32162 let mode = if dirty_sector_count == 0 && rebuilding_sector_count == 0 {
32163 sift::SearchCoverageMode::Sealed
32164 } else if completed_dirty_sector_count > 0
32165 || rebuilding_sector_count > 0
32166 || resumed_sector_count > 0
32167 {
32168 sift::SearchCoverageMode::Converging
32169 } else {
32170 sift::SearchCoverageMode::Frontier
32171 };
32172
32173 sift::SearchCoverageSnapshot {
32174 mode,
32175 total_sector_count,
32176 mounted_sector_count,
32177 reused_sector_count,
32178 dirty_sector_count,
32179 completed_dirty_sector_count,
32180 rebuilding_sector_count,
32181 resumed_sector_count,
32182 active_rebuild: responses
32183 .iter()
32184 .find_map(|response| response.coverage.active_rebuild.clone()),
32185 }
32186}
32187
32188fn empty_search_response(root: &Path, strategy: &str) -> sift::SearchResponse {
32189 sift::SearchResponse {
32190 strategy: strategy.to_string(),
32191 root: root.display().to_string(),
32192 indexed_artifacts: 0,
32193 skipped_artifacts: 0,
32194 coverage: empty_search_coverage(),
32195 hits: Vec::new(),
32196 }
32197}
32198
32199fn absolutize_search_hit_paths(response: &mut sift::SearchResponse, search_root: &Path) {
32200 for hit in &mut response.hits {
32201 let path = Path::new(&hit.path);
32202 if path.is_relative() {
32203 hit.path = search_root.join(path).display().to_string();
32204 }
32205 }
32206}
32207
32208fn merge_search_responses(
32209 root: &Path,
32210 strategy: &str,
32211 limit: usize,
32212 responses: Vec<sift::SearchResponse>,
32213) -> sift::SearchResponse {
32214 let indexed_artifacts = responses
32215 .iter()
32216 .map(|response| response.indexed_artifacts)
32217 .sum();
32218 let skipped_artifacts = responses
32219 .iter()
32220 .map(|response| response.skipped_artifacts)
32221 .sum();
32222 let coverage = if responses.is_empty() {
32223 empty_search_coverage()
32224 } else {
32225 aggregate_search_coverage(&responses)
32226 };
32227 let mut hits: Vec<sift::SearchHit> = responses
32228 .into_iter()
32229 .flat_map(|response| response.hits)
32230 .collect();
32231 hits.sort_by(|left, right| {
32232 right
32233 .score
32234 .partial_cmp(&left.score)
32235 .unwrap_or(Ordering::Equal)
32236 .then_with(|| left.path.cmp(&right.path))
32237 .then_with(|| left.location.cmp(&right.location))
32238 });
32239 hits.truncate(limit);
32240 for (rank, hit) in hits.iter_mut().enumerate() {
32241 hit.rank = rank + 1;
32242 }
32243
32244 sift::SearchResponse {
32245 strategy: strategy.to_string(),
32246 root: root.display().to_string(),
32247 indexed_artifacts,
32248 skipped_artifacts,
32249 coverage,
32250 hits,
32251 }
32252}
32253
32254pub(crate) fn federated_sift_search(
32255 root: &Path,
32256 cache_dir: &Path,
32257 query: &str,
32258 limit: usize,
32259 timeout_secs: u64,
32260 strategy: &str,
32261) -> Result<sift::SearchResponse> {
32262 let targets = resolve_search_index_targets(root, root, None, true)?;
32263 if targets.is_empty() {
32264 if config::Config::submodule_dirs(root)?.is_empty() {
32265 return run_search_with_timeout(
32266 root,
32267 cache_dir,
32268 query,
32269 limit,
32270 timeout_secs,
32271 strategy,
32272 &[],
32273 );
32274 }
32275 return Ok(empty_search_response(root, strategy));
32276 }
32277
32278 let mut responses = Vec::with_capacity(targets.len());
32279 for target in &targets {
32280 let mut response = run_search_with_timeout(
32281 &target.source_root,
32282 cache_dir,
32283 query,
32284 limit,
32285 timeout_secs,
32286 strategy,
32287 std::slice::from_ref(target),
32288 )?;
32289 absolutize_search_hit_paths(&mut response, &target.source_root);
32290 response.root = root.display().to_string();
32291 responses.push(response);
32292 }
32293
32294 Ok(merge_search_responses(root, strategy, limit, responses))
32295}
32296
32297pub(crate) fn federated_symbol_search(
32305 root: &std::path::Path,
32306 query: &str,
32307 limit: usize,
32308 tagpath_opts: &TagpathSearchOpts,
32309) -> Result<(Vec<index::SymbolHit>, TagpathAnnotationDiagnostic)> {
32310 let cfg = config::Config::load(root)?;
32311 let submodules = config::Config::submodule_dirs(root)?;
32312 let mut all_hits: Vec<index::SymbolHit> = Vec::new();
32313 let mut combined = TagpathAnnotationDiagnostic::default();
32314 for scope in &submodules {
32315 if !cfg.federation_for_scope(scope) {
32316 continue;
32317 }
32318 let db_path = cfg.db_path_for(root, &scope.id);
32319 if !db_path.exists() {
32320 continue;
32321 }
32322 let db = index::IndexDb::open_read_only(&db_path)?;
32323 let mut hits = db.symbol_search(query, limit)?;
32324 let diag = annotate_hits_with_tagpath(&mut hits, &scope.source_root, tagpath_opts)?;
32325 combined.loaded |= diag.loaded;
32326 if diag.stale && !combined.stale {
32327 combined.stale = true;
32328 combined.reason = diag.reason;
32329 }
32330 all_hits.append(&mut hits);
32331 }
32332 all_hits.sort_by(|a, b| {
32333 b.score
32334 .partial_cmp(&a.score)
32335 .unwrap_or(std::cmp::Ordering::Equal)
32336 });
32337 all_hits.truncate(limit);
32338 Ok((all_hits, combined))
32339}
32340
32341#[derive(Debug, Deserialize)]
32342#[serde(tag = "type", rename_all = "lowercase")]
32343enum RipgrepJsonEvent {
32344 Match {
32345 data: RipgrepMatchData,
32346 },
32347 #[serde(other)]
32348 Other,
32349}
32350
32351#[derive(Debug, Deserialize)]
32352struct RipgrepMatchData {
32353 path: RipgrepTextField,
32354 lines: RipgrepTextField,
32355 line_number: Option<usize>,
32356}
32357
32358#[derive(Debug, Deserialize)]
32359struct RipgrepTextField {
32360 text: Option<String>,
32361}
32362
32363pub(crate) fn federated_exact_search(
32364 root: &Path,
32365 query: &str,
32366 limit: usize,
32367 timeout_secs: u64,
32368) -> Result<sift::SearchResponse> {
32369 let cfg = config::Config::load(root)?;
32370 let mut responses = Vec::new();
32371 for scope in config::Config::submodule_dirs(root)? {
32372 if !cfg.federation_for_scope(&scope) {
32373 continue;
32374 }
32375 let mut response =
32376 run_exact_search_with_timeout(&scope.source_root, query, limit, timeout_secs)?;
32377 absolutize_search_hit_paths(&mut response, &scope.source_root);
32378 response.root = root.display().to_string();
32379 responses.push(response);
32380 }
32381
32382 Ok(merge_search_responses(root, "exact", limit, responses))
32383}
32384
32385pub(crate) fn run_sift_search(
32386 search_path: &Path,
32387 cache_dir: &Path,
32388 query: &str,
32389 limit: usize,
32390 strategy: &str,
32391) -> Result<sift::SearchResponse> {
32392 let engine = Sift::builder().with_cache_dir(cache_dir).build();
32393 let options = SearchOptions::default()
32394 .with_limit(limit)
32395 .with_strategy(strategy.to_string());
32396 let input = SearchInput::new(search_path, query).with_options(options);
32397 engine.search(input).context("sift search failed")
32398}
32399
32400fn exact_search_timeout_message(timeout_secs: u64) -> String {
32401 format!(
32402 "tsift search timed out after {}s (strategy: exact). \
32403 Re-run with `--timeout 0` to disable the timeout or narrow `--path` / `--scope`.",
32404 timeout_secs
32405 )
32406}
32407
32408fn exact_search_command(search_path: &Path, query: &str) -> Command {
32409 let mut command = Command::new("rg");
32410 command
32411 .arg("--json")
32412 .arg("--fixed-strings")
32413 .arg("--line-number")
32414 .arg("--hidden")
32415 .arg("--")
32416 .arg(query)
32417 .arg(search_path);
32418 command
32419}
32420
32421fn exact_search_file_timestamp(path: &Path) -> sift::ArtifactFreshness {
32422 let observed_unix_secs = SystemTime::now()
32423 .duration_since(UNIX_EPOCH)
32424 .unwrap_or_default()
32425 .as_secs() as i64;
32426 let modified_unix_secs = fs::metadata(path)
32427 .ok()
32428 .and_then(|metadata| metadata.modified().ok())
32429 .and_then(|modified| modified.duration_since(UNIX_EPOCH).ok())
32430 .map(|duration| duration.as_secs() as i64);
32431 sift::ArtifactFreshness {
32432 observed_unix_secs,
32433 modified_unix_secs,
32434 }
32435}
32436
32437fn parse_exact_search_output(
32438 search_path: &Path,
32439 limit: usize,
32440 raw: &str,
32441) -> Result<sift::SearchResponse> {
32442 if limit == 0 {
32443 return Ok(sift::SearchResponse {
32444 strategy: "exact".to_string(),
32445 root: search_path.display().to_string(),
32446 indexed_artifacts: 0,
32447 skipped_artifacts: 0,
32448 coverage: empty_search_coverage(),
32449 hits: Vec::new(),
32450 });
32451 }
32452
32453 let mut hits = Vec::new();
32454 for line in raw.lines() {
32455 let event: RipgrepJsonEvent =
32456 serde_json::from_str(line).context("parsing ripgrep exact-search output")?;
32457 let RipgrepJsonEvent::Match { data } = event else {
32458 continue;
32459 };
32460 let Some(path_text) = data.path.text else {
32461 continue;
32462 };
32463 let Some(lines_text) = data.lines.text else {
32464 continue;
32465 };
32466 let path = PathBuf::from(path_text);
32467 let snippet = lines_text.trim_end_matches(['\r', '\n']).to_string();
32468 let rank = hits.len() + 1;
32469 hits.push(sift::SearchHit {
32470 artifact_id: format!(
32471 "exact:{}:{}:{}",
32472 path.display(),
32473 data.line_number.unwrap_or(0),
32474 rank
32475 ),
32476 artifact_kind: sift::ContextArtifactKind::File,
32477 path: path.display().to_string(),
32478 rank,
32479 score: (limit.saturating_sub(rank).saturating_add(1)) as f64,
32480 confidence: sift::ScoreConfidence::High,
32481 location: data.line_number.map(|line| format!("line {}", line)),
32482 snippet: snippet.clone(),
32483 provenance: sift::ArtifactProvenance {
32484 adapter: sift::AcquisitionAdapterKind::FileSystem,
32485 source: "ripgrep -F".to_string(),
32486 synthetic: false,
32487 },
32488 freshness: exact_search_file_timestamp(&path),
32489 budget: sift::ArtifactBudget::from_text(&snippet, 1),
32490 });
32491 if hits.len() >= limit {
32492 break;
32493 }
32494 }
32495
32496 Ok(sift::SearchResponse {
32497 strategy: "exact".to_string(),
32498 root: search_path.display().to_string(),
32499 indexed_artifacts: hits.len(),
32500 skipped_artifacts: 0,
32501 coverage: empty_search_coverage(),
32502 hits,
32503 })
32504}
32505
32506fn exact_search_response_from_process(
32507 search_path: &Path,
32508 limit: usize,
32509 status: std::process::ExitStatus,
32510 stdout: &[u8],
32511 stderr: &[u8],
32512) -> Result<sift::SearchResponse> {
32513 if !status.success() && status.code() != Some(1) {
32514 let message = String::from_utf8_lossy(stderr);
32515 let trimmed = message.trim();
32516 if trimmed.is_empty() {
32517 bail!("ripgrep exact search exited with status {}", status);
32518 }
32519 bail!("{}", trimmed);
32520 }
32521
32522 let raw = String::from_utf8(stdout.to_vec()).context("decoding ripgrep exact-search output")?;
32523 parse_exact_search_output(search_path, limit, &raw)
32524}
32525
32526fn run_exact_search(search_path: &Path, query: &str, limit: usize) -> Result<sift::SearchResponse> {
32527 let output = exact_search_command(search_path, query)
32528 .output()
32529 .context("running exact search with ripgrep")?;
32530 exact_search_response_from_process(
32531 search_path,
32532 limit,
32533 output.status,
32534 &output.stdout,
32535 &output.stderr,
32536 )
32537}
32538
32539pub(crate) fn run_exact_search_with_timeout(
32540 search_path: &Path,
32541 query: &str,
32542 limit: usize,
32543 timeout_secs: u64,
32544) -> Result<sift::SearchResponse> {
32545 if timeout_secs == 0 {
32546 return run_exact_search(search_path, query, limit);
32547 }
32548
32549 let mut child = exact_search_command(search_path, query)
32550 .stdin(Stdio::null())
32551 .stdout(Stdio::piped())
32552 .stderr(Stdio::piped())
32553 .spawn()
32554 .context("spawning timed exact search worker")?;
32555
32556 let timeout = Duration::from_secs(timeout_secs);
32557 let status = wait_for_child_exit(&mut child, timeout)
32558 .context("waiting for timed exact search worker")?;
32559 if status.is_none() {
32560 let _ = child.kill();
32561 let _ = child.wait();
32562 bail!("{}", exact_search_timeout_message(timeout_secs));
32563 }
32564
32565 let status = status.unwrap();
32566 let stdout = read_child_stdout(&mut child)?;
32567 let stderr = read_child_stderr(&mut child)?;
32568 exact_search_response_from_process(
32569 search_path,
32570 limit,
32571 status,
32572 stdout.as_bytes(),
32573 stderr.as_bytes(),
32574 )
32575}
32576
32577pub(crate) fn run_search_with_timeout(
32578 search_path: &Path,
32579 cache_dir: &Path,
32580 query: &str,
32581 limit: usize,
32582 timeout_secs: u64,
32583 strategy: &str,
32584 search_targets: &[SearchIndexTarget],
32585) -> Result<sift::SearchResponse> {
32586 if timeout_secs == 0 {
32587 return run_sift_search(search_path, cache_dir, query, limit, strategy);
32588 }
32589
32590 let output_path = next_search_worker_output_path();
32591 let mut child = Command::new(
32592 std::env::current_exe().context("resolving tsift executable for timed search")?,
32593 )
32594 .arg("__search-worker")
32595 .arg("--path")
32596 .arg(search_path)
32597 .arg("--cache-dir")
32598 .arg(cache_dir)
32599 .arg("--query")
32600 .arg(query)
32601 .arg("--limit")
32602 .arg(limit.to_string())
32603 .arg("--strategy")
32604 .arg(strategy)
32605 .arg("--output")
32606 .arg(&output_path)
32607 .stdin(Stdio::null())
32608 .stdout(Stdio::null())
32609 .stderr(Stdio::piped())
32610 .spawn()
32611 .context("spawning timed sift search worker")?;
32612
32613 let timeout = Duration::from_secs(timeout_secs);
32614 let status =
32615 wait_for_child_exit(&mut child, timeout).context("waiting for timed sift search worker")?;
32616 if status.is_none() {
32617 let _ = child.kill();
32618 let _ = child.wait();
32619 let _ = fs::remove_file(&output_path);
32620 bail!(
32621 "{}",
32622 search_timeout_message(timeout_secs, strategy, search_targets)?
32623 );
32624 }
32625
32626 let status = status.unwrap();
32627 let stderr = read_child_stderr(&mut child)?;
32628 if !status.success() {
32629 let _ = fs::remove_file(&output_path);
32630 let message = stderr.trim();
32631 if message.is_empty() {
32632 bail!("sift search worker exited with status {}", status);
32633 }
32634 bail!("{}", message);
32635 }
32636
32637 let raw = fs::read_to_string(&output_path)
32638 .with_context(|| format!("reading search worker output: {}", output_path.display()))?;
32639 let _ = fs::remove_file(&output_path);
32640 serde_json::from_str(&raw).context("parsing search worker output")
32641}
32642
32643fn next_search_worker_output_path() -> PathBuf {
32644 let stamp = SystemTime::now()
32645 .duration_since(UNIX_EPOCH)
32646 .unwrap_or_default()
32647 .as_nanos();
32648 std::env::temp_dir().join(format!(
32649 "tsift-search-{}-{}.json",
32650 std::process::id(),
32651 stamp
32652 ))
32653}
32654
32655fn wait_for_child_exit(
32656 child: &mut std::process::Child,
32657 timeout: Duration,
32658) -> Result<Option<std::process::ExitStatus>> {
32659 let started = Instant::now();
32660 loop {
32661 if let Some(status) = child.try_wait()? {
32662 return Ok(Some(status));
32663 }
32664 if started.elapsed() >= timeout {
32665 return Ok(None);
32666 }
32667 let remaining = timeout.saturating_sub(started.elapsed());
32668 std::thread::sleep(remaining.min(Duration::from_millis(10)));
32669 }
32670}
32671
32672fn read_child_stderr(child: &mut std::process::Child) -> Result<String> {
32673 let mut stderr = String::new();
32674 if let Some(mut pipe) = child.stderr.take() {
32675 pipe.read_to_string(&mut stderr)
32676 .context("reading search worker stderr")?;
32677 }
32678 Ok(stderr)
32679}
32680
32681fn read_child_stdout(child: &mut std::process::Child) -> Result<String> {
32682 let mut stdout = String::new();
32683 if let Some(mut pipe) = child.stdout.take() {
32684 pipe.read_to_string(&mut stdout)
32685 .context("reading search worker stdout")?;
32686 }
32687 Ok(stdout)
32688}
32689
32690pub(crate) fn maybe_apply_search_worker_test_hooks() -> Result<()> {
32691 if let Ok(path) = std::env::var("TSIFT_TEST_SEARCH_WORKER_PID_FILE") {
32692 fs::write(&path, std::process::id().to_string())
32693 .with_context(|| format!("writing search worker pid file: {path}"))?;
32694 }
32695 if let Ok(ms) = std::env::var("TSIFT_TEST_SEARCH_WORKER_SLEEP_MS") {
32696 let delay_ms = ms
32697 .parse::<u64>()
32698 .with_context(|| format!("parsing TSIFT_TEST_SEARCH_WORKER_SLEEP_MS={ms}"))?;
32699 std::thread::sleep(Duration::from_millis(delay_ms));
32700 }
32701 Ok(())
32702}
32703
32704#[cfg(test)]
32705thread_local! {
32706 static SEARCH_POST_PRECHECK_LOCK_HOOK: RefCell<Option<SearchPostPrecheckLockHook>> = const { RefCell::new(None) };
32707}
32708
32709#[cfg(test)]
32710enum SearchPostPrecheckLockMode {
32711 RollbackJournal,
32712 Wal,
32713}
32714
32715#[cfg(test)]
32716struct SearchPostPrecheckLockHook {
32717 db_path: PathBuf,
32718 mode: SearchPostPrecheckLockMode,
32719}
32720
32721#[cfg(test)]
32722struct SearchPostPrecheckLockGuard;
32723
32724#[cfg(test)]
32725impl Drop for SearchPostPrecheckLockGuard {
32726 fn drop(&mut self) {
32727 SEARCH_POST_PRECHECK_LOCK_HOOK.with(|hook| {
32728 hook.borrow_mut().take();
32729 });
32730 }
32731}
32732
32733#[cfg(test)]
32734fn install_search_post_precheck_lock(db_path: PathBuf) -> SearchPostPrecheckLockGuard {
32735 install_search_post_precheck_lock_hook(db_path, SearchPostPrecheckLockMode::RollbackJournal)
32736}
32737
32738#[cfg(test)]
32739fn install_search_post_precheck_wal_lock(db_path: PathBuf) -> SearchPostPrecheckLockGuard {
32740 install_search_post_precheck_lock_hook(db_path, SearchPostPrecheckLockMode::Wal)
32741}
32742
32743#[cfg(test)]
32744fn install_search_post_precheck_lock_hook(
32745 db_path: PathBuf,
32746 mode: SearchPostPrecheckLockMode,
32747) -> SearchPostPrecheckLockGuard {
32748 SEARCH_POST_PRECHECK_LOCK_HOOK.with(|hook| {
32749 assert!(
32750 hook.borrow().is_none(),
32751 "search post-precheck lock hook already installed"
32752 );
32753 *hook.borrow_mut() = Some(SearchPostPrecheckLockHook { db_path, mode });
32754 });
32755 SearchPostPrecheckLockGuard
32756}
32757
32758#[cfg(test)]
32759pub(crate) fn maybe_apply_search_post_precheck_test_hooks() -> Result<()> {
32760 let Some(hook) = SEARCH_POST_PRECHECK_LOCK_HOOK.with(|hook| hook.borrow_mut().take()) else {
32761 return Ok(());
32762 };
32763 let (ready_tx, ready_rx) = std::sync::mpsc::sync_channel(1);
32764 std::thread::spawn(move || {
32765 let conn = Connection::open(&hook.db_path).expect("opening db for search lock hook");
32766 match hook.mode {
32767 SearchPostPrecheckLockMode::RollbackJournal => {
32768 conn.execute_batch("PRAGMA journal_mode=DELETE; BEGIN EXCLUSIVE;")
32769 .expect("acquiring rollback-journal hook lock");
32770 fs::write(substrate::rollback_journal_path(&hook.db_path), "locked")
32771 .expect("writing rollback journal marker");
32772 }
32773 SearchPostPrecheckLockMode::Wal => {
32774 conn.execute_batch(
32775 "PRAGMA journal_mode=WAL;
32776 PRAGMA wal_autocheckpoint=0;
32777 CREATE TABLE IF NOT EXISTS search_wal_lock_probe (id INTEGER PRIMARY KEY);
32778 INSERT INTO search_wal_lock_probe DEFAULT VALUES;
32779 PRAGMA locking_mode=EXCLUSIVE;
32780 BEGIN EXCLUSIVE;",
32781 )
32782 .expect("acquiring WAL hook lock");
32783 assert!(substrate::wal_sidecar_path(&hook.db_path).exists());
32784 }
32785 }
32786 ready_tx.send(()).expect("signaling search lock hook");
32787 std::thread::sleep(Duration::from_millis(200));
32788 drop(conn);
32789 let _ = fs::remove_file(substrate::rollback_journal_path(&hook.db_path));
32790 });
32791 ready_rx
32792 .recv_timeout(Duration::from_secs(1))
32793 .context("waiting for search post-precheck lock hook")?;
32794 Ok(())
32795}
32796
32797#[cfg(not(test))]
32798pub(crate) fn maybe_apply_search_post_precheck_test_hooks() -> Result<()> {
32799 Ok(())
32800}