1mod cli;
2mod commands;
3mod output;
4
5use anyhow::{Context, Result, bail};
6use clap::Parser;
7use cli::{Cli, Commands, DispatchTraceFormat, GraphDbQuery, SemanticRelatedKind};
8#[cfg(test)]
9use cli::{GraphDbBackend, TraverseFormat};
10use commands::digests::{
11 cmd_context_pack, cmd_diff_digest, cmd_log_digest, cmd_metric_digest, cmd_session_cost,
12 cmd_session_digest, cmd_session_review_with_budget, cmd_test_digest,
13};
14#[cfg(test)]
15use commands::graph::cmd_explain;
16use commands::graph::{
17 cmd_analyze, cmd_communities, cmd_explain_with_budget, cmd_graph, cmd_path, cmd_traverse,
18};
19#[cfg(test)]
20use commands::index_search::cmd_search;
21use commands::index_search::{cmd_index, cmd_search_with_budget, cmd_search_worker};
22use commands::infra::{
23 cmd_convex_sync, cmd_edit, cmd_graph_db, cmd_init, cmd_locks, cmd_rewrite, cmd_route, cmd_sql,
24 cmd_status,
25};
26use commands::quality::{cmd_audit, cmd_audit_tagpath, cmd_lint};
27use commands::summarize::cmd_summarize;
28use flate2::{Compression, read::GzDecoder, write::GzEncoder};
29use output::tagpath::{
30 CommunityMemberAmbiguityDiagnostic, TagpathAnnotationDiagnostic, TagpathSearchOpts,
31 annotate_communities_with_tagpath, annotate_hits_with_tagpath,
32 annotate_path_nodes_with_tagpath, annotate_stored_edges_with_tagpath,
33 annotate_stored_symbols_with_tagpath,
34};
35use output::{
36 OutputFormat, ResponseBudget, ResponseBudgetPreset, ToolEnvelope, ToolEnvelopeMetric,
37 ToolEnvelopeSummary, TranscriptArtifactRef,
38};
39use rusqlite::{Connection, OptionalExtension};
40use serde::{Deserialize, Serialize};
41use sift::{SearchInput, SearchOptions, Sift};
42#[cfg(test)]
43use std::cell::RefCell;
44use std::cmp::Ordering;
45use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque};
46use std::env;
47use std::fs;
48use std::io::{BufRead as _, BufReader, Read as _, Write as _};
49use std::path::{Path, PathBuf};
50use std::process::{Command, Stdio};
51use std::sync::{Mutex, OnceLock};
52use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
53use substrate::{
54 ConvexEdgeRow, ConvexNodeRow, ConvexProjectionRows, GraphEdge as SubstrateGraphEdge,
55 GraphFreshness, GraphNode as SubstrateGraphNode, GraphProjection, GraphPropertyFilter,
56 GraphProvenance, GraphQueryOptions, GraphQueryPage, GraphStore, SQLITE_GRAPH_SCHEMA_VERSION,
57 SqliteGraphStore, SqliteProjectionRefresh,
58};
59use tagpath::{family as tagpath_family, ontology as tagpath_ontology};
60use tempfile::NamedTempFile;
61#[cfg(test)]
62use tsift_agent_doc::session_cost;
63use tsift_agent_doc::{session_digest, session_review};
64use tsift_digest::{diff_digest, log_digest, metric_digest, test_digest};
65use tsift_graph as graph;
66use tsift_index::{config, index, init, walk};
67use tsift_quality::{dci_benchmark, lint, perf_gate};
68use tsift_resolution as resolution;
69use tsift_search::{impact, sift, tagpath_adapter};
70use tsift_sqlite as substrate;
71use tsift_status::status;
72use tsift_summarize::summarize;
73use tsift_tokensave::TokensaveDb;
74
75#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize)]
76pub(crate) enum GraphDbExperimentalBackend {
77 DuckdbDuckpgq,
78 Falkordb,
79 Ladybug,
80 Kuzu,
81}
82
83#[derive(Serialize)]
84struct GraphDbBackendPromotionGate {
85 status: String,
86 native_adapter_required: bool,
87 required_checks: Vec<String>,
88}
89
90impl GraphDbExperimentalBackend {
91 fn name(self) -> &'static str {
92 match self {
93 Self::DuckdbDuckpgq => "duckdb-duckpgq",
94 Self::Falkordb => "falkordb",
95 Self::Ladybug => "ladybug",
96 Self::Kuzu => "kuzu",
97 }
98 }
99
100 fn adapter_label(self) -> &'static str {
101 match self {
102 Self::DuckdbDuckpgq => "DuckDB/DuckPGQ read-only prototype",
103 Self::Falkordb => "FalkorDB read-only prototype",
104 Self::Ladybug => "Ladybug read-only prototype",
105 Self::Kuzu => "Kuzu (Vela-Engineering/kuzu) read-only prototype",
106 }
107 }
108
109 fn projection_load(self) -> &'static str {
110 match self {
111 Self::Falkordb => {
112 "provider-neutral rows loaded into a FalkorDB-shaped read snapshot for parity and timing only; production FalkorDB storage remains behind backend-eval until a real adapter passes the full-projection gate"
113 }
114 Self::Kuzu => {
115 "provider-neutral rows loaded into a Kuzu-compatible in-process read snapshot for parity and performance gates; production Vela-Engineering/kuzu storage remains behind a future optional adapter"
116 }
117 _ => {
118 "provider-neutral rows loaded into a dependency-free in-process read snapshot for parity and performance gates"
119 }
120 }
121 }
122
123 fn lock_behavior(self) -> &'static str {
124 match self {
125 Self::Falkordb => {
126 "read-only FalkorDB prototype snapshot; production promotion must prove multi-process writer behavior and local fallback semantics before replacing SQLite"
127 }
128 Self::Kuzu => {
129 "read-only Kuzu prototype snapshot; no SQLite writer lock is taken during benchmarks, and production Vela-Engineering/kuzu promotion must prove concurrent writer semantics before replacing SQLite"
130 }
131 _ => "read-only snapshot/row adapter; no writer lock is taken during query benchmarks",
132 }
133 }
134
135 fn install_portability(self) -> &'static str {
136 match self {
137 Self::Falkordb => {
138 "prototype is dependency-free in this binary; production FalkorDB promotion must keep install optional and preserve cargo build/install without a service"
139 }
140 Self::Kuzu => {
141 "prototype is dependency-free in this binary; production Vela-Engineering/kuzu integration must stay optional so cargo build/install works without a native Kuzu toolchain"
142 }
143 _ => {
144 "prototype is dependency-free in this binary; a production engine adapter must remain optional before promotion"
145 }
146 }
147 }
148
149 fn prototype_hold_reason(self) -> Option<&'static str> {
150 match self {
151 Self::DuckdbDuckpgq => Some(
152 "DuckDB/DuckPGQ remains behind backend-eval until a native production adapter proves projection writes, freshness/parity, full_projection wins, install portability, and lock behavior",
153 ),
154 Self::Falkordb => Some(
155 "FalkorDB remains behind backend-eval until a production adapter beats SQLite on full_projection conflict-matrix, evidence, dispatch-trace, path tiers, install portability, and lock behavior",
156 ),
157 Self::Ladybug => Some(
158 "Ladybug remains behind backend-eval until a native production adapter proves projection writes, freshness/parity, full_projection wins, install portability, and lock behavior",
159 ),
160 Self::Kuzu => Some(
161 "Kuzu remains behind backend-eval until a native optional adapter proves projection writes/load, SQLite parity, full_projection wins, install portability, and lock behavior",
162 ),
163 }
164 }
165
166 fn promotion_gate(self) -> GraphDbBackendPromotionGate {
167 match self {
168 Self::DuckdbDuckpgq => GraphDbBackendPromotionGate {
169 status: "hold_native_adapter_required".to_string(),
170 native_adapter_required: true,
171 required_checks: vec![
172 "native_duckdb_duckpgq_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
173 .to_string(),
174 "freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
175 .to_string(),
176 "embedded_or_service_lock_behavior_match_or_beat_sqlite".to_string(),
177 "operator_install_cost_keeps_cargo_build_install_duckdb_extension_free_by_default"
178 .to_string(),
179 ],
180 },
181 Self::Falkordb => GraphDbBackendPromotionGate {
182 status: "hold_native_adapter_required".to_string(),
183 native_adapter_required: true,
184 required_checks: vec![
185 "native_falkordb_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
186 .to_string(),
187 "freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
188 .to_string(),
189 "multi_process_writer_and_read_only_lock_behavior_match_or_beat_sqlite"
190 .to_string(),
191 "operator_install_cost_keeps_cargo_build_install_service_free_by_default"
192 .to_string(),
193 ],
194 },
195 Self::Ladybug => GraphDbBackendPromotionGate {
196 status: "hold_native_adapter_required".to_string(),
197 native_adapter_required: true,
198 required_checks: vec![
199 "native_ladybug_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
200 .to_string(),
201 "freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
202 .to_string(),
203 "concurrent_writer_and_read_only_lock_behavior_match_or_beat_sqlite"
204 .to_string(),
205 "operator_install_cost_keeps_cargo_build_install_ladybug_free_by_default"
206 .to_string(),
207 ],
208 },
209 Self::Kuzu => GraphDbBackendPromotionGate {
210 status: "hold_native_adapter_required".to_string(),
211 native_adapter_required: true,
212 required_checks: vec![
213 "native_kuzu_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
214 .to_string(),
215 "freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
216 .to_string(),
217 "concurrent_writer_and_read_only_lock_behavior_match_or_beat_sqlite"
218 .to_string(),
219 "operator_install_cost_keeps_cargo_build_install_native_kuzu_free_by_default"
220 .to_string(),
221 ],
222 },
223 }
224 }
225
226 fn parse(raw: &str) -> Result<Self> {
227 match raw {
228 "duckdb-duckpgq" | "duckdb" | "duckpgq" => Ok(Self::DuckdbDuckpgq),
229 "falkordb" | "falkor" => Ok(Self::Falkordb),
230 "ladybug" => Ok(Self::Ladybug),
231 "kuzu" | "vela-kuzu" => Ok(Self::Kuzu),
232 _ => {
233 bail!(
234 "unknown backend-eval candidate {raw:?}; expected duckdb-duckpgq, falkordb, ladybug, or kuzu"
235 )
236 }
237 }
238 }
239}
240
241#[derive(Deserialize)]
242pub(crate) struct EditBatch {
243 edits: Vec<EditOp>,
244}
245
246#[derive(Deserialize)]
247struct EditOp {
248 file: PathBuf,
250 old: String,
252 new: String,
254 #[serde(default)]
256 replace_all: bool,
257}
258
259pub(crate) struct MetricDigestOptions<'a> {
260 input_path: Option<&'a Path>,
261 baseline_path: Option<&'a Path>,
262 metrics: &'a [String],
263 lower_is_better: &'a [String],
264 higher_is_better: &'a [String],
265 history: usize,
266 top: usize,
267}
268
269#[derive(Serialize)]
270pub(crate) struct EditResult {
271 file: PathBuf,
272 status: EditStatus,
273 #[serde(skip_serializing_if = "Option::is_none")]
274 error: Option<String>,
275 #[serde(skip_serializing_if = "Option::is_none")]
276 replacements: Option<usize>,
277}
278
279#[derive(Serialize)]
280#[serde(rename_all = "lowercase")]
281pub(crate) enum EditStatus {
282 Ok,
283 Skipped,
284}
285
286struct PlannedEdit {
287 index: usize,
288 file: PathBuf,
289 new_content: String,
290 replacements: usize,
291}
292
293struct StagedEdit {
294 index: usize,
295 file: PathBuf,
296 replacements: usize,
297 staged_file: NamedTempFile,
298}
299
300struct AppliedEdit {
301 index: usize,
302 file: PathBuf,
303 replacements: usize,
304 backup_path: PathBuf,
305}
306
307pub fn run() -> Result<()> {
308 let cli = Cli::parse();
309 let compact = cli.compact;
310 let pretty = cli.pretty;
311 let terse = cli.terse;
312 let absolute = cli.absolute;
313 let tabular = cli.tabular;
314 let schema = cli.schema;
315 let envelope = cli.envelope;
316 match cli.command {
317 Some(Commands::Search {
318 query,
319 path,
320 limit,
321 strategy,
322 exact,
323 scope,
324 federated,
325 json,
326 autoindex,
327 no_autoindex,
328 timeout,
329 max_items,
330 max_bytes,
331 budget,
332 no_tagpath,
333 tagpath_strict,
334 }) => cmd_search_with_budget(
335 query,
336 path,
337 limit,
338 if exact {
339 Some("exact".to_string())
340 } else {
341 strategy
342 },
343 scope,
344 federated,
345 json || terse || schema || envelope,
346 autoindex || !no_autoindex,
347 timeout,
348 compact,
349 pretty,
350 terse,
351 absolute,
352 tabular,
353 schema,
354 envelope,
355 ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
356 TagpathSearchOpts {
357 no_tagpath,
358 strict: tagpath_strict,
359 },
360 ),
361 Some(Commands::SearchWorker {
362 path,
363 cache_dir,
364 query,
365 limit,
366 strategy,
367 output,
368 }) => cmd_search_worker(&path, &cache_dir, &query, limit, &strategy, &output),
369 Some(Commands::DigestRunner {
370 kind,
371 path,
372 runner,
373 shell_command,
374 json,
375 }) => cmd_digest_runner(
376 &kind,
377 &path,
378 runner.as_deref(),
379 &shell_command,
380 OutputFormat {
381 json_output: json || terse || schema || envelope,
382 compact,
383 pretty,
384 terse,
385 schema,
386 envelope,
387 },
388 ),
389 Some(Commands::Edit { dry_run, file }) => {
390 cmd_edit(dry_run, file, compact, pretty, terse, schema)
391 }
392 Some(Commands::Index {
393 path,
394 rebuild,
395 check,
396 exit_code,
397 prune,
398 quiet,
399 workspace,
400 submodule,
401 json,
402 }) => cmd_index(
403 &path,
404 rebuild,
405 check,
406 exit_code,
407 prune,
408 quiet,
409 workspace,
410 submodule.as_deref(),
411 json || terse || schema || envelope,
412 compact,
413 pretty,
414 terse,
415 absolute,
416 schema,
417 ),
418 Some(Commands::Rewrite { command, run }) => cmd_rewrite(
419 &command,
420 run,
421 OutputFormat {
422 json_output: terse || schema || envelope,
423 compact,
424 pretty,
425 terse,
426 schema,
427 envelope,
428 },
429 ),
430 Some(Commands::Route { task, id }) => cmd_route(&task, id),
431 Some(Commands::Graph {
432 symbol,
433 path,
434 callers,
435 callees,
436 scope,
437 limit,
438 json,
439 no_tagpath,
440 tagpath_strict,
441 }) => cmd_graph(
442 &symbol,
443 &path,
444 callers,
445 callees,
446 scope.as_deref(),
447 limit,
448 json || terse || schema || envelope,
449 compact,
450 pretty,
451 terse,
452 absolute,
453 tabular,
454 schema,
455 TagpathSearchOpts {
456 no_tagpath,
457 strict: tagpath_strict,
458 },
459 ),
460 Some(Commands::Sql {
461 db,
462 query,
463 table,
464 json,
465 }) => cmd_sql(
466 &db,
467 query,
468 table,
469 json || terse || schema || envelope,
470 compact,
471 pretty,
472 terse,
473 schema,
474 ),
475 Some(Commands::Communities {
476 path,
477 scope,
478 min_size,
479 limit,
480 json,
481 no_tagpath,
482 tagpath_strict,
483 }) => cmd_communities(
484 &path,
485 scope.as_deref(),
486 min_size,
487 limit,
488 json || terse || schema || envelope,
489 compact,
490 pretty,
491 terse,
492 tabular,
493 schema,
494 TagpathSearchOpts {
495 no_tagpath,
496 strict: tagpath_strict,
497 },
498 ),
499 Some(Commands::Analyze {
500 path,
501 scope,
502 entry_points,
503 limit,
504 json,
505 }) => cmd_analyze(
506 &path,
507 scope.as_deref(),
508 &entry_points,
509 limit,
510 OutputFormat {
511 json_output: json || terse || schema || envelope,
512 compact,
513 pretty,
514 terse,
515 schema,
516 envelope,
517 },
518 ),
519 Some(Commands::Path {
520 from,
521 to,
522 path,
523 scope,
524 json,
525 no_tagpath,
526 tagpath_strict,
527 }) => cmd_path(
528 &from,
529 &to,
530 &path,
531 scope.as_deref(),
532 json || terse || schema || envelope,
533 compact,
534 pretty,
535 terse,
536 schema,
537 TagpathSearchOpts {
538 no_tagpath,
539 strict: tagpath_strict,
540 },
541 ),
542 Some(Commands::Explain {
543 symbol,
544 path,
545 scope,
546 limit,
547 json,
548 max_items,
549 max_bytes,
550 budget,
551 no_tagpath,
552 tagpath_strict,
553 }) => cmd_explain_with_budget(
554 &symbol,
555 &path,
556 scope.as_deref(),
557 limit,
558 json || terse || schema || envelope,
559 compact,
560 pretty,
561 terse,
562 absolute,
563 tabular,
564 schema,
565 envelope,
566 ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
567 TagpathSearchOpts {
568 no_tagpath,
569 strict: tagpath_strict,
570 },
571 ),
572 Some(Commands::Traverse {
573 node,
574 to,
575 path,
576 scope,
577 depth,
578 limit,
579 format,
580 convex_snapshot,
581 }) => cmd_traverse(
582 node.as_deref(),
583 to.as_deref(),
584 &path,
585 scope.as_deref(),
586 depth,
587 limit,
588 format,
589 pretty,
590 terse,
591 schema,
592 convex_snapshot.as_deref(),
593 ),
594 Some(Commands::ConvexSync {
595 path,
596 scope,
597 snapshot,
598 chunk_size,
599 remote_snapshot,
600 apply,
601 endpoint,
602 auth_token_env,
603 json,
604 }) => cmd_convex_sync(
605 ConvexSyncOptions {
606 path: &path,
607 scope: scope.as_deref(),
608 snapshot: snapshot.as_deref(),
609 chunk_size,
610 remote_snapshot,
611 apply,
612 endpoint: endpoint.as_deref(),
613 auth_token_env: &auth_token_env,
614 },
615 OutputFormat {
616 json_output: json || terse || schema || envelope,
617 compact,
618 pretty,
619 terse,
620 schema,
621 envelope,
622 },
623 ),
624 Some(Commands::GraphDb {
625 path,
626 scope,
627 backend,
628 convex_snapshot,
629 json,
630 query,
631 }) => cmd_graph_db(
632 &path,
633 scope.as_deref(),
634 backend,
635 convex_snapshot.as_deref(),
636 query,
637 OutputFormat {
638 json_output: json || terse || schema || envelope,
639 compact,
640 pretty,
641 terse,
642 schema,
643 envelope,
644 },
645 ),
646 Some(Commands::SourceRead {
647 file,
648 path,
649 start,
650 lines,
651 end,
652 scope,
653 json,
654 max_items,
655 max_bytes,
656 budget,
657 }) => cmd_source_read(
658 &file,
659 &path,
660 start,
661 lines,
662 end,
663 scope.as_deref(),
664 OutputFormat {
665 json_output: json || terse || schema || envelope,
666 compact,
667 pretty,
668 terse,
669 schema,
670 envelope,
671 },
672 absolute,
673 ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
674 ),
675 Some(Commands::Audit {
676 skills_dir,
677 manifest,
678 usage,
679 cleanup,
680 report,
681 json,
682 }) => cmd_audit(
683 &skills_dir,
684 manifest,
685 usage,
686 cleanup,
687 report,
688 json || terse || schema || envelope,
689 compact,
690 pretty,
691 terse,
692 schema,
693 ),
694 Some(Commands::AuditTagpath { path, scope, json }) => cmd_audit_tagpath(
695 &path,
696 scope.as_deref(),
697 json || terse || schema || envelope,
698 pretty,
699 terse,
700 schema,
701 ),
702 Some(Commands::Init {
703 path,
704 codex,
705 opencode,
706 workspace,
707 }) => cmd_init(&path, codex, opencode, workspace),
708 Some(Commands::Lint {
709 file,
710 index,
711 entities_from,
712 json,
713 }) => cmd_lint(
714 &file,
715 index,
716 entities_from,
717 json || terse || schema || envelope,
718 compact,
719 pretty,
720 terse,
721 schema,
722 ),
723 Some(Commands::Summarize {
724 symbol,
725 file,
726 extract,
727 diff,
728 stats,
729 path,
730 json,
731 }) => cmd_summarize(
732 symbol,
733 file,
734 extract,
735 diff,
736 stats,
737 &path,
738 json || terse || schema || envelope,
739 compact,
740 pretty,
741 terse,
742 schema,
743 ),
744 Some(Commands::Semantic {
745 query,
746 path,
747 scope,
748 limit,
749 kind,
750 json,
751 }) => cmd_semantic_related(
752 &query,
753 &path,
754 scope.as_deref(),
755 limit,
756 kind,
757 json || terse || schema || envelope,
758 compact,
759 pretty,
760 terse,
761 schema,
762 ),
763 Some(Commands::DiffDigest {
764 path,
765 cached,
766 revision,
767 json,
768 }) => cmd_diff_digest(
769 &path,
770 cached,
771 revision.as_deref(),
772 OutputFormat {
773 json_output: json || terse || schema || envelope,
774 compact,
775 pretty,
776 terse,
777 schema,
778 envelope,
779 },
780 ),
781 Some(Commands::Impact {
782 path,
783 cached,
784 revision,
785 scope,
786 limit,
787 json,
788 }) => cmd_impact(
789 &path,
790 cached,
791 revision.as_deref(),
792 scope.as_deref(),
793 limit,
794 OutputFormat {
795 json_output: json || terse || schema || envelope,
796 compact,
797 pretty,
798 terse,
799 schema,
800 envelope,
801 },
802 ),
803 Some(Commands::TestDigest {
804 path,
805 input,
806 runner,
807 json,
808 }) => cmd_test_digest(
809 &path,
810 input.as_deref(),
811 runner.as_deref(),
812 OutputFormat {
813 json_output: json || terse || schema || envelope,
814 compact,
815 pretty,
816 terse,
817 schema,
818 envelope,
819 },
820 ),
821 Some(Commands::LogDigest { path, input, json }) => cmd_log_digest(
822 &path,
823 input.as_deref(),
824 OutputFormat {
825 json_output: json || terse || schema || envelope,
826 compact,
827 pretty,
828 terse,
829 schema,
830 envelope,
831 },
832 ),
833 Some(Commands::ContextPack {
834 path,
835 test_input,
836 runner,
837 log_input,
838 json,
839 max_items,
840 max_bytes,
841 budget,
842 convex_snapshot,
843 }) => cmd_context_pack(
844 &path,
845 test_input.as_deref(),
846 runner.as_deref(),
847 log_input.as_deref(),
848 OutputFormat {
849 json_output: json || terse || schema || envelope,
850 compact,
851 pretty,
852 terse,
853 schema,
854 envelope,
855 },
856 ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
857 convex_snapshot.as_deref(),
858 ),
859 Some(Commands::ConflictMatrix {
860 targets,
861 path,
862 scope,
863 depth,
864 limit,
865 impact_limit,
866 json,
867 }) => cmd_conflict_matrix(
868 &path,
869 scope.as_deref(),
870 &targets,
871 depth,
872 limit,
873 impact_limit,
874 OutputFormat {
875 json_output: json || terse || schema || envelope,
876 compact,
877 pretty,
878 terse,
879 schema,
880 envelope,
881 },
882 ),
883 Some(Commands::DispatchTrace {
884 targets,
885 path,
886 scope,
887 depth,
888 limit,
889 impact_limit,
890 format,
891 json,
892 }) => cmd_dispatch_trace(
893 DispatchTraceOptions {
894 path: &path,
895 scope: scope.as_deref(),
896 raw_targets: &targets,
897 depth,
898 limit,
899 impact_limit,
900 trace_format: if json {
901 DispatchTraceFormat::Json
902 } else {
903 format
904 },
905 },
906 OutputFormat {
907 json_output: json || terse || schema || envelope,
908 compact,
909 pretty,
910 terse,
911 schema,
912 envelope,
913 },
914 ),
915 Some(Commands::DependencyDag {
916 targets,
917 path,
918 scope,
919 depth,
920 limit,
921 json,
922 }) => cmd_dependency_dag(
923 &path,
924 scope.as_deref(),
925 &targets,
926 depth,
927 limit,
928 OutputFormat {
929 json_output: json || terse || schema || envelope,
930 compact,
931 pretty,
932 terse,
933 schema,
934 envelope,
935 },
936 ),
937 Some(Commands::TokenSavings {
938 fixture,
939 fail_under,
940 json,
941 }) => cmd_token_savings(
942 &fixture,
943 fail_under,
944 OutputFormat {
945 json_output: json || terse || schema || envelope,
946 compact,
947 pretty,
948 terse,
949 schema,
950 envelope,
951 },
952 ),
953 Some(Commands::MetricDigest {
954 input,
955 baseline,
956 metrics,
957 lower_is_better,
958 higher_is_better,
959 history,
960 top,
961 json,
962 }) => cmd_metric_digest(
963 MetricDigestOptions {
964 input_path: input.as_deref(),
965 baseline_path: baseline.as_deref(),
966 metrics: &metrics,
967 lower_is_better: &lower_is_better,
968 higher_is_better: &higher_is_better,
969 history,
970 top,
971 },
972 OutputFormat {
973 json_output: json || terse || schema || envelope,
974 compact,
975 pretty,
976 terse,
977 schema,
978 envelope,
979 },
980 ),
981 Some(Commands::DciBenchmark { fixture, json }) => cmd_dci_benchmark(
982 &fixture,
983 OutputFormat {
984 json_output: json || terse || schema || envelope,
985 compact,
986 pretty,
987 terse,
988 schema,
989 envelope,
990 },
991 ),
992 Some(Commands::Workflow { topic, json }) => cmd_workflow(
993 &topic,
994 OutputFormat {
995 json_output: json || terse || schema || envelope,
996 compact,
997 pretty,
998 terse,
999 schema,
1000 envelope,
1001 },
1002 ),
1003 Some(Commands::SessionDigest {
1004 path,
1005 input,
1006 source,
1007 json,
1008 }) => cmd_session_digest(
1009 &path,
1010 input.as_deref(),
1011 source.as_deref(),
1012 OutputFormat {
1013 json_output: json || terse || schema || envelope,
1014 compact,
1015 pretty,
1016 terse,
1017 schema,
1018 envelope,
1019 },
1020 ),
1021 Some(Commands::SessionCost {
1022 input,
1023 source,
1024 json,
1025 }) => cmd_session_cost(
1026 input.as_deref(),
1027 source.as_deref(),
1028 OutputFormat {
1029 json_output: json || terse || schema || envelope,
1030 compact,
1031 pretty,
1032 terse,
1033 schema,
1034 envelope,
1035 },
1036 ),
1037 Some(Commands::SessionReview {
1038 path,
1039 next_context,
1040 json,
1041 max_items,
1042 max_bytes,
1043 budget,
1044 }) => cmd_session_review_with_budget(
1045 &path,
1046 next_context,
1047 OutputFormat {
1048 json_output: json || terse || schema || envelope,
1049 compact,
1050 pretty,
1051 terse,
1052 schema,
1053 envelope,
1054 },
1055 ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
1056 ),
1057 Some(Commands::Status { path, fix, json }) => cmd_status(
1058 &path,
1059 fix,
1060 json || terse || schema || envelope,
1061 compact,
1062 pretty,
1063 terse,
1064 schema,
1065 ),
1066 Some(Commands::Locks { path, scope, json }) => cmd_locks(
1067 &path,
1068 scope.as_deref(),
1069 json || terse || schema || envelope,
1070 compact,
1071 pretty,
1072 terse,
1073 schema,
1074 ),
1075 None => {
1076 println!("tsift v{}", env!("CARGO_PKG_VERSION"));
1077 println!("Run `tsift --help` for usage.");
1078 Ok(())
1079 }
1080 }
1081}
1082
1083pub fn classify_task(task: &str) -> (&'static str, &'static str) {
1086 let lower = task.to_lowercase();
1087 for signal in &[
1089 "architect",
1090 "architecture",
1091 "design",
1092 "plan",
1093 "strateg",
1094 "analy",
1095 "review",
1096 "evaluate",
1097 "assess",
1098 ] {
1099 if lower.contains(signal) {
1100 return ("opus", "claude-opus-4-6");
1101 }
1102 }
1103 for signal in &[
1105 "edit",
1106 "write",
1107 "fix",
1108 "change",
1109 "update",
1110 "create",
1111 "add ",
1112 "remove",
1113 "delete",
1114 "modify",
1115 "refactor",
1116 "implement",
1117 "build",
1118 ] {
1119 if lower.contains(signal) {
1120 return ("sonnet", "claude-sonnet-4-6");
1121 }
1122 }
1123 ("haiku", "claude-haiku-4-5-20251001")
1125}
1126
1127#[cfg(test)]
1128fn to_json<T: serde::Serialize>(val: &T, pretty: bool, terse: bool) -> anyhow::Result<String> {
1129 to_json_schema(val, pretty, terse, false)
1130}
1131
1132pub(crate) fn inject_tagpath_stale_into_json(
1139 value: &mut serde_json::Value,
1140 stale: bool,
1141 reason: Option<&str>,
1142) {
1143 if !stale {
1144 return;
1145 }
1146 if let Some(obj) = value.as_object_mut() {
1147 obj.insert(
1148 "tagpath_index_stale".to_string(),
1149 serde_json::Value::Bool(true),
1150 );
1151 if let Some(reason) = reason {
1152 obj.insert(
1153 "tagpath_stale_reason".to_string(),
1154 serde_json::Value::String(reason.to_string()),
1155 );
1156 }
1157 }
1158}
1159
1160pub(crate) fn to_json_schema<T: serde::Serialize>(
1161 val: &T,
1162 pretty: bool,
1163 terse: bool,
1164 schema: bool,
1165) -> anyhow::Result<String> {
1166 if terse || schema {
1167 let value = serde_json::to_value(val)?;
1168 let mut transformed = if terse { terse_transform(value) } else { value };
1169 if schema {
1170 transformed = schema_transform(transformed);
1171 }
1172 if terse {
1173 let terse_schema = terse_schema_for(&transformed);
1174 let wrapped = serde_json::json!({"_s": terse_schema, "d": transformed});
1175 if pretty {
1176 Ok(serde_json::to_string_pretty(&wrapped)?)
1177 } else {
1178 Ok(serde_json::to_string(&wrapped)?)
1179 }
1180 } else if pretty {
1181 Ok(serde_json::to_string_pretty(&transformed)?)
1182 } else {
1183 Ok(serde_json::to_string(&transformed)?)
1184 }
1185 } else if pretty {
1186 Ok(serde_json::to_string_pretty(val)?)
1187 } else {
1188 Ok(serde_json::to_string(val)?)
1189 }
1190}
1191
1192pub(crate) fn envelope_metric(label: &str, value: impl ToString) -> ToolEnvelopeMetric {
1193 ToolEnvelopeMetric {
1194 label: label.to_string(),
1195 value: value.to_string(),
1196 }
1197}
1198
1199pub(crate) fn dedupe_preserve_order(values: Vec<String>) -> Vec<String> {
1200 let mut seen = HashSet::new();
1201 let mut deduped = Vec::new();
1202 for value in values {
1203 if seen.insert(value.clone()) {
1204 deduped.push(value);
1205 }
1206 }
1207 deduped
1208}
1209
1210pub(crate) fn print_json_or_envelope<T: Serialize>(
1211 report: &T,
1212 format: &OutputFormat,
1213 tool: &str,
1214 view: &str,
1215 summary: ToolEnvelopeSummary,
1216 truncated: bool,
1217 follow_up: Vec<String>,
1218) -> Result<()> {
1219 if format.envelope {
1220 let envelope = ToolEnvelope {
1221 tool,
1222 view,
1223 summary,
1224 truncated,
1225 follow_up: dedupe_preserve_order(follow_up),
1226 report,
1227 };
1228 println!(
1229 "{}",
1230 to_json_schema(&envelope, format.pretty, format.terse, format.schema)?
1231 );
1232 } else {
1233 println!(
1234 "{}",
1235 to_json_schema(report, format.pretty, format.terse, format.schema)?
1236 );
1237 }
1238 Ok(())
1239}
1240
1241#[derive(Serialize)]
1242struct WorkflowStep {
1243 name: &'static str,
1244 goal: &'static str,
1245 command: &'static str,
1246 preserves: Vec<&'static str>,
1247 next: Vec<&'static str>,
1248}
1249
1250#[derive(Serialize)]
1251struct WorkflowRecipe {
1252 topic: &'static str,
1253 summary: &'static str,
1254 handle_contract: Vec<&'static str>,
1255 steps: Vec<WorkflowStep>,
1256}
1257
1258fn search_workflow_recipe() -> WorkflowRecipe {
1259 WorkflowRecipe {
1260 topic: "search",
1261 summary: "Chain exact search, semantic search, explain, summarize, and digest commands without dropping the stable handles emitted by each envelope.",
1262 handle_contract: vec![
1263 "Keep every handle with its originating command, query, path, and strategy.",
1264 "Use each step's expand command for deeper context, but cite the parent handle in notes and follow-up prompts.",
1265 "Prefer --envelope plus --budget normal when handing results to an agent so handles, follow_up commands, and truncation state stay machine-readable.",
1266 ],
1267 steps: vec![
1268 WorkflowStep {
1269 name: "exact-anchor",
1270 goal: "Start from a literal identifier, file path, error text, or prior handle label.",
1271 command: "tsift --envelope search \"<literal>\" --exact --path . --budget normal",
1272 preserves: vec![
1273 "summary.handle",
1274 "report.symbols[].handle",
1275 "report.hits[].handle",
1276 ],
1277 next: vec![
1278 "Run the matching report.symbols[].expand or report.hits[].expand command before broadening the query.",
1279 ],
1280 },
1281 WorkflowStep {
1282 name: "semantic-search",
1283 goal: "Broaden from the exact anchor to lexical, vector, or hybrid retrieval while keeping search-family handles.",
1284 command: "tsift --envelope search \"<concept>\" --path . --strategy hybrid --budget normal",
1285 preserves: vec![
1286 "sfam-* symbol-family handles",
1287 "shit-* content-hit handles",
1288 "follow_up[]",
1289 ],
1290 next: vec![
1291 "Use a symbol-family expand command for more search results, or pass the selected symbol name to explain.",
1292 ],
1293 },
1294 WorkflowStep {
1295 name: "explain-symbol",
1296 goal: "Expand a selected symbol into definitions, callers, callees, and community context.",
1297 command: "tsift --envelope explain \"<symbol>\" --path . --budget normal",
1298 preserves: vec![
1299 "edef-* definition handles",
1300 "ecall-* caller handles",
1301 "eces-* callee handles",
1302 ],
1303 next: vec![
1304 "Run edge expand commands for neighboring symbols, or summarize the selected symbol/file when the cache is available.",
1305 ],
1306 },
1307 WorkflowStep {
1308 name: "summarize-selection",
1309 goal: "Read cached summaries for the selected symbol or file without mutating the summary cache.",
1310 command: "tsift summarize \"<symbol>\" --path . --json",
1311 preserves: vec![
1312 "summary refs emitted by search, explain, test-digest, log-digest, diff-digest, and context-pack",
1313 ],
1314 next: vec![
1315 "If summaries are missing, run the status-recommended summarize --extract command outside the read-only query path.",
1316 ],
1317 },
1318 WorkflowStep {
1319 name: "digest-expansion",
1320 goal: "Expand from code navigation into changed files, tests, logs, or session context while retaining digest artifact handles.",
1321 command: "tsift --envelope context-pack <path> --test-input test.log --log-input build.log --budget normal",
1322 preserves: vec![
1323 "artifact handles",
1324 "touched symbol handles",
1325 "digest summary handles",
1326 "resume_commands[]",
1327 ],
1328 next: vec![
1329 "Use resume_commands[] or each digest entry's expand command, and carry forward the original search/explain handle that motivated the digest.",
1330 ],
1331 },
1332 ],
1333 }
1334}
1335
1336fn workflow_recipe(topic: &str) -> Result<WorkflowRecipe> {
1337 match topic {
1338 "search" | "search-handles" | "search-workflow" => Ok(search_workflow_recipe()),
1339 other => bail!("unknown workflow `{other}`; available workflows: search"),
1340 }
1341}
1342
1343fn print_workflow_human(recipe: &WorkflowRecipe, compact: bool) {
1344 if compact {
1345 println!("workflow:{} steps:{}", recipe.topic, recipe.steps.len());
1346 for step in &recipe.steps {
1347 println!(" {} cmd:{}", step.name, step.command);
1348 }
1349 return;
1350 }
1351
1352 println!("Workflow: {}", recipe.topic);
1353 println!("{}", recipe.summary);
1354 println!();
1355 println!("Handle contract:");
1356 for item in &recipe.handle_contract {
1357 println!(" - {item}");
1358 }
1359 println!();
1360 println!("Steps:");
1361 for (index, step) in recipe.steps.iter().enumerate() {
1362 println!(" {}. {} - {}", index + 1, step.name, step.goal);
1363 println!(" cmd: {}", step.command);
1364 println!(" preserves: {}", step.preserves.join(", "));
1365 println!(" next: {}", step.next.join(" "));
1366 }
1367}
1368
1369fn cmd_workflow(topic: &str, format: OutputFormat) -> Result<()> {
1370 let recipe = workflow_recipe(topic)?;
1371 if format.json_output {
1372 print_json_or_envelope(
1373 &recipe,
1374 &format,
1375 "workflow",
1376 recipe.topic,
1377 ToolEnvelopeSummary {
1378 text: recipe.summary.to_string(),
1379 metrics: vec![envelope_metric("steps", recipe.steps.len())],
1380 },
1381 false,
1382 recipe
1383 .steps
1384 .iter()
1385 .map(|step| step.command.to_string())
1386 .collect(),
1387 )
1388 } else {
1389 print_workflow_human(&recipe, format.compact);
1390 Ok(())
1391 }
1392}
1393
1394#[derive(Deserialize, Serialize)]
1395struct TokenSavingsFixture {
1396 schema_version: u64,
1397 #[serde(default)]
1398 description: String,
1399 token_estimate: String,
1400 cases: Vec<TokenSavingsFixtureCase>,
1401}
1402
1403#[derive(Deserialize, Serialize)]
1404struct TokenSavingsFixtureCase {
1405 name: String,
1406 surface: String,
1407 minimum_savings_percent: f64,
1408 raw_symbols: Vec<TokenSavingsRawSymbol>,
1409 tagpath_families: Vec<TokenSavingsFamily>,
1410 #[serde(default)]
1411 session_review_inputs: Option<TokenSavingsSessionReviewInputs>,
1412 #[serde(default)]
1413 context_pack_inputs: Option<TokenSavingsContextPackInputs>,
1414 #[serde(default)]
1415 source_read_inputs: Option<TokenSavingsSourceReadInputs>,
1416}
1417
1418#[derive(Deserialize, Serialize)]
1419struct TokenSavingsRawSymbol {
1420 identifier: String,
1421 file: String,
1422 line: u64,
1423 context: String,
1424}
1425
1426#[derive(Deserialize, Serialize)]
1427struct TokenSavingsFamily {
1428 canonical: String,
1429 count: usize,
1430 #[serde(default)]
1431 aliases: BTreeMap<String, String>,
1432}
1433
1434#[derive(Deserialize, Serialize)]
1435struct TokenSavingsSessionReviewInputs {
1436 prompt_targets: Vec<serde_json::Value>,
1437 sessions: Vec<serde_json::Value>,
1438 commands: Vec<serde_json::Value>,
1439 touched_files: Vec<serde_json::Value>,
1440 touched_symbols: Vec<serde_json::Value>,
1441 failures: Vec<serde_json::Value>,
1442 guardrails: Vec<serde_json::Value>,
1443 largest_turns: Vec<serde_json::Value>,
1444}
1445
1446#[derive(Deserialize, Serialize)]
1447struct TokenSavingsContextPackInputs {
1448 next_context: Vec<serde_json::Value>,
1449 diff: Vec<serde_json::Value>,
1450 test: Vec<serde_json::Value>,
1451 log: Vec<serde_json::Value>,
1452}
1453
1454#[derive(Deserialize, Serialize)]
1455struct TokenSavingsSourceReadInputs {
1456 reads: Vec<TokenSavingsSourceReadInput>,
1457}
1458
1459#[derive(Deserialize, Serialize)]
1460struct TokenSavingsSourceReadInput {
1461 command: String,
1462 file: String,
1463 raw_start: u64,
1464 raw_lines: u64,
1465 raw_excerpt: String,
1466 envelope_start: u64,
1467 envelope_lines: u64,
1468 required_line_anchors: Vec<u64>,
1469}
1470
1471#[derive(Serialize)]
1472struct TokenSavingsEnvelopeFamily {
1473 handle: String,
1474 tag_alias: String,
1475 count: usize,
1476 expand: String,
1477}
1478
1479#[derive(Serialize)]
1480struct TokenSavingsSessionReviewEnvelope<'a> {
1481 section: &'a str,
1482 handle: String,
1483 count: usize,
1484 expand: String,
1485}
1486
1487#[derive(Serialize)]
1488struct TokenSavingsContextPackEnvelope<'a> {
1489 section: &'a str,
1490 handle: String,
1491 count: usize,
1492 expand: String,
1493}
1494
1495#[derive(Serialize)]
1496struct TokenSavingsSourceReadEnvelope {
1497 handle: String,
1498 file: String,
1499 start: u64,
1500 lines: u64,
1501 required_line_anchors: Vec<u64>,
1502 expand: String,
1503}
1504
1505#[derive(Serialize)]
1506struct TokenSavingsCaseReport {
1507 name: String,
1508 surface: String,
1509 raw_symbol_count: usize,
1510 family_count: usize,
1511 raw_bytes: usize,
1512 envelope_bytes: usize,
1513 byte_delta: usize,
1514 raw_estimated_tokens: usize,
1515 envelope_estimated_tokens: usize,
1516 estimated_token_delta: usize,
1517 savings_percent: f64,
1518 minimum_savings_percent: f64,
1519 status: String,
1520}
1521
1522#[derive(Serialize)]
1523struct TokenSavingsTotals {
1524 cases: usize,
1525 raw_bytes: usize,
1526 envelope_bytes: usize,
1527 byte_delta: usize,
1528 raw_estimated_tokens: usize,
1529 envelope_estimated_tokens: usize,
1530 estimated_token_delta: usize,
1531 savings_percent: f64,
1532}
1533
1534#[derive(Serialize)]
1535struct TokenSavingsReport {
1536 schema_version: u64,
1537 token_estimate: String,
1538 pass: bool,
1539 totals: TokenSavingsTotals,
1540 cases: Vec<TokenSavingsCaseReport>,
1541}
1542
1543fn estimated_tokens_from_bytes(bytes: usize) -> usize {
1544 bytes.div_ceil(4)
1545}
1546
1547fn savings_percent(raw_bytes: usize, envelope_bytes: usize) -> f64 {
1548 if raw_bytes == 0 || envelope_bytes >= raw_bytes {
1549 0.0
1550 } else {
1551 ((raw_bytes - envelope_bytes) as f64 / raw_bytes as f64) * 100.0
1552 }
1553}
1554
1555fn token_savings_expand_command(surface: &str, canonical: &str) -> String {
1556 let query = canonical.replace('_', " ");
1557 match surface {
1558 "explain" => format!(
1559 "tsift --envelope explain {} --budget normal",
1560 shell_quote(canonical)
1561 ),
1562 "session-review" => format!("tsift summarize {}", shell_quote(canonical)),
1563 "context-pack" => {
1564 "tsift --envelope context-pack <target> --test-input <test.log> --log-input <build.log> --budget normal"
1565 .to_string()
1566 }
1567 _ => format!(
1568 "tsift --envelope search {} --budget normal",
1569 shell_quote(&query)
1570 ),
1571 }
1572}
1573
1574fn token_savings_envelope_families(
1575 case: &TokenSavingsFixtureCase,
1576) -> Vec<TokenSavingsEnvelopeFamily> {
1577 case.tagpath_families
1578 .iter()
1579 .map(|family| {
1580 let key = format!("{}:{}:{}", case.surface, case.name, family.canonical);
1581 TokenSavingsEnvelopeFamily {
1582 handle: stable_handle("tfam", &key),
1583 tag_alias: family.canonical.replace('_', "/"),
1584 count: family.count,
1585 expand: token_savings_expand_command(&case.surface, &family.canonical),
1586 }
1587 })
1588 .collect()
1589}
1590
1591fn token_savings_context_pack_raw_bytes(inputs: &TokenSavingsContextPackInputs) -> Result<usize> {
1592 Ok(serde_json::to_vec(inputs)?.len())
1593}
1594
1595fn token_savings_session_review_raw_bytes(
1596 inputs: &TokenSavingsSessionReviewInputs,
1597) -> Result<usize> {
1598 Ok(serde_json::to_vec(inputs)?.len())
1599}
1600
1601fn token_savings_source_read_raw_bytes(inputs: &TokenSavingsSourceReadInputs) -> Result<usize> {
1602 Ok(serde_json::to_vec(&inputs.reads)?.len())
1603}
1604
1605fn token_savings_session_review_envelope(
1606 case: &TokenSavingsFixtureCase,
1607 inputs: &TokenSavingsSessionReviewInputs,
1608) -> Vec<TokenSavingsSessionReviewEnvelope<'static>> {
1609 let mut rows = vec![
1610 TokenSavingsSessionReviewEnvelope {
1611 section: "prompt_targets",
1612 handle: stable_handle("tsr", &format!("{}:prompt_targets", case.name)),
1613 count: inputs.prompt_targets.len(),
1614 expand: "tsift session-review <target> --json".to_string(),
1615 },
1616 TokenSavingsSessionReviewEnvelope {
1617 section: "sessions",
1618 handle: stable_handle("tsr", &format!("{}:sessions", case.name)),
1619 count: inputs.sessions.len(),
1620 expand: "tsift session-review <target> --json".to_string(),
1621 },
1622 TokenSavingsSessionReviewEnvelope {
1623 section: "commands",
1624 handle: stable_handle("tsr", &format!("{}:commands", case.name)),
1625 count: inputs.commands.len(),
1626 expand: "tsift session-digest --source auto --input <transcript> --json".to_string(),
1627 },
1628 TokenSavingsSessionReviewEnvelope {
1629 section: "files",
1630 handle: stable_handle("tsr", &format!("{}:files", case.name)),
1631 count: inputs.touched_files.len(),
1632 expand: "tsift session-review <target> --json".to_string(),
1633 },
1634 TokenSavingsSessionReviewEnvelope {
1635 section: "symbols",
1636 handle: stable_handle("tsr", &format!("{}:symbols", case.name)),
1637 count: inputs.touched_symbols.len(),
1638 expand: "tsift --envelope search <symbol> --budget normal".to_string(),
1639 },
1640 TokenSavingsSessionReviewEnvelope {
1641 section: "failures",
1642 handle: stable_handle("tsr", &format!("{}:failures", case.name)),
1643 count: inputs.failures.len(),
1644 expand: "tsift session-review <target> --json".to_string(),
1645 },
1646 TokenSavingsSessionReviewEnvelope {
1647 section: "guardrails",
1648 handle: stable_handle("tsr", &format!("{}:guardrails", case.name)),
1649 count: inputs.guardrails.len(),
1650 expand: "tsift session-cost --input <transcript> --json".to_string(),
1651 },
1652 TokenSavingsSessionReviewEnvelope {
1653 section: "largest_turns",
1654 handle: stable_handle("tsr", &format!("{}:largest_turns", case.name)),
1655 count: inputs.largest_turns.len(),
1656 expand: "tsift session-cost --input <transcript> --json".to_string(),
1657 },
1658 ];
1659 rows.retain(|row| row.count > 0);
1660 rows
1661}
1662
1663fn token_savings_context_pack_envelope(
1664 case: &TokenSavingsFixtureCase,
1665 inputs: &TokenSavingsContextPackInputs,
1666) -> Vec<TokenSavingsContextPackEnvelope<'static>> {
1667 let mut rows = vec![
1668 TokenSavingsContextPackEnvelope {
1669 section: "next_context",
1670 handle: stable_handle("tcp", &format!("{}:next_context", case.name)),
1671 count: inputs.next_context.len(),
1672 expand: "tsift session-review --next-context <target> --json".to_string(),
1673 },
1674 TokenSavingsContextPackEnvelope {
1675 section: "diff",
1676 handle: stable_handle("tcp", &format!("{}:diff", case.name)),
1677 count: inputs.diff.len(),
1678 expand: "tsift diff-digest . --json".to_string(),
1679 },
1680 TokenSavingsContextPackEnvelope {
1681 section: "test",
1682 handle: stable_handle("tcp", &format!("{}:test", case.name)),
1683 count: inputs.test.len(),
1684 expand: "tsift test-digest --path . < test.log".to_string(),
1685 },
1686 TokenSavingsContextPackEnvelope {
1687 section: "log",
1688 handle: stable_handle("tcp", &format!("{}:log", case.name)),
1689 count: inputs.log.len(),
1690 expand: "tsift log-digest --path . < build.log".to_string(),
1691 },
1692 ];
1693 rows.retain(|row| row.count > 0);
1694 rows
1695}
1696
1697fn token_savings_source_read_envelope(
1698 case: &TokenSavingsFixtureCase,
1699 inputs: &TokenSavingsSourceReadInputs,
1700) -> Result<Vec<TokenSavingsSourceReadEnvelope>> {
1701 inputs
1702 .reads
1703 .iter()
1704 .map(|read| {
1705 if read.envelope_lines == 0 {
1706 bail!(
1707 "source-read fixture {} has an empty envelope window for {}",
1708 case.name,
1709 read.command
1710 );
1711 }
1712 let envelope_end = read
1713 .envelope_start
1714 .saturating_add(read.envelope_lines)
1715 .saturating_sub(1);
1716 for anchor in &read.required_line_anchors {
1717 if *anchor < read.envelope_start || *anchor > envelope_end {
1718 bail!(
1719 "source-read fixture {} hides required line anchor {} for {} outside {}-{}",
1720 case.name,
1721 anchor,
1722 read.command,
1723 read.envelope_start,
1724 envelope_end
1725 );
1726 }
1727 }
1728 Ok(TokenSavingsSourceReadEnvelope {
1729 handle: stable_handle("tsrc", &format!("{}:{}", case.name, read.command)),
1730 file: read.file.clone(),
1731 start: read.envelope_start,
1732 lines: read.envelope_lines,
1733 required_line_anchors: read.required_line_anchors.clone(),
1734 expand: format!(
1735 "tsift --envelope source-read {} --start {} --lines {} --budget normal",
1736 shell_quote(&read.file),
1737 read.envelope_start,
1738 read.envelope_lines
1739 ),
1740 })
1741 })
1742 .collect()
1743}
1744
1745fn build_token_savings_report(fixture: &TokenSavingsFixture) -> Result<TokenSavingsReport> {
1746 let mut cases = Vec::new();
1747 let mut total_raw_bytes = 0;
1748 let mut total_envelope_bytes = 0;
1749
1750 for case in &fixture.cases {
1751 let mut raw_bytes = serde_json::to_vec(&case.raw_symbols)?.len();
1752 let envelope = token_savings_envelope_families(case);
1753 let mut envelope_bytes = serde_json::to_vec(&envelope)?.len();
1754 if let Some(inputs) = &case.session_review_inputs {
1755 raw_bytes += token_savings_session_review_raw_bytes(inputs)?;
1756 envelope_bytes +=
1757 serde_json::to_vec(&token_savings_session_review_envelope(case, inputs))?.len();
1758 }
1759 if let Some(inputs) = &case.context_pack_inputs {
1760 raw_bytes += token_savings_context_pack_raw_bytes(inputs)?;
1761 envelope_bytes +=
1762 serde_json::to_vec(&token_savings_context_pack_envelope(case, inputs))?.len();
1763 }
1764 if let Some(inputs) = &case.source_read_inputs {
1765 raw_bytes += token_savings_source_read_raw_bytes(inputs)?;
1766 envelope_bytes +=
1767 serde_json::to_vec(&token_savings_source_read_envelope(case, inputs)?)?.len();
1768 }
1769 let byte_delta = raw_bytes.saturating_sub(envelope_bytes);
1770 let raw_estimated_tokens = estimated_tokens_from_bytes(raw_bytes);
1771 let envelope_estimated_tokens = estimated_tokens_from_bytes(envelope_bytes);
1772 let estimated_token_delta = raw_estimated_tokens.saturating_sub(envelope_estimated_tokens);
1773 let savings_percent = savings_percent(raw_bytes, envelope_bytes);
1774 let pass = savings_percent >= case.minimum_savings_percent;
1775
1776 total_raw_bytes += raw_bytes;
1777 total_envelope_bytes += envelope_bytes;
1778 cases.push(TokenSavingsCaseReport {
1779 name: case.name.clone(),
1780 surface: case.surface.clone(),
1781 raw_symbol_count: case.raw_symbols.len(),
1782 family_count: case.tagpath_families.len(),
1783 raw_bytes,
1784 envelope_bytes,
1785 byte_delta,
1786 raw_estimated_tokens,
1787 envelope_estimated_tokens,
1788 estimated_token_delta,
1789 savings_percent,
1790 minimum_savings_percent: case.minimum_savings_percent,
1791 status: if pass { "pass" } else { "fail" }.to_string(),
1792 });
1793 }
1794
1795 let total_byte_delta = total_raw_bytes.saturating_sub(total_envelope_bytes);
1796 let total_raw_estimated_tokens = estimated_tokens_from_bytes(total_raw_bytes);
1797 let total_envelope_estimated_tokens = estimated_tokens_from_bytes(total_envelope_bytes);
1798 let total_estimated_token_delta =
1799 total_raw_estimated_tokens.saturating_sub(total_envelope_estimated_tokens);
1800 let pass = cases.iter().all(|case| case.status == "pass");
1801
1802 Ok(TokenSavingsReport {
1803 schema_version: fixture.schema_version,
1804 token_estimate: fixture.token_estimate.clone(),
1805 pass,
1806 totals: TokenSavingsTotals {
1807 cases: cases.len(),
1808 raw_bytes: total_raw_bytes,
1809 envelope_bytes: total_envelope_bytes,
1810 byte_delta: total_byte_delta,
1811 raw_estimated_tokens: total_raw_estimated_tokens,
1812 envelope_estimated_tokens: total_envelope_estimated_tokens,
1813 estimated_token_delta: total_estimated_token_delta,
1814 savings_percent: savings_percent(total_raw_bytes, total_envelope_bytes),
1815 },
1816 cases,
1817 })
1818}
1819
1820fn print_token_savings_human(report: &TokenSavingsReport) {
1821 println!(
1822 "surface\tcase\traw_bytes\tenvelope_bytes\tbyte_delta\traw_tokens\tenvelope_tokens\ttoken_delta\tsavings_percent\tminimum_percent\tstatus"
1823 );
1824 for case in &report.cases {
1825 println!(
1826 "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{:.1}\t{:.1}\t{}",
1827 case.surface,
1828 case.name,
1829 case.raw_bytes,
1830 case.envelope_bytes,
1831 case.byte_delta,
1832 case.raw_estimated_tokens,
1833 case.envelope_estimated_tokens,
1834 case.estimated_token_delta,
1835 case.savings_percent,
1836 case.minimum_savings_percent,
1837 case.status
1838 );
1839 }
1840 println!(
1841 "total\tall\t{}\t{}\t{}\t{}\t{}\t{}\t{:.1}\t-\t{}",
1842 report.totals.raw_bytes,
1843 report.totals.envelope_bytes,
1844 report.totals.byte_delta,
1845 report.totals.raw_estimated_tokens,
1846 report.totals.envelope_estimated_tokens,
1847 report.totals.estimated_token_delta,
1848 report.totals.savings_percent,
1849 if report.pass { "pass" } else { "fail" }
1850 );
1851}
1852
1853fn cmd_token_savings(fixture_path: &Path, fail_under: bool, format: OutputFormat) -> Result<()> {
1854 let fixture_body = fs::read_to_string(fixture_path)
1855 .with_context(|| format!("reading token-savings fixture: {}", fixture_path.display()))?;
1856 let fixture: TokenSavingsFixture = serde_json::from_str(&fixture_body)
1857 .with_context(|| format!("parsing token-savings fixture: {}", fixture_path.display()))?;
1858 let report = build_token_savings_report(&fixture)?;
1859
1860 if format.json_output {
1861 print_json_or_envelope(
1862 &report,
1863 &format,
1864 "token-savings",
1865 "report",
1866 ToolEnvelopeSummary {
1867 text: "token-savings report".to_string(),
1868 metrics: vec![
1869 envelope_metric("cases", report.totals.cases),
1870 envelope_metric("raw_tokens", report.totals.raw_estimated_tokens),
1871 envelope_metric("envelope_tokens", report.totals.envelope_estimated_tokens),
1872 envelope_metric("token_delta", report.totals.estimated_token_delta),
1873 envelope_metric(
1874 "savings_percent",
1875 format!("{:.1}", report.totals.savings_percent),
1876 ),
1877 ],
1878 },
1879 false,
1880 vec![],
1881 )?;
1882 } else {
1883 print_token_savings_human(&report);
1884 }
1885
1886 if fail_under && !report.pass {
1887 bail!("token-savings threshold failed");
1888 }
1889 Ok(())
1890}
1891
1892fn persist_transcript_artifact(
1893 root: &Path,
1894 prefix: &str,
1895 suffix: &str,
1896 key: &str,
1897 body: &str,
1898 expand: String,
1899) -> Result<TranscriptArtifactRef> {
1900 let handle = stable_handle(prefix, key);
1901 let artifacts_dir = root.join(".tsift/artifacts");
1902 fs::create_dir_all(&artifacts_dir).with_context(|| {
1903 format!(
1904 "creating transcript artifacts dir: {}",
1905 artifacts_dir.display()
1906 )
1907 })?;
1908 let file_name = format!("{handle}.{suffix}");
1909 let artifact_path = artifacts_dir.join(file_name);
1910 fs::write(&artifact_path, body)
1911 .with_context(|| format!("writing transcript artifact: {}", artifact_path.display()))?;
1912 let rel_path = relativize_pathbuf(&artifact_path, root);
1913 Ok(TranscriptArtifactRef {
1914 handle,
1915 path: rel_path.display().to_string(),
1916 bytes: body.len(),
1917 lines: body.lines().count(),
1918 expand,
1919 })
1920}
1921
1922fn terse_key(key: &str) -> &str {
1923 match key {
1924 "name" => "n",
1925 "kind" => "k",
1926 "file" => "f",
1927 "line" => "l",
1928 "path" => "p",
1929 "from" => "fr",
1930 "type" => "ty",
1931 "text" => "tx",
1932 "new" => "nw",
1933 "run" => "r",
1934 "use" => "u",
1935 "score" => "sc",
1936 "language" => "la",
1937 "status" => "st",
1938 "state" => "stt",
1939 "error" => "err",
1940 "errors" => "ers",
1941 "hops" => "hp",
1942 "tags" => "tg",
1943 "model" => "ml",
1944 "skill" => "sk",
1945 "count" => "ct",
1946 "total" => "tot",
1947 "column" => "col",
1948 "description" => "dsc",
1949 "end_line" => "el",
1950 "signature" => "sig",
1951 "parent_module" => "pm",
1952 "visibility" => "vis",
1953 "match_type" => "mt",
1954 "caller_file" => "cf",
1955 "caller_name" => "cn",
1956 "caller_line" => "cl",
1957 "callee_name" => "en",
1958 "call_site_line" => "csl",
1959 "members" => "m",
1960 "refs" => "refs",
1961 "role" => "rl",
1962 "peer" => "pr",
1963 "modularity" => "q",
1964 "modularity_contribution" => "mc",
1965 "iterations" => "it",
1966 "node_count" => "nc",
1967 "edge_count" => "ec",
1968 "community_count" => "cc",
1969 "communities" => "cms",
1970 "community" => "cm",
1971 "community_diagnostics" => "cd",
1972 "cache_hit" => "cah",
1973 "tagpath_state" => "tps",
1974 "tagpath_stale_reason" => "tsr",
1975 "annotated_community_count" => "acc",
1976 "annotated_member_count" => "amc",
1977 "ambiguous_member_count" => "ambc",
1978 "ambiguous_members" => "amb",
1979 "candidate_count" => "cand",
1980 "tagpath_candidate_count" => "tcand",
1981 "evidence" => "ev",
1982 "chosen_file" => "chf",
1983 "symbol" => "s",
1984 "symbols" => "sy",
1985 "definitions" => "df",
1986 "callers" => "crs",
1987 "callees" => "ces",
1988 "total_tracked" => "tt",
1989 "modified" => "md",
1990 "deleted" => "dl",
1991 "unchanged" => "uc",
1992 "changes" => "ch",
1993 "prune_stats" => "ps",
1994 "hits" => "h",
1995 "rank" => "rk",
1996 "snippet" => "sn",
1997 "confidence" => "co",
1998 "index" => "ix",
1999 "summaries" => "sms",
2000 "recommendations" => "rec",
2001 "total_files" => "tf",
2002 "stale_files" => "sf",
2003 "last_indexed_secs_ago" => "age",
2004 "cached_files" => "caf",
2005 "total_indexed_files" => "tif",
2006 "coverage_pct" => "cov",
2007 "symbol_name" => "syn",
2008 "file_path" => "fp",
2009 "content_hash" => "hsh",
2010 "summary" => "sum",
2011 "tool" => "tl",
2012 "view" => "vw",
2013 "truncated" => "tr",
2014 "follow_up" => "fu",
2015 "report" => "rp",
2016 "metrics" => "ms",
2017 "label" => "lb",
2018 "value" => "v",
2019 "command" => "cmd",
2020 "exit_code" => "xc",
2021 "success" => "ok",
2022 "artifact" => "art",
2023 "digest" => "dg",
2024 "bytes" => "bt",
2025 "lines" => "lns",
2026 "expand" => "xp",
2027 "entities" => "ent",
2028 "relationships" => "rel",
2029 "concept_labels" => "cls",
2030 "extracted_at" => "at",
2031 "tokens_input" => "ti",
2032 "tokens_output" => "tout",
2033 "total_summaries" => "ts",
2034 "stale_count" => "stc",
2035 "total_tokens_input" => "tti",
2036 "total_tokens_output" => "tto",
2037 "estimated_tokens_saved" => "ets",
2038 "files_processed" => "fps",
2039 "symbols_extracted" => "se",
2040 "skills_dir" => "sd",
2041 "healthy" => "ok",
2042 "broken" => "brk",
2043 "skills" => "sks",
2044 "manifest_diffs" => "mdf",
2045 "similar_pairs" => "sim",
2046 "usage" => "usg",
2047 "cleanup" => "cln",
2048 "has_skill_md" => "hsm",
2049 "is_symlink" => "isl",
2050 "issues" => "iss",
2051 "invocation_count" => "inv",
2052 "reasons" => "rsn",
2053 "token_estimate" => "te",
2054 "skill_a" => "sa",
2055 "skill_b" => "sb",
2056 "desc_a" => "da",
2057 "desc_b" => "db",
2058 "annotations" => "ann",
2059 "entity" => "ety",
2060 "suggestion" => "sug",
2061 "columns" => "cols",
2062 "row_count" => "rc",
2063 "notnull" => "nn",
2064 "default_value" => "dv",
2065 "replace_all" => "ra",
2066 other => other,
2067 }
2068}
2069
2070fn terse_transform(val: serde_json::Value) -> serde_json::Value {
2071 match val {
2072 serde_json::Value::Object(map) => {
2073 let mut new_map = serde_json::Map::new();
2074 for (k, v) in map {
2075 new_map.insert(terse_key(&k).to_string(), terse_transform(v));
2076 }
2077 serde_json::Value::Object(new_map)
2078 }
2079 serde_json::Value::Array(arr) => {
2080 serde_json::Value::Array(arr.into_iter().map(terse_transform).collect())
2081 }
2082 other => other,
2083 }
2084}
2085
2086fn terse_schema_for(val: &serde_json::Value) -> serde_json::Value {
2087 let mut keys = HashSet::new();
2088 collect_terse_keys(val, &mut keys);
2089 let mut schema = serde_json::Map::new();
2090 for (long, short) in TERSE_PAIRS {
2091 if keys.contains(*short) {
2092 schema.insert(
2093 short.to_string(),
2094 serde_json::Value::String(long.to_string()),
2095 );
2096 }
2097 }
2098 serde_json::Value::Object(schema)
2099}
2100
2101fn collect_terse_keys(val: &serde_json::Value, keys: &mut HashSet<String>) {
2102 match val {
2103 serde_json::Value::Object(map) => {
2104 for (k, v) in map {
2105 keys.insert(k.clone());
2106 collect_terse_keys(v, keys);
2107 }
2108 }
2109 serde_json::Value::Array(arr) => {
2110 for v in arr {
2111 collect_terse_keys(v, keys);
2112 }
2113 }
2114 _ => {}
2115 }
2116}
2117
2118fn schema_transform(val: serde_json::Value) -> serde_json::Value {
2119 match val {
2120 serde_json::Value::Array(arr) if arr.len() >= 2 => {
2121 if let Some(cols) = homogeneous_keys(&arr) {
2122 let rows: Vec<serde_json::Value> = arr
2123 .into_iter()
2124 .map(|item| {
2125 if let serde_json::Value::Object(map) = item {
2126 let vals: Vec<serde_json::Value> = cols
2127 .iter()
2128 .map(|c| map.get(c).cloned().unwrap_or(serde_json::Value::Null))
2129 .collect();
2130 serde_json::Value::Array(vals)
2131 } else {
2132 item
2133 }
2134 })
2135 .collect();
2136 let col_vals: Vec<serde_json::Value> =
2137 cols.into_iter().map(serde_json::Value::String).collect();
2138 serde_json::json!({"_c": col_vals, "_r": rows})
2139 } else {
2140 serde_json::Value::Array(arr.into_iter().map(schema_transform).collect())
2141 }
2142 }
2143 serde_json::Value::Array(arr) => {
2144 serde_json::Value::Array(arr.into_iter().map(schema_transform).collect())
2145 }
2146 serde_json::Value::Object(map) => {
2147 let new_map: serde_json::Map<String, serde_json::Value> = map
2148 .into_iter()
2149 .map(|(k, v)| (k, schema_transform(v)))
2150 .collect();
2151 serde_json::Value::Object(new_map)
2152 }
2153 other => other,
2154 }
2155}
2156
2157fn homogeneous_keys(arr: &[serde_json::Value]) -> Option<Vec<String>> {
2158 let first = arr.first()?.as_object()?;
2159 let keys: Vec<String> = first.keys().cloned().collect();
2160 for item in &arr[1..] {
2161 let obj = item.as_object()?;
2162 if obj.len() != keys.len() {
2163 return None;
2164 }
2165 for k in &keys {
2166 if !obj.contains_key(k) {
2167 return None;
2168 }
2169 }
2170 }
2171 Some(keys)
2172}
2173
2174const TERSE_PAIRS: &[(&str, &str)] = &[
2175 ("name", "n"),
2176 ("kind", "k"),
2177 ("file", "f"),
2178 ("line", "l"),
2179 ("path", "p"),
2180 ("from", "fr"),
2181 ("type", "ty"),
2182 ("text", "tx"),
2183 ("new", "nw"),
2184 ("run", "r"),
2185 ("use", "u"),
2186 ("score", "sc"),
2187 ("language", "la"),
2188 ("status", "st"),
2189 ("state", "stt"),
2190 ("error", "err"),
2191 ("errors", "ers"),
2192 ("hops", "hp"),
2193 ("tags", "tg"),
2194 ("model", "ml"),
2195 ("skill", "sk"),
2196 ("count", "ct"),
2197 ("total", "tot"),
2198 ("column", "col"),
2199 ("description", "dsc"),
2200 ("end_line", "el"),
2201 ("signature", "sig"),
2202 ("parent_module", "pm"),
2203 ("visibility", "vis"),
2204 ("match_type", "mt"),
2205 ("caller_file", "cf"),
2206 ("caller_name", "cn"),
2207 ("caller_line", "cl"),
2208 ("callee_name", "en"),
2209 ("call_site_line", "csl"),
2210 ("members", "m"),
2211 ("refs", "refs"),
2212 ("role", "rl"),
2213 ("peer", "pr"),
2214 ("modularity", "q"),
2215 ("modularity_contribution", "mc"),
2216 ("iterations", "it"),
2217 ("node_count", "nc"),
2218 ("edge_count", "ec"),
2219 ("community_count", "cc"),
2220 ("communities", "cms"),
2221 ("community", "cm"),
2222 ("community_diagnostics", "cd"),
2223 ("cache_hit", "cah"),
2224 ("tagpath_state", "tps"),
2225 ("tagpath_stale_reason", "tsr"),
2226 ("annotated_community_count", "acc"),
2227 ("annotated_member_count", "amc"),
2228 ("ambiguous_member_count", "ambc"),
2229 ("ambiguous_members", "amb"),
2230 ("candidate_count", "cand"),
2231 ("tagpath_candidate_count", "tcand"),
2232 ("evidence", "ev"),
2233 ("chosen_file", "chf"),
2234 ("symbol", "s"),
2235 ("symbols", "sy"),
2236 ("definitions", "df"),
2237 ("callers", "crs"),
2238 ("callees", "ces"),
2239 ("total_tracked", "tt"),
2240 ("modified", "md"),
2241 ("deleted", "dl"),
2242 ("unchanged", "uc"),
2243 ("changes", "ch"),
2244 ("prune_stats", "ps"),
2245 ("hits", "h"),
2246 ("rank", "rk"),
2247 ("snippet", "sn"),
2248 ("confidence", "co"),
2249 ("index", "ix"),
2250 ("summaries", "sms"),
2251 ("recommendations", "rec"),
2252 ("total_files", "tf"),
2253 ("stale_files", "sf"),
2254 ("last_indexed_secs_ago", "age"),
2255 ("cached_files", "caf"),
2256 ("total_indexed_files", "tif"),
2257 ("coverage_pct", "cov"),
2258 ("symbol_name", "syn"),
2259 ("file_path", "fp"),
2260 ("content_hash", "hsh"),
2261 ("summary", "sum"),
2262 ("tool", "tl"),
2263 ("view", "vw"),
2264 ("truncated", "tr"),
2265 ("follow_up", "fu"),
2266 ("report", "rp"),
2267 ("metrics", "ms"),
2268 ("label", "lb"),
2269 ("value", "v"),
2270 ("command", "cmd"),
2271 ("exit_code", "xc"),
2272 ("success", "ok"),
2273 ("artifact", "art"),
2274 ("digest", "dg"),
2275 ("bytes", "bt"),
2276 ("lines", "lns"),
2277 ("expand", "xp"),
2278 ("entities", "ent"),
2279 ("relationships", "rel"),
2280 ("concept_labels", "cls"),
2281 ("extracted_at", "at"),
2282 ("tokens_input", "ti"),
2283 ("tokens_output", "tout"),
2284 ("total_summaries", "ts"),
2285 ("stale_count", "stc"),
2286 ("total_tokens_input", "tti"),
2287 ("total_tokens_output", "tto"),
2288 ("estimated_tokens_saved", "ets"),
2289 ("files_processed", "fps"),
2290 ("symbols_extracted", "se"),
2291 ("skills_dir", "sd"),
2292 ("healthy", "ok"),
2293 ("broken", "brk"),
2294 ("skills", "sks"),
2295 ("manifest_diffs", "mdf"),
2296 ("similar_pairs", "sim"),
2297 ("usage", "usg"),
2298 ("cleanup", "cln"),
2299 ("has_skill_md", "hsm"),
2300 ("is_symlink", "isl"),
2301 ("issues", "iss"),
2302 ("invocation_count", "inv"),
2303 ("reasons", "rsn"),
2304 ("token_estimate", "te"),
2305 ("skill_a", "sa"),
2306 ("skill_b", "sb"),
2307 ("desc_a", "da"),
2308 ("desc_b", "db"),
2309 ("annotations", "ann"),
2310 ("entity", "ety"),
2311 ("suggestion", "sug"),
2312 ("columns", "cols"),
2313 ("row_count", "rc"),
2314 ("notnull", "nn"),
2315 ("default_value", "dv"),
2316 ("replace_all", "ra"),
2317];
2318
2319pub(crate) fn relativize(path: &str, root: &std::path::Path) -> String {
2320 let root_str = root.to_string_lossy();
2321 let prefix = format!("{}/", root_str.trim_end_matches('/'));
2322 path.strip_prefix(&prefix).unwrap_or(path).to_string()
2323}
2324
2325fn transcript_artifact_root(path: &Path) -> Result<PathBuf> {
2326 let canonical = path
2327 .canonicalize()
2328 .with_context(|| format!("canonicalizing {}", path.display()))?;
2329 let start = if canonical.is_dir() {
2330 canonical.clone()
2331 } else {
2332 canonical
2333 .parent()
2334 .map(Path::to_path_buf)
2335 .unwrap_or_else(|| canonical.clone())
2336 };
2337
2338 for ancestor in start.ancestors() {
2339 if ancestor.join(".git").exists() || ancestor.join(".gitmodules").is_file() {
2340 return Ok(ancestor.to_path_buf());
2341 }
2342 }
2343
2344 Ok(start)
2345}
2346
2347fn relativize_pathbuf(path: &std::path::Path, root: &std::path::Path) -> PathBuf {
2348 path.strip_prefix(root)
2349 .map(|p| p.to_path_buf())
2350 .unwrap_or_else(|_| path.to_path_buf())
2351}
2352
2353pub(crate) fn relativize_edges(edges: &mut [index::StoredEdge], root: &std::path::Path) {
2354 for edge in edges {
2355 edge.caller_file = relativize(&edge.caller_file, root);
2356 }
2357}
2358
2359pub(crate) fn relativize_symbols(symbols: &mut [index::StoredSymbol], root: &std::path::Path) {
2360 for sym in symbols {
2361 sym.file = relativize(&sym.file, root);
2362 }
2363}
2364
2365pub(crate) fn relativize_symbol_hits(hits: &mut [index::SymbolHit], root: &std::path::Path) {
2366 for hit in hits {
2367 hit.file = relativize(&hit.file, root);
2368 }
2369}
2370
2371const COMMUNITY_DETECTION_CACHE_VERSION: &str = "community-detection-cache-v1";
2372
2373static COMMUNITY_DETECTION_CACHE: OnceLock<Mutex<BTreeMap<String, graph::CommunityResult>>> =
2374 OnceLock::new();
2375
2376#[derive(Debug, Clone, Serialize)]
2377struct CommunityDetectionDiagnostics {
2378 cache_hit: bool,
2379 edge_count: usize,
2380 iterations: usize,
2381 tagpath_state: String,
2382 tagpath_readiness: GraphEffectivenessReadiness,
2383 #[serde(skip_serializing_if = "Option::is_none")]
2384 tagpath_stale_reason: Option<String>,
2385 annotated_community_count: usize,
2386 annotated_member_count: usize,
2387 ambiguous_member_count: usize,
2388 #[serde(skip_serializing_if = "Vec::is_empty")]
2389 ambiguous_members: Vec<CommunityMemberAmbiguityDiagnostic>,
2390}
2391
2392#[derive(Debug, Clone)]
2393pub(crate) struct CommunityDetectionReport {
2394 result: graph::CommunityResult,
2395 diagnostics: CommunityDetectionDiagnostics,
2396}
2397
2398#[derive(Debug, Clone)]
2399struct CommunityTagpathCachePart {
2400 state: String,
2401 reason: Option<String>,
2402 key: String,
2403}
2404
2405#[derive(Debug, Clone, Serialize, Deserialize)]
2406struct CommunityDetectionCacheEntry {
2407 version: String,
2408 key: String,
2409 result: graph::CommunityResult,
2410}
2411
2412fn community_detection_cache() -> &'static Mutex<BTreeMap<String, graph::CommunityResult>> {
2413 COMMUNITY_DETECTION_CACHE.get_or_init(|| Mutex::new(BTreeMap::new()))
2414}
2415
2416pub(crate) fn community_tagpath_cache_part_for_loaded(
2417 adapter: &tagpath_adapter::TagpathAdapter,
2418) -> CommunityTagpathCachePart {
2419 let index_path = tagpath::index::index_path(&adapter.project_root);
2420 let index_hash = fs::read(&index_path)
2421 .map(|bytes| hash_bytes_hex(&bytes))
2422 .unwrap_or_else(|err| hash_bytes_hex(format!("fresh-index-unreadable:{err:#}").as_bytes()));
2423 CommunityTagpathCachePart {
2424 state: "fresh".to_string(),
2425 reason: None,
2426 key: format!("fresh:{index_hash}"),
2427 }
2428}
2429
2430pub(crate) fn community_tagpath_cache_part(
2431 root: &std::path::Path,
2432 opts: &TagpathSearchOpts,
2433) -> Result<CommunityTagpathCachePart> {
2434 if opts.no_tagpath {
2435 return Ok(CommunityTagpathCachePart {
2436 state: "disabled".to_string(),
2437 reason: None,
2438 key: "disabled".to_string(),
2439 });
2440 }
2441 match tagpath_adapter::try_load(root) {
2442 tagpath_adapter::LoadResult::Loaded(adapter) => {
2443 Ok(community_tagpath_cache_part_for_loaded(&adapter))
2444 }
2445 tagpath_adapter::LoadResult::Stale { reason, .. } => {
2446 if opts.strict {
2447 anyhow::bail!(
2448 "tagpath index is stale (reason={reason}); rerun `tagpath index --update` or drop --tagpath-strict"
2449 );
2450 }
2451 Ok(CommunityTagpathCachePart {
2452 state: "stale".to_string(),
2453 key: format!("stale:{reason}"),
2454 reason: Some(reason),
2455 })
2456 }
2457 tagpath_adapter::LoadResult::Missing => Ok(CommunityTagpathCachePart {
2458 state: "missing".to_string(),
2459 reason: None,
2460 key: "missing".to_string(),
2461 }),
2462 }
2463}
2464
2465fn graph_effectiveness_ready(reason: impl Into<String>) -> GraphEffectivenessReadiness {
2466 GraphEffectivenessReadiness {
2467 status: "ready".to_string(),
2468 fail_closed: false,
2469 reason: reason.into(),
2470 diagnostics: Vec::new(),
2471 next_commands: Vec::new(),
2472 }
2473}
2474
2475fn graph_effectiveness_blocked(
2476 reason: impl Into<String>,
2477 diagnostics: Vec<String>,
2478 next_commands: Vec<String>,
2479) -> GraphEffectivenessReadiness {
2480 GraphEffectivenessReadiness {
2481 status: "blocked".to_string(),
2482 fail_closed: true,
2483 reason: reason.into(),
2484 diagnostics,
2485 next_commands,
2486 }
2487}
2488
2489fn tagpath_index_update_command(root: &std::path::Path) -> String {
2490 format!(
2491 "cd {} && tagpath index --update",
2492 shell_quote(root.to_string_lossy().as_ref())
2493 )
2494}
2495
2496fn graph_tagpath_readiness(
2497 root: &std::path::Path,
2498 tagpath: &CommunityTagpathCachePart,
2499) -> GraphEffectivenessReadiness {
2500 match tagpath.state.as_str() {
2501 "fresh" => graph_effectiveness_ready("tagpath_handles_available"),
2502 "disabled" => GraphEffectivenessReadiness {
2503 status: "disabled".to_string(),
2504 fail_closed: false,
2505 reason: "tagpath_lookup_disabled".to_string(),
2506 diagnostics: Vec::new(),
2507 next_commands: Vec::new(),
2508 },
2509 "stale" => graph_effectiveness_blocked(
2510 "tagpath_state_stale",
2511 vec![format!(
2512 "tagpath_state=stale{}: community members may miss stable tagpath_handle citations; rebuild the tagpath index before relying on handle coverage",
2513 tagpath
2514 .reason
2515 .as_ref()
2516 .map(|reason| format!(" (reason={reason})"))
2517 .unwrap_or_default()
2518 )],
2519 vec![tagpath_index_update_command(root)],
2520 ),
2521 "missing" => graph_effectiveness_blocked(
2522 "tagpath_state_missing",
2523 vec![format!(
2524 "tagpath_state=missing: community members cannot emit stable tagpath_handle citations; create .naming.toml if needed, then run tagpath indexing from {}",
2525 root.display()
2526 )],
2527 vec![tagpath_index_update_command(root)],
2528 ),
2529 state => graph_effectiveness_blocked(
2530 format!("tagpath_state_{state}"),
2531 vec![format!(
2532 "tagpath_state={state}: community tagpath_handle readiness is unknown"
2533 )],
2534 vec![tagpath_index_update_command(root)],
2535 ),
2536 }
2537}
2538
2539fn community_graph_watermark(db: &index::IndexDb) -> Result<String> {
2540 let source_snapshot = db.source_snapshot_parts()?;
2541 let edge_rows = db.edge_count()?;
2542 let symbol_rows = db.symbol_count()?;
2543 content_hash(&serde_json::json!({
2544 "source_snapshot": source_snapshot,
2545 "edge_rows": edge_rows,
2546 "symbol_rows": symbol_rows,
2547 }))
2548}
2549
2550fn community_detection_cache_key(
2551 root: &std::path::Path,
2552 scope: Option<&str>,
2553 graph_watermark: &str,
2554 tagpath: &CommunityTagpathCachePart,
2555) -> Result<String> {
2556 content_hash(&serde_json::json!({
2557 "version": COMMUNITY_DETECTION_CACHE_VERSION,
2558 "root": root.display().to_string(),
2559 "scope": scope.unwrap_or("root"),
2560 "graph_watermark": graph_watermark,
2561 "tagpath": tagpath.key,
2562 }))
2563}
2564
2565fn community_detection_cache_path(
2566 root: &std::path::Path,
2567 scope: Option<&str>,
2568 key: &str,
2569) -> PathBuf {
2570 root.join(".tsift/community-cache")
2571 .join(scope.unwrap_or("root"))
2572 .join(format!("{key}.json"))
2573}
2574
2575fn read_community_detection_cache(
2576 root: &std::path::Path,
2577 scope: Option<&str>,
2578 key: &str,
2579) -> Option<graph::CommunityResult> {
2580 let path = community_detection_cache_path(root, scope, key);
2581 let bytes = fs::read(path).ok()?;
2582 let entry: CommunityDetectionCacheEntry = serde_json::from_slice(&bytes).ok()?;
2583 if entry.version == COMMUNITY_DETECTION_CACHE_VERSION && entry.key == key {
2584 Some(entry.result)
2585 } else {
2586 None
2587 }
2588}
2589
2590fn write_community_detection_cache(
2591 root: &std::path::Path,
2592 scope: Option<&str>,
2593 key: &str,
2594 result: &graph::CommunityResult,
2595) {
2596 let path = community_detection_cache_path(root, scope, key);
2597 let Some(parent) = path.parent() else {
2598 return;
2599 };
2600 if fs::create_dir_all(parent).is_err() {
2601 return;
2602 }
2603 let entry = CommunityDetectionCacheEntry {
2604 version: COMMUNITY_DETECTION_CACHE_VERSION.to_string(),
2605 key: key.to_string(),
2606 result: result.clone(),
2607 };
2608 if let Ok(bytes) = serde_json::to_vec(&entry) {
2609 let _ = fs::write(path, bytes);
2610 }
2611}
2612
2613fn community_detection_diagnostics(
2614 cache_hit: bool,
2615 result: &graph::CommunityResult,
2616 tagpath: &CommunityTagpathCachePart,
2617 tagpath_root: &std::path::Path,
2618) -> CommunityDetectionDiagnostics {
2619 CommunityDetectionDiagnostics {
2620 cache_hit,
2621 edge_count: result.edge_count,
2622 iterations: result.iterations,
2623 tagpath_state: tagpath.state.clone(),
2624 tagpath_readiness: graph_tagpath_readiness(tagpath_root, tagpath),
2625 tagpath_stale_reason: tagpath.reason.clone(),
2626 annotated_community_count: 0,
2627 annotated_member_count: 0,
2628 ambiguous_member_count: 0,
2629 ambiguous_members: Vec::new(),
2630 }
2631}
2632
2633pub(crate) fn update_community_annotation_diagnostics(
2634 diagnostics: &mut CommunityDetectionDiagnostics,
2635 communities: &[graph::Community],
2636 annotation: Option<&TagpathAnnotationDiagnostic>,
2637) {
2638 diagnostics.annotated_community_count = communities
2639 .iter()
2640 .filter(|community| {
2641 community
2642 .members
2643 .iter()
2644 .any(|member| member.tagpath_handle.is_some())
2645 })
2646 .count();
2647 diagnostics.annotated_member_count = communities
2648 .iter()
2649 .flat_map(|community| community.members.iter())
2650 .filter(|member| member.tagpath_handle.is_some())
2651 .count();
2652 if let Some(annotation) = annotation {
2653 diagnostics.ambiguous_member_count = annotation.ambiguous_members.len();
2654 diagnostics.ambiguous_members = annotation.ambiguous_members.clone();
2655 } else {
2656 diagnostics.ambiguous_member_count = 0;
2657 diagnostics.ambiguous_members.clear();
2658 }
2659}
2660
2661pub(crate) fn detect_communities_cached(
2662 db: &index::IndexDb,
2663 root: &std::path::Path,
2664 scope: Option<&str>,
2665 tagpath: &CommunityTagpathCachePart,
2666 tagpath_root: &std::path::Path,
2667) -> Result<CommunityDetectionReport> {
2668 let graph_watermark = community_graph_watermark(db)?;
2669 let cache_key = community_detection_cache_key(root, scope, &graph_watermark, tagpath)?;
2670
2671 if let Some(result) = community_detection_cache()
2672 .lock()
2673 .ok()
2674 .and_then(|cache| cache.get(&cache_key).cloned())
2675 {
2676 return Ok(CommunityDetectionReport {
2677 diagnostics: community_detection_diagnostics(true, &result, tagpath, tagpath_root),
2678 result,
2679 });
2680 }
2681
2682 if let Some(result) = read_community_detection_cache(root, scope, &cache_key) {
2683 if let Ok(mut cache) = community_detection_cache().lock() {
2684 cache.insert(cache_key.clone(), result.clone());
2685 }
2686 return Ok(CommunityDetectionReport {
2687 diagnostics: community_detection_diagnostics(true, &result, tagpath, tagpath_root),
2688 result,
2689 });
2690 }
2691
2692 let edges = db.all_edges()?;
2693 let result = graph::detect_communities(&edges);
2694 write_community_detection_cache(root, scope, &cache_key, &result);
2695 if let Ok(mut cache) = community_detection_cache().lock() {
2696 cache.insert(cache_key, result.clone());
2697 }
2698 Ok(CommunityDetectionReport {
2699 diagnostics: community_detection_diagnostics(false, &result, tagpath, tagpath_root),
2700 result,
2701 })
2702}
2703
2704fn index_file_abs(file: &str, root: &std::path::Path) -> std::path::PathBuf {
2705 if std::path::Path::new(file).is_absolute() {
2706 std::path::PathBuf::from(file)
2707 } else {
2708 root.join(file)
2709 }
2710}
2711
2712fn index_file_key(file: &str, root: &std::path::Path) -> String {
2713 let path = std::path::Path::new(file);
2714 let rel = if path.is_absolute() {
2715 path.strip_prefix(root).unwrap_or(path)
2716 } else {
2717 path
2718 };
2719 rel.to_string_lossy().replace('\\', "/")
2720}
2721
2722fn tagpath_handle_for_index_file(
2723 file: &str,
2724 name: &str,
2725 root: &std::path::Path,
2726 adapter: &tagpath_adapter::TagpathAdapter,
2727) -> Option<String> {
2728 adapter.handle_for_member(&index_file_abs(file, root), name)
2729}
2730
2731#[derive(Debug, Clone)]
2732struct TagpathHandleCandidate {
2733 file: String,
2734 line: i64,
2735 handle: String,
2736}
2737
2738fn tagpath_handle_candidates_for_symbol_rows(
2739 name: &str,
2740 syms: &[index::StoredSymbol],
2741 root: &std::path::Path,
2742 adapter: &tagpath_adapter::TagpathAdapter,
2743) -> Vec<TagpathHandleCandidate> {
2744 syms.iter()
2745 .filter_map(|sym| {
2746 let handle = tagpath_handle_for_index_file(&sym.file, name, root, adapter)?;
2747 Some(TagpathHandleCandidate {
2748 file: index_file_key(&sym.file, root),
2749 line: sym.line,
2750 handle,
2751 })
2752 })
2753 .collect()
2754}
2755
2756pub(crate) fn file_communities_from_callers(
2757 db: &index::IndexDb,
2758 root: &std::path::Path,
2759 scope: Option<&str>,
2760 tagpath: &CommunityTagpathCachePart,
2761) -> Result<std::collections::HashMap<String, std::collections::HashSet<usize>>> {
2762 let community_report = detect_communities_cached(db, root, scope, tagpath, root)?;
2763 if community_report.result.communities.is_empty() {
2764 return Ok(std::collections::HashMap::new());
2765 }
2766 let mut community_by_symbol = std::collections::HashMap::new();
2767 for community in community_report.result.communities {
2768 for member in community.members {
2769 community_by_symbol.insert(member.name, community.id);
2770 }
2771 }
2772
2773 let mut communities_by_file: std::collections::HashMap<
2774 String,
2775 std::collections::HashSet<usize>,
2776 > = std::collections::HashMap::new();
2777 for sym in db.all_symbols()? {
2778 if let Some(community_id) = community_by_symbol.get(&sym.name) {
2779 communities_by_file
2780 .entry(index_file_key(&sym.file, root))
2781 .or_default()
2782 .insert(*community_id);
2783 }
2784 }
2785 for edge in db.all_stored_edges()? {
2786 if let Some(community_id) = community_by_symbol.get(&edge.caller_name) {
2787 communities_by_file
2788 .entry(index_file_key(&edge.caller_file, root))
2789 .or_default()
2790 .insert(*community_id);
2791 }
2792 }
2793 Ok(communities_by_file)
2794}
2795
2796pub(crate) fn resolve_tagpath_handle_for_callee_edge(
2797 edge: &index::StoredEdge,
2798 db: &index::IndexDb,
2799 root: &std::path::Path,
2800 adapter: &tagpath_adapter::TagpathAdapter,
2801 communities_by_file: &std::collections::HashMap<String, std::collections::HashSet<usize>>,
2802) -> Option<String> {
2803 let syms = db.symbol_info(&edge.callee_name).ok()?;
2804 let candidates =
2805 tagpath_handle_candidates_for_symbol_rows(&edge.callee_name, &syms, root, adapter);
2806 let caller_file = index_file_key(&edge.caller_file, root);
2807
2808 if let Some(candidate) = candidates
2809 .iter()
2810 .find(|candidate| candidate.file == caller_file)
2811 {
2812 return Some(candidate.handle.clone());
2813 }
2814
2815 if let Some(caller_communities) = communities_by_file.get(&caller_file) {
2816 for candidate in &candidates {
2817 if let Some(candidate_communities) = communities_by_file.get(&candidate.file)
2818 && !caller_communities.is_disjoint(candidate_communities)
2819 {
2820 return Some(candidate.handle.clone());
2821 }
2822 }
2823 }
2824
2825 candidates.first().map(|candidate| candidate.handle.clone())
2826}
2827
2828fn push_bounded_community_member_ref(
2829 refs_by_member: &mut HashMap<(usize, String), Vec<graph::CommunityMemberRef>>,
2830 community_id: usize,
2831 name: &str,
2832 reference: graph::CommunityMemberRef,
2833) {
2834 let refs = refs_by_member
2835 .entry((community_id, name.to_string()))
2836 .or_default();
2837 if refs.iter().any(|existing| {
2838 existing.file == reference.file
2839 && existing.line == reference.line
2840 && existing.role == reference.role
2841 && existing.peer == reference.peer
2842 }) {
2843 return;
2844 }
2845 if refs.len() < 6 {
2846 refs.push(reference);
2847 }
2848}
2849
2850fn choose_symbol_row_by_files<'a>(
2851 syms: &'a [index::StoredSymbol],
2852 files: &BTreeSet<String>,
2853 root: &std::path::Path,
2854) -> Option<(&'a index::StoredSymbol, &'static str)> {
2855 let matches: Vec<&index::StoredSymbol> = syms
2856 .iter()
2857 .filter(|sym| files.contains(&index_file_key(&sym.file, root)))
2858 .collect();
2859 if matches.len() == 1 {
2860 Some((matches[0], "edge_file"))
2861 } else {
2862 None
2863 }
2864}
2865
2866fn choose_tagpath_candidate_by_files<'a>(
2867 candidates: &'a [TagpathHandleCandidate],
2868 files: &BTreeSet<String>,
2869 evidence: &'static str,
2870) -> Option<(&'a TagpathHandleCandidate, &'static str)> {
2871 let matches: Vec<&TagpathHandleCandidate> = candidates
2872 .iter()
2873 .filter(|candidate| files.contains(&candidate.file))
2874 .collect();
2875 if matches.len() == 1 {
2876 Some((matches[0], evidence))
2877 } else {
2878 None
2879 }
2880}
2881
2882pub(crate) fn annotate_community_members_with_context(
2883 communities: &mut [graph::Community],
2884 db: &index::IndexDb,
2885 root: &std::path::Path,
2886 adapter: Option<&tagpath_adapter::TagpathAdapter>,
2887) -> Result<Vec<CommunityMemberAmbiguityDiagnostic>> {
2888 let mut community_by_name = HashMap::<String, usize>::new();
2889 for community in communities.iter() {
2890 for member in &community.members {
2891 community_by_name.insert(member.name.clone(), community.id);
2892 }
2893 }
2894
2895 let mut symbols_by_name = HashMap::<String, Vec<index::StoredSymbol>>::new();
2896 for sym in db.all_symbols()? {
2897 symbols_by_name
2898 .entry(sym.name.clone())
2899 .or_default()
2900 .push(sym);
2901 }
2902
2903 let mut refs_by_member = HashMap::<(usize, String), Vec<graph::CommunityMemberRef>>::new();
2904 let mut evidence_files_by_member = HashMap::<(usize, String), BTreeSet<String>>::new();
2905 let mut context_files_by_community = HashMap::<usize, BTreeSet<String>>::new();
2906
2907 for edge in db.all_stored_edges()? {
2908 let Some(&caller_community) = community_by_name.get(&edge.caller_name) else {
2909 continue;
2910 };
2911 let Some(&callee_community) = community_by_name.get(&edge.callee_name) else {
2912 continue;
2913 };
2914 if caller_community != callee_community {
2915 continue;
2916 }
2917
2918 let file = index_file_key(&edge.caller_file, root);
2919 context_files_by_community
2920 .entry(caller_community)
2921 .or_default()
2922 .insert(file.clone());
2923
2924 evidence_files_by_member
2925 .entry((caller_community, edge.caller_name.clone()))
2926 .or_default()
2927 .insert(file.clone());
2928 push_bounded_community_member_ref(
2929 &mut refs_by_member,
2930 caller_community,
2931 &edge.caller_name,
2932 graph::CommunityMemberRef {
2933 file: file.clone(),
2934 line: edge.caller_line,
2935 role: "caller".to_string(),
2936 peer: edge.callee_name.clone(),
2937 },
2938 );
2939
2940 evidence_files_by_member
2941 .entry((callee_community, edge.callee_name.clone()))
2942 .or_default()
2943 .insert(file.clone());
2944 push_bounded_community_member_ref(
2945 &mut refs_by_member,
2946 callee_community,
2947 &edge.callee_name,
2948 graph::CommunityMemberRef {
2949 file,
2950 line: edge.call_site_line,
2951 role: "callee".to_string(),
2952 peer: edge.caller_name.clone(),
2953 },
2954 );
2955 }
2956
2957 let mut diagnostics = Vec::new();
2958 for community in communities.iter_mut() {
2959 let community_files = context_files_by_community
2960 .get(&community.id)
2961 .cloned()
2962 .unwrap_or_default();
2963 for member in community.members.iter_mut() {
2964 member.file = None;
2965 member.line = None;
2966 member.tagpath_handle = None;
2967 let key = (community.id, member.name.clone());
2968 member.refs = refs_by_member.remove(&key).unwrap_or_default();
2969
2970 let syms = symbols_by_name
2971 .get(&member.name)
2972 .map(Vec::as_slice)
2973 .unwrap_or(&[]);
2974 let evidence_files = evidence_files_by_member
2975 .get(&key)
2976 .cloned()
2977 .unwrap_or_default();
2978 let candidates = adapter
2979 .map(|adapter| {
2980 tagpath_handle_candidates_for_symbol_rows(&member.name, syms, root, adapter)
2981 })
2982 .unwrap_or_default();
2983
2984 let mut selected_file: Option<String> = None;
2985 let mut selected_line: Option<i64> = None;
2986 let mut selected_handle: Option<String> = None;
2987 let mut selected_evidence: Option<&'static str> = None;
2988
2989 if let Some(candidate) = candidates.first().filter(|_| candidates.len() == 1) {
2990 selected_file = Some(candidate.file.clone());
2991 selected_line = Some(candidate.line);
2992 selected_handle = Some(candidate.handle.clone());
2993 selected_evidence = Some("unique_tagpath_handle");
2994 } else if let Some((candidate, evidence)) =
2995 choose_tagpath_candidate_by_files(&candidates, &evidence_files, "edge_file")
2996 {
2997 selected_file = Some(candidate.file.clone());
2998 selected_line = Some(candidate.line);
2999 selected_handle = Some(candidate.handle.clone());
3000 selected_evidence = Some(evidence);
3001 } else if let Some((candidate, evidence)) =
3002 choose_tagpath_candidate_by_files(&candidates, &community_files, "community_file")
3003 {
3004 selected_file = Some(candidate.file.clone());
3005 selected_line = Some(candidate.line);
3006 selected_handle = Some(candidate.handle.clone());
3007 selected_evidence = Some(evidence);
3008 }
3009
3010 if selected_file.is_none() {
3011 if let Some(sym) = syms.first().filter(|_| syms.len() == 1) {
3012 selected_file = Some(index_file_key(&sym.file, root));
3013 selected_line = Some(sym.line);
3014 selected_evidence = Some("unique_symbol_row");
3015 } else if let Some((sym, evidence)) =
3016 choose_symbol_row_by_files(syms, &evidence_files, root)
3017 {
3018 selected_file = Some(index_file_key(&sym.file, root));
3019 selected_line = Some(sym.line);
3020 selected_evidence = Some(evidence);
3021 } else if let Some((sym, _)) =
3022 choose_symbol_row_by_files(syms, &community_files, root)
3023 {
3024 selected_file = Some(index_file_key(&sym.file, root));
3025 selected_line = Some(sym.line);
3026 selected_evidence = Some("community_file");
3027 }
3028 }
3029
3030 member.file = selected_file.clone();
3031 member.line = selected_line;
3032 member.tagpath_handle = selected_handle;
3033
3034 if syms.len() > 1 || candidates.len() > 1 {
3035 diagnostics.push(CommunityMemberAmbiguityDiagnostic {
3036 community_id: community.id,
3037 name: member.name.clone(),
3038 candidate_count: syms.len(),
3039 tagpath_candidate_count: candidates.len(),
3040 evidence: selected_evidence
3041 .unwrap_or("ambiguous_no_evidence")
3042 .to_string(),
3043 chosen_file: selected_file,
3044 });
3045 }
3046 }
3047 }
3048
3049 Ok(diagnostics)
3050}
3051
3052#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3055pub enum EdgeSide {
3056 Caller,
3057 Callee,
3058}
3059
3060const JSON_PATH_KEYS: &[&str] = &["file", "path", "caller_file", "file_path"];
3061
3062pub(crate) fn relativize_json_paths(val: &mut serde_json::Value, root: &std::path::Path) {
3063 let root_str = root.to_string_lossy();
3064 let prefix = format!("{}/", root_str.trim_end_matches('/'));
3065 relativize_json_inner(val, &prefix);
3066}
3067
3068fn relativize_json_inner(val: &mut serde_json::Value, prefix: &str) {
3069 match val {
3070 serde_json::Value::Array(arr) => {
3071 for v in arr {
3072 relativize_json_inner(v, prefix);
3073 }
3074 }
3075 serde_json::Value::Object(map) => {
3076 for (k, v) in map.iter_mut() {
3077 if JSON_PATH_KEYS.contains(&k.as_str())
3078 && let serde_json::Value::String(s) = v
3079 && let Some(rest) = s.strip_prefix(prefix)
3080 {
3081 *s = rest.to_string();
3082 }
3083 relativize_json_inner(v, prefix);
3084 }
3085 }
3086 _ => {}
3087 }
3088}
3089
3090pub(crate) fn format_score(score: f64, compact: bool) -> String {
3091 if compact {
3092 format!("{score:.2}")
3093 } else {
3094 format!("{score:.4}")
3095 }
3096}
3097
3098pub(crate) fn truncate_for_compact(input: &str, max_chars: usize) -> String {
3099 let trimmed = input.trim();
3100 let count = trimmed.chars().count();
3101 if count <= max_chars {
3102 return trimmed.to_string();
3103 }
3104 let prefix: String = trimmed.chars().take(max_chars.saturating_sub(3)).collect();
3105 format!("{prefix}...")
3106}
3107
3108pub(crate) fn compact_snippet(snippet: &str) -> Option<String> {
3109 snippet
3110 .lines()
3111 .find(|line| !line.trim().is_empty())
3112 .map(|line| truncate_for_compact(line, 100))
3113}
3114
3115pub(crate) fn compact_members(members: &[graph::CommunityMember], limit: usize) -> String {
3116 let names: Vec<&str> = members.iter().map(|m| m.name.as_str()).collect();
3117 if names.len() <= limit {
3118 return names.join(", ");
3119 }
3120 format!(
3121 "{} (+{} more)",
3122 names[..limit].join(", "),
3123 names.len() - limit
3124 )
3125}
3126
3127fn stable_handle(prefix: &str, key: &str) -> String {
3128 let mut hasher = blake3::Hasher::new();
3129 hasher.update(prefix.as_bytes());
3130 hasher.update(&[0]);
3131 hasher.update(key.as_bytes());
3132 let hex = hasher.finalize().to_hex();
3133 format!("{prefix}-{}", &hex[..10])
3134}
3135
3136#[derive(Clone, Debug, PartialEq, Eq)]
3137struct CanonicalTagFamily {
3138 canonical: String,
3139 tag_alias: String,
3140}
3141
3142fn canonical_family_from_tagpath_family(
3143 family: tagpath_family::TagFamily,
3144) -> Option<CanonicalTagFamily> {
3145 let tag_alias = if family.dimensions.is_empty() {
3146 family.tags.join("/")
3147 } else {
3148 family
3149 .dimensions
3150 .iter()
3151 .filter(|dimension| !dimension.tags.is_empty())
3152 .map(|dimension| dimension.tags.join("."))
3153 .collect::<Vec<_>>()
3154 .join("/")
3155 };
3156
3157 if tag_alias.is_empty() {
3158 None
3159 } else {
3160 Some(CanonicalTagFamily {
3161 canonical: family.canonical,
3162 tag_alias,
3163 })
3164 }
3165}
3166
3167fn canonical_tag_family_from_name(name: &str) -> Option<CanonicalTagFamily> {
3168 let trimmed = name.trim();
3169 if trimmed.is_empty() {
3170 return None;
3171 }
3172
3173 canonical_family_from_tagpath_family(tagpath_family::generate_family(trimmed))
3174}
3175
3176fn canonical_tag_family_from_tags(tags: &str) -> Option<CanonicalTagFamily> {
3177 let canonical = tags
3178 .split(',')
3179 .map(str::trim)
3180 .filter(|tag| !tag.is_empty())
3181 .collect::<Vec<_>>()
3182 .join("_");
3183 if canonical.is_empty() {
3184 None
3185 } else {
3186 canonical_family_from_tagpath_family(tagpath_family::generate_family(&canonical))
3187 }
3188}
3189
3190fn canonical_tag_family_from_symbol(name: &str, tags: Option<&str>) -> Option<CanonicalTagFamily> {
3191 tags.and_then(canonical_tag_family_from_tags)
3192 .or_else(|| canonical_tag_family_from_name(name))
3193}
3194
3195fn tag_alias_from_name(name: &str) -> Option<String> {
3196 canonical_tag_family_from_name(name).map(|family| family.tag_alias)
3197}
3198
3199fn tag_alias_from_tags(name: &str, tags: Option<&str>) -> Option<String> {
3200 canonical_tag_family_from_symbol(name, tags).map(|family| family.tag_alias)
3201}
3202
3203fn family_query_from_tag_alias(tag_alias: &str) -> Option<String> {
3204 let query = tag_alias
3205 .split(['/', '.'])
3206 .map(str::trim)
3207 .filter(|part| !part.is_empty())
3208 .collect::<Vec<_>>()
3209 .join(" ");
3210 if query.is_empty() { None } else { Some(query) }
3211}
3212
3213#[derive(Serialize, Clone, Debug, PartialEq, Eq)]
3214struct CompactOntologyRefPreview {
3215 handle: String,
3216 tag: String,
3217 path: String,
3218 #[serde(skip_serializing_if = "Option::is_none")]
3219 title: Option<String>,
3220 #[serde(skip_serializing_if = "Option::is_none")]
3221 domain: Option<String>,
3222}
3223
3224#[derive(Clone, Debug)]
3225struct TagOntologyPreviewContext {
3226 project_root: PathBuf,
3227 tags: BTreeMap<String, tagpath_ontology::OntologyTag>,
3228}
3229
3230#[derive(Serialize, Clone, Debug, PartialEq, Eq)]
3231struct CompactSymbolRefPreview {
3232 handle: String,
3233 name: String,
3234 #[serde(skip_serializing_if = "Option::is_none")]
3235 tag_alias: Option<String>,
3236 #[serde(skip_serializing_if = "Vec::is_empty", default)]
3237 ontology_refs: Vec<CompactOntologyRefPreview>,
3238}
3239
3240fn build_compact_symbol_ref(
3241 prefix: &str,
3242 key: &str,
3243 name: &str,
3244 tags: Option<&str>,
3245 max_bytes: usize,
3246) -> CompactSymbolRefPreview {
3247 build_compact_symbol_ref_with_ontology(prefix, key, name, tags, max_bytes, None)
3248}
3249
3250fn build_compact_symbol_ref_with_ontology(
3251 prefix: &str,
3252 key: &str,
3253 name: &str,
3254 tags: Option<&str>,
3255 max_bytes: usize,
3256 ontology: Option<&TagOntologyPreviewContext>,
3257) -> CompactSymbolRefPreview {
3258 let tag_alias = tag_alias_from_tags(name, tags);
3259 let ontology_refs = tag_alias
3260 .as_deref()
3261 .map(|alias| ontology_refs_for_alias(ontology, alias))
3262 .unwrap_or_default();
3263 CompactSymbolRefPreview {
3264 handle: stable_handle(prefix, key),
3265 name: truncate_for_budget(name, max_bytes),
3266 tag_alias: tag_alias.map(|alias| truncate_for_budget(&alias, max_bytes)),
3267 ontology_refs,
3268 }
3269}
3270
3271fn load_tag_ontology_preview_context(root: &Path) -> Option<TagOntologyPreviewContext> {
3272 let report = tagpath_ontology::load_project(root).ok()?;
3273 if report.tags.is_empty() {
3274 return None;
3275 }
3276 Some(TagOntologyPreviewContext {
3277 project_root: report.project_path,
3278 tags: report
3279 .tags
3280 .into_iter()
3281 .map(|tag| (tag.tag.clone(), tag))
3282 .collect(),
3283 })
3284}
3285
3286fn ontology_refs_for_alias(
3287 ontology: Option<&TagOntologyPreviewContext>,
3288 alias: &str,
3289) -> Vec<CompactOntologyRefPreview> {
3290 let Some(ontology) = ontology else {
3291 return Vec::new();
3292 };
3293 let mut seen = BTreeSet::new();
3294 alias
3295 .split('/')
3296 .flat_map(|part| part.split('.'))
3297 .map(str::trim)
3298 .filter(|tag| !tag.is_empty())
3299 .filter_map(|tag| {
3300 let key = tag.to_ascii_lowercase();
3301 if !seen.insert(key.clone()) {
3302 return None;
3303 }
3304 let ontology_tag = ontology.tags.get(&key)?;
3305 let path = relativize_ontology_path(&ontology_tag.path, &ontology.project_root);
3306 Some(CompactOntologyRefPreview {
3307 handle: stable_handle("tont", &format!("{}:{path}", ontology_tag.tag)),
3308 tag: ontology_tag.tag.clone(),
3309 path,
3310 title: ontology_tag.title.clone(),
3311 domain: ontology_tag.domain.clone(),
3312 })
3313 })
3314 .collect()
3315}
3316
3317fn relativize_ontology_path(path: &Path, root: &Path) -> String {
3318 path.strip_prefix(root)
3319 .unwrap_or(path)
3320 .to_string_lossy()
3321 .replace('\\', "/")
3322}
3323
3324fn format_symbol_preview_line(handle: &str, name: &str, tag_alias: Option<&str>) -> String {
3325 match tag_alias {
3326 Some(alias) => format!("{handle} {name} tag:{alias}"),
3327 None => format!("{handle} {name}"),
3328 }
3329}
3330
3331fn format_summary_ref_line(summary: &ContextPackSummaryRefPreview) -> String {
3332 match summary.tag_alias.as_deref() {
3333 Some(alias) => format!(
3334 "{} {} tag:{} expand:{}",
3335 summary.handle, summary.symbol, alias, summary.expand
3336 ),
3337 None => format!(
3338 "{} {} expand:{}",
3339 summary.handle, summary.symbol, summary.expand
3340 ),
3341 }
3342}
3343
3344fn compact_symbol_ref_token(symbol: &CompactSymbolRefPreview) -> String {
3345 match symbol.tag_alias.as_deref() {
3346 Some(alias) => format!("{}@{}", symbol.handle, alias),
3347 None => format!("{}@{}", symbol.handle, symbol.name),
3348 }
3349}
3350
3351fn truncate_for_budget(input: &str, max_bytes: usize) -> String {
3352 let trimmed = input.trim();
3353 if trimmed.len() <= max_bytes {
3354 return trimmed.to_string();
3355 }
3356 if max_bytes <= 3 {
3357 return ".".repeat(max_bytes);
3358 }
3359
3360 let mut end = 0usize;
3361 for (idx, ch) in trimmed.char_indices() {
3362 let next = idx + ch.len_utf8();
3363 if next > max_bytes.saturating_sub(3) {
3364 break;
3365 }
3366 end = next;
3367 }
3368
3369 if end == 0 {
3370 "...".to_string()
3371 } else {
3372 format!("{}...", &trimmed[..end])
3373 }
3374}
3375
3376pub(crate) fn abbreviate_kind(kind: &str) -> &str {
3377 match kind {
3378 "function" => "fn",
3379 "method" => "meth",
3380 "module" | "mod" => "mod",
3381 "struct" => "struct",
3382 "trait" => "trait",
3383 "impl" => "impl",
3384 "class" => "cls",
3385 "interface" => "iface",
3386 "type_alias" => "type",
3387 "data_class" => "data_cls",
3388 "sealed_class" => "sealed_cls",
3389 "enum_class" => "enum_cls",
3390 "companion_object" => "comp_obj",
3391 "object" => "obj",
3392 "heading" => "h",
3393 "code_block" => "code",
3394 "alias" => "alias",
3395 other => other,
3396 }
3397}
3398
3399pub(crate) fn abbreviate_match_type(mt: &str) -> &str {
3400 match mt {
3401 "exact_name" => "exact",
3402 "all_tags" => "all_tags",
3403 "partial_tags" => "partial",
3404 other => other,
3405 }
3406}
3407
3408pub(crate) fn symbol_path_summary(path: &[graph::PathNode]) -> String {
3409 path.iter()
3410 .map(|n| n.name.as_str())
3411 .collect::<Vec<_>>()
3412 .join(" -> ")
3413}
3414
3415const SEARCH_GROUP_SAMPLE_LIMIT: usize = 2;
3416
3417struct SearchHitGroup {
3418 path: String,
3419 first_rank: usize,
3420 top_score: f64,
3421 confidence: String,
3422 hits: usize,
3423 samples: Vec<String>,
3424}
3425
3426fn format_search_sample(hit: &sift::SearchHit) -> Option<String> {
3427 let snippet = compact_snippet(&hit.snippet)?;
3428 Some(match hit.location.as_deref() {
3429 Some(location) => format!("{location}: {snippet}"),
3430 None => snippet,
3431 })
3432}
3433
3434pub(crate) fn group_search_hits(
3435 hits: &[sift::SearchHit],
3436 root: &Path,
3437 absolute: bool,
3438) -> Vec<SearchHitGroup> {
3439 let mut positions = BTreeMap::new();
3440 let mut groups = Vec::new();
3441 for hit in hits {
3442 let path = if absolute {
3443 hit.path.clone()
3444 } else {
3445 relativize(&hit.path, root)
3446 };
3447 let entry = positions.entry(path.clone()).or_insert_with(|| {
3448 groups.push(SearchHitGroup {
3449 path: path.clone(),
3450 first_rank: hit.rank,
3451 top_score: hit.score,
3452 confidence: format!("{:?}", hit.confidence),
3453 hits: 0,
3454 samples: Vec::new(),
3455 });
3456 groups.len() - 1
3457 });
3458 let group = &mut groups[*entry];
3459 group.hits += 1;
3460 if hit.rank < group.first_rank {
3461 group.first_rank = hit.rank;
3462 }
3463 if hit.score > group.top_score {
3464 group.top_score = hit.score;
3465 }
3466 if let Some(sample) = format_search_sample(hit)
3467 && group.samples.len() < SEARCH_GROUP_SAMPLE_LIMIT
3468 && !group.samples.contains(&sample)
3469 {
3470 group.samples.push(sample);
3471 }
3472 }
3473 groups.sort_by_key(|group| group.first_rank);
3474 groups
3475}
3476
3477pub(crate) fn should_collapse_search_hits(
3478 hits: &[sift::SearchHit],
3479 root: &Path,
3480 absolute: bool,
3481) -> bool {
3482 let groups = group_search_hits(hits, root, absolute);
3483 let max_hits_per_file = groups.iter().map(|group| group.hits).max().unwrap_or(0);
3484 max_hits_per_file >= 3 || (hits.len() >= 6 && groups.len() < hits.len())
3485}
3486
3487pub(crate) fn format_edge_groups(edges: &[index::StoredEdge], use_callers: bool) -> Vec<String> {
3488 let mut grouped: BTreeMap<&str, Vec<&str>> = BTreeMap::new();
3489 for edge in edges {
3490 let key = edge.caller_file.as_str();
3491 let name = if use_callers {
3492 edge.caller_name.as_str()
3493 } else {
3494 edge.callee_name.as_str()
3495 };
3496 let names = grouped.entry(key).or_default();
3497 if !names.contains(&name) {
3498 names.push(name);
3499 }
3500 }
3501
3502 grouped
3503 .into_iter()
3504 .map(|(file, names)| format!(" {} ({}): {}", file, names.len(), names.join(", ")))
3505 .collect()
3506}
3507
3508pub(crate) fn should_collapse_edge_groups(edges: &[index::StoredEdge]) -> bool {
3509 let mut grouped: BTreeMap<&str, usize> = BTreeMap::new();
3510 for edge in edges {
3511 *grouped.entry(edge.caller_file.as_str()).or_default() += 1;
3512 }
3513 let max_hits_per_file = grouped.values().copied().max().unwrap_or(0);
3514 max_hits_per_file >= 3 || (edges.len() >= 6 && grouped.len() < edges.len())
3515}
3516
3517pub(crate) fn apply_edit_op(content: &str, op: &EditOp) -> Result<(String, usize)> {
3519 if op.old == op.new {
3520 bail!("old and new strings are identical");
3521 }
3522 let count = content.matches(op.old.as_str()).count();
3523 if count == 0 {
3524 bail!("old_string not found");
3525 }
3526 if count > 1 && !op.replace_all {
3527 bail!(
3528 "old_string matches {} times (use replace_all or provide more context)",
3529 count
3530 );
3531 }
3532 let replaced = if op.replace_all {
3533 content.replace(op.old.as_str(), &op.new)
3534 } else {
3535 content.replacen(op.old.as_str(), &op.new, 1)
3536 };
3537 Ok((replaced, count))
3538}
3539
3540pub(crate) fn build_edit_plan(batch: &EditBatch) -> Result<Vec<PlannedEdit>> {
3541 let mut plan = Vec::with_capacity(batch.edits.len());
3542 for (i, op) in batch.edits.iter().enumerate() {
3543 let content = fs::read_to_string(&op.file)
3544 .with_context(|| format!("edit #{}: reading {}", i + 1, op.file.display()))?;
3545 let (replaced, count) = apply_edit_op(&content, op)
3546 .with_context(|| format!("edit #{}: {}", i + 1, op.file.display()))?;
3547 plan.push(PlannedEdit {
3548 index: i,
3549 file: op.file.clone(),
3550 new_content: replaced,
3551 replacements: count,
3552 });
3553 }
3554 Ok(plan)
3555}
3556
3557fn stage_edit_plan(plan: Vec<PlannedEdit>) -> Result<Vec<StagedEdit>> {
3558 let mut staged = Vec::with_capacity(plan.len());
3559 for planned in plan {
3560 let parent = planned.file.parent().unwrap_or_else(|| Path::new("."));
3561 let mut staged_file = NamedTempFile::new_in(parent)
3562 .with_context(|| format!("staging {}", planned.file.display()))?;
3563 staged_file
3564 .write_all(planned.new_content.as_bytes())
3565 .with_context(|| format!("staging {}", planned.file.display()))?;
3566 staged_file
3567 .as_file_mut()
3568 .sync_all()
3569 .with_context(|| format!("flushing staged edit for {}", planned.file.display()))?;
3570 staged.push(StagedEdit {
3571 index: planned.index,
3572 file: planned.file,
3573 replacements: planned.replacements,
3574 staged_file,
3575 });
3576 }
3577 Ok(staged)
3578}
3579
3580fn edit_backup_path(file: &Path, index: usize) -> PathBuf {
3581 let parent = file.parent().unwrap_or_else(|| Path::new("."));
3582 let name = file
3583 .file_name()
3584 .map(|value| value.to_string_lossy().into_owned())
3585 .unwrap_or_else(|| "edit-target".to_string());
3586 let stamp = SystemTime::now()
3587 .duration_since(UNIX_EPOCH)
3588 .unwrap_or_default()
3589 .as_nanos();
3590 parent.join(format!(
3591 ".{name}.tsift-edit-{stamp}-{}-{index}.bak",
3592 std::process::id()
3593 ))
3594}
3595
3596fn rollback_applied_edits(applied: &[AppliedEdit]) -> Result<()> {
3597 let mut rollback_errors = Vec::new();
3598 for entry in applied.iter().rev() {
3599 if let Err(err) = fs::remove_file(&entry.file)
3600 && err.kind() != std::io::ErrorKind::NotFound
3601 {
3602 rollback_errors.push(format!(
3603 "removing {} during rollback: {}",
3604 entry.file.display(),
3605 err
3606 ));
3607 continue;
3608 }
3609 if let Err(err) = fs::rename(&entry.backup_path, &entry.file) {
3610 rollback_errors.push(format!(
3611 "restoring {} during rollback: {}",
3612 entry.file.display(),
3613 err
3614 ));
3615 }
3616 }
3617 if rollback_errors.is_empty() {
3618 Ok(())
3619 } else {
3620 bail!(rollback_errors.join("; "));
3621 }
3622}
3623
3624fn cleanup_edit_backups(applied: &[AppliedEdit]) {
3625 for entry in applied {
3626 let _ = fs::remove_file(&entry.backup_path);
3627 }
3628}
3629
3630fn ok_results_from_applied(applied: &[AppliedEdit]) -> Vec<EditResult> {
3631 applied
3632 .iter()
3633 .map(|entry| EditResult {
3634 file: entry.file.clone(),
3635 status: EditStatus::Ok,
3636 error: None,
3637 replacements: Some(entry.replacements),
3638 })
3639 .collect()
3640}
3641
3642pub(crate) fn apply_edit_plan_atomically(plan: Vec<PlannedEdit>) -> Result<Vec<EditResult>> {
3643 apply_edit_plan_atomically_inner(plan, |_, _| Ok(()))
3644}
3645
3646fn apply_edit_plan_atomically_inner<F>(
3647 plan: Vec<PlannedEdit>,
3648 mut before_swap: F,
3649) -> Result<Vec<EditResult>>
3650where
3651 F: FnMut(usize, &Path) -> Result<()>,
3652{
3653 let staged = stage_edit_plan(plan)?;
3654 let mut applied = Vec::with_capacity(staged.len());
3655
3656 for (commit_index, staged_edit) in staged.into_iter().enumerate() {
3657 if let Err(err) = before_swap(commit_index, &staged_edit.file) {
3658 match rollback_applied_edits(&applied) {
3659 Ok(()) => cleanup_edit_backups(&applied),
3660 Err(rollback_error) => {
3661 return Err(err.context(format!("rollback also failed: {rollback_error}")));
3662 }
3663 }
3664 return Err(err);
3665 }
3666
3667 let backup_path = edit_backup_path(&staged_edit.file, staged_edit.index);
3668 if let Err(err) = fs::rename(&staged_edit.file, &backup_path) {
3669 match rollback_applied_edits(&applied) {
3670 Ok(()) => cleanup_edit_backups(&applied),
3671 Err(rollback_error) => {
3672 bail!(
3673 "moving {} into backup slot failed: {}; rollback also failed: {}",
3674 staged_edit.file.display(),
3675 err,
3676 rollback_error
3677 );
3678 }
3679 }
3680 bail!(
3681 "moving {} into backup slot failed: {}",
3682 staged_edit.file.display(),
3683 err
3684 );
3685 }
3686 match staged_edit.staged_file.persist(&staged_edit.file) {
3687 Ok(_) => applied.push(AppliedEdit {
3688 index: staged_edit.index,
3689 file: staged_edit.file,
3690 replacements: staged_edit.replacements,
3691 backup_path,
3692 }),
3693 Err(err) => {
3694 let persist_error = err.error;
3695 drop(err.file);
3696 let restore_error = fs::rename(&backup_path, &staged_edit.file).err();
3697 let rollback_error = rollback_applied_edits(&applied).err();
3698 if rollback_error.is_none() {
3699 cleanup_edit_backups(&applied);
3700 }
3701 let mut message = format!(
3702 "committing {} failed: {}",
3703 staged_edit.file.display(),
3704 persist_error
3705 );
3706 if let Some(restore_error) = restore_error {
3707 message.push_str(&format!(
3708 "; restoring original {} failed: {}",
3709 staged_edit.file.display(),
3710 restore_error
3711 ));
3712 }
3713 if let Some(rollback_error) = rollback_error {
3714 message.push_str(&format!("; rollback also failed: {rollback_error}"));
3715 }
3716 bail!(message);
3717 }
3718 }
3719 }
3720
3721 applied.sort_by_key(|entry| entry.index);
3722 let results = ok_results_from_applied(&applied);
3723 cleanup_edit_backups(&applied);
3724 Ok(results)
3725}
3726
3727fn resolve_query_index_target(
3728 root: &Path,
3729 path_hint: &Path,
3730 scope: Option<&str>,
3731) -> Result<SearchIndexTarget> {
3732 let cfg = config::Config::load(root)?;
3733 if let Some(scope_name) = scope {
3734 let scope = config::Config::resolve_submodule(root, scope_name)?;
3735 return Ok(SearchIndexTarget {
3736 label: format!("submodule `{}` index", scope.id),
3737 db_path: cfg.db_path_for(root, &scope.id),
3738 source_root: scope.source_root.clone(),
3739 scope_name: Some(scope.id.clone()),
3740 reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
3741 });
3742 }
3743
3744 if let Some(scope) = config::Config::infer_submodule_from_path(root, path_hint)? {
3745 return Ok(SearchIndexTarget {
3746 label: format!("submodule `{}` index", scope.id),
3747 db_path: cfg.db_path_for(root, &scope.id),
3748 source_root: scope.source_root.clone(),
3749 scope_name: Some(scope.id.clone()),
3750 reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
3751 });
3752 }
3753
3754 if let Some(scope) = infer_agent_doc_task_submodule(root, path_hint)? {
3755 return Ok(SearchIndexTarget {
3756 label: format!("submodule `{}` index", scope.id),
3757 db_path: cfg.db_path_for(root, &scope.id),
3758 source_root: scope.source_root.clone(),
3759 scope_name: Some(scope.id.clone()),
3760 reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
3761 });
3762 }
3763
3764 let db_path = root.join(".tsift/index.db");
3765 if db_path.exists() {
3766 return Ok(SearchIndexTarget {
3767 label: "index".to_string(),
3768 db_path,
3769 source_root: root.to_path_buf(),
3770 scope_name: None,
3771 reindex_cmd: format!("tsift index {}", root.display()),
3772 });
3773 }
3774
3775 let scopes = config::Config::submodule_dirs(root)?;
3776 if scopes.is_empty() {
3777 return Ok(SearchIndexTarget {
3778 label: "index".to_string(),
3779 db_path,
3780 source_root: root.to_path_buf(),
3781 scope_name: None,
3782 reindex_cmd: format!("tsift index {}", root.display()),
3783 });
3784 }
3785
3786 let available_scopes = scopes
3787 .iter()
3788 .map(|scope| scope.id.as_str())
3789 .collect::<Vec<_>>()
3790 .join(", ");
3791 let indexed_scopes = scopes
3792 .iter()
3793 .filter(|scope| cfg.db_path_for(root, &scope.id).exists())
3794 .map(|scope| scope.id.as_str())
3795 .collect::<Vec<_>>();
3796 let indexed_label = if indexed_scopes.is_empty() {
3797 "none".to_string()
3798 } else {
3799 indexed_scopes.join(", ")
3800 };
3801
3802 bail!(
3803 "workspace root {} has no shared root index at {}. Read-only graph queries require `--scope <scope>` when the workspace is indexed into `.tsift/indexes/*/index.db`. Available scopes: {}. Indexed scopes: {}.",
3804 root.display(),
3805 db_path.display(),
3806 available_scopes,
3807 indexed_label
3808 );
3809}
3810
3811fn resolve_query_db_path(root: &Path, path_hint: &Path, scope: Option<&str>) -> Result<PathBuf> {
3812 Ok(resolve_query_index_target(root, path_hint, scope)?.db_path)
3813}
3814
3815fn ensure_query_index_current(root: &Path, target: &SearchIndexTarget) -> Result<()> {
3816 let state = inspect_search_index(target)?;
3817 let Some(reason) = index_reason_for_state(state) else {
3818 return Ok(());
3819 };
3820
3821 match apply_search_index_update(root, target) {
3822 Ok(_) => {
3823 index::inspect_scope_invalidate_all();
3824 Ok(())
3825 }
3826 Err(err) if is_active_writer_lock_error(&err) && target.db_path.exists() => {
3827 eprintln!(
3828 "note: active tsift writer detected; skipping graph-query autoindex because {}. \
3829 Continuing with the current read-only index snapshot; graph results may lag. \
3830 Retry `{}` after the active writer finishes for fresh graph results.",
3831 index_reason_detail(target, reason),
3832 target.reindex_cmd
3833 );
3834 Ok(())
3835 }
3836 Err(err) => Err(err),
3837 }
3838}
3839
3840pub(crate) fn open_index_db(path: &std::path::Path, scope: Option<&str>) -> Result<index::IndexDb> {
3841 let root = lint::resolve_project_root_or_canonical_path(path)?;
3842 let target = resolve_query_index_target(&root, path, scope)?;
3843 ensure_query_index_current(&root, &target)?;
3844 let db_path = target.db_path;
3845 if !db_path.exists() {
3846 bail!(
3847 "no index found at {}. Run `tsift index` first.",
3848 db_path.display()
3849 );
3850 }
3851 index::IndexDb::open_read_only_resilient(&db_path)
3852}
3853
3854pub(crate) fn query_tagpath_root(
3855 root: &std::path::Path,
3856 path_hint: &std::path::Path,
3857 scope: Option<&str>,
3858) -> Result<PathBuf> {
3859 if let Some(scope_name) = scope {
3860 return Ok(config::Config::resolve_submodule(root, scope_name)?.source_root);
3861 }
3862 if let Some(scope) = config::Config::infer_submodule_from_path(root, path_hint)? {
3863 return Ok(scope.source_root);
3864 }
3865 Ok(root.to_path_buf())
3866}
3867
3868#[derive(Clone, Debug, Serialize, PartialEq)]
3869struct TraversalNode {
3870 handle: String,
3871 kind: String,
3872 label: String,
3873 #[serde(skip_serializing_if = "Option::is_none")]
3874 ref_id: Option<String>,
3875 #[serde(skip_serializing_if = "Option::is_none")]
3876 path: Option<String>,
3877 #[serde(skip_serializing_if = "Option::is_none")]
3878 line: Option<i64>,
3879 #[serde(skip_serializing_if = "Option::is_none")]
3880 detail: Option<String>,
3881 #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
3882 properties: BTreeMap<String, String>,
3883 expand: String,
3884}
3885
3886#[derive(Clone, Debug, Serialize, PartialEq)]
3887struct TraversalEdge {
3888 from: String,
3889 to: String,
3890 relation: String,
3891 #[serde(skip_serializing_if = "Option::is_none")]
3892 label: Option<String>,
3893 weight: usize,
3894}
3895
3896#[derive(Clone, Debug, Default)]
3897struct TraversalGraphBuild {
3898 nodes: BTreeMap<String, TraversalNode>,
3899 edges: Vec<TraversalEdge>,
3900 edge_keys: BTreeSet<(String, String, String)>,
3901 warnings: Vec<String>,
3902}
3903
3904pub(crate) const GRAPH_PROJECTION_VERSION: &str = "tsift-traversal-v1";
3905const GRAPH_DB_EVIDENCE_CONTRACT_VERSION: &str = "graph-db-evidence-v1";
3906const WORKER_PROMPT_PACKET_CONTRACT_VERSION: &str = "worker-prompt-packet-v1";
3907const CONFLICT_MATRIX_CONTRACT_VERSION: &str = "conflict-matrix-v1";
3908const CONTEXT_PACK_GRAPH_ORCHESTRATION_CONTRACT_VERSION: &str =
3909 "context-pack-graph-orchestration-v1";
3910const SESSION_REVIEW_FOLLOW_UP_CONTRACT_VERSION: &str = "session-review-follow-up-v1";
3911const DISPATCH_TRACE_CONTRACT_VERSION: &str = "dispatch-trace-v1";
3912const DEPENDENCY_DAG_CONTRACT_VERSION: &str = "dependency-dag-v1";
3913const GRAPH_PROJECTION_META_KIND: &str = "projection_meta";
3914const GRAPH_DB_RANKED_NEIGHBOR_CAP: usize = 12;
3915const GRAPH_DB_SEMANTIC_MIN_EDGE_SCAN_CAP: usize = 16;
3916const GRAPH_DB_SEMANTIC_MAX_EDGE_SCAN_CAP: usize = 64;
3917
3918#[derive(Debug, Serialize, PartialEq)]
3919struct TraversalTotals {
3920 nodes: usize,
3921 edges: usize,
3922}
3923
3924#[derive(Debug, Serialize, PartialEq)]
3925struct TraversalPathReport {
3926 from: TraversalNode,
3927 to: TraversalNode,
3928 hops: usize,
3929 nodes: Vec<TraversalNode>,
3930 edges: Vec<TraversalEdge>,
3931}
3932
3933#[derive(Debug, Serialize, PartialEq)]
3934struct TraversalRecommendation {
3935 handle: String,
3936 kind: String,
3937 label: String,
3938 reason: String,
3939 score: usize,
3940 expand: String,
3941}
3942
3943#[derive(Debug, Serialize, PartialEq)]
3944struct TraversalReport {
3945 root: String,
3946 #[serde(skip_serializing_if = "Option::is_none")]
3947 scope: Option<String>,
3948 mode: String,
3949 totals: TraversalTotals,
3950 #[serde(skip_serializing_if = "Option::is_none")]
3951 query: Option<String>,
3952 #[serde(skip_serializing_if = "Option::is_none")]
3953 target: Option<String>,
3954 nodes: Vec<TraversalNode>,
3955 edges: Vec<TraversalEdge>,
3956 #[serde(skip_serializing_if = "Option::is_none")]
3957 shortest_path: Option<TraversalPathReport>,
3958 recommendations: Vec<TraversalRecommendation>,
3959 exploration: ExplorationPacket,
3960 truncated: bool,
3961 #[serde(skip_serializing_if = "Vec::is_empty", default)]
3962 warnings: Vec<String>,
3963}
3964
3965#[derive(Debug, Serialize, PartialEq)]
3966struct SemanticRelatedReport {
3967 root: String,
3968 #[serde(skip_serializing_if = "Option::is_none")]
3969 scope: Option<String>,
3970 query: String,
3971 embedding_model: String,
3972 count: usize,
3973 items: Vec<SemanticRelatedItem>,
3974 #[serde(skip_serializing_if = "Vec::is_empty", default)]
3975 warnings: Vec<String>,
3976}
3977
3978#[derive(Clone, Debug, Serialize, PartialEq)]
3979struct SemanticRelatedItem {
3980 handle: String,
3981 kind: String,
3982 label: String,
3983 score: f64,
3984 #[serde(skip_serializing_if = "Option::is_none")]
3985 file_path: Option<String>,
3986 #[serde(skip_serializing_if = "Option::is_none")]
3987 source_symbol: Option<String>,
3988 #[serde(skip_serializing_if = "Option::is_none")]
3989 detail: Option<String>,
3990 expand: String,
3991}
3992
3993#[derive(Clone)]
3994struct TraversalSymbolIndexEntry {
3995 handle: String,
3996 node: TraversalNode,
3997 tokens: BTreeSet<String>,
3998}
3999
4000#[derive(Clone)]
4001struct TraversalFileIndexEntry {
4002 handle: String,
4003 node: TraversalNode,
4004 tokens: BTreeSet<String>,
4005}
4006
4007#[derive(Clone)]
4008struct TraversalRouteIndexEntry {
4009 handle: String,
4010 node: TraversalNode,
4011 tokens: BTreeSet<String>,
4012}
4013
4014struct TraversalCodeLookup<'a> {
4015 symbols: &'a [TraversalSymbolIndexEntry],
4016 files: &'a [TraversalFileIndexEntry],
4017 routes: &'a [TraversalRouteIndexEntry],
4018 symbol_index: HashMap<String, Vec<usize>>,
4019 file_index: HashMap<String, Vec<usize>>,
4020 route_index: HashMap<String, Vec<usize>>,
4021 file_path_index: HashMap<String, String>,
4022}
4023
4024#[derive(Clone, Debug, Serialize, PartialEq)]
4025struct ExplorationBudget {
4026 project_size: String,
4027 max_source_windows: usize,
4028 lines_per_window: usize,
4029 relationship_limit: usize,
4030}
4031
4032#[derive(Clone, Debug, Serialize, PartialEq)]
4033struct ExplorationRelation {
4034 from: String,
4035 relation: String,
4036 to: String,
4037 #[serde(skip_serializing_if = "Option::is_none")]
4038 label: Option<String>,
4039}
4040
4041#[derive(Clone, Debug, Serialize, PartialEq)]
4042struct ExplorationSourceWindow {
4043 handle: String,
4044 file: String,
4045 start: usize,
4046 end: usize,
4047 reason: String,
4048 expand: String,
4049}
4050
4051#[derive(Clone, Debug, Serialize, PartialEq)]
4052struct ExplorationWorkerContext {
4053 handle: String,
4054 target: String,
4055 summary: String,
4056 expand: String,
4057}
4058
4059#[derive(Clone, Debug, Serialize, PartialEq)]
4060struct ExplorationPacket {
4061 budget: ExplorationBudget,
4062 relationship_map: Vec<ExplorationRelation>,
4063 source_windows: Vec<ExplorationSourceWindow>,
4064 #[serde(skip_serializing_if = "Vec::is_empty", default)]
4065 worker_context: Vec<ExplorationWorkerContext>,
4066 no_reread_guidance: String,
4067}
4068
4069impl TraversalGraphBuild {
4070 fn add_node(&mut self, node: TraversalNode) {
4071 self.nodes.entry(node.handle.clone()).or_insert(node);
4072 }
4073
4074 fn add_edge(
4075 &mut self,
4076 from: &str,
4077 to: &str,
4078 relation: &str,
4079 label: Option<String>,
4080 weight: usize,
4081 ) {
4082 if from == to || !self.nodes.contains_key(from) || !self.nodes.contains_key(to) {
4083 return;
4084 }
4085 let key = (from.to_string(), to.to_string(), relation.to_string());
4086 if self.edge_keys.insert(key) {
4087 self.edges.push(TraversalEdge {
4088 from: from.to_string(),
4089 to: to.to_string(),
4090 relation: relation.to_string(),
4091 label,
4092 weight,
4093 });
4094 }
4095 }
4096}
4097
4098pub(crate) fn graph_substrate_db_path(root: &Path, scope: Option<&str>) -> PathBuf {
4099 match scope {
4100 Some(scope) => root.join(".tsift/indexes").join(scope).join("graph.db"),
4101 None => root.join(".tsift/graph.db"),
4102 }
4103}
4104
4105fn graph_projection_meta_id(scope: Option<&str>) -> String {
4106 format!("projection:tsift-traversal:{}", scope.unwrap_or("root"))
4107}
4108
4109fn content_hash<T: Serialize>(value: &T) -> Result<String> {
4110 let bytes = serde_json::to_vec(value)?;
4111 Ok(blake3::hash(&bytes).to_hex().to_string())
4112}
4113
4114fn node_with_content_freshness(mut node: SubstrateGraphNode) -> Result<SubstrateGraphNode> {
4115 let mut hashable = node.clone();
4116 hashable.freshness = None;
4117 node.freshness = Some(GraphFreshness::content_hash(content_hash(&hashable)?));
4118 Ok(node)
4119}
4120
4121fn edge_with_content_freshness(mut edge: SubstrateGraphEdge) -> Result<SubstrateGraphEdge> {
4122 let mut hashable = edge.clone();
4123 hashable.freshness = None;
4124 edge.freshness = Some(GraphFreshness::content_hash(content_hash(&hashable)?));
4125 Ok(edge)
4126}
4127
4128const SEMANTIC_EMBEDDING_DIM: usize = 32;
4129const SEMANTIC_EMBEDDING_MODEL: &str = "tsift-local-hash-v1";
4130
4131fn semantic_related_kind_name(kind: SemanticRelatedKind) -> &'static str {
4132 match kind {
4133 SemanticRelatedKind::Concept => "concept",
4134 SemanticRelatedKind::Entity => "entity",
4135 SemanticRelatedKind::All => "all",
4136 }
4137}
4138
4139fn semantic_related_command(root: &Path, query: &str, kind: SemanticRelatedKind) -> String {
4140 format!(
4141 "tsift semantic {} --path {} --kind {} --limit 10",
4142 shell_quote(query),
4143 shell_quote(root.to_string_lossy().as_ref()),
4144 semantic_related_kind_name(kind)
4145 )
4146}
4147
4148fn semantic_embedding(input: &str) -> Vec<f64> {
4149 let mut vector = vec![0.0; SEMANTIC_EMBEDDING_DIM];
4150 let mut tokens = traversal_tokens(input);
4151 if tokens.is_empty() {
4152 let trimmed = input.trim().to_ascii_lowercase();
4153 if !trimmed.is_empty() {
4154 tokens.insert(trimmed);
4155 }
4156 }
4157
4158 for token in tokens {
4159 let hash = blake3::hash(token.as_bytes());
4160 let bytes = hash.as_bytes();
4161 let idx = usize::from(bytes[0]) % SEMANTIC_EMBEDDING_DIM;
4162 let sign = if bytes[1] & 1 == 0 { 1.0 } else { -1.0 };
4163 vector[idx] += sign;
4164 }
4165
4166 let norm = vector.iter().map(|value| value * value).sum::<f64>().sqrt();
4167 if norm > 0.0 {
4168 for value in &mut vector {
4169 *value /= norm;
4170 }
4171 }
4172 vector
4173}
4174
4175fn semantic_embedding_property(input: &str) -> String {
4176 semantic_embedding(input)
4177 .iter()
4178 .map(|value| format!("{value:.6}"))
4179 .collect::<Vec<_>>()
4180 .join(",")
4181}
4182
4183fn parse_semantic_embedding_property(value: &str) -> Option<Vec<f64>> {
4184 let parsed = value
4185 .split(',')
4186 .map(str::trim)
4187 .map(str::parse::<f64>)
4188 .collect::<std::result::Result<Vec<_>, _>>()
4189 .ok()?;
4190 (parsed.len() == SEMANTIC_EMBEDDING_DIM).then_some(parsed)
4191}
4192
4193fn semantic_cosine(left: &[f64], right: &[f64]) -> f64 {
4194 if left.len() != right.len() {
4195 return 0.0;
4196 }
4197 left.iter()
4198 .zip(right.iter())
4199 .map(|(left, right)| left * right)
4200 .sum::<f64>()
4201}
4202
4203fn semantic_entity_handle(name: &str, kind: &str) -> String {
4204 stable_handle(
4205 "gent",
4206 &format!(
4207 "entity:{}:{}",
4208 kind.trim().to_ascii_lowercase(),
4209 name.trim().to_ascii_lowercase()
4210 ),
4211 )
4212}
4213
4214fn semantic_concept_handle(label: &str) -> String {
4215 stable_handle(
4216 "gcon",
4217 &format!("concept:{}", label.trim().to_ascii_lowercase()),
4218 )
4219}
4220
4221fn summary_source_handles(
4222 summary: &summarize::Summary,
4223 file_node_by_path: &BTreeMap<String, String>,
4224 symbol_node_by_file_label: &BTreeMap<(String, String), String>,
4225) -> Vec<String> {
4226 let mut handles = Vec::new();
4227 if let Some(handle) = file_node_by_path.get(&summary.file_path) {
4228 handles.push(handle.clone());
4229 }
4230 if let Some(handle) =
4231 symbol_node_by_file_label.get(&(summary.file_path.clone(), summary.symbol_name.clone()))
4232 && !handles.iter().any(|existing| existing == handle)
4233 {
4234 handles.push(handle.clone());
4235 }
4236 handles
4237}
4238
4239fn semantic_entity_node(
4240 root: &Path,
4241 summary: &summarize::Summary,
4242 name: &str,
4243 kind: &str,
4244 description: &str,
4245 provenance: &GraphProvenance,
4246) -> SubstrateGraphNode {
4247 let handle = semantic_entity_handle(name, kind);
4248 let detail = if description.trim().is_empty() {
4249 format!("{kind} entity from cached summaries")
4250 } else {
4251 format!("{kind}: {description}")
4252 };
4253 SubstrateGraphNode::new(handle.clone(), "semantic_entity", name.to_string())
4254 .with_property("handle", handle)
4255 .with_property("ref_id", name.to_string())
4256 .with_property("detail", detail)
4257 .with_property("entity_kind", kind.to_string())
4258 .with_property("description", description.to_string())
4259 .with_property("source_file", summary.file_path.clone())
4260 .with_property("source_symbol", summary.symbol_name.clone())
4261 .with_property("embedding_model", SEMANTIC_EMBEDDING_MODEL)
4262 .with_property(
4263 "embedding",
4264 semantic_embedding_property(&format!("{name} {kind} {description}")),
4265 )
4266 .with_property(
4267 "expand",
4268 semantic_related_command(root, name, SemanticRelatedKind::Entity),
4269 )
4270 .with_provenance(provenance.clone())
4271}
4272
4273fn semantic_concept_node(
4274 root: &Path,
4275 summary: &summarize::Summary,
4276 label: &str,
4277 provenance: &GraphProvenance,
4278) -> SubstrateGraphNode {
4279 let handle = semantic_concept_handle(label);
4280 SubstrateGraphNode::new(handle.clone(), "semantic_concept", label.to_string())
4281 .with_property("handle", handle)
4282 .with_property("ref_id", label.to_string())
4283 .with_property("detail", "concept label from cached summaries".to_string())
4284 .with_property("source_file", summary.file_path.clone())
4285 .with_property("source_symbol", summary.symbol_name.clone())
4286 .with_property("embedding_model", SEMANTIC_EMBEDDING_MODEL)
4287 .with_property("embedding", semantic_embedding_property(label))
4288 .with_property(
4289 "expand",
4290 semantic_related_command(root, label, SemanticRelatedKind::Concept),
4291 )
4292 .with_provenance(provenance.clone())
4293}
4294
4295fn insert_semantic_edge(
4296 edge_map: &mut BTreeMap<(String, String, String), SubstrateGraphEdge>,
4297 edge: SubstrateGraphEdge,
4298) {
4299 edge_map
4300 .entry((edge.from_id.clone(), edge.to_id.clone(), edge.kind.clone()))
4301 .or_insert(edge);
4302}
4303
4304fn append_summary_semantic_projection_rows(
4305 root: &Path,
4306 graph: &TraversalGraphBuild,
4307 provenance: &GraphProvenance,
4308 nodes: &mut Vec<SubstrateGraphNode>,
4309 edges: &mut Vec<SubstrateGraphEdge>,
4310) -> Result<()> {
4311 let summaries_db = root.join(".tsift/summaries.db");
4312 if !summaries_db.exists() {
4313 return Ok(());
4314 }
4315
4316 let summary_db = summarize::SummaryDb::open_read_only_resilient(&summaries_db)?;
4317 let summaries = summary_db.all()?;
4318 if summaries.is_empty() {
4319 return Ok(());
4320 }
4321
4322 let file_node_by_path = graph
4323 .nodes
4324 .values()
4325 .filter(|node| node.kind == "file")
4326 .filter_map(|node| {
4327 node.path
4328 .as_ref()
4329 .map(|path| (path.clone(), node.handle.clone()))
4330 })
4331 .collect::<BTreeMap<_, _>>();
4332 let symbol_node_by_file_label = graph
4333 .nodes
4334 .values()
4335 .filter(|node| node.kind == "symbol")
4336 .filter_map(|node| {
4337 Some((
4338 (node.path.clone()?, node.label.clone()),
4339 node.handle.clone(),
4340 ))
4341 })
4342 .collect::<BTreeMap<_, _>>();
4343
4344 let mut semantic_nodes = BTreeMap::<String, SubstrateGraphNode>::new();
4345 let mut semantic_edges = BTreeMap::<(String, String, String), SubstrateGraphEdge>::new();
4346
4347 for summary in &summaries {
4348 let source_handles =
4349 summary_source_handles(summary, &file_node_by_path, &symbol_node_by_file_label);
4350 let mut entity_ids_by_name = BTreeMap::<String, String>::new();
4351
4352 if let Some(entities) = &summary.entities {
4353 for entity in entities {
4354 let node = semantic_entity_node(
4355 root,
4356 summary,
4357 &entity.name,
4358 &entity.kind,
4359 &entity.description,
4360 provenance,
4361 );
4362 let entity_id = node.id.clone();
4363 entity_ids_by_name.insert(entity.name.to_ascii_lowercase(), entity_id.clone());
4364 semantic_nodes.entry(entity_id.clone()).or_insert(node);
4365
4366 for source_handle in &source_handles {
4367 insert_semantic_edge(
4368 &mut semantic_edges,
4369 SubstrateGraphEdge::new(
4370 source_handle.clone(),
4371 entity_id.clone(),
4372 "mentions_entity",
4373 )
4374 .with_property("label", format!("summary entity: {}", entity.name))
4375 .with_property("source_file", summary.file_path.clone())
4376 .with_provenance(provenance.clone()),
4377 );
4378 }
4379 }
4380 }
4381
4382 let mut concept_ids = Vec::new();
4383 if let Some(labels) = &summary.concept_labels {
4384 for label in labels
4385 .iter()
4386 .map(|label| label.trim())
4387 .filter(|label| !label.is_empty())
4388 {
4389 let node = semantic_concept_node(root, summary, label, provenance);
4390 let concept_id = node.id.clone();
4391 semantic_nodes.entry(concept_id.clone()).or_insert(node);
4392 concept_ids.push(concept_id.clone());
4393
4394 for source_handle in &source_handles {
4395 insert_semantic_edge(
4396 &mut semantic_edges,
4397 SubstrateGraphEdge::new(
4398 source_handle.clone(),
4399 concept_id.clone(),
4400 "mentions_concept",
4401 )
4402 .with_property("label", format!("summary concept: {label}"))
4403 .with_property("source_file", summary.file_path.clone())
4404 .with_provenance(provenance.clone()),
4405 );
4406 }
4407 }
4408 }
4409
4410 for entity_id in entity_ids_by_name.values() {
4411 for concept_id in &concept_ids {
4412 insert_semantic_edge(
4413 &mut semantic_edges,
4414 SubstrateGraphEdge::new(
4415 entity_id.clone(),
4416 concept_id.clone(),
4417 "tagged_concept",
4418 )
4419 .with_property("label", "entity concept label".to_string())
4420 .with_property("source_file", summary.file_path.clone())
4421 .with_provenance(provenance.clone()),
4422 );
4423 }
4424 }
4425
4426 for idx in 0..concept_ids.len() {
4427 for next_idx in (idx + 1)..concept_ids.len() {
4428 insert_semantic_edge(
4429 &mut semantic_edges,
4430 SubstrateGraphEdge::new(
4431 concept_ids[idx].clone(),
4432 concept_ids[next_idx].clone(),
4433 "related_concept",
4434 )
4435 .with_property("label", format!("co-occurs in {}", summary.symbol_name))
4436 .with_property("source_file", summary.file_path.clone())
4437 .with_provenance(provenance.clone()),
4438 );
4439 }
4440 }
4441
4442 if let Some(relationships) = &summary.relationships {
4443 for relationship in relationships {
4444 let from_id = entity_ids_by_name
4445 .get(&relationship.from.to_ascii_lowercase())
4446 .cloned()
4447 .unwrap_or_else(|| {
4448 let node = semantic_entity_node(
4449 root,
4450 summary,
4451 &relationship.from,
4452 "unknown",
4453 "",
4454 provenance,
4455 );
4456 let id = node.id.clone();
4457 semantic_nodes.entry(id.clone()).or_insert(node);
4458 id
4459 });
4460 let to_id = entity_ids_by_name
4461 .get(&relationship.to.to_ascii_lowercase())
4462 .cloned()
4463 .unwrap_or_else(|| {
4464 let node = semantic_entity_node(
4465 root,
4466 summary,
4467 &relationship.to,
4468 "unknown",
4469 "",
4470 provenance,
4471 );
4472 let id = node.id.clone();
4473 semantic_nodes.entry(id.clone()).or_insert(node);
4474 id
4475 });
4476 insert_semantic_edge(
4477 &mut semantic_edges,
4478 SubstrateGraphEdge::new(from_id, to_id, "semantic_relation")
4479 .with_property("relationship_kind", relationship.kind.clone())
4480 .with_property("label", relationship.kind.clone())
4481 .with_property("source_file", summary.file_path.clone())
4482 .with_property("source_symbol", summary.symbol_name.clone())
4483 .with_provenance(provenance.clone()),
4484 );
4485 }
4486 }
4487 }
4488
4489 for node in semantic_nodes.into_values() {
4490 nodes.push(node_with_content_freshness(node)?);
4491 }
4492 for edge in semantic_edges.into_values() {
4493 edges.push(edge_with_content_freshness(edge)?);
4494 }
4495
4496 Ok(())
4497}
4498
4499fn projection_content_hash(
4500 nodes: &[SubstrateGraphNode],
4501 edges: &[SubstrateGraphEdge],
4502) -> Result<String> {
4503 #[derive(Serialize)]
4504 struct Payload<'a> {
4505 version: &'static str,
4506 nodes: &'a [SubstrateGraphNode],
4507 edges: &'a [SubstrateGraphEdge],
4508 }
4509
4510 content_hash(&Payload {
4511 version: GRAPH_PROJECTION_VERSION,
4512 nodes,
4513 edges,
4514 })
4515}
4516
4517pub(crate) fn graph_projection_content_hash(projection: &GraphProjection) -> Option<String> {
4518 projection
4519 .nodes
4520 .iter()
4521 .find(|node| node.kind == GRAPH_PROJECTION_META_KIND)
4522 .and_then(|node| node.properties.get("content_hash").cloned())
4523}
4524
4525fn traversal_projection_from_graph(
4526 root: &Path,
4527 scope: Option<&str>,
4528 graph: &TraversalGraphBuild,
4529) -> Result<GraphProjection> {
4530 let provenance = GraphProvenance::new(
4531 "tsift.traverse",
4532 format!("{}:{}", root.display(), scope.unwrap_or("root")),
4533 );
4534 let mut nodes = Vec::with_capacity(graph.nodes.len() + 1);
4535 for node in graph.nodes.values() {
4536 let mut projected =
4537 SubstrateGraphNode::new(node.handle.clone(), node.kind.clone(), node.label.clone())
4538 .with_property("handle", node.handle.clone())
4539 .with_property("expand", node.expand.clone())
4540 .with_provenance(provenance.clone());
4541 if let Some(ref_id) = &node.ref_id {
4542 projected = projected.with_property("ref_id", ref_id.clone());
4543 }
4544 if let Some(path) = &node.path {
4545 projected = projected.with_property("path", path.clone());
4546 }
4547 if let Some(line) = node.line {
4548 projected = projected.with_property("line", line.to_string());
4549 }
4550 if let Some(detail) = &node.detail {
4551 projected = projected.with_property("detail", detail.clone());
4552 }
4553 for (key, value) in &node.properties {
4554 projected = projected.with_property(key.clone(), value.clone());
4555 }
4556 nodes.push(node_with_content_freshness(projected)?);
4557 }
4558
4559 let mut edges = Vec::with_capacity(graph.edges.len());
4560 for edge in &graph.edges {
4561 let mut projected =
4562 SubstrateGraphEdge::new(edge.from.clone(), edge.to.clone(), edge.relation.clone())
4563 .with_property("weight", edge.weight.to_string())
4564 .with_provenance(provenance.clone());
4565 if let Some(label) = &edge.label {
4566 projected = projected.with_property("label", label.clone());
4567 }
4568 edges.push(edge_with_content_freshness(projected)?);
4569 }
4570
4571 append_traversal_context_projection_rows(root, graph, &provenance, &mut nodes, &mut edges)?;
4572 append_summary_semantic_projection_rows(root, graph, &provenance, &mut nodes, &mut edges)?;
4573
4574 let projection_hash = projection_content_hash(&nodes, &edges)?;
4575 let meta = SubstrateGraphNode::new(
4576 graph_projection_meta_id(scope),
4577 GRAPH_PROJECTION_META_KIND,
4578 "tsift traversal projection",
4579 )
4580 .with_property("projection_version", GRAPH_PROJECTION_VERSION)
4581 .with_property("content_hash", projection_hash.clone())
4582 .with_property("root", root.to_string_lossy().to_string())
4583 .with_property("scope", scope.unwrap_or("root"))
4584 .with_property("node_count", graph.nodes.len().to_string())
4585 .with_property("edge_count", graph.edges.len().to_string())
4586 .with_provenance(provenance)
4587 .with_freshness(GraphFreshness::content_hash(projection_hash));
4588 nodes.push(meta);
4589
4590 Ok(GraphProjection { nodes, edges })
4591}
4592
4593#[allow(clippy::too_many_arguments)]
4594fn ensure_traversal_source_handle(
4595 root: &Path,
4596 provenance: &GraphProvenance,
4597 file_node_by_path: &BTreeMap<String, String>,
4598 node: &TraversalNode,
4599 budget: &ExplorationBudget,
4600 source_handle_by_node: &mut BTreeMap<String, String>,
4601 seen_windows: &mut BTreeMap<(String, usize, usize), String>,
4602 nodes: &mut Vec<SubstrateGraphNode>,
4603 edges: &mut Vec<SubstrateGraphEdge>,
4604) -> Result<Option<String>> {
4605 if let Some(handle) = source_handle_by_node.get(&node.handle) {
4606 return Ok(Some(handle.clone()));
4607 }
4608 let Some(window) = exploration_source_window_for_node(root, node, budget) else {
4609 return Ok(None);
4610 };
4611 let window_key = (window.file.clone(), window.start, window.end);
4612 let handle = if let Some(handle) = seen_windows.get(&window_key) {
4613 handle.clone()
4614 } else {
4615 let label = format!("{}:{}-{}", window.file, window.start, window.end);
4616 let projected = SubstrateGraphNode::new(window.handle.clone(), "source_handle", label)
4617 .with_property("handle", window.handle.clone())
4618 .with_property("file", window.file.clone())
4619 .with_property("start", window.start.to_string())
4620 .with_property("end", window.end.to_string())
4621 .with_property("reason", window.reason.clone())
4622 .with_property("expand", window.expand.clone())
4623 .with_provenance(provenance.clone());
4624 nodes.push(node_with_content_freshness(projected)?);
4625
4626 if let Some(file_handle) = file_node_by_path.get(&window.file) {
4627 let edge = SubstrateGraphEdge::new(
4628 window.handle.clone(),
4629 file_handle.clone(),
4630 "expands_source",
4631 )
4632 .with_property("label", window.reason.clone())
4633 .with_provenance(provenance.clone());
4634 edges.push(edge_with_content_freshness(edge)?);
4635 }
4636 if node.kind != "file" {
4637 let edge = SubstrateGraphEdge::new(
4638 window.handle.clone(),
4639 node.handle.clone(),
4640 "anchors_source",
4641 )
4642 .with_property("label", window.reason.clone())
4643 .with_provenance(provenance.clone());
4644 edges.push(edge_with_content_freshness(edge)?);
4645 }
4646 seen_windows.insert(window_key, window.handle.clone());
4647 window.handle
4648 };
4649 source_handle_by_node.insert(node.handle.clone(), handle.clone());
4650 Ok(Some(handle))
4651}
4652
4653fn push_traversal_backlog_target_handles<'a>(
4654 backlog: &TraversalNode,
4655 edges_by_from: &BTreeMap<&'a str, Vec<&'a TraversalEdge>>,
4656 node_by_handle: &BTreeMap<&'a str, &'a TraversalNode>,
4657 max_handles: usize,
4658 seen_target_nodes: &mut BTreeSet<String>,
4659 target_node_handles: &mut Vec<String>,
4660) {
4661 for edge in edges_by_from
4662 .get(backlog.handle.as_str())
4663 .into_iter()
4664 .flatten()
4665 .filter(|edge| edge.relation == "mentions")
4666 {
4667 let Some(target_node) = node_by_handle.get(edge.to.as_str()) else {
4668 continue;
4669 };
4670 if !matches!(target_node.kind.as_str(), "file" | "symbol" | "route") {
4671 continue;
4672 }
4673 if target_node
4674 .path
4675 .as_deref()
4676 .zip(backlog.path.as_deref())
4677 .is_some_and(|(target_path, backlog_path)| {
4678 target_path == backlog_path && target_path.ends_with(".md")
4679 })
4680 {
4681 continue;
4682 }
4683 if seen_target_nodes.insert(target_node.handle.clone()) {
4684 target_node_handles.push(target_node.handle.clone());
4685 }
4686 if target_node_handles.len() >= max_handles {
4687 break;
4688 }
4689 }
4690}
4691
4692fn append_traversal_context_projection_rows(
4693 root: &Path,
4694 graph: &TraversalGraphBuild,
4695 provenance: &GraphProvenance,
4696 nodes: &mut Vec<SubstrateGraphNode>,
4697 edges: &mut Vec<SubstrateGraphEdge>,
4698) -> Result<()> {
4699 let budget = exploration_budget_for_counts(graph.nodes.len(), graph.edges.len());
4700 let file_node_by_path = graph
4701 .nodes
4702 .values()
4703 .filter(|node| node.kind == "file")
4704 .filter_map(|node| {
4705 node.path
4706 .as_ref()
4707 .map(|path| (path.clone(), node.handle.clone()))
4708 })
4709 .collect::<BTreeMap<_, _>>();
4710
4711 let node_by_handle = graph
4712 .nodes
4713 .values()
4714 .map(|node| (node.handle.as_str(), node))
4715 .collect::<BTreeMap<_, _>>();
4716 let mut edges_by_from = BTreeMap::<&str, Vec<&TraversalEdge>>::new();
4717 for edge in &graph.edges {
4718 edges_by_from
4719 .entry(edge.from.as_str())
4720 .or_default()
4721 .push(edge);
4722 }
4723 for rows in edges_by_from.values_mut() {
4724 rows.sort_by(|left, right| {
4725 right
4726 .weight
4727 .cmp(&left.weight)
4728 .then(left.relation.cmp(&right.relation))
4729 .then(left.to.cmp(&right.to))
4730 });
4731 }
4732
4733 let mut seen_windows = BTreeMap::<(String, usize, usize), String>::new();
4734 let mut source_handle_by_node = BTreeMap::<String, String>::new();
4735
4736 let mut code_context_count = 0usize;
4737 let code_context_limit = budget.relationship_limit.min(8);
4738 for node in graph.nodes.values() {
4739 if !matches!(
4740 node.kind.as_str(),
4741 "backlog" | "job_packet" | "worker_result"
4742 ) {
4743 continue;
4744 }
4745 let mut target_node_handles = Vec::new();
4746 let mut fallback_target_handles = Vec::new();
4747 let mut seen_target_nodes = BTreeSet::new();
4748 if node.kind == "backlog" || node.kind == "worker_result" {
4749 push_traversal_backlog_target_handles(
4750 node,
4751 &edges_by_from,
4752 &node_by_handle,
4753 budget.max_source_windows,
4754 &mut seen_target_nodes,
4755 &mut target_node_handles,
4756 );
4757 fallback_target_handles.push(node.handle.clone());
4758 } else {
4759 for edge in edges_by_from
4760 .get(node.handle.as_str())
4761 .into_iter()
4762 .flatten()
4763 .filter(|edge| edge.relation == "targets")
4764 {
4765 let Some(backlog) = node_by_handle.get(edge.to.as_str()) else {
4766 continue;
4767 };
4768 fallback_target_handles.push(backlog.handle.clone());
4769 push_traversal_backlog_target_handles(
4770 backlog,
4771 &edges_by_from,
4772 &node_by_handle,
4773 budget.max_source_windows,
4774 &mut seen_target_nodes,
4775 &mut target_node_handles,
4776 );
4777 if target_node_handles.len() >= budget.max_source_windows {
4778 break;
4779 }
4780 }
4781 if fallback_target_handles.is_empty() {
4782 continue;
4783 }
4784 }
4785 let code_context = !target_node_handles.is_empty();
4786 if target_node_handles.is_empty() {
4787 target_node_handles = dedupe_preserve_order(fallback_target_handles);
4788 } else if code_context_count >= code_context_limit {
4789 continue;
4790 }
4791
4792 let mut worker_source_handles = Vec::new();
4793 let mut seen_worker_handles = BTreeSet::new();
4794 for target_handle in target_node_handles {
4795 if worker_source_handles.len() >= budget.max_source_windows {
4796 break;
4797 }
4798 let Some(target_node) = node_by_handle.get(target_handle.as_str()) else {
4799 continue;
4800 };
4801 let Some(handle) = ensure_traversal_source_handle(
4802 root,
4803 provenance,
4804 &file_node_by_path,
4805 target_node,
4806 &budget,
4807 &mut source_handle_by_node,
4808 &mut seen_windows,
4809 nodes,
4810 edges,
4811 )?
4812 else {
4813 continue;
4814 };
4815 if seen_worker_handles.insert(handle.clone()) {
4816 worker_source_handles.push(handle);
4817 }
4818 }
4819 if worker_source_handles.is_empty() {
4820 continue;
4821 }
4822 let target = node
4823 .path
4824 .clone()
4825 .unwrap_or_else(|| root.to_string_lossy().to_string());
4826 let summary = node.detail.clone().unwrap_or_else(|| node.label.clone());
4827 let handle = stable_handle("xwrk", &format!("{}:{}:{}", target, node.handle, summary));
4828 let projected = SubstrateGraphNode::new(handle.clone(), "worker_context", summary.clone())
4829 .with_property("handle", handle.clone())
4830 .with_property("target", target.clone())
4831 .with_property("summary", summary)
4832 .with_property(
4833 "source_handle_count",
4834 worker_source_handles.len().to_string(),
4835 )
4836 .with_property(
4837 "expand",
4838 format!(
4839 "tsift --envelope context-pack {} --budget normal",
4840 shell_quote(&target)
4841 ),
4842 )
4843 .with_provenance(provenance.clone());
4844 nodes.push(node_with_content_freshness(projected)?);
4845
4846 let request_edge =
4847 SubstrateGraphEdge::new(node.handle.clone(), handle.clone(), "requests_context")
4848 .with_property("label", "bounded worker context".to_string())
4849 .with_provenance(provenance.clone());
4850 edges.push(edge_with_content_freshness(request_edge)?);
4851
4852 for source_handle in &worker_source_handles {
4853 let scope_edge =
4854 SubstrateGraphEdge::new(handle.clone(), source_handle.clone(), "scopes_source")
4855 .with_property("label", "bounded worker source window".to_string())
4856 .with_provenance(provenance.clone());
4857 edges.push(edge_with_content_freshness(scope_edge)?);
4858 }
4859 if code_context {
4860 code_context_count += 1;
4861 }
4862 }
4863
4864 Ok(())
4865}
4866
4867fn traversal_node_from_graph_node(root: &Path, node: SubstrateGraphNode) -> TraversalNode {
4868 let handle = node
4869 .properties
4870 .get("handle")
4871 .cloned()
4872 .unwrap_or_else(|| node.id.clone());
4873 TraversalNode {
4874 expand: node
4875 .properties
4876 .get("expand")
4877 .cloned()
4878 .unwrap_or_else(|| traversal_expand_command(root, &handle)),
4879 handle,
4880 kind: node.kind,
4881 label: node.label,
4882 ref_id: node.properties.get("ref_id").cloned(),
4883 path: node.properties.get("path").cloned(),
4884 line: node
4885 .properties
4886 .get("line")
4887 .and_then(|value| value.parse::<i64>().ok()),
4888 detail: node.properties.get("detail").cloned(),
4889 properties: node.properties,
4890 }
4891}
4892
4893fn traversal_graph_from_store(root: &Path, store: &impl GraphStore) -> Result<TraversalGraphBuild> {
4894 let mut graph = TraversalGraphBuild::default();
4895 for node in store.all_nodes()? {
4896 if node.kind == GRAPH_PROJECTION_META_KIND {
4897 continue;
4898 }
4899 graph.add_node(traversal_node_from_graph_node(root, node));
4900 }
4901 for edge in store.all_edges()? {
4902 graph.add_edge(
4903 &edge.from_id,
4904 &edge.to_id,
4905 &edge.kind,
4906 edge.properties.get("label").cloned(),
4907 edge.properties
4908 .get("weight")
4909 .and_then(|value| value.parse::<usize>().ok())
4910 .unwrap_or(1),
4911 );
4912 }
4913 Ok(graph)
4914}
4915
4916pub(crate) fn convex_rows_from_graph_store(
4917 store: &impl GraphStore,
4918) -> Result<ConvexProjectionRows> {
4919 Ok(GraphProjection {
4920 nodes: store.all_nodes()?,
4921 edges: store.all_edges()?,
4922 }
4923 .to_convex_rows())
4924}
4925
4926#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
4927struct ConvexRequiredIndex {
4928 table: String,
4929 name: String,
4930 fields: Vec<String>,
4931}
4932
4933#[derive(Clone, Debug, Serialize, PartialEq)]
4934struct ConvexSyncChunk {
4935 operation: String,
4936 chunk: usize,
4937 count: usize,
4938 keys: Vec<String>,
4939 max_attempts: usize,
4940 retry_policy: String,
4941}
4942
4943#[derive(Clone, Debug, Serialize, PartialEq)]
4944struct ConvexTransportSummary {
4945 endpoint_env: String,
4946 endpoint_configured: bool,
4947 auth_token_env: String,
4948 auth_configured: bool,
4949 remote_snapshot: bool,
4950 applied_chunks: usize,
4951}
4952
4953#[derive(Clone, Debug, Serialize, PartialEq)]
4954struct ConvexTransportReceipt {
4955 operation: String,
4956 chunk: usize,
4957 attempt: usize,
4958 status: String,
4959 message: Option<String>,
4960}
4961
4962#[derive(Serialize)]
4963#[serde(rename_all = "camelCase")]
4964struct ConvexTransportRequest<'a> {
4965 operation: &'a str,
4966 chunk: usize,
4967 projection_version: &'a str,
4968 projection_hash: Option<&'a str>,
4969 #[serde(skip_serializing_if = "Option::is_none")]
4970 projection_meta_id: Option<&'a str>,
4971 node_rows: Vec<ConvexNodeRow>,
4972 edge_rows: Vec<ConvexEdgeRow>,
4973 keys: Vec<String>,
4974 #[serde(skip_serializing_if = "Option::is_none")]
4975 cursor: Option<String>,
4976 #[serde(skip_serializing_if = "Option::is_none")]
4977 limit: Option<usize>,
4978}
4979
4980#[derive(Deserialize)]
4981#[serde(rename_all = "camelCase")]
4982struct ConvexTransportResponse {
4983 status: Option<String>,
4984 message: Option<String>,
4985 rows: Option<ConvexProjectionRows>,
4986 #[serde(default)]
4987 meta: Option<ConvexSnapshotMeta>,
4988 #[serde(default)]
4989 page: Option<ConvexSnapshotPage>,
4990}
4991
4992#[derive(Deserialize, Debug, Clone)]
4993#[serde(rename_all = "camelCase")]
4994struct ConvexSnapshotMeta {
4995 #[serde(default)]
4999 #[allow(dead_code)]
5000 indexes: Vec<ConvexRequiredIndex>,
5001 #[serde(default)]
5002 #[allow(dead_code)]
5003 node_count: Option<usize>,
5004 #[serde(default)]
5005 #[allow(dead_code)]
5006 edge_count: Option<usize>,
5007 #[serde(default)]
5008 projection_hash: Option<String>,
5009 #[serde(default)]
5010 #[allow(dead_code)]
5011 page_size: Option<usize>,
5012}
5013
5014#[derive(Deserialize, Debug, Clone)]
5019#[serde(rename_all = "camelCase")]
5020struct ConvexSnapshotPage {
5021 rows: Vec<serde_json::Value>,
5022 #[serde(default)]
5023 next_cursor: Option<String>,
5024}
5025
5026#[derive(Clone, Debug, Serialize, PartialEq)]
5027struct ConvexProjectionFreshness {
5028 status: String,
5029 fail_closed: bool,
5030 local_hash: Option<String>,
5031 snapshot_hash: Option<String>,
5032 missing_nodes: Vec<String>,
5033 stale_nodes: Vec<String>,
5034 missing_edges: Vec<String>,
5035 stale_edges: Vec<String>,
5036 diagnostics: Vec<String>,
5037}
5038
5039const DEFAULT_CONVEX_GRAPH_URL_ENV: &str = "TSIFT_CONVEX_GRAPH_URL";
5040
5041impl ConvexProjectionFreshness {
5042 fn current(local_hash: Option<String>, snapshot_hash: Option<String>) -> Self {
5043 Self {
5044 status: "current".to_string(),
5045 fail_closed: false,
5046 local_hash,
5047 snapshot_hash,
5048 missing_nodes: Vec::new(),
5049 stale_nodes: Vec::new(),
5050 missing_edges: Vec::new(),
5051 stale_edges: Vec::new(),
5052 diagnostics: Vec::new(),
5053 }
5054 }
5055}
5056
5057#[derive(Clone, Debug, Serialize, PartialEq)]
5058struct ConvexSyncReport {
5059 root: String,
5060 #[serde(skip_serializing_if = "Option::is_none")]
5061 scope: Option<String>,
5062 graph_db: String,
5063 dry_run: bool,
5064 projection_version: String,
5065 projection_hash: Option<String>,
5066 required_indexes: Vec<ConvexRequiredIndex>,
5067 node_upserts: Vec<ConvexNodeRow>,
5068 edge_upserts: Vec<ConvexEdgeRow>,
5069 node_tombstones: Vec<String>,
5070 edge_tombstones: Vec<String>,
5071 chunks: Vec<ConvexSyncChunk>,
5072 freshness: ConvexProjectionFreshness,
5073 transport: Option<ConvexTransportSummary>,
5074 receipts: Vec<ConvexTransportReceipt>,
5075 diagnostics: Vec<String>,
5076 warnings: Vec<String>,
5077}
5078
5079fn convex_required_indexes() -> Vec<ConvexRequiredIndex> {
5080 vec![
5081 ConvexRequiredIndex {
5082 table: "nodes".to_string(),
5083 name: "by_external_id".to_string(),
5084 fields: vec!["externalId".to_string()],
5085 },
5086 ConvexRequiredIndex {
5087 table: "nodes".to_string(),
5088 name: "by_kind".to_string(),
5089 fields: vec!["kind".to_string()],
5090 },
5091 ConvexRequiredIndex {
5092 table: "edges".to_string(),
5093 name: "by_edge_key".to_string(),
5094 fields: vec!["edgeKey".to_string()],
5095 },
5096 ConvexRequiredIndex {
5097 table: "edges".to_string(),
5098 name: "by_from_kind".to_string(),
5099 fields: vec!["fromExternalId".to_string(), "kind".to_string()],
5100 },
5101 ConvexRequiredIndex {
5102 table: "edges".to_string(),
5103 name: "by_to_kind".to_string(),
5104 fields: vec!["toExternalId".to_string(), "kind".to_string()],
5105 },
5106 ]
5107}
5108
5109pub(crate) fn load_convex_projection_rows(path: &Path) -> Result<ConvexProjectionRows> {
5110 let content = fs::read_to_string(path)
5111 .with_context(|| format!("reading Convex projection snapshot {}", path.display()))?;
5112 serde_json::from_str(&content)
5113 .with_context(|| format!("parsing Convex projection snapshot {}", path.display()))
5114}
5115
5116fn convex_projection_row_diagnostics(rows: &ConvexProjectionRows) -> Vec<String> {
5117 let mut diagnostics = Vec::new();
5118 let mut node_counts = BTreeMap::<&str, usize>::new();
5119 for row in &rows.nodes {
5120 *node_counts.entry(row.external_id.as_str()).or_default() += 1;
5121 }
5122 for (external_id, count) in node_counts.iter().filter(|(_, count)| **count > 1) {
5123 diagnostics.push(format!(
5124 "Convex snapshot contains duplicate node externalId {external_id} ({count} rows)"
5125 ));
5126 }
5127
5128 let node_ids = node_counts.keys().copied().collect::<BTreeSet<_>>();
5129 let mut edge_counts = BTreeMap::<&str, usize>::new();
5130 for edge in &rows.edges {
5131 *edge_counts.entry(edge.edge_key.as_str()).or_default() += 1;
5132 if !node_ids.contains(edge.from_external_id.as_str()) {
5133 diagnostics.push(format!(
5134 "Convex snapshot edge {} references missing from node {}",
5135 edge.edge_key, edge.from_external_id
5136 ));
5137 }
5138 if !node_ids.contains(edge.to_external_id.as_str()) {
5139 diagnostics.push(format!(
5140 "Convex snapshot edge {} references missing to node {}",
5141 edge.edge_key, edge.to_external_id
5142 ));
5143 }
5144 let expected_key =
5145 ConvexEdgeRow::stable_key(&edge.from_external_id, &edge.to_external_id, &edge.kind);
5146 if edge.edge_key != expected_key {
5147 diagnostics.push(format!(
5148 "Convex snapshot edge {} has non-canonical key; expected {} for ({}, {}, {})",
5149 edge.edge_key, expected_key, edge.from_external_id, edge.kind, edge.to_external_id
5150 ));
5151 }
5152 }
5153 for (edge_key, count) in edge_counts.iter().filter(|(_, count)| **count > 1) {
5154 diagnostics.push(format!(
5155 "Convex snapshot contains duplicate edgeKey {edge_key} ({count} rows)"
5156 ));
5157 }
5158 diagnostics
5159}
5160
5161pub(crate) fn validate_convex_projection_rows(rows: &ConvexProjectionRows) -> Result<()> {
5162 let diagnostics = convex_projection_row_diagnostics(rows);
5163 if diagnostics.is_empty() {
5164 Ok(())
5165 } else {
5166 bail!("{}", diagnostics.join("; "))
5167 }
5168}
5169
5170pub(crate) struct ConvexHttpTransport {
5171 endpoint: String,
5172 auth_token_env: String,
5173 auth_token: Option<String>,
5174}
5175
5176impl ConvexHttpTransport {
5177 fn from_options(endpoint: Option<&str>, auth_token_env: &str) -> Result<Self> {
5178 let endpoint = endpoint
5179 .map(str::to_string)
5180 .or_else(|| env::var(DEFAULT_CONVEX_GRAPH_URL_ENV).ok())
5181 .context("Convex transport requires --endpoint or TSIFT_CONVEX_GRAPH_URL")?;
5182 let auth_token = env::var(auth_token_env)
5183 .ok()
5184 .filter(|value| !value.trim().is_empty());
5185 Ok(Self {
5186 endpoint,
5187 auth_token_env: auth_token_env.to_string(),
5188 auth_token,
5189 })
5190 }
5191
5192 fn summary(&self, remote_snapshot: bool, applied_chunks: usize) -> ConvexTransportSummary {
5193 ConvexTransportSummary {
5194 endpoint_env: DEFAULT_CONVEX_GRAPH_URL_ENV.to_string(),
5195 endpoint_configured: true,
5196 auth_token_env: self.auth_token_env.clone(),
5197 auth_configured: self.auth_token.is_some(),
5198 remote_snapshot,
5199 applied_chunks,
5200 }
5201 }
5202
5203 fn post(&self, request: &ConvexTransportRequest<'_>) -> Result<ConvexTransportResponse> {
5204 let mut builder = ureq::post(&self.endpoint);
5205 if let Some(token) = &self.auth_token {
5206 builder = builder.header("Authorization", &format!("Bearer {token}"));
5207 }
5208 builder
5209 .send_json(request)
5210 .with_context(|| format!("calling Convex graph transport {}", self.endpoint))?
5211 .body_mut()
5212 .read_json::<ConvexTransportResponse>()
5213 .with_context(|| format!("parsing Convex graph transport response {}", self.endpoint))
5214 }
5215
5216 fn fetch_snapshot(
5227 &self,
5228 projection_version: &str,
5229 scope: Option<&str>,
5230 local_hash: Option<&str>,
5231 local_rows: Option<&ConvexProjectionRows>,
5232 ) -> Result<(ConvexProjectionRows, Vec<String>)> {
5233 match self.fetch_snapshot_paginated(projection_version, scope, local_hash, local_rows) {
5234 Ok(rows) => Ok(rows),
5235 Err(err) => {
5236 let msg = format!("{err:#}");
5241 let is_unknown_op = msg.contains("unknown operation")
5242 || msg.contains("snapshot_meta")
5243 || msg.contains("404");
5244 if !is_unknown_op {
5245 return Err(err);
5246 }
5247 self.fetch_snapshot_legacy(projection_version)
5248 .map(|rows| (rows, Vec::new()))
5249 }
5250 }
5251 }
5252
5253 fn fetch_snapshot_legacy(&self, projection_version: &str) -> Result<ConvexProjectionRows> {
5254 let response = self.post(&ConvexTransportRequest {
5255 operation: "snapshot",
5256 chunk: 0,
5257 projection_version,
5258 projection_hash: None,
5259 projection_meta_id: None,
5260 node_rows: Vec::new(),
5261 edge_rows: Vec::new(),
5262 keys: Vec::new(),
5263 cursor: None,
5264 limit: None,
5265 })?;
5266 response
5267 .rows
5268 .context("Convex snapshot response did not include rows")
5269 }
5270
5271 fn fetch_snapshot_paginated(
5272 &self,
5273 projection_version: &str,
5274 scope: Option<&str>,
5275 local_hash: Option<&str>,
5276 local_rows: Option<&ConvexProjectionRows>,
5277 ) -> Result<(ConvexProjectionRows, Vec<String>)> {
5278 let projection_meta_id = graph_projection_meta_id(scope);
5279 let meta_response = self.post(&ConvexTransportRequest {
5280 operation: "snapshot_meta",
5281 chunk: 0,
5282 projection_version,
5283 projection_hash: None,
5284 projection_meta_id: Some(&projection_meta_id),
5285 node_rows: Vec::new(),
5286 edge_rows: Vec::new(),
5287 keys: Vec::new(),
5288 cursor: None,
5289 limit: None,
5290 })?;
5291 if matches!(meta_response.status.as_deref(), Some("error")) {
5292 anyhow::bail!(
5293 "Convex snapshot_meta returned error: {}",
5294 meta_response.message.unwrap_or_default()
5295 );
5296 }
5297 let meta = meta_response
5298 .meta
5299 .context("Convex snapshot_meta response did not include meta")?;
5300 if let (Some(remote_hash), Some(local_hash), Some(local_rows)) =
5301 (meta.projection_hash.as_deref(), local_hash, local_rows)
5302 && remote_hash == local_hash
5303 {
5304 return Ok((
5305 local_rows.clone(),
5306 vec![
5307 "remote projection hash matched local graph; skipped full row-page snapshot diff"
5308 .to_string(),
5309 ],
5310 ));
5311 }
5312
5313 let mut nodes: Vec<ConvexNodeRow> = Vec::with_capacity(meta.node_count.unwrap_or_default());
5314 let mut node_cursor: Option<String> = None;
5315 loop {
5316 let response = self.post(&ConvexTransportRequest {
5317 operation: "snapshot_nodes_page",
5318 chunk: 0,
5319 projection_version,
5320 projection_hash: None,
5321 projection_meta_id: None,
5322 node_rows: Vec::new(),
5323 edge_rows: Vec::new(),
5324 keys: Vec::new(),
5325 cursor: node_cursor.clone(),
5326 limit: None,
5327 })?;
5328 let page = response
5329 .page
5330 .context("Convex snapshot_nodes_page response did not include page")?;
5331 for raw in page.rows {
5332 let row: ConvexNodeRow =
5333 serde_json::from_value(raw).context("decoding Convex snapshot node row")?;
5334 nodes.push(row);
5335 }
5336 match page.next_cursor {
5337 Some(next) => node_cursor = Some(next),
5338 None => break,
5339 }
5340 }
5341
5342 let mut edges: Vec<ConvexEdgeRow> = Vec::with_capacity(meta.edge_count.unwrap_or_default());
5343 let mut edge_cursor: Option<String> = None;
5344 loop {
5345 let response = self.post(&ConvexTransportRequest {
5346 operation: "snapshot_edges_page",
5347 chunk: 0,
5348 projection_version,
5349 projection_hash: None,
5350 projection_meta_id: None,
5351 node_rows: Vec::new(),
5352 edge_rows: Vec::new(),
5353 keys: Vec::new(),
5354 cursor: edge_cursor.clone(),
5355 limit: None,
5356 })?;
5357 let page = response
5358 .page
5359 .context("Convex snapshot_edges_page response did not include page")?;
5360 for raw in page.rows {
5361 let row: ConvexEdgeRow =
5362 serde_json::from_value(raw).context("decoding Convex snapshot edge row")?;
5363 edges.push(row);
5364 }
5365 match page.next_cursor {
5366 Some(next) => edge_cursor = Some(next),
5367 None => break,
5368 }
5369 }
5370
5371 Ok((ConvexProjectionRows { nodes, edges }, Vec::new()))
5372 }
5373
5374 fn apply_chunk(
5375 &self,
5376 report: &ConvexSyncReport,
5377 chunk: &ConvexSyncChunk,
5378 ) -> Result<ConvexTransportReceipt> {
5379 let node_rows = if chunk.operation == "upsert_nodes" {
5380 report
5381 .node_upserts
5382 .iter()
5383 .filter(|row| chunk.keys.contains(&row.external_id))
5384 .cloned()
5385 .collect()
5386 } else {
5387 Vec::new()
5388 };
5389 let edge_rows = if chunk.operation == "upsert_edges" {
5390 report
5391 .edge_upserts
5392 .iter()
5393 .filter(|row| chunk.keys.contains(&row.edge_key))
5394 .cloned()
5395 .collect()
5396 } else {
5397 Vec::new()
5398 };
5399 let request = ConvexTransportRequest {
5400 operation: &chunk.operation,
5401 chunk: chunk.chunk,
5402 projection_version: &report.projection_version,
5403 projection_hash: report.projection_hash.as_deref(),
5404 projection_meta_id: None,
5405 node_rows,
5406 edge_rows,
5407 keys: chunk.keys.clone(),
5408 cursor: None,
5409 limit: None,
5410 };
5411 let mut last_error = None;
5412 for attempt in 1..=chunk.max_attempts {
5413 match self.post(&request) {
5414 Ok(response) => {
5415 return Ok(ConvexTransportReceipt {
5416 operation: chunk.operation.clone(),
5417 chunk: chunk.chunk,
5418 attempt,
5419 status: response.status.unwrap_or_else(|| "ok".to_string()),
5420 message: response.message,
5421 });
5422 }
5423 Err(err) => {
5424 last_error = Some(err);
5425 if attempt < chunk.max_attempts {
5426 std::thread::sleep(Duration::from_millis(100 * attempt as u64));
5427 }
5428 }
5429 }
5430 }
5431 Err(last_error.unwrap_or_else(|| anyhow::anyhow!("Convex transport chunk failed")))
5432 .with_context(|| format!("applying Convex {} chunk {}", chunk.operation, chunk.chunk))
5433 }
5434}
5435
5436fn convex_projection_hash(rows: &ConvexProjectionRows, scope: Option<&str>) -> Option<String> {
5437 let meta_id = graph_projection_meta_id(scope);
5438 rows.nodes
5439 .iter()
5440 .find(|row| row.external_id == meta_id && row.kind == GRAPH_PROJECTION_META_KIND)
5441 .and_then(|row| row.properties.get("content_hash").cloned())
5442}
5443
5444fn convex_projection_freshness(
5445 local: &ConvexProjectionRows,
5446 snapshot: Option<&ConvexProjectionRows>,
5447 scope: Option<&str>,
5448) -> ConvexProjectionFreshness {
5449 let local_hash = convex_projection_hash(local, scope);
5450 let Some(snapshot) = snapshot else {
5451 return ConvexProjectionFreshness {
5452 status: "unchecked".to_string(),
5453 fail_closed: false,
5454 local_hash,
5455 snapshot_hash: None,
5456 missing_nodes: Vec::new(),
5457 stale_nodes: Vec::new(),
5458 missing_edges: Vec::new(),
5459 stale_edges: Vec::new(),
5460 diagnostics: vec![
5461 "no Convex snapshot supplied; sync output is a local dry-run plan".to_string(),
5462 ],
5463 };
5464 };
5465
5466 let snapshot_hash = convex_projection_hash(snapshot, scope);
5467 let snapshot_nodes = snapshot
5468 .nodes
5469 .iter()
5470 .map(|row| (row.external_id.as_str(), row))
5471 .collect::<BTreeMap<_, _>>();
5472 let snapshot_edges = snapshot
5473 .edges
5474 .iter()
5475 .map(|row| (row.edge_key.as_str(), row))
5476 .collect::<BTreeMap<_, _>>();
5477
5478 let mut missing_nodes = Vec::new();
5479 let mut stale_nodes = Vec::new();
5480 for row in &local.nodes {
5481 match snapshot_nodes.get(row.external_id.as_str()) {
5482 Some(snapshot_row) if *snapshot_row == row => {}
5483 Some(_) => stale_nodes.push(row.external_id.clone()),
5484 None => missing_nodes.push(row.external_id.clone()),
5485 }
5486 }
5487
5488 let mut missing_edges = Vec::new();
5489 let mut stale_edges = Vec::new();
5490 for row in &local.edges {
5491 match snapshot_edges.get(row.edge_key.as_str()) {
5492 Some(snapshot_row) if *snapshot_row == row => {}
5493 Some(_) => stale_edges.push(row.edge_key.clone()),
5494 None => missing_edges.push(row.edge_key.clone()),
5495 }
5496 }
5497
5498 let hash_current = local_hash.is_some() && local_hash == snapshot_hash;
5499 let rows_current = missing_nodes.is_empty()
5500 && stale_nodes.is_empty()
5501 && missing_edges.is_empty()
5502 && stale_edges.is_empty();
5503 if hash_current && rows_current {
5504 return ConvexProjectionFreshness::current(local_hash, snapshot_hash);
5505 }
5506
5507 let mut diagnostics = Vec::new();
5508 if local_hash != snapshot_hash {
5509 diagnostics.push(format!(
5510 "projection hash mismatch: local={} snapshot={}",
5511 local_hash.as_deref().unwrap_or("missing"),
5512 snapshot_hash.as_deref().unwrap_or("missing")
5513 ));
5514 }
5515 if !missing_nodes.is_empty() || !missing_edges.is_empty() {
5516 diagnostics.push(format!(
5517 "Convex snapshot is missing {} node(s) and {} edge(s)",
5518 missing_nodes.len(),
5519 missing_edges.len()
5520 ));
5521 }
5522 if !stale_nodes.is_empty() || !stale_edges.is_empty() {
5523 diagnostics.push(format!(
5524 "Convex snapshot has {} stale node row(s) and {} stale edge row(s)",
5525 stale_nodes.len(),
5526 stale_edges.len()
5527 ));
5528 }
5529
5530 ConvexProjectionFreshness {
5531 status: "stale".to_string(),
5532 fail_closed: true,
5533 local_hash,
5534 snapshot_hash,
5535 missing_nodes,
5536 stale_nodes,
5537 missing_edges,
5538 stale_edges,
5539 diagnostics,
5540 }
5541}
5542
5543pub(crate) fn verify_convex_projection_snapshot(
5544 root: &Path,
5545 scope: Option<&str>,
5546 snapshot_path: &Path,
5547) -> Result<()> {
5548 let graph_db = graph_substrate_db_path(root, scope);
5549 let store = SqliteGraphStore::open_read_only_resilient(&graph_db)?;
5550 let local = convex_rows_from_graph_store(&store)?;
5551 let snapshot = load_convex_projection_rows(snapshot_path)?;
5552 validate_convex_projection_rows(&snapshot)?;
5553 let freshness = convex_projection_freshness(&local, Some(&snapshot), scope);
5554 if freshness.fail_closed {
5555 bail!(
5556 "Convex graph projection is not current for {}: {}",
5557 root.display(),
5558 freshness.diagnostics.join("; ")
5559 );
5560 }
5561 Ok(())
5562}
5563
5564fn convex_rows_diff(
5565 local: &ConvexProjectionRows,
5566 snapshot: Option<&ConvexProjectionRows>,
5567) -> (
5568 Vec<ConvexNodeRow>,
5569 Vec<ConvexEdgeRow>,
5570 Vec<String>,
5571 Vec<String>,
5572) {
5573 let Some(snapshot) = snapshot else {
5574 return (
5575 local.nodes.clone(),
5576 local.edges.clone(),
5577 Vec::new(),
5578 Vec::new(),
5579 );
5580 };
5581 let local_nodes = local
5582 .nodes
5583 .iter()
5584 .map(|row| (row.external_id.as_str(), row))
5585 .collect::<BTreeMap<_, _>>();
5586 let local_edges = local
5587 .edges
5588 .iter()
5589 .map(|row| (row.edge_key.as_str(), row))
5590 .collect::<BTreeMap<_, _>>();
5591 let snapshot_nodes = snapshot
5592 .nodes
5593 .iter()
5594 .map(|row| (row.external_id.as_str(), row))
5595 .collect::<BTreeMap<_, _>>();
5596 let snapshot_edges = snapshot
5597 .edges
5598 .iter()
5599 .map(|row| (row.edge_key.as_str(), row))
5600 .collect::<BTreeMap<_, _>>();
5601
5602 let node_upserts = local
5603 .nodes
5604 .iter()
5605 .filter(|row| {
5606 snapshot_nodes
5607 .get(row.external_id.as_str())
5608 .is_none_or(|snapshot_row| *snapshot_row != *row)
5609 })
5610 .cloned()
5611 .collect::<Vec<_>>();
5612 let edge_upserts = local
5613 .edges
5614 .iter()
5615 .filter(|row| {
5616 snapshot_edges
5617 .get(row.edge_key.as_str())
5618 .is_none_or(|snapshot_row| *snapshot_row != *row)
5619 })
5620 .cloned()
5621 .collect::<Vec<_>>();
5622 let node_tombstones = snapshot
5623 .nodes
5624 .iter()
5625 .filter(|row| !local_nodes.contains_key(row.external_id.as_str()))
5626 .map(|row| row.external_id.clone())
5627 .collect::<Vec<_>>();
5628 let edge_tombstones = snapshot
5629 .edges
5630 .iter()
5631 .filter(|row| !local_edges.contains_key(row.edge_key.as_str()))
5632 .map(|row| row.edge_key.clone())
5633 .collect::<Vec<_>>();
5634
5635 (node_upserts, edge_upserts, node_tombstones, edge_tombstones)
5636}
5637
5638fn push_sync_chunks(
5639 chunks: &mut Vec<ConvexSyncChunk>,
5640 operation: &str,
5641 keys: Vec<String>,
5642 size: usize,
5643) {
5644 if keys.is_empty() {
5645 return;
5646 }
5647 for (idx, chunk) in keys.chunks(size).enumerate() {
5648 chunks.push(ConvexSyncChunk {
5649 operation: operation.to_string(),
5650 chunk: idx + 1,
5651 count: chunk.len(),
5652 keys: chunk.to_vec(),
5653 max_attempts: 3,
5654 retry_policy:
5655 "retry the whole chunk; rows are idempotent by externalId/edgeKey, stop on a repeated partial failure"
5656 .to_string(),
5657 });
5658 }
5659}
5660
5661pub(crate) fn build_convex_sync_report_with_snapshot(
5662 path: &Path,
5663 scope: Option<&str>,
5664 snapshot: Option<ConvexProjectionRows>,
5665 chunk_size: usize,
5666 dry_run: bool,
5667) -> Result<ConvexSyncReport> {
5668 if chunk_size == 0 {
5669 bail!("--chunk-size must be greater than zero");
5670 }
5671 let root = lint::resolve_project_root_or_canonical_path(path)?;
5672 let (graph, _refresh) = write_traversal_graph_store(&root, path, scope)?;
5673 let graph_db = graph_substrate_db_path(&root, scope);
5674 let store = SqliteGraphStore::open_read_only_resilient(&graph_db)?;
5675 let local = convex_rows_from_graph_store(&store)?;
5676 let freshness = convex_projection_freshness(&local, snapshot.as_ref(), scope);
5677 let (node_upserts, edge_upserts, node_tombstones, edge_tombstones) =
5678 convex_rows_diff(&local, snapshot.as_ref());
5679
5680 let mut chunks = Vec::new();
5681 push_sync_chunks(
5682 &mut chunks,
5683 "delete_edges",
5684 edge_tombstones.clone(),
5685 chunk_size,
5686 );
5687 push_sync_chunks(
5688 &mut chunks,
5689 "upsert_nodes",
5690 node_upserts
5691 .iter()
5692 .map(|row| row.external_id.clone())
5693 .collect(),
5694 chunk_size,
5695 );
5696 push_sync_chunks(
5697 &mut chunks,
5698 "upsert_edges",
5699 edge_upserts
5700 .iter()
5701 .map(|row| row.edge_key.clone())
5702 .collect(),
5703 chunk_size,
5704 );
5705 push_sync_chunks(
5706 &mut chunks,
5707 "delete_nodes",
5708 node_tombstones.clone(),
5709 chunk_size,
5710 );
5711
5712 let mut diagnostics = vec![
5713 "apply node upserts before edge upserts; apply edge tombstones before node tombstones"
5714 .to_string(),
5715 ];
5716 if dry_run {
5717 diagnostics.push("dry-run only: no Convex network mutation was attempted".to_string());
5718 }
5719 if freshness.fail_closed {
5720 diagnostics.push(
5721 "Convex-backed traverse/context-pack reads must fail closed until this plan is applied"
5722 .to_string(),
5723 );
5724 }
5725
5726 Ok(ConvexSyncReport {
5727 root: root.to_string_lossy().to_string(),
5728 scope: scope.map(str::to_string),
5729 graph_db: graph_db.to_string_lossy().to_string(),
5730 dry_run,
5731 projection_version: GRAPH_PROJECTION_VERSION.to_string(),
5732 projection_hash: convex_projection_hash(&local, scope),
5733 required_indexes: convex_required_indexes(),
5734 node_upserts,
5735 edge_upserts,
5736 node_tombstones,
5737 edge_tombstones,
5738 chunks,
5739 freshness,
5740 transport: None,
5741 receipts: Vec::new(),
5742 diagnostics,
5743 warnings: graph.warnings,
5744 })
5745}
5746
5747#[cfg(test)]
5748fn build_convex_sync_report(
5749 path: &Path,
5750 scope: Option<&str>,
5751 snapshot_path: Option<&Path>,
5752 chunk_size: usize,
5753) -> Result<ConvexSyncReport> {
5754 let snapshot = snapshot_path.map(load_convex_projection_rows).transpose()?;
5755 build_convex_sync_report_with_snapshot(path, scope, snapshot, chunk_size, true)
5756}
5757
5758pub(crate) fn print_convex_sync_human(report: &ConvexSyncReport, compact: bool) {
5759 if compact {
5760 println!(
5761 "convex-sync nodes:+{} -{} edges:+{} -{} chunks:{} freshness:{}",
5762 report.node_upserts.len(),
5763 report.node_tombstones.len(),
5764 report.edge_upserts.len(),
5765 report.edge_tombstones.len(),
5766 report.chunks.len(),
5767 report.freshness.status
5768 );
5769 return;
5770 }
5771
5772 println!(
5773 "Convex graph sync {}",
5774 if report.dry_run { "dry-run" } else { "apply" }
5775 );
5776 println!("root: {}", report.root);
5777 println!("graph_db: {}", report.graph_db);
5778 println!(
5779 "upserts: {} node(s), {} edge(s)",
5780 report.node_upserts.len(),
5781 report.edge_upserts.len()
5782 );
5783 println!(
5784 "tombstones: {} node(s), {} edge(s)",
5785 report.node_tombstones.len(),
5786 report.edge_tombstones.len()
5787 );
5788 println!("chunks: {}", report.chunks.len());
5789 println!("freshness: {}", report.freshness.status);
5790 if let Some(transport) = &report.transport {
5791 println!(
5792 "transport: endpoint_env={} auth_env={} applied_chunks={}",
5793 transport.endpoint_env, transport.auth_token_env, transport.applied_chunks
5794 );
5795 }
5796 for receipt in &report.receipts {
5797 println!(
5798 "receipt: {} chunk {} attempt {} {}",
5799 receipt.operation, receipt.chunk, receipt.attempt, receipt.status
5800 );
5801 }
5802 for diagnostic in report
5803 .diagnostics
5804 .iter()
5805 .chain(report.freshness.diagnostics.iter())
5806 {
5807 println!("- {}", diagnostic);
5808 }
5809}
5810
5811pub(crate) struct ConvexSyncOptions<'a> {
5812 path: &'a Path,
5813 scope: Option<&'a str>,
5814 snapshot: Option<&'a Path>,
5815 chunk_size: usize,
5816 remote_snapshot: bool,
5817 apply: bool,
5818 endpoint: Option<&'a str>,
5819 auth_token_env: &'a str,
5820}
5821
5822#[derive(Serialize)]
5823struct GraphDbSchemaField {
5824 name: &'static str,
5825 value_type: &'static str,
5826 description: &'static str,
5827}
5828
5829#[derive(Serialize)]
5830struct GraphDbSchemaOperation {
5831 command: &'static str,
5832 description: &'static str,
5833}
5834
5835#[derive(Serialize)]
5836struct GraphDbSchemaContract {
5837 name: &'static str,
5838 version: &'static str,
5839 description: &'static str,
5840}
5841
5842#[derive(Serialize)]
5843struct GraphDbSchema {
5844 contract_versions: Vec<GraphDbSchemaContract>,
5845 node_fields: Vec<GraphDbSchemaField>,
5846 edge_fields: Vec<GraphDbSchemaField>,
5847 operations: Vec<GraphDbSchemaOperation>,
5848}
5849
5850#[derive(Clone, Serialize, Deserialize)]
5851struct GraphDbFreshnessReport {
5852 status: String,
5853 fail_closed: bool,
5854 projection_version: Option<String>,
5855 content_hash: Option<String>,
5856 source_watermark: Option<String>,
5857 diagnostics: Vec<String>,
5858}
5859
5860#[derive(Clone, Debug, Serialize)]
5861struct GraphEffectivenessReadiness {
5862 status: String,
5863 fail_closed: bool,
5864 reason: String,
5865 diagnostics: Vec<String>,
5866 next_commands: Vec<String>,
5867}
5868
5869#[derive(Clone, Debug, Serialize, PartialEq)]
5870struct GraphDbPropertyFilter {
5871 key: String,
5872 value: String,
5873}
5874
5875#[derive(Clone, Debug, Default)]
5876struct GraphDbQueryOptions {
5877 cursor: Option<String>,
5878 limit: Option<usize>,
5879 property_filters: Vec<GraphDbPropertyFilter>,
5880}
5881
5882#[derive(Clone, Debug, Serialize, PartialEq)]
5883struct GraphDbPageReport {
5884 #[serde(skip_serializing_if = "Option::is_none")]
5885 cursor: Option<String>,
5886 #[serde(skip_serializing_if = "Option::is_none")]
5887 limit: Option<usize>,
5888 #[serde(skip_serializing_if = "Option::is_none")]
5889 next_cursor: Option<String>,
5890 returned_nodes: usize,
5891 returned_edges: usize,
5892 truncated: bool,
5893 property_filters: Vec<GraphDbPropertyFilter>,
5894 #[serde(skip_serializing_if = "Vec::is_empty", default)]
5895 diagnostics: Vec<String>,
5896}
5897
5898type GraphDbRankedNeighbor = resolution::RankedNeighbor;
5899
5900#[derive(Clone, Debug, Serialize, PartialEq)]
5901struct GraphDbKnowledgeRetrieval {
5902 mode: String,
5903 query: String,
5904 seed_kind: String,
5905 seed_limit: usize,
5906 seed_count: usize,
5907 depth: usize,
5908 limit: usize,
5909 node_count: usize,
5910 edge_count: usize,
5911 truncated: bool,
5912 traversal: String,
5913 freshness_boundary: String,
5914 privacy_boundary: String,
5915 diagnostics: Vec<String>,
5916}
5917
5918struct GraphDbSemanticSeededSubgraph {
5919 nodes: Vec<SubstrateGraphNode>,
5920 edges: Vec<SubstrateGraphEdge>,
5921 truncated: bool,
5922 diagnostics: Vec<String>,
5923}
5924
5925type GraphDbNeighborhoodRankingGate = resolution::NeighborhoodRankingGate;
5926
5927#[derive(Serialize)]
5928struct GraphDbReport {
5929 root: String,
5930 #[serde(skip_serializing_if = "Option::is_none")]
5931 scope: Option<String>,
5932 backend: String,
5933 query: String,
5934 freshness: GraphDbFreshnessReport,
5935 #[serde(skip_serializing_if = "Option::is_none")]
5936 schema: Option<GraphDbSchema>,
5937 #[serde(skip_serializing_if = "Option::is_none")]
5938 node: Option<SubstrateGraphNode>,
5939 #[serde(skip_serializing_if = "Option::is_none")]
5940 edge: Option<SubstrateGraphEdge>,
5941 #[serde(skip_serializing_if = "Vec::is_empty", default)]
5942 nodes: Vec<SubstrateGraphNode>,
5943 #[serde(skip_serializing_if = "Vec::is_empty", default)]
5944 edges: Vec<SubstrateGraphEdge>,
5945 #[serde(skip_serializing_if = "Vec::is_empty", default)]
5946 ranked_neighbors: Vec<GraphDbRankedNeighbor>,
5947 #[serde(skip_serializing_if = "Vec::is_empty", default)]
5948 semantic_related: Vec<SemanticRelatedItem>,
5949 #[serde(skip_serializing_if = "Option::is_none")]
5950 neighborhood_ranking_gate: Option<GraphDbNeighborhoodRankingGate>,
5951 #[serde(skip_serializing_if = "Option::is_none")]
5952 knowledge_retrieval: Option<GraphDbKnowledgeRetrieval>,
5953 #[serde(skip_serializing_if = "Option::is_none")]
5954 path: Option<substrate::GraphPath>,
5955 #[serde(skip_serializing_if = "Option::is_none")]
5956 page: Option<GraphDbPageReport>,
5957 #[serde(skip_serializing_if = "Vec::is_empty", default)]
5958 warnings: Vec<String>,
5959}
5960
5961struct ExperimentalReadOnlyGraphStore {
5962 backend: GraphDbExperimentalBackend,
5963 nodes: BTreeMap<String, SubstrateGraphNode>,
5964 edges: BTreeMap<String, SubstrateGraphEdge>,
5965 node_ids_by_kind: BTreeMap<String, Vec<String>>,
5966 outgoing_edge_keys_by_from: BTreeMap<String, Vec<String>>,
5967}
5968
5969impl ExperimentalReadOnlyGraphStore {
5970 fn from_rows(backend: GraphDbExperimentalBackend, rows: &ConvexProjectionRows) -> Result<Self> {
5971 validate_convex_projection_rows(rows)?;
5972 let nodes = rows
5973 .nodes
5974 .iter()
5975 .map(|row| {
5976 let node = SubstrateGraphNode {
5977 id: row.external_id.clone(),
5978 kind: row.kind.clone(),
5979 label: row.label.clone(),
5980 properties: row.properties.clone(),
5981 provenance: row.provenance.clone(),
5982 freshness: row.freshness.clone(),
5983 };
5984 (node.id.clone(), node)
5985 })
5986 .collect::<BTreeMap<_, _>>();
5987 let edges = rows
5988 .edges
5989 .iter()
5990 .map(|row| {
5991 let edge = SubstrateGraphEdge {
5992 id: row.edge_key.clone(),
5993 from_id: row.from_external_id.clone(),
5994 to_id: row.to_external_id.clone(),
5995 kind: row.kind.clone(),
5996 properties: row.properties.clone(),
5997 provenance: row.provenance.clone(),
5998 freshness: row.freshness.clone(),
5999 };
6000 (graph_db_edge_key(&edge), edge)
6001 })
6002 .collect::<BTreeMap<_, _>>();
6003 let mut node_ids_by_kind = BTreeMap::<String, Vec<String>>::new();
6004 for node in nodes.values() {
6005 node_ids_by_kind
6006 .entry(node.kind.clone())
6007 .or_default()
6008 .push(node.id.clone());
6009 }
6010 for ids in node_ids_by_kind.values_mut() {
6011 ids.sort();
6012 }
6013 let mut outgoing_edge_keys_by_from = BTreeMap::<String, Vec<String>>::new();
6014 for edge in edges.values() {
6015 outgoing_edge_keys_by_from
6016 .entry(edge.from_id.clone())
6017 .or_default()
6018 .push(graph_db_edge_key(edge));
6019 }
6020 for edge_keys in outgoing_edge_keys_by_from.values_mut() {
6021 edge_keys.sort_by(|left_key, right_key| {
6022 let left = &edges[left_key];
6023 let right = &edges[right_key];
6024 left.to_id
6025 .cmp(&right.to_id)
6026 .then(left.kind.cmp(&right.kind))
6027 .then(left_key.cmp(right_key))
6028 });
6029 }
6030 Ok(Self {
6031 backend,
6032 nodes,
6033 edges,
6034 node_ids_by_kind,
6035 outgoing_edge_keys_by_from,
6036 })
6037 }
6038}
6039
6040impl GraphStore for ExperimentalReadOnlyGraphStore {
6041 fn upsert_node(&self, _node: &SubstrateGraphNode) -> Result<()> {
6042 bail!("{} backend-eval adapter is read-only", self.backend.name())
6043 }
6044
6045 fn upsert_edge(&self, _edge: &SubstrateGraphEdge) -> Result<()> {
6046 bail!("{} backend-eval adapter is read-only", self.backend.name())
6047 }
6048
6049 fn delete_node(&self, _id: &str) -> Result<usize> {
6050 bail!("{} backend-eval adapter is read-only", self.backend.name())
6051 }
6052
6053 fn delete_edge(&self, _from_id: &str, _to_id: &str, _kind: &str) -> Result<usize> {
6054 bail!("{} backend-eval adapter is read-only", self.backend.name())
6055 }
6056
6057 fn node(&self, id: &str) -> Result<Option<SubstrateGraphNode>> {
6058 Ok(self.nodes.get(id).cloned())
6059 }
6060
6061 fn all_nodes(&self) -> Result<Vec<SubstrateGraphNode>> {
6062 Ok(self.nodes.values().cloned().collect())
6063 }
6064
6065 fn all_edges(&self) -> Result<Vec<SubstrateGraphEdge>> {
6066 let mut edges = self.edges.values().cloned().collect::<Vec<_>>();
6067 edges.sort_by(|left, right| {
6068 left.from_id
6069 .cmp(&right.from_id)
6070 .then(left.kind.cmp(&right.kind))
6071 .then(left.to_id.cmp(&right.to_id))
6072 });
6073 Ok(edges)
6074 }
6075
6076 fn graph_counts(&self) -> Result<(usize, usize)> {
6077 Ok((self.nodes.len(), self.edges.len()))
6078 }
6079
6080 fn sample_edge(&self, kind: Option<&str>) -> Result<Option<SubstrateGraphEdge>> {
6081 let mut edges = self
6082 .edges
6083 .values()
6084 .filter(|edge| edge.from_id != edge.to_id)
6085 .filter(|edge| kind.is_none_or(|kind| edge.kind == kind))
6086 .cloned()
6087 .collect::<Vec<_>>();
6088 edges.sort_by(|left, right| {
6089 left.from_id
6090 .cmp(&right.from_id)
6091 .then(left.kind.cmp(&right.kind))
6092 .then(left.to_id.cmp(&right.to_id))
6093 });
6094 Ok(edges.into_iter().next())
6095 }
6096
6097 fn sample_edge_with_property(
6098 &self,
6099 ) -> Result<Option<(SubstrateGraphEdge, GraphPropertyFilter)>> {
6100 Ok(self
6101 .edges
6102 .values()
6103 .filter(|edge| edge.from_id != edge.to_id)
6104 .filter_map(|edge| {
6105 edge.properties.iter().next().map(|(key, value)| {
6106 (
6107 edge,
6108 GraphPropertyFilter {
6109 key: key.clone(),
6110 value: value.clone(),
6111 },
6112 )
6113 })
6114 })
6115 .min_by(|(left_edge, left_filter), (right_edge, right_filter)| {
6116 left_filter
6117 .key
6118 .cmp(&right_filter.key)
6119 .then(left_filter.value.cmp(&right_filter.value))
6120 .then_with(|| graph_db_edge_key(left_edge).cmp(&graph_db_edge_key(right_edge)))
6121 })
6122 .map(|(edge, filter)| (edge.clone(), filter)))
6123 }
6124
6125 fn nodes_by_kind(&self, kind: &str) -> Result<Vec<SubstrateGraphNode>> {
6126 Ok(self
6127 .node_ids_by_kind
6128 .get(kind)
6129 .into_iter()
6130 .flatten()
6131 .filter_map(|id| self.nodes.get(id).cloned())
6132 .collect())
6133 }
6134
6135 fn outgoing_edges(&self, from_id: &str, kind: Option<&str>) -> Result<Vec<SubstrateGraphEdge>> {
6136 Ok(self
6137 .outgoing_edge_keys_by_from
6138 .get(from_id)
6139 .into_iter()
6140 .flatten()
6141 .filter_map(|key| self.edges.get(key))
6142 .filter(|edge| kind.is_none_or(|kind| edge.kind == kind))
6143 .cloned()
6144 .collect())
6145 }
6146
6147 fn edges_between_nodes(&self, node_ids: &BTreeSet<String>) -> Result<Vec<SubstrateGraphEdge>> {
6148 Ok(self
6149 .edges
6150 .values()
6151 .filter(|edge| node_ids.contains(&edge.from_id) && node_ids.contains(&edge.to_id))
6152 .cloned()
6153 .collect())
6154 }
6155
6156 fn shortest_path(
6157 &self,
6158 from_id: &str,
6159 to_id: &str,
6160 kind: Option<&str>,
6161 ) -> Result<Option<substrate::GraphPath>> {
6162 if from_id == to_id {
6163 return Ok(Some(substrate::GraphPath {
6164 nodes: vec![from_id.to_string()],
6165 hops: 0,
6166 }));
6167 }
6168
6169 let mut queue = VecDeque::new();
6170 let mut parent = BTreeMap::<String, String>::new();
6171 parent.insert(from_id.to_string(), String::new());
6172 queue.push_back(from_id.to_string());
6173
6174 while let Some(current) = queue.pop_front() {
6175 for edge in self.outgoing_edges(¤t, kind)? {
6176 if parent.contains_key(&edge.to_id) {
6177 continue;
6178 }
6179 parent.insert(edge.to_id.clone(), current.clone());
6180 if edge.to_id == to_id {
6181 let mut nodes = vec![to_id.to_string()];
6182 let mut cursor = to_id;
6183 while let Some(previous) = parent.get(cursor) {
6184 if previous.is_empty() {
6185 break;
6186 }
6187 nodes.push(previous.clone());
6188 cursor = previous;
6189 }
6190 nodes.reverse();
6191 return Ok(Some(substrate::GraphPath {
6192 hops: nodes.len().saturating_sub(1),
6193 nodes,
6194 }));
6195 }
6196 queue.push_back(edge.to_id);
6197 }
6198 }
6199
6200 Ok(None)
6201 }
6202
6203 fn reachable_nodes_by_kinds(
6204 &self,
6205 from_id: &str,
6206 kinds: &[&str],
6207 depth: usize,
6208 limit: usize,
6209 ) -> Result<BTreeMap<String, Vec<(SubstrateGraphNode, substrate::GraphPath)>>> {
6210 let requested = kinds.iter().copied().collect::<BTreeSet<_>>();
6211 let mut rows = requested
6212 .iter()
6213 .map(|kind| {
6214 (
6215 (*kind).to_string(),
6216 BTreeMap::<String, (SubstrateGraphNode, substrate::GraphPath)>::new(),
6217 )
6218 })
6219 .collect::<BTreeMap<_, _>>();
6220 if requested.is_empty() {
6221 return Ok(BTreeMap::new());
6222 }
6223
6224 let mut seen = BTreeSet::from([from_id.to_string()]);
6225 let mut queue = VecDeque::from([(from_id.to_string(), vec![from_id.to_string()])]);
6226 while let Some((current, path)) = queue.pop_front() {
6227 let current_depth = path.len().saturating_sub(1);
6228 if current_depth >= depth {
6229 continue;
6230 }
6231 for edge in self.outgoing_edges(¤t, None)? {
6232 if !seen.insert(edge.to_id.clone()) {
6233 continue;
6234 }
6235 let Some(node) = self.nodes.get(&edge.to_id).cloned() else {
6236 continue;
6237 };
6238 let mut next_path = path.clone();
6239 next_path.push(edge.to_id.clone());
6240 let graph_path = substrate::GraphPath {
6241 hops: next_path.len().saturating_sub(1),
6242 nodes: next_path.clone(),
6243 };
6244 if requested.contains(node.kind.as_str()) {
6245 rows.entry(node.kind.clone())
6246 .or_default()
6247 .entry(node.id.clone())
6248 .or_insert((node.clone(), graph_path));
6249 }
6250 queue.push_back((edge.to_id, next_path));
6251 }
6252 }
6253
6254 Ok(rows
6255 .into_iter()
6256 .map(|(kind, values)| {
6257 let mut values = values.into_values().collect::<Vec<_>>();
6258 values.sort_by(|(left_node, left_path), (right_node, right_path)| {
6259 left_path
6260 .hops
6261 .cmp(&right_path.hops)
6262 .then(left_node.label.cmp(&right_node.label))
6263 .then(left_node.id.cmp(&right_node.id))
6264 });
6265 if limit > 0 && values.len() > limit {
6266 values.truncate(limit);
6267 }
6268 (kind, values)
6269 })
6270 .collect())
6271 }
6272}
6273
6274pub(crate) const GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS: usize = 64;
6275pub(crate) const GRAPH_DB_BACKEND_EVAL_EXTENDED_PATH_HOPS: [usize; 3] = [128, 256, 512];
6276pub(crate) const GRAPH_DB_BACKEND_EVAL_DIRECT_PATH_HOPS: usize = 1;
6277const GRAPH_DB_BACKEND_EVAL_ALLOWED_REGRESSION_PERCENT: f64 = 10.0;
6278pub(crate) const GRAPH_DB_BACKEND_EVAL_NORMALIZATION_ROW_UNIT: f64 = 1000.0;
6279const GRAPH_DB_BACKEND_EVAL_MIN_SAMPLE_RUNS: usize = 3;
6280const CONFLICT_MATRIX_PREPARATION_CACHE_VERSION: &str = "conflict-matrix-prep-v1";
6281const CONFLICT_MATRIX_GRAPH_PREPARATION_CACHE_VERSION: &str = "conflict-matrix-graph-prep-v1";
6282const GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION: &str = "backend-eval-full-projection-v5";
6283
6284#[derive(Clone, Serialize, Deserialize)]
6285pub(crate) struct GraphDbBackendEvalPhaseTiming {
6286 name: String,
6287 duration_micros: u128,
6288 detail: String,
6289}
6290
6291#[derive(Serialize, Deserialize)]
6292struct GraphDbBackendEvalFullProjectionCache {
6293 version: String,
6294 key: String,
6295 source_watermark: String,
6296 projection: GraphProjection,
6297 warnings: Vec<String>,
6298}
6299
6300#[derive(Clone, Default)]
6301struct GraphDbBackendEvalFullProjectionCacheStats {
6302 hit: bool,
6303 disk_bytes: u64,
6304 json_bytes: u64,
6305 pruned_files: usize,
6306 pruned_bytes: u64,
6307}
6308
6309#[derive(Serialize)]
6310struct GraphDbBackendEvalRawSourceWatermarkRow {
6311 path: String,
6312 bytes: u64,
6313 content_hash: String,
6314}
6315
6316#[derive(Clone)]
6317struct GraphDbBackendEvalFullProjectionSourceWatermark {
6318 value: String,
6319 detail: String,
6320}
6321
6322#[derive(Serialize)]
6323pub(crate) struct GraphDbBackendEvalConfig {
6324 high_degree_nodes: usize,
6325 high_degree_fanout: usize,
6326 deep_chain_nodes: usize,
6327 deep_chain_fanout: usize,
6328 depth: usize,
6329 limit: usize,
6330 impact_limit: usize,
6331 path_max_hops: usize,
6332 path_direct_hop_budget: usize,
6333 path_deep_chain_hop_budget: usize,
6334 path_extended_hop_budgets: Vec<usize>,
6335 path_hop_policy: String,
6336 path_probe_strategy: String,
6337 path_query_plan_checks: Vec<String>,
6338 full_projection_enabled: bool,
6339 full_projection_profile: String,
6340 normalization_row_unit: usize,
6341}
6342
6343#[derive(Clone)]
6344struct GraphDbBackendEvalSignature {
6345 operation: String,
6346 value: serde_json::Value,
6347}
6348
6349#[derive(Serialize)]
6350struct GraphDbBackendEvalOperation {
6351 name: String,
6352 supported: bool,
6353 status: String,
6354 duration_micros: u128,
6355 #[serde(skip_serializing_if = "Option::is_none")]
6356 rows: Option<usize>,
6357 #[serde(skip_serializing_if = "Option::is_none")]
6358 error: Option<String>,
6359}
6360
6361#[derive(Serialize)]
6362struct GraphDbBackendEvalParity {
6363 matches_sqlite: bool,
6364 diagnostics: Vec<String>,
6365}
6366
6367#[derive(Serialize)]
6368struct GraphDbBackendEvalBackendReport {
6369 backend: String,
6370 adapter: String,
6371 read_only: bool,
6372 projection_load: String,
6373 operations: Vec<GraphDbBackendEvalOperation>,
6374 total_micros: u128,
6375 parity: GraphDbBackendEvalParity,
6376 lock_behavior: String,
6377 install_portability: String,
6378}
6379
6380#[derive(Serialize)]
6381struct GraphDbBackendEvalDataset {
6382 name: String,
6383 target_count: usize,
6384 nodes: usize,
6385 edges: usize,
6386 backends: Vec<GraphDbBackendEvalBackendReport>,
6387}
6388
6389#[derive(Serialize)]
6390struct GraphDbBackendPromotionDecision {
6391 backend: String,
6392 decision: String,
6393 reasons: Vec<String>,
6394 gate: GraphDbBackendPromotionGate,
6395}
6396
6397#[derive(Serialize)]
6398struct GraphDbBackendEvalPerformanceGate {
6399 baseline_fixture: String,
6400 ci_profile: String,
6401 opt_in_real_profile: String,
6402 full_projection_cache_hit_gate: String,
6403 allowed_regression_percent: f64,
6404 minimum_sample_runs: usize,
6405 normalized_metric_unit: String,
6406 required_metrics: Vec<String>,
6407 digest_command: String,
6408 repeated_sample_command: String,
6409 hop_cap_promotion: GraphDbHopCapPromotionGate,
6410 backend_adapter_spike: GraphDbBackendAdapterSpikeGate,
6411}
6412
6413#[derive(Serialize)]
6414struct GraphDbHopCapPromotionGate {
6415 status: String,
6416 current_default_hops: usize,
6417 candidate_hop_tiers: Vec<usize>,
6418 required_backend: String,
6419 required_workloads: Vec<String>,
6420 required_metrics: Vec<String>,
6421 allowed_regression_percent: f64,
6422 minimum_sample_runs: usize,
6423 decision_rule: String,
6424}
6425
6426#[derive(Serialize)]
6427struct GraphDbBackendAdapterSpikeGate {
6428 status: String,
6429 candidate_backends: Vec<GraphDbBackendAdapterSpikeCandidate>,
6430 required_workloads: Vec<String>,
6431 required_checks: Vec<String>,
6432 decision_rule: String,
6433 evidence_plan: String,
6434}
6435
6436#[derive(Serialize)]
6437struct GraphDbBackendAdapterSpikeCandidate {
6438 backend: String,
6439 adapter_label: String,
6440 projection_load: String,
6441 lock_behavior: String,
6442 install_portability: String,
6443}
6444
6445#[derive(Serialize)]
6446pub(crate) struct GraphDbBackendEvalReport {
6447 root: String,
6448 #[serde(skip_serializing_if = "Option::is_none")]
6449 scope: Option<String>,
6450 label: String,
6451 baseline_backend: String,
6452 candidates: Vec<String>,
6453 targets: Vec<String>,
6454 config: GraphDbBackendEvalConfig,
6455 phase_timings: Vec<GraphDbBackendEvalPhaseTiming>,
6456 datasets: Vec<GraphDbBackendEvalDataset>,
6457 promotion: Vec<GraphDbBackendPromotionDecision>,
6458 performance_gate: GraphDbBackendEvalPerformanceGate,
6459 metrics: BTreeMap<String, f64>,
6460 metric_digest_command: String,
6461 warnings: Vec<String>,
6462}
6463
6464#[derive(Clone, Debug, Serialize)]
6465struct GraphDbDoctorCheck {
6466 name: String,
6467 status: String,
6468 fail_closed: bool,
6469 diagnostics: Vec<String>,
6470 repair_commands: Vec<String>,
6471}
6472
6473#[derive(Serialize)]
6474pub(crate) struct GraphDbDoctorReport {
6475 root: String,
6476 #[serde(skip_serializing_if = "Option::is_none")]
6477 scope: Option<String>,
6478 backend: String,
6479 graph_db: String,
6480 #[serde(skip_serializing_if = "Option::is_none")]
6481 convex_snapshot: Option<String>,
6482 status: String,
6483 fail_closed: bool,
6484 checks: Vec<GraphDbDoctorCheck>,
6485 repair_commands: Vec<String>,
6486 #[serde(skip_serializing_if = "Vec::is_empty", default)]
6487 required_indexes: Vec<ConvexRequiredIndex>,
6488}
6489
6490#[derive(Serialize)]
6491struct GraphDbDriftSummary {
6492 node_upserts: usize,
6493 edge_upserts: usize,
6494 node_tombstones: usize,
6495 edge_tombstones: usize,
6496 stale_nodes: usize,
6497 stale_edges: usize,
6498 stale_projection_metadata: usize,
6499 duplicate_failures: usize,
6500 orphan_failures: usize,
6501 missing_required_indexes: usize,
6502}
6503
6504#[derive(Serialize)]
6505struct GraphDbDriftReport {
6506 root: String,
6507 #[serde(skip_serializing_if = "Option::is_none")]
6508 scope: Option<String>,
6509 graph_db: String,
6510 convex_snapshot: String,
6511 status: String,
6512 graph_reads_allowed: bool,
6513 projection_version: String,
6514 local_hash: Option<String>,
6515 snapshot_hash: Option<String>,
6516 summary: GraphDbDriftSummary,
6517 node_upserts: Vec<String>,
6518 edge_upserts: Vec<String>,
6519 node_tombstones: Vec<String>,
6520 edge_tombstones: Vec<String>,
6521 stale_nodes: Vec<String>,
6522 stale_edges: Vec<String>,
6523 diagnostics: Vec<String>,
6524 next_commands: Vec<String>,
6525 required_indexes: Vec<ConvexRequiredIndex>,
6526 #[serde(skip_serializing_if = "Vec::is_empty", default)]
6527 warnings: Vec<String>,
6528}
6529
6530#[derive(Clone, Serialize)]
6531struct GraphDbTombstoneCounts {
6532 nodes: usize,
6533 edges: usize,
6534 total: usize,
6535}
6536
6537#[derive(Clone, Serialize)]
6538struct GraphDbOperatorCounts {
6539 nodes: usize,
6540 edges: usize,
6541 tombstones: GraphDbTombstoneCounts,
6542 #[serde(skip_serializing_if = "Option::is_none")]
6543 file_size_bytes: Option<u64>,
6544 #[serde(skip_serializing_if = "Option::is_none")]
6545 freelist_bytes: Option<u64>,
6546}
6547
6548#[derive(Clone, Serialize)]
6549struct GraphDbCompactionPolicy {
6550 status: String,
6551 tombstone_scan_rows: usize,
6552 live_rows: usize,
6553 file_size_bytes: Option<u64>,
6554 freelist_bytes: Option<u64>,
6555 safe_to_prune_tombstones: bool,
6556 requires_convex_reconciliation: bool,
6557 recommendations: Vec<String>,
6558 proof: Vec<String>,
6559}
6560
6561#[derive(Serialize)]
6562pub(crate) struct GraphDbRefreshSummary {
6563 scope: String,
6564 projection_version: String,
6565 mode: String,
6566 #[serde(skip_serializing_if = "Option::is_none")]
6567 source_watermark: Option<String>,
6568 tombstoned_nodes: usize,
6569 tombstoned_edges: usize,
6570 upserted_nodes: usize,
6571 upserted_edges: usize,
6572 unchanged_nodes: usize,
6573 unchanged_edges: usize,
6574 upserted_properties: usize,
6575 unchanged_properties: usize,
6576 deleted_properties: usize,
6577 deleted_nodes: usize,
6578 deleted_edges: usize,
6579 pruned_tombstones: usize,
6580 #[serde(skip_serializing_if = "Option::is_none")]
6581 file_size_bytes_before: Option<u64>,
6582 #[serde(skip_serializing_if = "Option::is_none")]
6583 file_size_bytes_after: Option<u64>,
6584 #[serde(skip_serializing_if = "Vec::is_empty", default)]
6585 phase_timings: Vec<GraphDbBackendEvalPhaseTiming>,
6586}
6587
6588#[derive(Serialize)]
6589struct GraphDbOperatorReport {
6590 root: String,
6591 #[serde(skip_serializing_if = "Option::is_none")]
6592 scope: Option<String>,
6593 graph_db: String,
6594 operation: String,
6595 status: String,
6596 materialized: bool,
6597 freshness: GraphDbFreshnessReport,
6598 readiness: GraphEffectivenessReadiness,
6599 counts: GraphDbOperatorCounts,
6600 #[serde(skip_serializing_if = "Option::is_none")]
6601 refresh: Option<GraphDbRefreshSummary>,
6602 compaction: GraphDbCompactionPolicy,
6603 #[serde(skip_serializing_if = "Option::is_none")]
6604 recovery: Option<index::ReadOnlyRecovery>,
6605 next_commands: Vec<String>,
6606 #[serde(skip_serializing_if = "Vec::is_empty", default)]
6607 warnings: Vec<String>,
6608}
6609
6610#[derive(Serialize)]
6611pub(crate) struct GraphDbCompactionReport {
6612 root: String,
6613 #[serde(skip_serializing_if = "Option::is_none")]
6614 scope: Option<String>,
6615 graph_db: String,
6616 applied: bool,
6617 pruned_tombstones: usize,
6618 counts_before: GraphDbOperatorCounts,
6619 counts_after: GraphDbOperatorCounts,
6620 compaction_before: GraphDbCompactionPolicy,
6621 compaction_after: GraphDbCompactionPolicy,
6622 reclaimed_bytes: i64,
6623 next_commands: Vec<String>,
6624 #[serde(skip_serializing_if = "Vec::is_empty", default)]
6625 warnings: Vec<String>,
6626}
6627
6628#[derive(Clone, Serialize, Deserialize)]
6629struct GraphDbEvidencePath {
6630 to: String,
6631 kind: String,
6632 label: String,
6633 #[serde(skip_serializing_if = "Option::is_none")]
6634 path: Option<substrate::GraphPath>,
6635 #[serde(skip_serializing_if = "Option::is_none")]
6636 expand: Option<String>,
6637}
6638
6639#[derive(Clone, Serialize, Deserialize)]
6640struct GraphDbFixtureCoverage {
6641 test: String,
6642 fixture: String,
6643 assertions: Vec<String>,
6644}
6645
6646#[derive(Clone, Serialize, Deserialize)]
6647struct GraphDbEvidenceReport {
6648 root: String,
6649 #[serde(skip_serializing_if = "Option::is_none")]
6650 scope: Option<String>,
6651 backend: String,
6652 contract_version: String,
6653 target: String,
6654 packet_id: String,
6655 #[serde(skip_serializing_if = "Option::is_none")]
6656 projection_hash: Option<String>,
6657 freshness: GraphDbFreshnessReport,
6658 target_node: SubstrateGraphNode,
6659 worker_context: Vec<SubstrateGraphNode>,
6660 source_handles: Vec<SubstrateGraphNode>,
6661 worker_results: Vec<SubstrateGraphNode>,
6662 semantic_related: Vec<SubstrateGraphNode>,
6663 shortest_paths: Vec<GraphDbEvidencePath>,
6664 next_commands: Vec<String>,
6665 replay_commands: Vec<String>,
6666 repair_commands: Vec<String>,
6667 fixture_coverage: GraphDbFixtureCoverage,
6668 #[serde(skip_serializing_if = "Vec::is_empty", default)]
6669 warnings: Vec<String>,
6670}
6671
6672pub(crate) struct GraphDbEvidenceInput<'a, S: GraphStore> {
6673 root: &'a Path,
6674 scope: Option<&'a str>,
6675 backend: &'a str,
6676 target: &'a str,
6677 depth: usize,
6678 limit: usize,
6679 store: &'a S,
6680 freshness: GraphDbFreshnessReport,
6681 warnings: Vec<String>,
6682}
6683
6684impl GraphDbDoctorReport {
6685 fn new(
6686 root: &Path,
6687 scope: Option<&str>,
6688 backend: &str,
6689 graph_db: &Path,
6690 convex_snapshot: Option<&Path>,
6691 ) -> Self {
6692 Self {
6693 root: root.to_string_lossy().to_string(),
6694 scope: scope.map(str::to_string),
6695 backend: backend.to_string(),
6696 graph_db: graph_db.to_string_lossy().to_string(),
6697 convex_snapshot: convex_snapshot.map(|path| path.to_string_lossy().to_string()),
6698 status: "ok".to_string(),
6699 fail_closed: false,
6700 checks: Vec::new(),
6701 repair_commands: Vec::new(),
6702 required_indexes: Vec::new(),
6703 }
6704 }
6705
6706 fn push_check(&mut self, check: GraphDbDoctorCheck) {
6707 self.checks.push(check);
6708 }
6709
6710 fn finalize(&mut self) {
6711 self.fail_closed = self.checks.iter().any(|check| check.fail_closed);
6712 self.status = if self.fail_closed {
6713 "fail_closed"
6714 } else {
6715 "ok"
6716 }
6717 .to_string();
6718 let mut commands = BTreeSet::new();
6719 for check in &self.checks {
6720 commands.extend(check.repair_commands.iter().cloned());
6721 }
6722 self.repair_commands = commands.into_iter().collect();
6723 }
6724
6725 fn summary(&self) -> String {
6726 self.checks
6727 .iter()
6728 .filter(|check| check.fail_closed)
6729 .flat_map(|check| check.diagnostics.iter())
6730 .take(3)
6731 .cloned()
6732 .collect::<Vec<_>>()
6733 .join("; ")
6734 }
6735}
6736
6737fn graph_db_doctor_check(
6738 name: impl Into<String>,
6739 diagnostics: Vec<String>,
6740 repair_commands: Vec<String>,
6741) -> GraphDbDoctorCheck {
6742 let fail_closed = !diagnostics.is_empty();
6743 GraphDbDoctorCheck {
6744 name: name.into(),
6745 status: if fail_closed { "fail_closed" } else { "ok" }.to_string(),
6746 fail_closed,
6747 diagnostics,
6748 repair_commands: if fail_closed {
6749 repair_commands
6750 } else {
6751 Vec::new()
6752 },
6753 }
6754}
6755
6756pub(crate) fn graph_db_scope_arg(scope: Option<&str>) -> String {
6757 scope
6758 .map(|scope| format!(" --scope {}", shell_quote(scope)))
6759 .unwrap_or_default()
6760}
6761
6762fn graph_db_refresh_command(root: &Path, scope: Option<&str>) -> String {
6763 format!(
6764 "tsift graph-db --path {}{} refresh --json",
6765 shell_quote(root.to_string_lossy().as_ref()),
6766 graph_db_scope_arg(scope)
6767 )
6768}
6769
6770fn graph_db_rebuild_command(root: &Path, scope: Option<&str>) -> String {
6771 graph_db_refresh_command(root, scope)
6772}
6773
6774fn graph_db_backup_rebuild_command(root: &Path, scope: Option<&str>, graph_db: &Path) -> String {
6775 let backup = format!("{}.bak", graph_db.to_string_lossy());
6776 format!(
6777 "mv {} {} && {}",
6778 shell_quote(graph_db.to_string_lossy().as_ref()),
6779 shell_quote(&backup),
6780 graph_db_rebuild_command(root, scope)
6781 )
6782}
6783
6784fn convex_refresh_command(root: &Path, scope: Option<&str>) -> String {
6785 format!(
6786 "tsift convex-sync {}{} --remote-snapshot --apply --json",
6787 shell_quote(root.to_string_lossy().as_ref()),
6788 graph_db_scope_arg(scope)
6789 )
6790}
6791
6792fn open_sqlite_graph_db_readonly(graph_db: &Path) -> Result<substrate::SqliteReadOnlyConnection> {
6793 substrate::open_graph_read_only_connection_resilient(graph_db)
6794}
6795
6796fn sqlite_table_exists(conn: &Connection, table: &str) -> Result<bool> {
6797 conn.query_row(
6798 "SELECT EXISTS(SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = ?1)",
6799 [table],
6800 |row| row.get::<_, bool>(0),
6801 )
6802 .map_err(Into::into)
6803}
6804
6805fn sqlite_known_table_count(conn: &Connection, table: &str) -> Result<usize> {
6806 let sql = match table {
6807 "graph_nodes" => "SELECT COUNT(*) FROM graph_nodes",
6808 "graph_edges" => "SELECT COUNT(*) FROM graph_edges",
6809 "graph_tombstones" => "SELECT COUNT(*) FROM graph_tombstones",
6810 other => bail!("unsupported graph count table {other}"),
6811 };
6812 conn.query_row(sql, [], |row| row.get::<_, usize>(0))
6813 .map_err(Into::into)
6814}
6815
6816fn sqlite_tombstone_counts(conn: &Connection) -> Result<GraphDbTombstoneCounts> {
6817 if !sqlite_table_exists(conn, "graph_tombstones")? {
6818 return Ok(GraphDbTombstoneCounts {
6819 nodes: 0,
6820 edges: 0,
6821 total: 0,
6822 });
6823 }
6824 let mut stmt =
6825 conn.prepare("SELECT row_kind, COUNT(*) FROM graph_tombstones GROUP BY row_kind")?;
6826 let mut rows = stmt.query([])?;
6827 let mut nodes = 0usize;
6828 let mut edges = 0usize;
6829 while let Some(row) = rows.next()? {
6830 let row_kind: String = row.get(0)?;
6831 let count: usize = row.get(1)?;
6832 match row_kind.as_str() {
6833 "node" => nodes = count,
6834 "edge" => edges = count,
6835 _ => {}
6836 }
6837 }
6838 Ok(GraphDbTombstoneCounts {
6839 nodes,
6840 edges,
6841 total: nodes + edges,
6842 })
6843}
6844
6845fn sqlite_graph_counts_from_cache(
6846 conn: &Connection,
6847 scope: &str,
6848) -> Result<Option<GraphDbOperatorCounts>> {
6849 if !sqlite_table_exists(conn, "graph_operator_stats")? {
6850 return Ok(None);
6851 }
6852 let row = conn
6853 .query_row(
6854 r#"
6855 SELECT nodes, edges, tombstone_nodes, tombstone_edges, file_size_bytes, freelist_bytes
6856 FROM graph_operator_stats
6857 WHERE scope = ?1
6858 "#,
6859 [scope],
6860 |row| {
6861 Ok((
6862 row.get::<_, usize>(0)?,
6863 row.get::<_, usize>(1)?,
6864 row.get::<_, usize>(2)?,
6865 row.get::<_, usize>(3)?,
6866 row.get::<_, Option<i64>>(4)?,
6867 row.get::<_, Option<i64>>(5)?,
6868 ))
6869 },
6870 )
6871 .optional()?;
6872 Ok(row.map(
6873 |(nodes, edges, tombstone_nodes, tombstone_edges, file_size_bytes, freelist_bytes)| {
6874 GraphDbOperatorCounts {
6875 nodes,
6876 edges,
6877 tombstones: GraphDbTombstoneCounts {
6878 nodes: tombstone_nodes,
6879 edges: tombstone_edges,
6880 total: tombstone_nodes + tombstone_edges,
6881 },
6882 file_size_bytes: file_size_bytes
6883 .and_then(|value| u64::try_from(value).ok())
6884 .or_else(|| sqlite_database_size_bytes(conn).ok()),
6885 freelist_bytes: freelist_bytes
6886 .and_then(|value| u64::try_from(value).ok())
6887 .or_else(|| sqlite_database_freelist_bytes(conn).ok()),
6888 }
6889 },
6890 ))
6891}
6892
6893fn sqlite_graph_counts(conn: &Connection, scope: &str) -> Result<GraphDbOperatorCounts> {
6894 if let Some(counts) = sqlite_graph_counts_from_cache(conn, scope)? {
6895 return Ok(counts);
6896 }
6897 let nodes = if sqlite_table_exists(conn, "graph_nodes")? {
6898 sqlite_known_table_count(conn, "graph_nodes")?
6899 } else {
6900 0
6901 };
6902 let edges = if sqlite_table_exists(conn, "graph_edges")? {
6903 sqlite_known_table_count(conn, "graph_edges")?
6904 } else {
6905 0
6906 };
6907 Ok(GraphDbOperatorCounts {
6908 nodes,
6909 edges,
6910 tombstones: sqlite_tombstone_counts(conn)?,
6911 file_size_bytes: sqlite_database_size_bytes(conn).ok(),
6912 freelist_bytes: sqlite_database_freelist_bytes(conn).ok(),
6913 })
6914}
6915
6916pub(crate) fn graph_db_compaction_policy(
6917 root: &Path,
6918 scope: Option<&str>,
6919 counts: &GraphDbOperatorCounts,
6920 prune_confirmed: bool,
6921) -> GraphDbCompactionPolicy {
6922 let live_rows = counts.nodes + counts.edges;
6923 let tombstone_scan_rows = counts.tombstones.total;
6924 let tombstone_heavy = tombstone_scan_rows > live_rows.max(1);
6925 let freelist_heavy = counts
6926 .file_size_bytes
6927 .zip(counts.freelist_bytes)
6928 .is_some_and(|(file_size, freelist)| freelist > 0 && freelist >= file_size / 20);
6929 let status = if tombstone_heavy || freelist_heavy {
6930 "recommended"
6931 } else {
6932 "not_needed"
6933 }
6934 .to_string();
6935 let mut recommendations = vec![
6936 convex_refresh_command(root, scope),
6937 graph_db_refresh_command(root, scope),
6938 format!(
6939 "tsift graph-db --path {}{} compact --apply --json",
6940 shell_quote(root.to_string_lossy().as_ref()),
6941 graph_db_scope_arg(scope)
6942 ),
6943 ];
6944 if prune_confirmed {
6945 recommendations.push(format!(
6946 "tsift graph-db --path {}{} compact --apply --prune-tombstones --confirmed-convex-reconciled --json",
6947 shell_quote(root.to_string_lossy().as_ref()),
6948 graph_db_scope_arg(scope)
6949 ));
6950 }
6951 let proof = vec![
6952 format!("{live_rows} live graph row(s)"),
6953 format!("{tombstone_scan_rows} retained tombstone row(s) scanned by status/doctor"),
6954 format!(
6955 "graph.db file_size={} byte(s), freelist={} byte(s)",
6956 counts.file_size_bytes.unwrap_or(0),
6957 counts.freelist_bytes.unwrap_or(0)
6958 ),
6959 ];
6960 GraphDbCompactionPolicy {
6961 status,
6962 tombstone_scan_rows,
6963 live_rows,
6964 file_size_bytes: counts.file_size_bytes,
6965 freelist_bytes: counts.freelist_bytes,
6966 safe_to_prune_tombstones: prune_confirmed,
6967 requires_convex_reconciliation: tombstone_scan_rows > 0 && !prune_confirmed,
6968 recommendations,
6969 proof,
6970 }
6971}
6972
6973fn sqlite_database_size_bytes(conn: &Connection) -> Result<u64> {
6974 let page_count: u64 = conn.query_row("PRAGMA page_count", [], |row| row.get(0))?;
6975 let page_size: u64 = conn.query_row("PRAGMA page_size", [], |row| row.get(0))?;
6976 Ok(page_count.saturating_mul(page_size))
6977}
6978
6979fn sqlite_database_freelist_bytes(conn: &Connection) -> Result<u64> {
6980 let freelist_count: u64 = conn.query_row("PRAGMA freelist_count", [], |row| row.get(0))?;
6981 let page_size: u64 = conn.query_row("PRAGMA page_size", [], |row| row.get(0))?;
6982 Ok(freelist_count.saturating_mul(page_size))
6983}
6984
6985fn sqlite_graph_tombstone_retention_diagnostics(
6986 conn: &Connection,
6987 scope: &str,
6988) -> Result<Vec<String>> {
6989 if !sqlite_table_exists(conn, "graph_tombstones")? {
6990 return Ok(Vec::new());
6991 }
6992 let cached = sqlite_graph_counts_from_cache(conn, scope)?;
6993 let counts = match cached.clone() {
6994 Some(counts) => counts,
6995 None => sqlite_graph_counts(conn, scope)?,
6996 };
6997 let live_rows = counts.nodes + counts.edges;
6998 let file_size = counts.file_size_bytes.unwrap_or(0);
6999 let freelist = counts.freelist_bytes.unwrap_or(0);
7000 let stale_live_tombstones = if cached.is_some() {
7001 0
7002 } else {
7003 let mut live_keys = BTreeSet::new();
7004 if sqlite_table_exists(conn, "graph_nodes")? {
7005 let mut stmt = conn.prepare("SELECT id FROM graph_nodes")?;
7006 for row in stmt.query_map([], |row| row.get::<_, String>(0))? {
7007 live_keys.insert(format!("node:{}", row?));
7008 }
7009 }
7010 if sqlite_table_exists(conn, "graph_edges")? {
7011 let mut stmt = conn.prepare("SELECT edge_key FROM graph_edges")?;
7012 for row in stmt.query_map([], |row| row.get::<_, String>(0))? {
7013 live_keys.insert(format!("edge:{}", row?));
7014 }
7015 }
7016 let mut stale_live_tombstones = 0usize;
7017 let mut stmt = conn.prepare("SELECT row_key FROM graph_tombstones ORDER BY row_key")?;
7018 for row in stmt.query_map([], |row| row.get::<_, String>(0))? {
7019 if live_keys.contains(&row?) {
7020 stale_live_tombstones += 1;
7021 }
7022 }
7023 stale_live_tombstones
7024 };
7025
7026 let mut diagnostics = Vec::new();
7027 if stale_live_tombstones > 0 {
7028 diagnostics.push(format!(
7029 "{stale_live_tombstones} tombstone(s) reference rows that are live again; the next graph-db refresh prunes those stale tombstones before inserting new deletion markers"
7030 ));
7031 }
7032 if counts.tombstones.total > live_rows.max(1) {
7033 let source = if cached.is_some() {
7034 "cached refresh stats"
7035 } else {
7036 "live row scan"
7037 };
7038 diagnostics.push(format!(
7039 "tombstone retention exceeds live graph rows: {} tombstone(s) vs {} live row(s) from {}; graph.db file_size={} byte(s), freelist={} byte(s), status/doctor tombstone scans inspect {} extra row(s). Run convex-sync against the remote snapshot before rebuild/compaction if a remote consumer may still need deletion reconciliation.",
7040 counts.tombstones.total,
7041 live_rows,
7042 source,
7043 file_size,
7044 freelist,
7045 counts.tombstones.total
7046 ));
7047 }
7048 Ok(diagnostics)
7049}
7050
7051fn sqlite_graph_freshness_from_conn(
7052 conn: &Connection,
7053 scope: &str,
7054) -> Result<GraphDbFreshnessReport> {
7055 if !sqlite_table_exists(conn, "graph_projection_versions")? {
7056 return Ok(GraphDbFreshnessReport {
7057 status: "missing".to_string(),
7058 fail_closed: true,
7059 projection_version: None,
7060 content_hash: None,
7061 source_watermark: None,
7062 diagnostics: vec![
7063 "graph projection metadata table is missing; refresh graph.db before trusting reads"
7064 .to_string(),
7065 ],
7066 });
7067 }
7068 let version = conn
7069 .query_row(
7070 r#"
7071 SELECT projection_version, content_hash, source_watermark
7072 FROM graph_projection_versions
7073 WHERE scope = ?1
7074 "#,
7075 [scope],
7076 |row| {
7077 Ok((
7078 row.get::<_, String>(0)?,
7079 row.get::<_, Option<String>>(1)?,
7080 row.get::<_, Option<String>>(2)?,
7081 ))
7082 },
7083 )
7084 .optional()?;
7085 let Some((projection_version, content_hash, source_watermark)) = version else {
7086 return Ok(GraphDbFreshnessReport {
7087 status: "missing".to_string(),
7088 fail_closed: true,
7089 projection_version: None,
7090 content_hash: None,
7091 source_watermark: None,
7092 diagnostics: vec![
7093 "graph projection metadata is missing; refresh graph.db before trusting reads"
7094 .to_string(),
7095 ],
7096 });
7097 };
7098
7099 let mut diagnostics = Vec::new();
7100 if projection_version != GRAPH_PROJECTION_VERSION {
7101 diagnostics.push(format!(
7102 "projection version mismatch: expected {} got {}",
7103 GRAPH_PROJECTION_VERSION, projection_version
7104 ));
7105 }
7106 if content_hash.is_none() {
7107 diagnostics.push("projection content hash is missing".to_string());
7108 }
7109 let fail_closed = !diagnostics.is_empty();
7110 Ok(GraphDbFreshnessReport {
7111 status: if fail_closed { "stale" } else { "current" }.to_string(),
7112 fail_closed,
7113 projection_version: Some(projection_version),
7114 content_hash,
7115 source_watermark,
7116 diagnostics,
7117 })
7118}
7119
7120fn graph_db_operator_next_commands(
7121 root: &Path,
7122 scope: Option<&str>,
7123 include_refresh: bool,
7124) -> Vec<String> {
7125 let mut commands = Vec::new();
7126 if include_refresh {
7127 commands.push(graph_db_refresh_command(root, scope));
7128 }
7129 commands.push(format!(
7130 "tsift graph-db --path {}{} doctor --json",
7131 shell_quote(root.to_string_lossy().as_ref()),
7132 graph_db_scope_arg(scope)
7133 ));
7134 commands.push(format!(
7135 "tsift graph-db --path {}{} --backend convex-snapshot --convex-snapshot <rows.json> drift --json",
7136 shell_quote(root.to_string_lossy().as_ref()),
7137 graph_db_scope_arg(scope)
7138 ));
7139 commands.push(format!(
7140 "tsift convex-sync {}{} --remote-snapshot --apply --json",
7141 shell_quote(root.to_string_lossy().as_ref()),
7142 graph_db_scope_arg(scope)
7143 ));
7144 commands
7145}
7146
7147pub(crate) fn graph_db_read_recovery_diagnostic(recovery: index::ReadOnlyRecovery) -> String {
7148 match recovery {
7149 index::ReadOnlyRecovery::SnapshotFallback => {
7150 "graph.db read recovered through snapshot fallback after a rollback-journal lock on the live database".to_string()
7151 }
7152 index::ReadOnlyRecovery::SnapshotFallbackWal => {
7153 "graph.db read recovered through WAL-aware snapshot fallback after copying live -wal/-shm sidecars".to_string()
7154 }
7155 }
7156}
7157
7158fn sqlite_string_set(conn: &Connection, sql: &str) -> Result<BTreeSet<String>> {
7159 let mut stmt = conn.prepare(sql)?;
7160 let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
7161 let mut values = BTreeSet::new();
7162 for row in rows {
7163 values.insert(row?);
7164 }
7165 Ok(values)
7166}
7167
7168fn sqlite_column_names(conn: &Connection, table: &str) -> Result<BTreeSet<String>> {
7169 let mut stmt = conn.prepare(&format!("PRAGMA table_info({table})"))?;
7170 let rows = stmt.query_map([], |row| row.get::<_, String>(1))?;
7171 let mut columns = BTreeSet::new();
7172 for row in rows {
7173 columns.insert(row?);
7174 }
7175 Ok(columns)
7176}
7177
7178fn sqlite_graph_schema_diagnostics(conn: &Connection) -> Result<Vec<String>> {
7179 let mut diagnostics = Vec::new();
7180 let user_version: i64 =
7181 conn.pragma_query_value(None, "user_version", |row| row.get::<_, i64>(0))?;
7182 if user_version > SQLITE_GRAPH_SCHEMA_VERSION {
7183 diagnostics.push(format!(
7184 "graph.db schema version {user_version} is newer than supported version {SQLITE_GRAPH_SCHEMA_VERSION}"
7185 ));
7186 } else if user_version < SQLITE_GRAPH_SCHEMA_VERSION {
7187 diagnostics.push(format!(
7188 "graph.db schema version {user_version} is older than supported version {SQLITE_GRAPH_SCHEMA_VERSION}"
7189 ));
7190 }
7191
7192 let tables = sqlite_string_set(
7193 conn,
7194 "SELECT name FROM sqlite_master WHERE type = 'table' ORDER BY name",
7195 )?;
7196 let required_tables = [
7197 (
7198 "graph_nodes",
7199 vec![
7200 "id",
7201 "kind",
7202 "label",
7203 "properties_json",
7204 "provenance_json",
7205 "freshness_json",
7206 "row_hash",
7207 "source_watermark",
7208 ],
7209 ),
7210 (
7211 "graph_edges",
7212 vec![
7213 "edge_key",
7214 "from_id",
7215 "to_id",
7216 "kind",
7217 "properties_json",
7218 "provenance_json",
7219 "freshness_json",
7220 "row_hash",
7221 "source_watermark",
7222 ],
7223 ),
7224 (
7225 "graph_projection_versions",
7226 vec![
7227 "scope",
7228 "projection_version",
7229 "content_hash",
7230 "source_watermark",
7231 "observed_at_unix",
7232 ],
7233 ),
7234 (
7235 "graph_tombstones",
7236 vec!["row_key", "row_kind", "deleted_at_unix"],
7237 ),
7238 ("graph_node_properties", vec!["node_id", "key", "value"]),
7239 ("graph_edge_properties", vec!["edge_key", "key", "value"]),
7240 ];
7241 for (table, required_columns) in required_tables {
7242 if !tables.contains(table) {
7243 diagnostics.push(format!("graph.db schema drift: missing table {table}"));
7244 continue;
7245 }
7246 let columns = sqlite_column_names(conn, table)?;
7247 for column in required_columns {
7248 if !columns.contains(column) {
7249 diagnostics.push(format!(
7250 "graph.db schema drift: missing column {table}.{column}"
7251 ));
7252 }
7253 }
7254 }
7255
7256 let indexes = sqlite_string_set(
7257 conn,
7258 "SELECT name FROM sqlite_master WHERE type = 'index' ORDER BY name",
7259 )?;
7260 for index in [
7261 "idx_graph_nodes_kind",
7262 "idx_graph_edges_from_kind",
7263 "idx_graph_edges_to_kind",
7264 "idx_graph_edges_edge_key",
7265 "idx_graph_node_properties_key_value_node",
7266 "idx_graph_edge_properties_key_value_edge",
7267 ] {
7268 if !indexes.contains(index) {
7269 diagnostics.push(format!("graph.db schema drift: missing index {index}"));
7270 }
7271 }
7272
7273 if tables.contains("graph_edges") {
7274 let mut stmt = conn.prepare("PRAGMA foreign_key_list(graph_edges)")?;
7275 let rows = stmt.query_map([], |row| {
7276 Ok((row.get::<_, String>(3)?, row.get::<_, String>(4)?))
7277 })?;
7278 let mut fks = BTreeSet::new();
7279 for row in rows {
7280 fks.insert(row?);
7281 }
7282 for expected in [
7283 ("from_id".to_string(), "id".to_string()),
7284 ("to_id".to_string(), "id".to_string()),
7285 ] {
7286 if !fks.contains(&expected) {
7287 diagnostics.push(format!(
7288 "graph.db schema drift: missing graph_edges foreign key {} -> graph_nodes.{}",
7289 expected.0, expected.1
7290 ));
7291 }
7292 }
7293 }
7294
7295 Ok(diagnostics)
7296}
7297
7298fn sqlite_query_diagnostics(conn: &Connection, sql: &str) -> Result<Vec<String>> {
7299 let mut stmt = conn.prepare(sql)?;
7300 let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
7301 let mut diagnostics = Vec::new();
7302 for row in rows {
7303 diagnostics.push(row?);
7304 }
7305 Ok(diagnostics)
7306}
7307
7308fn sqlite_graph_duplicate_diagnostics(conn: &Connection) -> Result<Vec<String>> {
7309 let mut diagnostics = sqlite_query_diagnostics(
7310 conn,
7311 r#"
7312 SELECT 'duplicate graph_nodes.id ' || id || ' (' || COUNT(*) || ' rows)'
7313 FROM graph_nodes
7314 GROUP BY id
7315 HAVING COUNT(*) > 1
7316 ORDER BY id
7317 "#,
7318 )?;
7319 diagnostics.extend(sqlite_query_diagnostics(
7320 conn,
7321 r#"
7322 SELECT 'duplicate graph_edges key ' || from_id || ' -' || kind || '-> ' || to_id || ' (' || COUNT(*) || ' rows)'
7323 FROM graph_edges
7324 GROUP BY from_id, to_id, kind
7325 HAVING COUNT(*) > 1
7326 ORDER BY from_id, kind, to_id
7327 "#,
7328 )?);
7329 diagnostics.extend(sqlite_query_diagnostics(
7330 conn,
7331 r#"
7332 SELECT 'duplicate graph_edges.edge_key ' || edge_key || ' (' || COUNT(*) || ' rows)'
7333 FROM graph_edges
7334 GROUP BY edge_key
7335 HAVING COUNT(*) > 1
7336 ORDER BY edge_key
7337 "#,
7338 )?);
7339 Ok(diagnostics)
7340}
7341
7342fn sqlite_graph_orphan_diagnostics(conn: &Connection) -> Result<Vec<String>> {
7343 sqlite_query_diagnostics(
7344 conn,
7345 r#"
7346 SELECT 'orphan edge missing from node: ' || e.from_id || ' -' || e.kind || '-> ' || e.to_id
7347 FROM graph_edges e
7348 LEFT JOIN graph_nodes n ON n.id = e.from_id
7349 WHERE n.id IS NULL
7350 UNION ALL
7351 SELECT 'orphan edge missing to node: ' || e.from_id || ' -' || e.kind || '-> ' || e.to_id
7352 FROM graph_edges e
7353 LEFT JOIN graph_nodes n ON n.id = e.to_id
7354 WHERE n.id IS NULL
7355 ORDER BY 1
7356 "#,
7357 )
7358}
7359
7360fn sqlite_graph_json_diagnostics(conn: &Connection) -> Result<Vec<String>> {
7361 let mut diagnostics = Vec::new();
7362 let mut node_stmt = conn.prepare(
7363 "SELECT id, properties_json, provenance_json, freshness_json FROM graph_nodes ORDER BY id",
7364 )?;
7365 let node_rows = node_stmt.query_map([], |row| {
7366 Ok((
7367 row.get::<_, String>(0)?,
7368 row.get::<_, String>(1)?,
7369 row.get::<_, String>(2)?,
7370 row.get::<_, Option<String>>(3)?,
7371 ))
7372 })?;
7373 for row in node_rows {
7374 let (id, properties_json, provenance_json, freshness_json) = row?;
7375 if let Err(err) = serde_json::from_str::<BTreeMap<String, String>>(&properties_json) {
7376 diagnostics.push(format!(
7377 "graph_nodes {id} properties_json is invalid: {err}"
7378 ));
7379 }
7380 if let Err(err) = serde_json::from_str::<Vec<GraphProvenance>>(&provenance_json) {
7381 diagnostics.push(format!(
7382 "graph_nodes {id} provenance_json is invalid: {err}"
7383 ));
7384 }
7385 if let Some(freshness_json) = freshness_json
7386 && let Err(err) = serde_json::from_str::<GraphFreshness>(&freshness_json)
7387 {
7388 diagnostics.push(format!("graph_nodes {id} freshness_json is invalid: {err}"));
7389 }
7390 }
7391
7392 let mut edge_stmt = conn.prepare(
7393 "SELECT edge_key, from_id, to_id, kind, properties_json, provenance_json, freshness_json FROM graph_edges ORDER BY from_id, kind, to_id",
7394 )?;
7395 let edge_rows = edge_stmt.query_map([], |row| {
7396 Ok((
7397 row.get::<_, String>(0)?,
7398 row.get::<_, String>(1)?,
7399 row.get::<_, String>(2)?,
7400 row.get::<_, String>(3)?,
7401 row.get::<_, String>(4)?,
7402 row.get::<_, String>(5)?,
7403 row.get::<_, Option<String>>(6)?,
7404 ))
7405 })?;
7406 for row in edge_rows {
7407 let (edge_key, from_id, to_id, kind, properties_json, provenance_json, freshness_json) =
7408 row?;
7409 let edge = format!("{edge_key} {from_id} -{kind}-> {to_id}");
7410 if let Err(err) = serde_json::from_str::<BTreeMap<String, String>>(&properties_json) {
7411 diagnostics.push(format!(
7412 "graph_edges {edge} properties_json is invalid: {err}"
7413 ));
7414 }
7415 if let Err(err) = serde_json::from_str::<Vec<GraphProvenance>>(&provenance_json) {
7416 diagnostics.push(format!(
7417 "graph_edges {edge} provenance_json is invalid: {err}"
7418 ));
7419 }
7420 if let Some(freshness_json) = freshness_json
7421 && let Err(err) = serde_json::from_str::<GraphFreshness>(&freshness_json)
7422 {
7423 diagnostics.push(format!(
7424 "graph_edges {edge} freshness_json is invalid: {err}"
7425 ));
7426 }
7427 }
7428 Ok(diagnostics)
7429}
7430
7431fn sqlite_graph_projection_metadata_diagnostics(
7432 conn: &Connection,
7433 scope: Option<&str>,
7434) -> Result<Vec<String>> {
7435 let mut diagnostics = Vec::new();
7436 let scope_key = scope.unwrap_or("root");
7437 let version = conn
7438 .query_row(
7439 r#"
7440 SELECT projection_version, content_hash, source_watermark
7441 FROM graph_projection_versions
7442 WHERE scope = ?1
7443 "#,
7444 [scope_key],
7445 |row| {
7446 Ok((
7447 row.get::<_, String>(0)?,
7448 row.get::<_, Option<String>>(1)?,
7449 row.get::<_, Option<String>>(2)?,
7450 ))
7451 },
7452 )
7453 .optional()?;
7454 let Some((projection_version, content_hash, _source_watermark)) = version else {
7455 diagnostics.push(format!(
7456 "graph projection metadata is missing for scope {scope_key}"
7457 ));
7458 return Ok(diagnostics);
7459 };
7460 if projection_version != GRAPH_PROJECTION_VERSION {
7461 diagnostics.push(format!(
7462 "projection version mismatch: expected {GRAPH_PROJECTION_VERSION} got {projection_version}"
7463 ));
7464 }
7465 if content_hash.is_none() {
7466 diagnostics.push("projection content hash is missing".to_string());
7467 }
7468
7469 let meta_id = graph_projection_meta_id(scope);
7470 let meta_properties = conn
7471 .query_row(
7472 "SELECT properties_json FROM graph_nodes WHERE id = ?1 AND kind = ?2",
7473 (&meta_id, GRAPH_PROJECTION_META_KIND),
7474 |row| row.get::<_, String>(0),
7475 )
7476 .optional()?;
7477 let Some(meta_properties) = meta_properties else {
7478 diagnostics.push(format!("projection_meta node {meta_id} is missing"));
7479 return Ok(diagnostics);
7480 };
7481 let properties = serde_json::from_str::<BTreeMap<String, String>>(&meta_properties)
7482 .with_context(|| format!("parsing projection_meta properties for {meta_id}"))?;
7483 if properties.get("projection_version").map(String::as_str) != Some(GRAPH_PROJECTION_VERSION) {
7484 diagnostics.push(format!(
7485 "projection_meta node {meta_id} has stale projection_version"
7486 ));
7487 }
7488 if properties.get("content_hash") != content_hash.as_ref() {
7489 diagnostics.push(format!(
7490 "projection_meta node {meta_id} content_hash does not match graph_projection_versions"
7491 ));
7492 }
7493 Ok(diagnostics)
7494}
7495
7496pub(crate) fn sqlite_convex_rows_from_conn(conn: &Connection) -> Result<ConvexProjectionRows> {
7497 let mut node_stmt = conn.prepare(
7498 "SELECT id, kind, label, properties_json, provenance_json, freshness_json FROM graph_nodes ORDER BY id",
7499 )?;
7500 let node_rows = node_stmt.query_map([], |row| {
7501 let properties_json: String = row.get(3)?;
7502 let provenance_json: String = row.get(4)?;
7503 let freshness_json: Option<String> = row.get(5)?;
7504 Ok((
7505 row.get::<_, String>(0)?,
7506 row.get::<_, String>(1)?,
7507 row.get::<_, String>(2)?,
7508 properties_json,
7509 provenance_json,
7510 freshness_json,
7511 ))
7512 })?;
7513 let mut nodes = Vec::new();
7514 for row in node_rows {
7515 let (external_id, kind, label, properties_json, provenance_json, freshness_json) = row?;
7516 nodes.push(ConvexNodeRow {
7517 external_id,
7518 kind,
7519 label,
7520 properties: serde_json::from_str(&properties_json)?,
7521 provenance: serde_json::from_str(&provenance_json)?,
7522 freshness: freshness_json
7523 .map(|value| serde_json::from_str(&value))
7524 .transpose()?,
7525 });
7526 }
7527
7528 let mut edge_stmt = conn.prepare(
7529 "SELECT edge_key, from_id, to_id, kind, properties_json, provenance_json, freshness_json FROM graph_edges ORDER BY from_id, kind, to_id",
7530 )?;
7531 let edge_rows = edge_stmt.query_map([], |row| {
7532 let properties_json: String = row.get(4)?;
7533 let provenance_json: String = row.get(5)?;
7534 let freshness_json: Option<String> = row.get(6)?;
7535 Ok((
7536 row.get::<_, String>(0)?,
7537 row.get::<_, String>(1)?,
7538 row.get::<_, String>(2)?,
7539 row.get::<_, String>(3)?,
7540 properties_json,
7541 provenance_json,
7542 freshness_json,
7543 ))
7544 })?;
7545 let mut edges = Vec::new();
7546 for row in edge_rows {
7547 let (
7548 edge_key,
7549 from_external_id,
7550 to_external_id,
7551 kind,
7552 properties_json,
7553 provenance_json,
7554 freshness_json,
7555 ) = row?;
7556 edges.push(ConvexEdgeRow {
7557 edge_key,
7558 from_external_id,
7559 to_external_id,
7560 kind,
7561 properties: serde_json::from_str(&properties_json)?,
7562 provenance: serde_json::from_str(&provenance_json)?,
7563 freshness: freshness_json
7564 .map(|value| serde_json::from_str(&value))
7565 .transpose()?,
7566 });
7567 }
7568 Ok(ConvexProjectionRows { nodes, edges })
7569}
7570
7571fn convex_required_index_label(index: &ConvexRequiredIndex) -> String {
7572 format!("{}.{}({})", index.table, index.name, index.fields.join(","))
7573}
7574
7575fn convex_snapshot_index_value(value: &serde_json::Value) -> Option<&serde_json::Value> {
7576 value
7577 .get("indexes")
7578 .or_else(|| value.get("requiredIndexes"))
7579 .or_else(|| {
7580 value
7581 .get("metadata")
7582 .and_then(|metadata| metadata.get("indexes"))
7583 })
7584}
7585
7586fn convex_snapshot_declared_indexes(
7587 value: &serde_json::Value,
7588) -> Result<Option<Vec<ConvexRequiredIndex>>> {
7589 convex_snapshot_index_value(value)
7590 .map(|indexes| {
7591 serde_json::from_value::<Vec<ConvexRequiredIndex>>(indexes.clone())
7592 .context("parsing Convex snapshot index metadata")
7593 })
7594 .transpose()
7595}
7596
7597fn convex_snapshot_index_diagnostics(value: &serde_json::Value) -> Result<Vec<String>> {
7598 let required = convex_required_indexes();
7599 let Some(declared) = convex_snapshot_declared_indexes(value)? else {
7600 return Ok(vec![format!(
7601 "Convex snapshot index metadata is missing; required indexes not confirmed: {}",
7602 required
7603 .iter()
7604 .map(convex_required_index_label)
7605 .collect::<Vec<_>>()
7606 .join(", ")
7607 )]);
7608 };
7609 let declared = declared.into_iter().collect::<BTreeSet<_>>();
7610 let missing = required
7611 .iter()
7612 .filter(|index| !declared.contains(*index))
7613 .map(convex_required_index_label)
7614 .collect::<Vec<_>>();
7615 if missing.is_empty() {
7616 Ok(Vec::new())
7617 } else {
7618 Ok(vec![format!(
7619 "Convex snapshot is missing required index metadata: {}",
7620 missing.join(", ")
7621 )])
7622 }
7623}
7624
7625pub(crate) fn load_convex_projection_snapshot_value(
7626 snapshot_path: &Path,
7627) -> Result<(ConvexProjectionRows, serde_json::Value)> {
7628 let content = fs::read_to_string(snapshot_path).with_context(|| {
7629 format!(
7630 "reading Convex projection snapshot {}",
7631 snapshot_path.display()
7632 )
7633 })?;
7634 let value = serde_json::from_str::<serde_json::Value>(&content).with_context(|| {
7635 format!(
7636 "parsing Convex projection snapshot {}",
7637 snapshot_path.display()
7638 )
7639 })?;
7640 let rows = serde_json::from_value::<ConvexProjectionRows>(value.clone())
7641 .with_context(|| format!("parsing Convex projection rows {}", snapshot_path.display()))?;
7642 Ok((rows, value))
7643}
7644
7645pub(crate) fn append_sqlite_graph_doctor_checks(
7646 report: &mut GraphDbDoctorReport,
7647 root: &Path,
7648 scope: Option<&str>,
7649 graph_db: &Path,
7650) -> Option<substrate::SqliteReadOnlyConnection> {
7651 let rebuild = graph_db_rebuild_command(root, scope);
7652 let backup_rebuild = graph_db_backup_rebuild_command(root, scope, graph_db);
7653 if !graph_db.exists() {
7654 report.push_check(graph_db_doctor_check(
7655 "sqlite_graph_db_exists",
7656 vec![format!("graph.db is missing at {}", graph_db.display())],
7657 vec![rebuild],
7658 ));
7659 return None;
7660 }
7661 report.push_check(graph_db_doctor_check(
7662 "sqlite_graph_db_exists",
7663 Vec::new(),
7664 vec![rebuild.clone()],
7665 ));
7666
7667 let conn = match open_sqlite_graph_db_readonly(graph_db) {
7668 Ok(conn) => conn,
7669 Err(err) => {
7670 report.push_check(graph_db_doctor_check(
7671 "sqlite_graph_db_open",
7672 vec![err.to_string()],
7673 vec![backup_rebuild],
7674 ));
7675 return None;
7676 }
7677 };
7678 report.push_check(graph_db_doctor_check(
7679 "sqlite_graph_db_open",
7680 Vec::new(),
7681 vec![rebuild.clone()],
7682 ));
7683 if let Some(recovery) = conn.recovery() {
7684 report.push_check(GraphDbDoctorCheck {
7685 name: "sqlite_graph_db_read_recovery".to_string(),
7686 status: "recovered".to_string(),
7687 fail_closed: false,
7688 diagnostics: vec![graph_db_read_recovery_diagnostic(recovery)],
7689 repair_commands: Vec::new(),
7690 });
7691 }
7692
7693 let schema_diagnostics = sqlite_graph_schema_diagnostics(conn.conn())
7694 .unwrap_or_else(|err| vec![format!("graph.db schema inspection failed: {err}")]);
7695 report.push_check(graph_db_doctor_check(
7696 "sqlite_schema",
7697 schema_diagnostics,
7698 vec![backup_rebuild.clone()],
7699 ));
7700
7701 let metadata_diagnostics = sqlite_graph_projection_metadata_diagnostics(conn.conn(), scope)
7702 .unwrap_or_else(|err| {
7703 vec![format!(
7704 "graph projection metadata inspection failed: {err}"
7705 )]
7706 });
7707 report.push_check(graph_db_doctor_check(
7708 "sqlite_projection_metadata",
7709 metadata_diagnostics,
7710 vec![rebuild.clone()],
7711 ));
7712
7713 let duplicate_diagnostics = sqlite_graph_duplicate_diagnostics(conn.conn())
7714 .unwrap_or_else(|err| vec![format!("duplicate id inspection failed: {err}")]);
7715 report.push_check(graph_db_doctor_check(
7716 "sqlite_duplicate_ids",
7717 duplicate_diagnostics,
7718 vec![backup_rebuild.clone()],
7719 ));
7720
7721 let orphan_diagnostics = sqlite_graph_orphan_diagnostics(conn.conn())
7722 .unwrap_or_else(|err| vec![format!("orphan edge inspection failed: {err}")]);
7723 report.push_check(graph_db_doctor_check(
7724 "sqlite_orphan_edges",
7725 orphan_diagnostics,
7726 vec![rebuild.clone()],
7727 ));
7728
7729 let json_diagnostics = sqlite_graph_json_diagnostics(conn.conn())
7730 .unwrap_or_else(|err| vec![format!("graph row JSON inspection failed: {err}")]);
7731 report.push_check(graph_db_doctor_check(
7732 "sqlite_row_json",
7733 json_diagnostics,
7734 vec![backup_rebuild],
7735 ));
7736
7737 let tombstone_diagnostics =
7738 sqlite_graph_tombstone_retention_diagnostics(conn.conn(), scope.unwrap_or("root"))
7739 .unwrap_or_else(|err| {
7740 vec![format!(
7741 "graph tombstone retention inspection failed: {err}"
7742 )]
7743 });
7744 report.push_check(GraphDbDoctorCheck {
7745 name: "sqlite_tombstone_retention".to_string(),
7746 status: if tombstone_diagnostics.is_empty() {
7747 "ok".to_string()
7748 } else {
7749 "warning".to_string()
7750 },
7751 fail_closed: false,
7752 diagnostics: tombstone_diagnostics,
7753 repair_commands: Vec::new(),
7754 });
7755 let compaction_check = match sqlite_graph_counts(conn.conn(), scope.unwrap_or("root")) {
7756 Ok(counts) => {
7757 let policy = graph_db_compaction_policy(root, scope, &counts, false);
7758 GraphDbDoctorCheck {
7759 name: "sqlite_compaction_policy".to_string(),
7760 status: policy.status.clone(),
7761 fail_closed: false,
7762 diagnostics: policy.proof,
7763 repair_commands: if policy.status == "recommended" {
7764 policy.recommendations
7765 } else {
7766 Vec::new()
7767 },
7768 }
7769 }
7770 Err(err) => GraphDbDoctorCheck {
7771 name: "sqlite_compaction_policy".to_string(),
7772 status: "warning".to_string(),
7773 fail_closed: false,
7774 diagnostics: vec![format!("graph compaction policy inspection failed: {err}")],
7775 repair_commands: Vec::new(),
7776 },
7777 };
7778 report.push_check(compaction_check);
7779
7780 Some(conn)
7781}
7782
7783pub(crate) fn append_convex_snapshot_doctor_checks(
7784 report: &mut GraphDbDoctorReport,
7785 root: &Path,
7786 scope: Option<&str>,
7787 local_rows: Option<&ConvexProjectionRows>,
7788 snapshot_path: Option<&Path>,
7789) {
7790 let repair = convex_refresh_command(root, scope);
7791 let Some(snapshot_path) = snapshot_path else {
7792 report.push_check(graph_db_doctor_check(
7793 "convex_snapshot_present",
7794 vec!["--backend convex-snapshot requires --convex-snapshot <rows.json>".to_string()],
7795 vec![format!(
7796 "tsift convex-sync {}{} --json > convex-rows.json",
7797 shell_quote(root.to_string_lossy().as_ref()),
7798 graph_db_scope_arg(scope)
7799 )],
7800 ));
7801 return;
7802 };
7803 report.push_check(graph_db_doctor_check(
7804 "convex_snapshot_present",
7805 Vec::new(),
7806 vec![repair.clone()],
7807 ));
7808
7809 let (snapshot, snapshot_value) = match load_convex_projection_snapshot_value(snapshot_path) {
7810 Ok(snapshot) => snapshot,
7811 Err(err) => {
7812 report.push_check(graph_db_doctor_check(
7813 "convex_snapshot_parse",
7814 vec![err.to_string()],
7815 vec![repair],
7816 ));
7817 return;
7818 }
7819 };
7820 report.push_check(graph_db_doctor_check(
7821 "convex_snapshot_parse",
7822 Vec::new(),
7823 vec![repair.clone()],
7824 ));
7825
7826 let row_diagnostics = convex_projection_row_diagnostics(&snapshot);
7827 report.push_check(graph_db_doctor_check(
7828 "convex_snapshot_rows",
7829 row_diagnostics,
7830 vec![repair.clone()],
7831 ));
7832
7833 let index_diagnostics = convex_snapshot_index_diagnostics(&snapshot_value)
7834 .unwrap_or_else(|err| vec![err.to_string()]);
7835 report.required_indexes = convex_required_indexes();
7836 report.push_check(graph_db_doctor_check(
7837 "convex_required_indexes",
7838 index_diagnostics,
7839 vec![
7840 "Add the indexes from examples/convex-graph/schema.ts, then redeploy the Convex app"
7841 .to_string(),
7842 ],
7843 ));
7844
7845 if let Some(local_rows) = local_rows {
7846 let freshness = convex_projection_freshness(local_rows, Some(&snapshot), scope);
7847 report.push_check(graph_db_doctor_check(
7848 "convex_projection_freshness",
7849 freshness.diagnostics,
7850 vec![repair],
7851 ));
7852 } else {
7853 report.push_check(graph_db_doctor_check(
7854 "convex_projection_freshness",
7855 vec![
7856 "local SQLite graph.db could not be read, so Convex freshness cannot be verified"
7857 .to_string(),
7858 ],
7859 vec![graph_db_rebuild_command(root, scope)],
7860 ));
7861 }
7862}
7863
7864fn graph_db_convex_snapshot_doctor_command(
7865 root: &Path,
7866 scope: Option<&str>,
7867 snapshot_path: &Path,
7868) -> String {
7869 format!(
7870 "tsift graph-db --path {}{} --backend convex-snapshot --convex-snapshot {} doctor --json",
7871 shell_quote(root.to_string_lossy().as_ref()),
7872 graph_db_scope_arg(scope),
7873 shell_quote(snapshot_path.to_string_lossy().as_ref())
7874 )
7875}
7876
7877fn graph_db_convex_snapshot_read_command(
7878 root: &Path,
7879 scope: Option<&str>,
7880 snapshot_path: &Path,
7881) -> String {
7882 format!(
7883 "tsift graph-db --path {}{} --backend convex-snapshot --convex-snapshot {} schema --json",
7884 shell_quote(root.to_string_lossy().as_ref()),
7885 graph_db_scope_arg(scope),
7886 shell_quote(snapshot_path.to_string_lossy().as_ref())
7887 )
7888}
7889
7890fn convex_sync_snapshot_diff_command(
7891 root: &Path,
7892 scope: Option<&str>,
7893 snapshot_path: &Path,
7894) -> String {
7895 format!(
7896 "tsift convex-sync {}{} --snapshot {} --json",
7897 shell_quote(root.to_string_lossy().as_ref()),
7898 graph_db_scope_arg(scope),
7899 shell_quote(snapshot_path.to_string_lossy().as_ref())
7900 )
7901}
7902
7903pub(crate) struct GraphDbDriftInput<'a> {
7904 root: &'a Path,
7905 scope: Option<&'a str>,
7906 graph_db: &'a Path,
7907 snapshot_path: &'a Path,
7908 local: &'a ConvexProjectionRows,
7909 snapshot: &'a ConvexProjectionRows,
7910 snapshot_value: &'a serde_json::Value,
7911 warnings: Vec<String>,
7912}
7913
7914pub(crate) fn graph_db_drift_report(input: GraphDbDriftInput<'_>) -> GraphDbDriftReport {
7915 let GraphDbDriftInput {
7916 root,
7917 scope,
7918 graph_db,
7919 snapshot_path,
7920 local,
7921 snapshot,
7922 snapshot_value,
7923 warnings,
7924 } = input;
7925 let freshness = convex_projection_freshness(local, Some(snapshot), scope);
7926 let (node_upserts, edge_upserts, node_tombstones, edge_tombstones) =
7927 convex_rows_diff(local, Some(snapshot));
7928 let row_diagnostics = convex_projection_row_diagnostics(snapshot);
7929 let index_diagnostics = convex_snapshot_index_diagnostics(snapshot_value)
7930 .unwrap_or_else(|err| vec![format!("Convex snapshot index metadata failed: {err}")]);
7931 let local_hash = freshness.local_hash.clone();
7932 let snapshot_hash = freshness.snapshot_hash.clone();
7933 let stale_nodes = freshness.stale_nodes.clone();
7934 let stale_edges = freshness.stale_edges.clone();
7935
7936 let duplicate_failures = row_diagnostics
7937 .iter()
7938 .filter(|diagnostic| diagnostic.contains("duplicate"))
7939 .count();
7940 let orphan_failures = row_diagnostics
7941 .iter()
7942 .filter(|diagnostic| diagnostic.contains("references missing"))
7943 .count();
7944 let missing_required_indexes = index_diagnostics.len();
7945 let stale_projection_metadata =
7946 usize::from(local_hash != snapshot_hash || snapshot_hash.is_none());
7947 let hard_failures = duplicate_failures + orphan_failures + missing_required_indexes;
7948 let has_drift = freshness.fail_closed
7949 || !node_upserts.is_empty()
7950 || !edge_upserts.is_empty()
7951 || !node_tombstones.is_empty()
7952 || !edge_tombstones.is_empty();
7953 let status = if hard_failures > 0 {
7954 "fail_closed"
7955 } else if has_drift {
7956 "drift"
7957 } else {
7958 "current"
7959 }
7960 .to_string();
7961
7962 let mut diagnostics = Vec::new();
7963 diagnostics.extend(row_diagnostics);
7964 diagnostics.extend(index_diagnostics);
7965 diagnostics.extend(freshness.diagnostics.clone());
7966 if has_drift {
7967 diagnostics.push(format!(
7968 "projection diff: {} node upsert(s), {} edge upsert(s), {} node tombstone(s), {} edge tombstone(s)",
7969 node_upserts.len(),
7970 edge_upserts.len(),
7971 node_tombstones.len(),
7972 edge_tombstones.len()
7973 ));
7974 }
7975
7976 let mut next_commands = vec![graph_db_convex_snapshot_doctor_command(
7977 root,
7978 scope,
7979 snapshot_path,
7980 )];
7981 if status == "current" {
7982 next_commands.push(graph_db_convex_snapshot_read_command(
7983 root,
7984 scope,
7985 snapshot_path,
7986 ));
7987 } else {
7988 next_commands.push(convex_sync_snapshot_diff_command(
7989 root,
7990 scope,
7991 snapshot_path,
7992 ));
7993 next_commands.push(convex_refresh_command(root, scope));
7994 }
7995
7996 GraphDbDriftReport {
7997 root: root.to_string_lossy().to_string(),
7998 scope: scope.map(str::to_string),
7999 graph_db: graph_db.to_string_lossy().to_string(),
8000 convex_snapshot: snapshot_path.to_string_lossy().to_string(),
8001 status: status.clone(),
8002 graph_reads_allowed: status == "current",
8003 projection_version: GRAPH_PROJECTION_VERSION.to_string(),
8004 local_hash,
8005 snapshot_hash,
8006 summary: GraphDbDriftSummary {
8007 node_upserts: node_upserts.len(),
8008 edge_upserts: edge_upserts.len(),
8009 node_tombstones: node_tombstones.len(),
8010 edge_tombstones: edge_tombstones.len(),
8011 stale_nodes: stale_nodes.len(),
8012 stale_edges: stale_edges.len(),
8013 stale_projection_metadata,
8014 duplicate_failures,
8015 orphan_failures,
8016 missing_required_indexes,
8017 },
8018 node_upserts: node_upserts
8019 .into_iter()
8020 .map(|row| row.external_id)
8021 .collect(),
8022 edge_upserts: edge_upserts.into_iter().map(|row| row.edge_key).collect(),
8023 node_tombstones,
8024 edge_tombstones,
8025 stale_nodes,
8026 stale_edges,
8027 diagnostics,
8028 next_commands,
8029 required_indexes: convex_required_indexes(),
8030 warnings,
8031 }
8032}
8033
8034pub(crate) fn print_graph_db_drift_human(report: &GraphDbDriftReport) {
8035 println!(
8036 "graph-db drift status: {} reads_allowed: {}",
8037 report.status, report.graph_reads_allowed
8038 );
8039 println!("graph_db: {}", report.graph_db);
8040 println!("convex_snapshot: {}", report.convex_snapshot);
8041 println!(
8042 "upserts: {} node(s), {} edge(s)",
8043 report.summary.node_upserts, report.summary.edge_upserts
8044 );
8045 println!(
8046 "tombstones: {} node(s), {} edge(s)",
8047 report.summary.node_tombstones, report.summary.edge_tombstones
8048 );
8049 for diagnostic in &report.diagnostics {
8050 println!("diagnostic: {diagnostic}");
8051 }
8052 for command in &report.next_commands {
8053 println!("next: {command}");
8054 }
8055}
8056
8057pub(crate) fn print_graph_db_doctor_human(report: &GraphDbDoctorReport) {
8058 println!(
8059 "graph-db doctor backend: {} status: {}",
8060 report.backend, report.status
8061 );
8062 println!("graph_db: {}", report.graph_db);
8063 if let Some(snapshot) = &report.convex_snapshot {
8064 println!("convex_snapshot: {snapshot}");
8065 }
8066 for check in &report.checks {
8067 println!("check: {} {}", check.name, check.status);
8068 for diagnostic in &check.diagnostics {
8069 println!(" diagnostic: {diagnostic}");
8070 }
8071 }
8072 for command in &report.repair_commands {
8073 println!("repair: {command}");
8074 }
8075}
8076
8077pub(crate) fn graph_db_operator_report_from_disk(
8078 root: &Path,
8079 scope: Option<&str>,
8080 graph_db: &Path,
8081 operation: &str,
8082 refresh: Option<GraphDbRefreshSummary>,
8083 warnings: Vec<String>,
8084) -> Result<GraphDbOperatorReport> {
8085 if !graph_db.exists() {
8086 let next_commands = graph_db_operator_next_commands(root, scope, true);
8087 let counts = GraphDbOperatorCounts {
8088 nodes: 0,
8089 edges: 0,
8090 tombstones: GraphDbTombstoneCounts {
8091 nodes: 0,
8092 edges: 0,
8093 total: 0,
8094 },
8095 file_size_bytes: None,
8096 freelist_bytes: None,
8097 };
8098 return Ok(GraphDbOperatorReport {
8099 root: root.to_string_lossy().to_string(),
8100 scope: scope.map(str::to_string),
8101 graph_db: graph_db.to_string_lossy().to_string(),
8102 operation: operation.to_string(),
8103 status: "missing".to_string(),
8104 materialized: false,
8105 freshness: GraphDbFreshnessReport {
8106 status: "missing".to_string(),
8107 fail_closed: true,
8108 projection_version: None,
8109 content_hash: None,
8110 source_watermark: None,
8111 diagnostics: vec![
8112 "graph.db is missing; run graph-db refresh before trusting graph reads"
8113 .to_string(),
8114 ],
8115 },
8116 readiness: graph_effectiveness_blocked(
8117 "graph_db_missing",
8118 vec![
8119 "graph.db is missing; materialize the projection before relying on graph effectiveness".to_string(),
8120 ],
8121 next_commands.clone(),
8122 ),
8123 counts: counts.clone(),
8124 refresh,
8125 compaction: graph_db_compaction_policy(root, scope, &counts, false),
8126 recovery: None,
8127 next_commands,
8128 warnings,
8129 });
8130 }
8131
8132 let conn = open_sqlite_graph_db_readonly(graph_db)?;
8133 let recovery = conn.recovery();
8134 let mut warnings = warnings;
8135 if let Some(recovery) = recovery {
8136 warnings.push(graph_db_read_recovery_diagnostic(recovery));
8137 }
8138 let mut freshness = sqlite_graph_freshness_from_conn(conn.conn(), scope.unwrap_or("root"))?;
8139 let schema_diagnostics = sqlite_graph_schema_diagnostics(conn.conn())
8140 .unwrap_or_else(|err| vec![format!("graph.db schema inspection failed: {err}")]);
8141 if !schema_diagnostics.is_empty() {
8142 freshness.diagnostics.extend(schema_diagnostics);
8143 freshness.fail_closed = true;
8144 freshness.status = "stale".to_string();
8145 }
8146 let counts = sqlite_graph_counts(conn.conn(), scope.unwrap_or("root"))?;
8147 warnings.extend(
8148 sqlite_graph_tombstone_retention_diagnostics(conn.conn(), scope.unwrap_or("root"))
8149 .unwrap_or_else(|err| {
8150 vec![format!(
8151 "graph tombstone retention inspection failed: {err}"
8152 )]
8153 }),
8154 );
8155 let status = if freshness.fail_closed {
8156 "stale"
8157 } else {
8158 "current"
8159 }
8160 .to_string();
8161
8162 Ok(GraphDbOperatorReport {
8163 root: root.to_string_lossy().to_string(),
8164 scope: scope.map(str::to_string),
8165 graph_db: graph_db.to_string_lossy().to_string(),
8166 operation: operation.to_string(),
8167 status,
8168 materialized: true,
8169 freshness,
8170 readiness: graph_db_semantic_readiness(root, scope),
8171 compaction: graph_db_compaction_policy(root, scope, &counts, false),
8172 counts,
8173 refresh,
8174 recovery,
8175 next_commands: graph_db_operator_next_commands(root, scope, false),
8176 warnings,
8177 })
8178}
8179
8180fn print_graph_db_operator_human(report: &GraphDbOperatorReport) {
8181 println!(
8182 "graph-db {} status: {} materialized: {}",
8183 report.operation, report.status, report.materialized
8184 );
8185 println!("graph_db: {}", report.graph_db);
8186 println!(
8187 "projection: version={} hash={} watermark={}",
8188 report
8189 .freshness
8190 .projection_version
8191 .as_deref()
8192 .unwrap_or("<missing>"),
8193 report
8194 .freshness
8195 .content_hash
8196 .as_deref()
8197 .unwrap_or("<missing>"),
8198 report
8199 .freshness
8200 .source_watermark
8201 .as_deref()
8202 .unwrap_or("<missing>")
8203 );
8204 println!(
8205 "rows: {} node(s), {} edge(s), {} tombstone(s)",
8206 report.counts.nodes, report.counts.edges, report.counts.tombstones.total
8207 );
8208 println!(
8209 "readiness: {} reason: {} fail_closed: {}",
8210 report.readiness.status, report.readiness.reason, report.readiness.fail_closed
8211 );
8212 if let Some(file_size) = report.counts.file_size_bytes {
8213 println!(
8214 "storage: {} byte(s), {} free byte(s)",
8215 file_size,
8216 report.counts.freelist_bytes.unwrap_or(0)
8217 );
8218 }
8219 if let Some(refresh) = &report.refresh {
8220 println!(
8221 "refresh: {} tombstoned node(s), {} tombstoned edge(s)",
8222 refresh.tombstoned_nodes, refresh.tombstoned_edges
8223 );
8224 println!(
8225 "delta: {} node upsert(s), {} edge upsert(s), {} property row upsert(s), {} unchanged node(s), {} unchanged edge(s), {} unchanged property row(s), {} deleted property row(s), {} pruned tombstone(s)",
8226 refresh.upserted_nodes,
8227 refresh.upserted_edges,
8228 refresh.upserted_properties,
8229 refresh.unchanged_nodes,
8230 refresh.unchanged_edges,
8231 refresh.unchanged_properties,
8232 refresh.deleted_properties,
8233 refresh.pruned_tombstones
8234 );
8235 }
8236 println!(
8237 "compaction: {} tombstone_scan_rows={} live_rows={}",
8238 report.compaction.status,
8239 report.compaction.tombstone_scan_rows,
8240 report.compaction.live_rows
8241 );
8242 for proof in &report.compaction.proof {
8243 println!("compaction proof: {proof}");
8244 }
8245 if let Some(recovery) = report.recovery {
8246 println!("recovery: {}", graph_db_read_recovery_diagnostic(recovery));
8247 }
8248 for diagnostic in &report.freshness.diagnostics {
8249 println!("diagnostic: {diagnostic}");
8250 }
8251 for diagnostic in &report.readiness.diagnostics {
8252 println!("readiness diagnostic: {diagnostic}");
8253 }
8254 for warning in &report.warnings {
8255 println!("warning: {warning}");
8256 }
8257 for command in &report.readiness.next_commands {
8258 println!("readiness next: {command}");
8259 }
8260 for command in &report.next_commands {
8261 println!("next: {command}");
8262 }
8263}
8264
8265pub(crate) fn print_graph_db_operator_report(
8266 report: &GraphDbOperatorReport,
8267 format: OutputFormat,
8268) -> Result<()> {
8269 if format.json_output {
8270 print_json_or_envelope(
8271 report,
8272 &format,
8273 "graph-db",
8274 &report.operation,
8275 ToolEnvelopeSummary {
8276 text: format!(
8277 "Graph DB {} status {} with {} node(s), {} edge(s), {} tombstone(s)",
8278 report.operation,
8279 report.status,
8280 report.counts.nodes,
8281 report.counts.edges,
8282 report.counts.tombstones.total
8283 ),
8284 metrics: vec![
8285 envelope_metric("operation", &report.operation),
8286 envelope_metric("status", &report.status),
8287 envelope_metric("nodes", report.counts.nodes),
8288 envelope_metric("edges", report.counts.edges),
8289 envelope_metric("tombstones", report.counts.tombstones.total),
8290 envelope_metric("compaction", &report.compaction.status),
8291 envelope_metric("readiness", &report.readiness.status),
8292 ],
8293 },
8294 false,
8295 report.next_commands.clone(),
8296 )
8297 } else {
8298 print_graph_db_operator_human(report);
8299 Ok(())
8300 }
8301}
8302
8303fn status_run_command_without_notes(run: &str) -> &str {
8304 run.split_once(" (")
8305 .map(|(command, _)| command)
8306 .unwrap_or(run)
8307}
8308
8309fn graph_db_status_summarize_command(report: &status::StatusReport) -> String {
8310 report
8311 .recommendations
8312 .run
8313 .as_deref()
8314 .filter(|command| command.contains("summarize --extract"))
8315 .map(status_run_command_without_notes)
8316 .unwrap_or("tsift summarize --extract .")
8317 .to_string()
8318}
8319
8320fn graph_db_semantic_readiness(root: &Path, scope: Option<&str>) -> GraphEffectivenessReadiness {
8321 let report = match status::check_status(root) {
8322 Ok(report) => report,
8323 Err(err) => {
8324 return graph_effectiveness_blocked(
8325 "status_check_unavailable",
8326 vec![format!(
8327 "semantic readiness could not inspect summary cache after graph-db refresh: {err:#}"
8328 )],
8329 vec![graph_db_refresh_command(root, scope)],
8330 );
8331 }
8332 };
8333
8334 match &report.summaries {
8335 status::SummaryStatus::Available {
8336 cached_files,
8337 total_indexed_files,
8338 coverage_pct,
8339 ..
8340 } => {
8341 let mut readiness = graph_effectiveness_ready("semantic_rows_available");
8342 readiness.diagnostics.push(format!(
8343 "summary cache has {cached_files}/{total_indexed_files} indexed file(s) cached ({coverage_pct}% coverage); graph semantic rows are available"
8344 ));
8345 readiness
8346 }
8347 status::SummaryStatus::None { .. } => {
8348 let summarize = graph_db_status_summarize_command(&report);
8349 graph_effectiveness_blocked(
8350 "summary_cache_empty",
8351 vec![format!(
8352 "summary cache empty: graph-db materialized code/session rows but semantic rows are unavailable; run `{}` from {} and rerun `{}` before relying on semantic evidence",
8353 summarize,
8354 root.display(),
8355 graph_db_refresh_command(root, scope)
8356 )],
8357 vec![summarize, graph_db_refresh_command(root, scope)],
8358 )
8359 }
8360 status::SummaryStatus::Unavailable => graph_effectiveness_blocked(
8361 "summary_cache_unavailable",
8362 vec![
8363 "summary cache unavailable because the source index is missing; build the index before relying on semantic graph evidence".to_string(),
8364 ],
8365 report
8366 .recommendations
8367 .run
8368 .clone()
8369 .into_iter()
8370 .chain(std::iter::once(graph_db_refresh_command(root, scope)))
8371 .collect(),
8372 ),
8373 }
8374}
8375
8376pub(crate) fn graph_db_operator_status_warnings(root: &Path, scope: Option<&str>) -> Vec<String> {
8377 let report = match status::check_status(root) {
8378 Ok(report) => report,
8379 Err(err) => {
8380 return vec![format!(
8381 "status check unavailable after graph-db refresh: {err:#}"
8382 )];
8383 }
8384 };
8385
8386 let summarize_run = if matches!(report.summaries, status::SummaryStatus::None { .. }) {
8387 Some(graph_db_status_summarize_command(&report))
8388 } else {
8389 None
8390 };
8391 let mut warnings = report.reminders;
8392 if matches!(report.summaries, status::SummaryStatus::None { .. }) {
8393 let run = summarize_run.unwrap_or_else(|| "tsift summarize --extract .".to_string());
8394 warnings.push(format!(
8395 "summary cache empty: graph-db refresh materialized code/session rows but semantic rows are unavailable; run `{}` from {} and rerun `{}` before relying on semantic evidence",
8396 run,
8397 root.display(),
8398 graph_db_refresh_command(root, scope)
8399 ));
8400 }
8401 dedupe_preserve_order(warnings)
8402}
8403
8404pub(crate) fn print_graph_db_compaction_human(report: &GraphDbCompactionReport) {
8405 println!(
8406 "graph-db compact applied:{} pruned_tombstones:{} reclaimed:{} byte(s)",
8407 report.applied, report.pruned_tombstones, report.reclaimed_bytes
8408 );
8409 println!("graph_db: {}", report.graph_db);
8410 println!(
8411 "before: {} node(s), {} edge(s), {} tombstone(s), file={} free={}",
8412 report.counts_before.nodes,
8413 report.counts_before.edges,
8414 report.counts_before.tombstones.total,
8415 report.counts_before.file_size_bytes.unwrap_or(0),
8416 report.counts_before.freelist_bytes.unwrap_or(0)
8417 );
8418 println!(
8419 "after: {} node(s), {} edge(s), {} tombstone(s), file={} free={}",
8420 report.counts_after.nodes,
8421 report.counts_after.edges,
8422 report.counts_after.tombstones.total,
8423 report.counts_after.file_size_bytes.unwrap_or(0),
8424 report.counts_after.freelist_bytes.unwrap_or(0)
8425 );
8426 for proof in &report.compaction_after.proof {
8427 println!("proof: {proof}");
8428 }
8429 for warning in &report.warnings {
8430 println!("warning: {warning}");
8431 }
8432 for command in &report.next_commands {
8433 println!("next: {command}");
8434 }
8435}
8436
8437fn parse_graph_db_property_filters(raw: &[String]) -> Result<Vec<GraphDbPropertyFilter>> {
8438 raw.iter()
8439 .map(|value| {
8440 let (key, filter_value) = value
8441 .split_once('=')
8442 .with_context(|| format!("graph-db --property expects KEY=VALUE, got {value:?}"))?;
8443 let key = key.trim();
8444 let filter_value = filter_value.trim();
8445 if key.is_empty() || filter_value.is_empty() {
8446 bail!("graph-db --property expects non-empty KEY=VALUE, got {value:?}");
8447 }
8448 Ok(GraphDbPropertyFilter {
8449 key: key.to_string(),
8450 value: filter_value.to_string(),
8451 })
8452 })
8453 .collect()
8454}
8455
8456fn graph_db_query_options(
8457 cursor: Option<String>,
8458 limit: Option<usize>,
8459 property_filters: &[String],
8460) -> Result<GraphDbQueryOptions> {
8461 Ok(GraphDbQueryOptions {
8462 cursor,
8463 limit: limit.filter(|limit| *limit > 0),
8464 property_filters: parse_graph_db_property_filters(property_filters)?,
8465 })
8466}
8467
8468fn graph_db_query_options_for_store(options: &GraphDbQueryOptions) -> GraphQueryOptions {
8469 GraphQueryOptions {
8470 cursor: options.cursor.clone(),
8471 limit: options.limit,
8472 property_filters: options
8473 .property_filters
8474 .iter()
8475 .map(|filter| GraphPropertyFilter {
8476 key: filter.key.clone(),
8477 value: filter.value.clone(),
8478 })
8479 .collect(),
8480 }
8481}
8482
8483fn graph_db_page_report_from_store(
8484 page: GraphQueryPage,
8485 property_filters: Vec<GraphDbPropertyFilter>,
8486) -> GraphDbPageReport {
8487 GraphDbPageReport {
8488 cursor: page.cursor,
8489 limit: page.limit,
8490 next_cursor: page.next_cursor,
8491 returned_nodes: page.returned_nodes,
8492 returned_edges: page.returned_edges,
8493 truncated: page.truncated,
8494 property_filters,
8495 diagnostics: page.diagnostics,
8496 }
8497}
8498
8499fn graph_db_neighborhood_ranking_gate(
8500 ranked_neighbor_cap: usize,
8501) -> GraphDbNeighborhoodRankingGate {
8502 GraphDbNeighborhoodRankingGate {
8503 status: "held_default_order_unchanged".to_string(),
8504 ranked_output_default: false,
8505 default_order: "stable_node_id".to_string(),
8506 default_change_gate: "community_search_quality_metrics".to_string(),
8507 required_workloads: metric_digest::COMMUNITY_SEARCH_WORKLOADS
8508 .iter()
8509 .map(|workload| (*workload).to_string())
8510 .collect(),
8511 required_metrics: metric_digest::COMMUNITY_SEARCH_REQUIRED_METRICS
8512 .iter()
8513 .map(|metric| (*metric).to_string())
8514 .collect(),
8515 max_duration_regression_percent: metric_digest::COMMUNITY_MAX_DURATION_REGRESSION_PERCENT,
8516 min_handle_coverage_pct: metric_digest::COMMUNITY_MIN_HANDLE_COVERAGE_PCT,
8517 min_duplicate_name_precision: metric_digest::COMMUNITY_MIN_DUPLICATE_NAME_PRECISION,
8518 min_top_community_stability: metric_digest::COMMUNITY_MIN_TOP_COMMUNITY_STABILITY,
8519 diagnostics: vec![
8520 "ranked_neighbors is additive; neighborhood nodes remain ordered by stable node id for cursor pagination".to_string(),
8521 format!(
8522 "ranked_neighbors is score-capped at {ranked_neighbor_cap} entries so previews stay bounded while cursor pagination remains exhaustive"
8523 ),
8524 "changing the default neighborhood order requires the community-search gate to pass for every required workload".to_string(),
8525 ],
8526 }
8527}
8528
8529fn graph_db_ranked_neighbor_cap(limit: Option<usize>) -> usize {
8530 match limit {
8531 Some(0) | None => GRAPH_DB_RANKED_NEIGHBOR_CAP,
8532 Some(limit) => limit.clamp(1, GRAPH_DB_RANKED_NEIGHBOR_CAP),
8533 }
8534}
8535
8536fn graph_db_ranked_neighbors(
8537 center_id: &str,
8538 nodes: &[SubstrateGraphNode],
8539 edges: &[SubstrateGraphEdge],
8540 cap: usize,
8541) -> Vec<GraphDbRankedNeighbor> {
8542 resolution::ranked_neighbors_capped(center_id, nodes, edges, cap)
8543}
8544
8545fn graph_db_edge_key(edge: &SubstrateGraphEdge) -> String {
8546 if edge.id.is_empty() {
8547 substrate::ConvexEdgeRow::stable_key(&edge.from_id, &edge.to_id, &edge.kind)
8548 } else {
8549 edge.id.clone()
8550 }
8551}
8552
8553fn graph_db_schema() -> GraphDbSchema {
8554 GraphDbSchema {
8555 contract_versions: vec![
8556 GraphDbSchemaContract {
8557 name: "graph_db_evidence",
8558 version: GRAPH_DB_EVIDENCE_CONTRACT_VERSION,
8559 description: "graph-db evidence JSON packet including packet_id, projection hash, worker context, source handles, worker results, semantic rows, replay commands, and repair commands",
8560 },
8561 GraphDbSchemaContract {
8562 name: "worker_prompt_packet",
8563 version: WORKER_PROMPT_PACKET_CONTRACT_VERSION,
8564 description: "conflict-matrix worker prompt packet with owned scope, scheduler fields, stable graph handles, expected tests, expansion commands, token budget, semantic ranking reasons, worker feedback closure controls, and fail-closed prompt text",
8565 },
8566 GraphDbSchemaContract {
8567 name: "conflict_matrix",
8568 version: CONFLICT_MATRIX_CONTRACT_VERSION,
8569 description: "parallel-dispatch decision report keyed by graph evidence packets, scheduler block fields, hard file/symbol/test/config gates, and soft worker-feedback closure ranking",
8570 },
8571 GraphDbSchemaContract {
8572 name: "context_pack_graph_orchestration",
8573 version: CONTEXT_PACK_GRAPH_ORCHESTRATION_CONTRACT_VERSION,
8574 description: "context-pack graph orchestration summary with projection freshness, evidence packet ids, ownership blocks, and follow-up graph commands",
8575 },
8576 GraphDbSchemaContract {
8577 name: "session_review_follow_up",
8578 version: SESSION_REVIEW_FOLLOW_UP_CONTRACT_VERSION,
8579 description: "session-review next-context follow-up command contract for resumable digest/context-pack commands",
8580 },
8581 GraphDbSchemaContract {
8582 name: "dispatch_trace",
8583 version: DISPATCH_TRACE_CONTRACT_VERSION,
8584 description: "operator review trace linking backlog, job packets, worker results, source handles, semantic rows, scheduler fields, evidence packet ids, worker feedback closure controls, and worker prompt packets",
8585 },
8586 GraphDbSchemaContract {
8587 name: "dependency_dag",
8588 version: DEPENDENCY_DAG_CONTRACT_VERSION,
8589 description: "topological planning DAG for agent-doc backlog targets with replayable dependency edges, topo batches, and cycle diagnostics",
8590 },
8591 ],
8592 node_fields: vec![
8593 GraphDbSchemaField {
8594 name: "id",
8595 value_type: "string",
8596 description: "Stable provider-neutral node id",
8597 },
8598 GraphDbSchemaField {
8599 name: "kind",
8600 value_type: "string",
8601 description: "Application-defined node family such as file, symbol, or backlog",
8602 },
8603 GraphDbSchemaField {
8604 name: "label",
8605 value_type: "string",
8606 description: "Human-readable label",
8607 },
8608 GraphDbSchemaField {
8609 name: "properties",
8610 value_type: "object<string,string>",
8611 description: "Adapter-specific string properties",
8612 },
8613 GraphDbSchemaField {
8614 name: "provenance",
8615 value_type: "array",
8616 description: "Source system and source reference metadata",
8617 },
8618 GraphDbSchemaField {
8619 name: "freshness",
8620 value_type: "object|null",
8621 description: "Optional content hash and observed timestamp",
8622 },
8623 ],
8624 edge_fields: vec![
8625 GraphDbSchemaField {
8626 name: "id",
8627 value_type: "string",
8628 description: "Stable provider-neutral edge id derived from from_id, kind, and to_id",
8629 },
8630 GraphDbSchemaField {
8631 name: "from_id",
8632 value_type: "string",
8633 description: "Source node id",
8634 },
8635 GraphDbSchemaField {
8636 name: "to_id",
8637 value_type: "string",
8638 description: "Target node id",
8639 },
8640 GraphDbSchemaField {
8641 name: "kind",
8642 value_type: "string",
8643 description: "Application-defined edge relation",
8644 },
8645 GraphDbSchemaField {
8646 name: "properties",
8647 value_type: "object<string,string>",
8648 description: "Adapter-specific string properties",
8649 },
8650 GraphDbSchemaField {
8651 name: "provenance",
8652 value_type: "array",
8653 description: "Source system and source reference metadata",
8654 },
8655 GraphDbSchemaField {
8656 name: "freshness",
8657 value_type: "object|null",
8658 description: "Optional content hash and observed timestamp",
8659 },
8660 ],
8661 operations: vec![
8662 GraphDbSchemaOperation {
8663 command: "refresh",
8664 description: "Materialize .tsift/graph.db explicitly with delta upserts/deletes, row hash watermarks, tombstone pruning, projection metadata, row counts, and operator next commands",
8665 },
8666 GraphDbSchemaOperation {
8667 command: "status",
8668 description: "Inspect .tsift/graph.db freshness, projection metadata, row counts, tombstone counts, file-size impact, and operator next commands without refreshing",
8669 },
8670 GraphDbSchemaOperation {
8671 command: "doctor",
8672 description: "Validate graph.db or Convex snapshot health and return fail-closed repair diagnostics plus non-fatal SQLite tombstone-retention warnings",
8673 },
8674 GraphDbSchemaOperation {
8675 command: "drift",
8676 description: "Compare local SQLite projection rows with a Convex snapshot and return upsert, tombstone, metadata, duplicate, orphan, and next-command diagnostics",
8677 },
8678 GraphDbSchemaOperation {
8679 command: "compact [--apply] [--prune-tombstones --confirmed-convex-reconciled]",
8680 description: "Return or apply the post-reconciliation SQLite graph compaction policy, including WAL checkpoint/VACUUM proof and guarded tombstone pruning",
8681 },
8682 GraphDbSchemaOperation {
8683 command: "backend-eval [--candidate duckdb-duckpgq|falkordb|ladybug|kuzu] [--target ID] [--full-projection]",
8684 description: "Benchmark experimental read-only GraphStore backend prototypes against SQLite on bounded real, optional full-project, and synthetic projections across refresh/status/path tiers/evidence/conflict-matrix/dispatch-trace and emit promotion hold/eligibility gates",
8685 },
8686 GraphDbSchemaOperation {
8687 command: "evidence <target> [--depth N] [--limit N]",
8688 description: "Return a bounded versioned graph-db handoff packet for a backlog id or job packet handle, including packet_id, projection hash, worker_context rows, source_handle rows, worker_result rows, semantic_concept/entity rows, shortest paths, replay commands, repair commands, and next commands",
8689 },
8690 GraphDbSchemaOperation {
8691 command: "related <phrase> [--kind concept|entity|all] [--depth N] [--seed-limit N] [--limit N]",
8692 description: "Resolve a natural-language phrase to cached semantic concept/entity seed nodes, then return an incident/outgoing GraphStore neighborhood around those seeds for general knowledge retrieval without changing stable neighborhood pagination defaults",
8693 },
8694 GraphDbSchemaOperation {
8695 command: "dispatch-trace [target...] --path <session> [--format json|html]",
8696 description: "Export a compact graph-backed dispatch trace with evidence packet ids, worker-result feedback closure summaries, graph links, and conflict-matrix worker prompt packets",
8697 },
8698 GraphDbSchemaOperation {
8699 command: "dependency-dag [target...] --path <session>",
8700 description: "Extract a versioned agent-doc dependency DAG from backlog ids, explicit depends-on text, shared file/symbol/test/config evidence, semantic overlap, and worker-result follow-up ids",
8701 },
8702 GraphDbSchemaOperation {
8703 command: "schema",
8704 description: "Return record and operation schemas",
8705 },
8706 GraphDbSchemaOperation {
8707 command: "node <id>",
8708 description: "Return one node by stable id",
8709 },
8710 GraphDbSchemaOperation {
8711 command: "edge <id>",
8712 description: "Return one edge by stable edge id",
8713 },
8714 GraphDbSchemaOperation {
8715 command: "edges [--edge-kind <kind>] [--property KEY=VALUE] [--cursor EDGE_ID] [--limit N]",
8716 description: "Return edge records ordered by stable edge id with SQLite-pushed edge-property filtering and cursor pagination",
8717 },
8718 GraphDbSchemaOperation {
8719 command: "incident <id> [--edge-kind <kind>] [--property KEY=VALUE] [--cursor EDGE_ID] [--limit N]",
8720 description: "Return incoming and outgoing edges incident to one node, ordered by stable edge id with optional kind and edge-property filters",
8721 },
8722 GraphDbSchemaOperation {
8723 command: "kind <kind> [--property KEY=VALUE] [--cursor ID] [--limit N]",
8724 description: "Return nodes of one kind ordered by id with SQLite-pushed property filtering/cursor pagination and query-plan diagnostics",
8725 },
8726 GraphDbSchemaOperation {
8727 command: "neighborhood <id> --depth <n> [--edge-kind <kind>] [--property KEY=VALUE] [--cursor ID] [--limit N]",
8728 description: "Return a directed outgoing subgraph around a node using batched SQLite recursive traversal plus pushed filters/paging when available; JSON also includes additive ranked_neighbors while default nodes remain stable-id ordered",
8729 },
8730 GraphDbSchemaOperation {
8731 command: "path <from> <to> [--edge-kind <kind>] [--max-hops N]",
8732 description: "Return the shortest directed path by node id, optionally bounded by hop count",
8733 },
8734 ],
8735 }
8736}
8737
8738pub(crate) fn sqlite_graph_freshness(
8739 store: &SqliteGraphStore,
8740 scope: &str,
8741) -> Result<GraphDbFreshnessReport> {
8742 let version = store.projection_version(scope)?;
8743 let Some(version) = version else {
8744 return Ok(GraphDbFreshnessReport {
8745 status: "missing".to_string(),
8746 fail_closed: true,
8747 projection_version: None,
8748 content_hash: None,
8749 source_watermark: None,
8750 diagnostics: vec![
8751 "graph projection metadata is missing; rebuild the graph before trusting reads"
8752 .to_string(),
8753 ],
8754 });
8755 };
8756 let mut diagnostics = Vec::new();
8757 let fail_closed =
8758 version.projection_version != GRAPH_PROJECTION_VERSION || version.content_hash.is_none();
8759 if version.projection_version != GRAPH_PROJECTION_VERSION {
8760 diagnostics.push(format!(
8761 "projection version mismatch: expected {} got {}",
8762 GRAPH_PROJECTION_VERSION, version.projection_version
8763 ));
8764 }
8765 if version.content_hash.is_none() {
8766 diagnostics.push("projection content hash is missing".to_string());
8767 }
8768 Ok(GraphDbFreshnessReport {
8769 status: if fail_closed { "stale" } else { "current" }.to_string(),
8770 fail_closed,
8771 projection_version: Some(version.projection_version),
8772 content_hash: version.content_hash,
8773 source_watermark: version.source_watermark,
8774 diagnostics,
8775 })
8776}
8777
8778pub(crate) fn convex_graph_freshness(
8779 local: &ConvexProjectionRows,
8780 snapshot: &ConvexProjectionRows,
8781 scope: Option<&str>,
8782) -> GraphDbFreshnessReport {
8783 let freshness = convex_projection_freshness(local, Some(snapshot), scope);
8784 GraphDbFreshnessReport {
8785 status: freshness.status,
8786 fail_closed: freshness.fail_closed,
8787 projection_version: Some(GRAPH_PROJECTION_VERSION.to_string()),
8788 content_hash: freshness.snapshot_hash,
8789 source_watermark: None,
8790 diagnostics: freshness.diagnostics,
8791 }
8792}
8793
8794pub(crate) fn tokensave_graph_freshness(store: &TokensaveDb) -> Result<GraphDbFreshnessReport> {
8795 let (nodes, edges) = store.graph_counts()?;
8796 let files = store.file_count()?;
8797 Ok(GraphDbFreshnessReport {
8798 status: "current".to_string(),
8799 fail_closed: false,
8800 projection_version: Some("tokensave-readonly".to_string()),
8801 content_hash: None,
8802 source_watermark: Some(store.db_path().to_string_lossy().to_string()),
8803 diagnostics: vec![format!(
8804 "tokensave read-only adapter opened {} node(s), {} edge(s), {} file(s)",
8805 nodes, edges, files
8806 )],
8807 })
8808}
8809
8810pub(crate) fn append_tokensave_graph_doctor_checks(report: &mut GraphDbDoctorReport, root: &Path) {
8811 match TokensaveDb::discover(root) {
8812 Ok(Some(store)) => {
8813 report.push_check(GraphDbDoctorCheck {
8814 name: "tokensave_db_open".to_string(),
8815 status: "ok".to_string(),
8816 fail_closed: false,
8817 diagnostics: vec![format!(
8818 "opened tokensave database at {}",
8819 store.db_path().display()
8820 )],
8821 repair_commands: Vec::new(),
8822 });
8823 match (store.node_count(), store.edge_count(), store.file_count()) {
8824 (Ok(nodes), Ok(edges), Ok(files)) => {
8825 report.push_check(GraphDbDoctorCheck {
8826 name: "tokensave_counts".to_string(),
8827 status: "ok".to_string(),
8828 fail_closed: false,
8829 diagnostics: vec![format!(
8830 "tokensave contains {} node(s), {} edge(s), {} file(s)",
8831 nodes, edges, files
8832 )],
8833 repair_commands: Vec::new(),
8834 });
8835 }
8836 (nodes, edges, files) => {
8837 report.push_check(graph_db_doctor_check(
8838 "tokensave_counts",
8839 vec![format!(
8840 "tokensave count inspection failed: nodes={:?} edges={:?} files={:?}",
8841 nodes.err(),
8842 edges.err(),
8843 files.err()
8844 )],
8845 Vec::new(),
8846 ));
8847 }
8848 }
8849 }
8850 Ok(None) => report.push_check(graph_db_doctor_check(
8851 "tokensave_db_exists",
8852 vec![format!(
8853 "tokensave database is missing at {}",
8854 root.join(".tokensave").join("tokensave.db").display()
8855 )],
8856 Vec::new(),
8857 )),
8858 Err(err) => report.push_check(graph_db_doctor_check(
8859 "tokensave_db_open",
8860 vec![err.to_string()],
8861 Vec::new(),
8862 )),
8863 }
8864}
8865
8866pub(crate) fn graph_db_resolve_evidence_target(
8867 store: &impl GraphStore,
8868 target: &str,
8869) -> Result<Option<SubstrateGraphNode>> {
8870 store.resolve_evidence_target(
8871 target,
8872 &[
8873 "backlog",
8874 "job_packet",
8875 "worker_result",
8876 "worker_context",
8877 "source_handle",
8878 ],
8879 )
8880}
8881
8882fn graph_db_reachable_nodes_by_kind(
8883 store: &impl GraphStore,
8884 from_id: &str,
8885 kind: &str,
8886 depth: usize,
8887 limit: usize,
8888) -> Result<Vec<(SubstrateGraphNode, substrate::GraphPath)>> {
8889 store.reachable_nodes_by_kind(from_id, kind, depth, limit)
8890}
8891
8892fn graph_db_evidence_completed_queue_drift_warnings(
8893 store: &impl GraphStore,
8894 target: &SubstrateGraphNode,
8895 worker_results: &[SubstrateGraphNode],
8896) -> Result<Vec<String>> {
8897 let ref_id = target.properties.get("ref_id").map(String::as_str);
8898 let has_completed_result = worker_results.iter().any(|node| {
8899 node.properties.get("status").map(String::as_str) == Some("completed")
8900 && node.properties.get("ref_id").map(String::as_str) == ref_id
8901 });
8902 if !has_completed_result {
8903 return Ok(Vec::new());
8904 }
8905 let active_jobs = store
8906 .nodes_by_kind("job_packet")?
8907 .into_iter()
8908 .filter(|node| {
8909 node.properties.get("ref_id").map(String::as_str) == ref_id
8910 && node.label.starts_with("do #")
8911 })
8912 .collect::<Vec<_>>();
8913 if active_jobs.is_empty() {
8914 return Ok(Vec::new());
8915 }
8916 let repair = match (target.properties.get("path"), ref_id) {
8917 (Some(path), Some(id)) => format!(
8918 "repair with `agent-doc write --commit {} --done {}` or the next `agent-doc finalize --done {}` closeout",
8919 shell_quote(path),
8920 shell_quote(id),
8921 shell_quote(id)
8922 ),
8923 _ => {
8924 "repair by marking the queue item done/reaping it in the agent-doc session".to_string()
8925 }
8926 };
8927 Ok(vec![format!(
8928 "queue-head drift: target {} has {} active queued do packet(s) but already has a completed worker_result; {repair}; do not redispatch or reactivate the completed item",
8929 target.label,
8930 active_jobs.len()
8931 )])
8932}
8933
8934fn graph_db_evidence_next_commands(
8935 root: &Path,
8936 scope: Option<&str>,
8937 target: &SubstrateGraphNode,
8938 worker_context: &[SubstrateGraphNode],
8939 source_handles: &[SubstrateGraphNode],
8940 worker_results: &[SubstrateGraphNode],
8941 semantic_related: &[SubstrateGraphNode],
8942) -> Vec<String> {
8943 let mut commands = BTreeSet::new();
8944 if let Some(expand) = target.properties.get("expand") {
8945 commands.insert(expand.clone());
8946 }
8947 for worker in worker_context {
8948 if let Some(expand) = worker.properties.get("expand") {
8949 commands.insert(expand.clone());
8950 }
8951 }
8952 for source in source_handles {
8953 if let Some(expand) = source.properties.get("expand") {
8954 commands.insert(expand.clone());
8955 }
8956 }
8957 for result in worker_results {
8958 if let Some(expand) = result.properties.get("expand") {
8959 commands.insert(expand.clone());
8960 }
8961 }
8962 for semantic in semantic_related {
8963 if let Some(expand) = semantic.properties.get("expand") {
8964 commands.insert(expand.clone());
8965 }
8966 }
8967 commands.insert(format!(
8968 "tsift graph-db --path {}{} status --json",
8969 shell_quote(root.to_string_lossy().as_ref()),
8970 graph_db_scope_arg(scope)
8971 ));
8972 commands.insert(format!(
8973 "tsift graph-db --path {}{} doctor --json",
8974 shell_quote(root.to_string_lossy().as_ref()),
8975 graph_db_scope_arg(scope)
8976 ));
8977 commands.into_iter().collect()
8978}
8979
8980fn graph_db_repair_commands(root: &Path, scope: Option<&str>) -> Vec<String> {
8981 vec![
8982 format!(
8983 "tsift graph-db --path {}{} refresh --json",
8984 shell_quote(root.to_string_lossy().as_ref()),
8985 graph_db_scope_arg(scope)
8986 ),
8987 format!(
8988 "tsift graph-db --path {}{} doctor --json",
8989 shell_quote(root.to_string_lossy().as_ref()),
8990 graph_db_scope_arg(scope)
8991 ),
8992 ]
8993}
8994
8995fn graph_db_evidence_replay_commands(
8996 root: &Path,
8997 scope: Option<&str>,
8998 target: &str,
8999 depth: usize,
9000 limit: usize,
9001) -> Vec<String> {
9002 vec![
9003 format!(
9004 "tsift graph-db --path {}{} evidence {} --depth {} --limit {} --json",
9005 shell_quote(root.to_string_lossy().as_ref()),
9006 graph_db_scope_arg(scope),
9007 shell_quote(target),
9008 depth,
9009 limit
9010 ),
9011 format!(
9012 "tsift conflict-matrix --path {} {} --json",
9013 shell_quote(root.to_string_lossy().as_ref()),
9014 shell_quote(target)
9015 ),
9016 ]
9017}
9018
9019fn graph_db_evidence_packet_id(
9020 target: &str,
9021 target_node: &SubstrateGraphNode,
9022 freshness: &GraphDbFreshnessReport,
9023) -> String {
9024 stable_handle(
9025 "gevd",
9026 &format!(
9027 "{}:{}:{}:{}",
9028 GRAPH_DB_EVIDENCE_CONTRACT_VERSION,
9029 target,
9030 target_node.id,
9031 freshness.content_hash.as_deref().unwrap_or("no-hash")
9032 ),
9033 )
9034}
9035
9036pub(crate) fn graph_db_evidence_report_from_store<S: GraphStore>(
9037 input: GraphDbEvidenceInput<'_, S>,
9038) -> Result<GraphDbEvidenceReport> {
9039 let GraphDbEvidenceInput {
9040 root,
9041 scope,
9042 backend,
9043 target,
9044 depth,
9045 limit,
9046 store,
9047 freshness,
9048 mut warnings,
9049 } = input;
9050 let repair_commands = graph_db_repair_commands(root, scope);
9051 if freshness.fail_closed {
9052 bail!(
9053 "graph database evidence failed closed for {} backend: {}; repair: {}",
9054 backend,
9055 freshness.diagnostics.join("; "),
9056 repair_commands.join("; ")
9057 );
9058 }
9059 let target_node = graph_db_resolve_evidence_target(store, target)?
9060 .with_context(|| format!("graph-db evidence target not found: {target}"))?;
9061 let max_rows = if limit == 0 { usize::MAX } else { limit };
9062 let mut reachable = store.reachable_nodes_by_kinds(
9063 &target_node.id,
9064 &[
9065 "worker_context",
9066 "source_handle",
9067 "worker_result",
9068 "semantic_concept",
9069 "semantic_entity",
9070 ],
9071 depth,
9072 max_rows,
9073 )?;
9074 let worker_paths = reachable.remove("worker_context").unwrap_or_default();
9075 let source_paths = reachable.remove("source_handle").unwrap_or_default();
9076 let worker_result_paths = reachable.remove("worker_result").unwrap_or_default();
9077 let mut semantic_paths = reachable.remove("semantic_concept").unwrap_or_default();
9078 semantic_paths.extend(reachable.remove("semantic_entity").unwrap_or_default());
9079 semantic_paths.sort_by(|(left_node, left_path), (right_node, right_path)| {
9080 left_path
9081 .hops
9082 .cmp(&right_path.hops)
9083 .then(left_node.kind.cmp(&right_node.kind))
9084 .then(left_node.label.cmp(&right_node.label))
9085 .then(left_node.id.cmp(&right_node.id))
9086 });
9087 if max_rows != usize::MAX && semantic_paths.len() > max_rows {
9088 semantic_paths.truncate(max_rows);
9089 }
9090
9091 let worker_context = worker_paths
9092 .iter()
9093 .map(|(node, _)| node.clone())
9094 .collect::<Vec<_>>();
9095 let source_handles = source_paths
9096 .iter()
9097 .map(|(node, _)| node.clone())
9098 .collect::<Vec<_>>();
9099 let worker_results = worker_result_paths
9100 .iter()
9101 .map(|(node, _)| node.clone())
9102 .collect::<Vec<_>>();
9103 let semantic_related = semantic_paths
9104 .iter()
9105 .map(|(node, _)| node.clone())
9106 .collect::<Vec<_>>();
9107 warnings.extend(graph_db_evidence_completed_queue_drift_warnings(
9108 store,
9109 &target_node,
9110 &worker_results,
9111 )?);
9112 if worker_context.is_empty()
9113 && source_handles.is_empty()
9114 && worker_results.is_empty()
9115 && semantic_related.is_empty()
9116 {
9117 warnings.push(format!(
9118 "graph-db evidence target {} resolved to a {} node but has no projection-linked context rows; add source/file tokens to the backlog text or rerun graph-db refresh after the session document is indexed",
9119 target, target_node.kind
9120 ));
9121 }
9122 let shortest_paths = worker_paths
9123 .iter()
9124 .chain(source_paths.iter())
9125 .chain(worker_result_paths.iter())
9126 .chain(semantic_paths.iter())
9127 .map(|(node, path)| GraphDbEvidencePath {
9128 to: node.id.clone(),
9129 kind: node.kind.clone(),
9130 label: node.label.clone(),
9131 path: Some(path.clone()),
9132 expand: node.properties.get("expand").cloned(),
9133 })
9134 .collect::<Vec<_>>();
9135 let next_commands = graph_db_evidence_next_commands(
9136 root,
9137 scope,
9138 &target_node,
9139 &worker_context,
9140 &source_handles,
9141 &worker_results,
9142 &semantic_related,
9143 );
9144 let replay_commands = graph_db_evidence_replay_commands(root, scope, target, depth, limit);
9145 let packet_id = graph_db_evidence_packet_id(target, &target_node, &freshness);
9146 let projection_hash = freshness.content_hash.clone();
9147
9148 Ok(GraphDbEvidenceReport {
9149 root: root.to_string_lossy().to_string(),
9150 scope: scope.map(str::to_string),
9151 backend: backend.to_string(),
9152 contract_version: GRAPH_DB_EVIDENCE_CONTRACT_VERSION.to_string(),
9153 target: target.to_string(),
9154 packet_id,
9155 projection_hash,
9156 freshness,
9157 target_node,
9158 worker_context,
9159 source_handles,
9160 worker_results,
9161 semantic_related,
9162 shortest_paths,
9163 next_commands,
9164 replay_commands,
9165 repair_commands,
9166 fixture_coverage: GraphDbFixtureCoverage {
9167 test: "graph_db_evidence_packet_covers_backlog_job_worker_context_and_source_handles"
9168 .to_string(),
9169 fixture: "tests/graph_db_conformance.rs::graph_db_project".to_string(),
9170 assertions: vec![
9171 "backlog id and job packet handle resolve to graph nodes".to_string(),
9172 "worker_context rows are reachable from queued work".to_string(),
9173 "source_handle rows are reachable through bounded shortest paths".to_string(),
9174 "worker_result rows are reachable from completed or blocked work".to_string(),
9175 ],
9176 },
9177 warnings,
9178 })
9179}
9180
9181fn print_graph_db_evidence_human(report: &GraphDbEvidenceReport) {
9182 println!(
9183 "graph-db evidence backend: {} target: {} [{}] packet:{}",
9184 report.backend, report.target_node.id, report.target_node.kind, report.packet_id
9185 );
9186 println!(
9187 "evidence: {} worker_context row(s), {} source_handle row(s), {} worker_result row(s), {} semantic row(s), {} path(s)",
9188 report.worker_context.len(),
9189 report.source_handles.len(),
9190 report.worker_results.len(),
9191 report.semantic_related.len(),
9192 report.shortest_paths.len()
9193 );
9194 for path in &report.shortest_paths {
9195 if let Some(graph_path) = &path.path {
9196 println!(
9197 "path: {} hop(s) {}",
9198 graph_path.hops,
9199 graph_path.nodes.join(" -> ")
9200 );
9201 }
9202 }
9203 for command in &report.next_commands {
9204 println!("next: {command}");
9205 }
9206 for warning in &report.warnings {
9207 println!("warning: {warning}");
9208 }
9209}
9210
9211pub(crate) fn print_graph_db_evidence_report(
9212 report: &GraphDbEvidenceReport,
9213 format: OutputFormat,
9214) -> Result<()> {
9215 if format.json_output {
9216 print_json_or_envelope(
9217 report,
9218 &format,
9219 "graph-db",
9220 "evidence",
9221 ToolEnvelopeSummary {
9222 text: format!(
9223 "Graph DB evidence for {} returned {} worker context row(s), {} source handle(s), {} worker result row(s), {} semantic row(s), and {} shortest path(s)",
9224 report.target,
9225 report.worker_context.len(),
9226 report.source_handles.len(),
9227 report.worker_results.len(),
9228 report.semantic_related.len(),
9229 report.shortest_paths.len()
9230 ),
9231 metrics: vec![
9232 envelope_metric("backend", &report.backend),
9233 envelope_metric("worker_context", report.worker_context.len()),
9234 envelope_metric("source_handles", report.source_handles.len()),
9235 envelope_metric("worker_results", report.worker_results.len()),
9236 envelope_metric("semantic_related", report.semantic_related.len()),
9237 envelope_metric("paths", report.shortest_paths.len()),
9238 ],
9239 },
9240 false,
9241 report.next_commands.clone(),
9242 )
9243 } else {
9244 print_graph_db_evidence_human(report);
9245 Ok(())
9246 }
9247}
9248
9249pub(crate) fn graph_db_report_from_store(
9250 root: &Path,
9251 scope: Option<&str>,
9252 backend: &str,
9253 query: GraphDbQuery,
9254 store: &impl GraphStore,
9255 freshness: GraphDbFreshnessReport,
9256 warnings: Vec<String>,
9257) -> Result<GraphDbReport> {
9258 if freshness.fail_closed {
9259 bail!(
9260 "graph database read failed closed for {} backend: {}",
9261 backend,
9262 freshness.diagnostics.join("; ")
9263 );
9264 }
9265 let mut report = GraphDbReport {
9266 root: root.to_string_lossy().to_string(),
9267 scope: scope.map(str::to_string),
9268 backend: backend.to_string(),
9269 query: format!("{query:?}"),
9270 freshness,
9271 schema: None,
9272 node: None,
9273 edge: None,
9274 nodes: Vec::new(),
9275 edges: Vec::new(),
9276 ranked_neighbors: Vec::new(),
9277 semantic_related: Vec::new(),
9278 neighborhood_ranking_gate: None,
9279 knowledge_retrieval: None,
9280 path: None,
9281 page: None,
9282 warnings,
9283 };
9284
9285 match query {
9286 GraphDbQuery::Refresh => {
9287 bail!("graph-db refresh must be handled by the refresh command path");
9288 }
9289 GraphDbQuery::Status => {
9290 bail!("graph-db status must be handled by the status command path");
9291 }
9292 GraphDbQuery::Doctor => {
9293 bail!("graph-db doctor must be handled by the doctor command path");
9294 }
9295 GraphDbQuery::Drift => {
9296 bail!("graph-db drift must be handled by the drift command path");
9297 }
9298 GraphDbQuery::Compact { .. } => {
9299 bail!("graph-db compact must be handled by the compact command path");
9300 }
9301 GraphDbQuery::BackendEval { .. } => {
9302 bail!("graph-db backend-eval must be handled by the benchmark command path");
9303 }
9304 GraphDbQuery::Evidence { .. } => {
9305 bail!("graph-db evidence must be handled by the evidence command path");
9306 }
9307 GraphDbQuery::Related {
9308 query,
9309 kind,
9310 depth,
9311 seed_limit,
9312 limit,
9313 } => {
9314 let semantic =
9315 semantic_related_report_from_store(root, scope, &query, seed_limit, kind, store)?;
9316 let SemanticRelatedReport {
9317 items,
9318 warnings: semantic_warnings,
9319 ..
9320 } = semantic;
9321 report.warnings.extend(semantic_warnings);
9322 let seed_ids = items
9323 .iter()
9324 .map(|item| item.handle.clone())
9325 .collect::<Vec<_>>();
9326 let subgraph = graph_db_semantic_seeded_neighborhood(store, &seed_ids, depth, limit)?;
9327 let seed_count = seed_ids.len();
9328
9329 report.semantic_related = items;
9330 report.nodes = subgraph.nodes;
9331 report.edges = subgraph.edges;
9332 if let Some(seed_id) = seed_ids.first() {
9333 let ranked_neighbor_cap = graph_db_ranked_neighbor_cap(Some(limit));
9334 report.ranked_neighbors = graph_db_ranked_neighbors(
9335 seed_id,
9336 &report.nodes,
9337 &report.edges,
9338 ranked_neighbor_cap,
9339 );
9340 report.neighborhood_ranking_gate =
9341 Some(graph_db_neighborhood_ranking_gate(ranked_neighbor_cap));
9342 }
9343 report.knowledge_retrieval = Some(GraphDbKnowledgeRetrieval {
9344 mode: "semantic_seeded_neighborhood".to_string(),
9345 query,
9346 seed_kind: semantic_related_kind_name(kind).to_string(),
9347 seed_limit,
9348 seed_count,
9349 depth,
9350 limit,
9351 node_count: report.nodes.len(),
9352 edge_count: report.edges.len(),
9353 truncated: subgraph.truncated,
9354 traversal: "incident_plus_outgoing_edges".to_string(),
9355 freshness_boundary:
9356 "semantic rows must come from refreshed summary graph records".to_string(),
9357 privacy_boundary:
9358 "GraphStore stores substrate records only; user consent, deletion policy, persona policy, and LiveKit session state stay in the avatar/agent adapter"
9359 .to_string(),
9360 diagnostics: subgraph.diagnostics,
9361 });
9362 }
9363 GraphDbQuery::Schema => {
9364 report.schema = Some(graph_db_schema());
9365 }
9366 GraphDbQuery::Node { id } => {
9367 report.node = store.node(&id)?;
9368 }
9369 GraphDbQuery::Edge { id } => {
9370 report.edge = store.edge(&id)?;
9371 }
9372 GraphDbQuery::Edges {
9373 edge_kind,
9374 cursor,
9375 limit,
9376 property_filters,
9377 } => {
9378 let options = graph_db_query_options(cursor, limit, &property_filters)?;
9379 let paged = store.paged_edges(
9380 edge_kind.as_deref(),
9381 graph_db_query_options_for_store(&options),
9382 )?;
9383 report.edges = paged.edges;
9384 report.page = Some(graph_db_page_report_from_store(
9385 paged.page,
9386 options.property_filters,
9387 ));
9388 }
9389 GraphDbQuery::Incident {
9390 id,
9391 edge_kind,
9392 cursor,
9393 limit,
9394 property_filters,
9395 } => {
9396 let options = graph_db_query_options(cursor, limit, &property_filters)?;
9397 let paged = store.paged_incident_edges(
9398 &id,
9399 edge_kind.as_deref(),
9400 graph_db_query_options_for_store(&options),
9401 )?;
9402 report.edges = paged.edges;
9403 report.page = Some(graph_db_page_report_from_store(
9404 paged.page,
9405 options.property_filters,
9406 ));
9407 }
9408 GraphDbQuery::Kind {
9409 kind,
9410 cursor,
9411 limit,
9412 property_filters,
9413 } => {
9414 let options = graph_db_query_options(cursor, limit, &property_filters)?;
9415 let paged =
9416 store.paged_nodes_by_kind(&kind, graph_db_query_options_for_store(&options))?;
9417 report.nodes = paged.nodes;
9418 report.edges = paged.edges;
9419 report.page = Some(graph_db_page_report_from_store(
9420 paged.page,
9421 options.property_filters,
9422 ));
9423 }
9424 GraphDbQuery::Neighborhood {
9425 id,
9426 depth,
9427 edge_kind,
9428 cursor,
9429 limit,
9430 property_filters,
9431 } => {
9432 let options = graph_db_query_options(cursor, limit, &property_filters)?;
9433 if let Some(paged) = store.paged_neighborhood(
9434 &id,
9435 depth,
9436 edge_kind.as_deref(),
9437 graph_db_query_options_for_store(&options),
9438 )? {
9439 report.nodes = paged.nodes;
9440 report.edges = paged.edges;
9441 let ranked_neighbor_cap = graph_db_ranked_neighbor_cap(options.limit);
9442 report.ranked_neighbors = graph_db_ranked_neighbors(
9443 &id,
9444 &report.nodes,
9445 &report.edges,
9446 ranked_neighbor_cap,
9447 );
9448 report.neighborhood_ranking_gate =
9449 Some(graph_db_neighborhood_ranking_gate(ranked_neighbor_cap));
9450 report.page = Some(graph_db_page_report_from_store(
9451 paged.page,
9452 options.property_filters,
9453 ));
9454 }
9455 }
9456 GraphDbQuery::Path {
9457 from,
9458 to,
9459 edge_kind,
9460 max_hops,
9461 } => {
9462 report.path =
9463 store.shortest_path_with_max_hops(&from, &to, edge_kind.as_deref(), max_hops)?;
9464 if let Some(max_hops) = max_hops
9465 && report.path.is_none()
9466 {
9467 report.warnings.push(format!(
9468 "no directed path found within --max-hops {}",
9469 max_hops
9470 ));
9471 }
9472 }
9473 }
9474 Ok(report)
9475}
9476
9477pub(crate) fn print_graph_db_human(report: &GraphDbReport, compact: bool) {
9478 if compact {
9479 println!(
9480 "graph-db backend:{} query:{} nodes:{} edges:{} freshness:{}",
9481 report.backend,
9482 report.query,
9483 report.nodes.len() + usize::from(report.node.is_some()),
9484 report.edges.len() + usize::from(report.edge.is_some()),
9485 report.freshness.status
9486 );
9487 return;
9488 }
9489 println!("graph-db backend: {}", report.backend);
9490 println!("freshness: {}", report.freshness.status);
9491 if let Some(schema) = &report.schema {
9492 println!(
9493 "schema: {} node fields, {} edge fields, {} operations",
9494 schema.node_fields.len(),
9495 schema.edge_fields.len(),
9496 schema.operations.len()
9497 );
9498 }
9499 if let Some(node) = &report.node {
9500 println!("node: {} [{}] {}", node.id, node.kind, node.label);
9501 }
9502 if let Some(edge) = &report.edge {
9503 println!(
9504 "edge: {} {} -{}-> {}",
9505 graph_db_edge_key(edge),
9506 edge.from_id,
9507 edge.kind,
9508 edge.to_id
9509 );
9510 }
9511 if let Some(knowledge) = &report.knowledge_retrieval {
9512 println!(
9513 "knowledge_retrieval: {} seeds:{} depth:{} traversal:{}",
9514 knowledge.mode, knowledge.seed_count, knowledge.depth, knowledge.traversal
9515 );
9516 }
9517 for item in &report.semantic_related {
9518 println!(
9519 "semantic_seed: {:.3} [{}] {} ({})",
9520 item.score, item.kind, item.label, item.handle
9521 );
9522 }
9523 for node in &report.nodes {
9524 println!("node: {} [{}] {}", node.id, node.kind, node.label);
9525 }
9526 for edge in &report.edges {
9527 println!(
9528 "edge: {} {} -{}-> {}",
9529 graph_db_edge_key(edge),
9530 edge.from_id,
9531 edge.kind,
9532 edge.to_id
9533 );
9534 }
9535 for neighbor in &report.ranked_neighbors {
9536 println!(
9537 "ranked_neighbor: #{} score:{} depth:{} {} [{}] {}",
9538 neighbor.rank,
9539 neighbor.score,
9540 neighbor
9541 .depth
9542 .map(|depth| depth.to_string())
9543 .unwrap_or_else(|| "unknown".to_string()),
9544 neighbor.node_id,
9545 neighbor.kind,
9546 neighbor.label
9547 );
9548 }
9549 if let Some(gate) = &report.neighborhood_ranking_gate {
9550 println!(
9551 "neighborhood_ranking_gate: {} default_order:{} ranked_output_default:{}",
9552 gate.status, gate.default_order, gate.ranked_output_default
9553 );
9554 }
9555 if let Some(path) = &report.path {
9556 println!("path: {} hop(s) {}", path.hops, path.nodes.join(" -> "));
9557 }
9558 if let Some(page) = &report.page {
9559 if let Some(next_cursor) = &page.next_cursor {
9560 println!("next_cursor: {next_cursor}");
9561 }
9562 for diagnostic in &page.diagnostics {
9563 println!("page: {diagnostic}");
9564 }
9565 }
9566 for warning in &report.warnings {
9567 println!("warning: {warning}");
9568 }
9569}
9570
9571pub(crate) fn graph_db_backend_eval_phase_timing(
9572 name: &str,
9573 duration_micros: u128,
9574 detail: &str,
9575) -> GraphDbBackendEvalPhaseTiming {
9576 GraphDbBackendEvalPhaseTiming {
9577 name: name.to_string(),
9578 duration_micros,
9579 detail: detail.to_string(),
9580 }
9581}
9582
9583pub(crate) fn graph_db_backend_eval_timed_phase<T>(
9584 phases: &mut Vec<GraphDbBackendEvalPhaseTiming>,
9585 name: &str,
9586 detail: &str,
9587 run: impl FnOnce() -> Result<T>,
9588) -> Result<T> {
9589 let started = Instant::now();
9590 let result = run();
9591 phases.push(graph_db_backend_eval_phase_timing(
9592 name,
9593 started.elapsed().as_micros(),
9594 detail,
9595 ));
9596 result
9597}
9598
9599pub(crate) fn graph_db_backend_eval_refresh_total_micros(
9600 phases: &[GraphDbBackendEvalPhaseTiming],
9601) -> u128 {
9602 phases
9603 .iter()
9604 .filter(|phase| phase.name != "conflict_matrix_preparation")
9605 .map(|phase| phase.duration_micros)
9606 .sum()
9607}
9608
9609pub(crate) fn graph_db_backend_eval_cached_refresh(
9610 root: &Path,
9611 scope: Option<&str>,
9612 source_watermark: Option<&str>,
9613) -> Result<
9614 Option<(
9615 TraversalGraphBuild,
9616 SqliteProjectionRefresh,
9617 Vec<GraphDbBackendEvalPhaseTiming>,
9618 )>,
9619> {
9620 let Some(source_watermark) = source_watermark else {
9621 return Ok(None);
9622 };
9623 let graph_db = graph_substrate_db_path(root, scope);
9624 if !graph_db.exists() {
9625 return Ok(None);
9626 }
9627
9628 let started = Instant::now();
9629 let store = match SqliteGraphStore::open_read_only_resilient(&graph_db) {
9630 Ok(store) => store,
9631 Err(_) => return Ok(None),
9632 };
9633 if store.has_user_triggers().unwrap_or(true) {
9634 return Ok(None);
9635 }
9636 let freshness = sqlite_graph_freshness(&store, scope.unwrap_or("root"))?;
9637 if freshness.fail_closed || freshness.source_watermark.as_deref() != Some(source_watermark) {
9638 return Ok(None);
9639 }
9640
9641 let phases = vec![
9642 graph_db_backend_eval_phase_timing(
9643 "source_graph_build",
9644 started.elapsed().as_micros(),
9645 "reused current graph.db projection because the source watermark matched; skipped code-index loading, session markdown scanning, source-handle construction, and semantic summary reads",
9646 ),
9647 graph_db_backend_eval_phase_timing(
9648 "projection_rows",
9649 0,
9650 "reused cached provider-neutral projection rows from graph.db",
9651 ),
9652 graph_db_backend_eval_phase_timing(
9653 "sqlite_open",
9654 0,
9655 "reused existing graph.db projection without opening a write transaction",
9656 ),
9657 ];
9658 let refresh = SqliteProjectionRefresh {
9659 scope: scope.unwrap_or("root").to_string(),
9660 projection_version: freshness
9661 .projection_version
9662 .unwrap_or_else(|| GRAPH_PROJECTION_VERSION.to_string()),
9663 source_watermark: Some(source_watermark.to_string()),
9664 tombstoned_nodes: Vec::new(),
9665 tombstoned_edges: Vec::new(),
9666 upserted_nodes: 0,
9667 upserted_edges: 0,
9668 unchanged_nodes: 0,
9669 unchanged_edges: 0,
9670 upserted_properties: 0,
9671 unchanged_properties: 0,
9672 deleted_properties: 0,
9673 deleted_nodes: 0,
9674 deleted_edges: 0,
9675 pruned_tombstones: 0,
9676 file_size_bytes_before: None,
9677 file_size_bytes_after: None,
9678 phase_timings: Vec::new(),
9679 };
9680 Ok(Some((TraversalGraphBuild::default(), refresh, phases)))
9681}
9682
9683pub(crate) fn graph_db_backend_eval_reused_cached_projection(
9684 phases: &[GraphDbBackendEvalPhaseTiming],
9685) -> bool {
9686 phases.iter().any(|phase| {
9687 phase.name == "source_graph_build"
9688 && phase.detail.contains("reused current graph.db projection")
9689 })
9690}
9691
9692pub(crate) fn graph_db_backend_eval_update_source_watermark(
9693 root: &Path,
9694 path_hint: &Path,
9695 scope: Option<&str>,
9696) -> Result<()> {
9697 let Some(source_watermark) = traversal_source_watermark(root, path_hint, scope, false)? else {
9698 return Ok(());
9699 };
9700 let graph_db = graph_substrate_db_path(root, scope);
9701 let mut store = SqliteGraphStore::open(&graph_db)?;
9702 store.update_projection_source_watermark(scope.unwrap_or("root"), Some(source_watermark))?;
9703 Ok(())
9704}
9705
9706pub(crate) fn graph_db_backend_eval_refresh_with_profile(
9707 root: &Path,
9708 path_hint: &Path,
9709 scope: Option<&str>,
9710) -> Result<(
9711 TraversalGraphBuild,
9712 SqliteProjectionRefresh,
9713 Vec<GraphDbBackendEvalPhaseTiming>,
9714)> {
9715 let source_watermark = traversal_source_watermark(root, path_hint, scope, false)?;
9716 if let Some(cached) =
9717 graph_db_backend_eval_cached_refresh(root, scope, source_watermark.as_deref())?
9718 {
9719 return Ok(cached);
9720 }
9721
9722 let mut phases = Vec::new();
9723 let source_graph_detail = if hinted_markdown_file(root, path_hint).is_some() {
9724 "bounded session projection: index/source loading plus agent-doc session markdown scan, source-handle construction, and semantic summary reads; skips global call-edge materialization because full-projection is the complete-call-graph regression guard"
9725 } else {
9726 "index/source loading plus agent-doc session markdown scan, source-handle construction, and semantic summary reads when summaries are cached"
9727 };
9728 let source_graph = graph_db_backend_eval_timed_phase(
9729 &mut phases,
9730 "source_graph_build",
9731 source_graph_detail,
9732 || build_traversal_graph_source_with_options(root, path_hint, scope, false),
9733 )?;
9734 let projection = graph_db_backend_eval_timed_phase(
9735 &mut phases,
9736 "projection_rows",
9737 "provider-neutral GraphStore node/edge row construction before SQLite persistence",
9738 || traversal_projection_from_graph(root, scope, &source_graph),
9739 )?;
9740 let graph_db = graph_substrate_db_path(root, scope);
9741 let mut store = graph_db_backend_eval_timed_phase(
9742 &mut phases,
9743 "sqlite_open",
9744 "open the local SQLite graph.db with WAL and busy-timeout settings",
9745 || SqliteGraphStore::open(&graph_db),
9746 )?;
9747 let refreshed_source_watermark = traversal_source_watermark(root, path_hint, scope, false)
9748 .ok()
9749 .flatten();
9750 let refresh = store.replace_projection_with_version(
9751 scope.unwrap_or("root"),
9752 &projection,
9753 Some(GRAPH_PROJECTION_VERSION),
9754 refreshed_source_watermark
9755 .or(source_watermark)
9756 .or_else(|| graph_projection_content_hash(&projection)),
9757 )?;
9758 phases.extend(
9759 refresh
9760 .phase_timings
9761 .iter()
9762 .map(|phase| GraphDbBackendEvalPhaseTiming {
9763 name: phase.name.clone(),
9764 duration_micros: phase.duration_micros,
9765 detail: phase.detail.clone(),
9766 }),
9767 );
9768 Ok((source_graph, refresh, phases))
9769}
9770
9771fn graph_db_backend_eval_disk_cache_dir(root: &Path) -> PathBuf {
9772 root.join(".tsift/backend-eval-cache")
9773}
9774
9775fn graph_db_backend_eval_disk_cache_path(root: &Path, kind: &str, key: &str) -> PathBuf {
9776 graph_db_backend_eval_disk_cache_dir(root)
9777 .join(kind)
9778 .join(format!("{key}.json.gz"))
9779}
9780
9781fn graph_db_backend_eval_legacy_disk_cache_path(root: &Path, kind: &str, key: &str) -> PathBuf {
9782 graph_db_backend_eval_disk_cache_dir(root)
9783 .join(kind)
9784 .join(format!("{key}.json"))
9785}
9786
9787#[derive(Default, Clone)]
9788struct GraphDbBackendEvalDiskCacheReadProfile {
9789 file_read_micros: u128,
9790 gzip_decode_micros: u128,
9791 serde_decode_micros: u128,
9792 legacy: bool,
9793}
9794
9795fn graph_db_backend_eval_read_disk_cache<T: for<'de> Deserialize<'de>>(
9796 root: &Path,
9797 kind: &str,
9798 key: &str,
9799) -> Option<(T, u64, u64, GraphDbBackendEvalDiskCacheReadProfile)> {
9800 let mut profile = GraphDbBackendEvalDiskCacheReadProfile::default();
9801 let path = graph_db_backend_eval_disk_cache_path(root, kind, key);
9802 let read_started = Instant::now();
9803 let read_result = fs::read(&path);
9804 profile.file_read_micros = read_started.elapsed().as_micros();
9805 if let Ok(bytes) = read_result {
9806 let decode_started = Instant::now();
9807 let mut decoder = GzDecoder::new(bytes.as_slice());
9808 let mut decoded = Vec::new();
9809 let decode_ok = decoder.read_to_end(&mut decoded).is_ok();
9810 profile.gzip_decode_micros = decode_started.elapsed().as_micros();
9811 if decode_ok {
9812 let serde_started = Instant::now();
9813 let parsed: Option<T> = serde_json::from_slice(&decoded).ok();
9814 profile.serde_decode_micros = serde_started.elapsed().as_micros();
9815 if let Some(value) = parsed {
9816 return Some((value, bytes.len() as u64, decoded.len() as u64, profile));
9817 }
9818 }
9819 }
9820
9821 let legacy_path = graph_db_backend_eval_legacy_disk_cache_path(root, kind, key);
9822 let legacy_started = Instant::now();
9823 let bytes = fs::read(legacy_path).ok()?;
9824 profile.file_read_micros = profile
9825 .file_read_micros
9826 .saturating_add(legacy_started.elapsed().as_micros());
9827 let serde_started = Instant::now();
9828 let value = serde_json::from_slice(&bytes).ok()?;
9829 profile.serde_decode_micros = profile
9830 .serde_decode_micros
9831 .saturating_add(serde_started.elapsed().as_micros());
9832 profile.legacy = true;
9833 Some((value, bytes.len() as u64, bytes.len() as u64, profile))
9834}
9835
9836#[derive(Default, Clone)]
9837struct GraphDbBackendEvalDiskCacheWriteProfile {
9838 serde_encode_micros: u128,
9839 gzip_encode_micros: u128,
9840 file_write_micros: u128,
9841}
9842
9843fn graph_db_backend_eval_write_disk_cache<T: Serialize>(
9844 root: &Path,
9845 kind: &str,
9846 key: &str,
9847 value: &T,
9848) -> Option<(u64, u64, GraphDbBackendEvalDiskCacheWriteProfile)> {
9849 let mut profile = GraphDbBackendEvalDiskCacheWriteProfile::default();
9850 let path = graph_db_backend_eval_disk_cache_path(root, kind, key);
9851 let parent = path.parent()?;
9852 if fs::create_dir_all(parent).is_err() {
9853 return None;
9854 }
9855 let serde_started = Instant::now();
9856 let bytes = serde_json::to_vec(value).ok()?;
9857 profile.serde_encode_micros = serde_started.elapsed().as_micros();
9858 let gzip_started = Instant::now();
9859 let mut encoder = GzEncoder::new(Vec::new(), Compression::fast());
9860 if encoder.write_all(&bytes).is_err() {
9861 return None;
9862 }
9863 let encoded = encoder.finish().ok()?;
9864 profile.gzip_encode_micros = gzip_started.elapsed().as_micros();
9865 let write_started = Instant::now();
9866 if fs::write(&path, &encoded).is_err() {
9867 return None;
9868 }
9869 profile.file_write_micros = write_started.elapsed().as_micros();
9870 Some((encoded.len() as u64, bytes.len() as u64, profile))
9871}
9872
9873fn graph_db_backend_eval_prune_disk_cache(root: &Path, kind: &str, keep_key: &str) -> (usize, u64) {
9874 let dir = graph_db_backend_eval_disk_cache_dir(root).join(kind);
9875 let Ok(entries) = fs::read_dir(dir) else {
9876 return (0, 0);
9877 };
9878 let keep_name = format!("{keep_key}.json.gz");
9879 let mut pruned_files = 0usize;
9880 let mut pruned_bytes = 0u64;
9881 for entry in entries.flatten() {
9882 let path = entry.path();
9883 if !path.is_file() {
9884 continue;
9885 }
9886 let Some(name) = path.file_name().and_then(|name| name.to_str()) else {
9887 continue;
9888 };
9889 if name == keep_name {
9890 continue;
9891 }
9892 let is_backend_eval_cache = name.ends_with(".json") || name.ends_with(".json.gz");
9893 if !is_backend_eval_cache {
9894 continue;
9895 }
9896 let bytes = entry.metadata().map(|metadata| metadata.len()).unwrap_or(0);
9897 if fs::remove_file(&path).is_ok() {
9898 pruned_files += 1;
9899 pruned_bytes += bytes;
9900 }
9901 }
9902 (pruned_files, pruned_bytes)
9903}
9904
9905fn graph_db_backend_eval_full_projection_raw_watermark_rows(
9906 root: &Path,
9907 source_root: &Path,
9908) -> Result<Vec<GraphDbBackendEvalRawSourceWatermarkRow>> {
9909 let mut rows = Vec::new();
9910 let mut entries = walk::walk_files(source_root)?;
9911 entries.sort_by(|left, right| left.path.cmp(&right.path));
9912 for entry in entries {
9913 if traversal_path_is_generated_artifact(root, source_root, &entry.path) {
9914 continue;
9915 }
9916 let bytes = fs::read(&entry.path)
9917 .with_context(|| format!("reading source input {}", entry.path.display()))?;
9918 rows.push(GraphDbBackendEvalRawSourceWatermarkRow {
9919 path: traversal_watermark_path(root, &entry.path),
9920 bytes: bytes.len() as u64,
9921 content_hash: content_hash(&bytes)?,
9922 });
9923 }
9924 Ok(rows)
9925}
9926
9927fn graph_db_backend_eval_full_projection_source_watermark(
9928 root: &Path,
9929 scope: Option<&str>,
9930) -> Result<GraphDbBackendEvalFullProjectionSourceWatermark> {
9931 let path_hint = root;
9932 let mut detail_parts = Vec::new();
9933 let mut parts = vec![
9934 format!("projection_version:{GRAPH_PROJECTION_VERSION}"),
9935 format!("cache_version:{GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION}"),
9936 "watermark_kind:stable_full_projection_inputs".to_string(),
9937 format!("scope:{}", scope.unwrap_or("root")),
9938 format!("path_hint:{}", traversal_watermark_path(root, path_hint)),
9939 ];
9940
9941 let gate = prepare_agent_doc_index_gate(root, path_hint, scope, "full-projection cache key");
9942 match gate.db_path.as_ref().filter(|db_path| db_path.exists()) {
9943 Some(db_path) => {
9944 let db = index::IndexDb::open_read_only_resilient(db_path)?;
9945 parts.push("index_mode:indexed".to_string());
9946 detail_parts.push("mode=indexed".to_string());
9947 parts.push(format!(
9948 "index_source_root:{}",
9949 traversal_watermark_path(root, &gate.source_root)
9950 ));
9951
9952 let symbols = db
9953 .all_symbols()?
9954 .into_iter()
9955 .filter(|symbol| {
9956 !traversal_path_is_generated_artifact(
9957 root,
9958 &gate.source_root,
9959 Path::new(&symbol.file),
9960 )
9961 })
9962 .collect::<Vec<_>>();
9963 let symbols_hash = content_hash(&symbols)?;
9964 detail_parts.push(format!("symbols={symbols_hash}"));
9965 parts.push(format!("index_symbols:{symbols_hash}"));
9966
9967 let edges = db
9968 .all_stored_edges()?
9969 .into_iter()
9970 .filter(|edge| {
9971 !traversal_path_is_generated_artifact(
9972 root,
9973 &gate.source_root,
9974 Path::new(&edge.caller_file),
9975 )
9976 })
9977 .collect::<Vec<_>>();
9978 let edges_hash = content_hash(&edges)?;
9979 detail_parts.push(format!("call_edges={edges_hash}"));
9980 parts.push(format!("index_call_edges:{edges_hash}"));
9981
9982 let routes = db
9983 .all_routes()?
9984 .into_iter()
9985 .filter(|route| {
9986 !traversal_path_is_generated_artifact(
9987 root,
9988 &gate.source_root,
9989 Path::new(&route.file),
9990 )
9991 })
9992 .collect::<Vec<_>>();
9993 let routes_hash = content_hash(&routes)?;
9994 detail_parts.push(format!("routes={routes_hash}"));
9995 parts.push(format!("index_routes:{routes_hash}"));
9996 }
9997 None => {
9998 parts.push("index_mode:raw_fallback".to_string());
9999 detail_parts.push("mode=raw_fallback".to_string());
10000 parts.push(format!(
10001 "raw_source_root:{}",
10002 traversal_watermark_path(root, &gate.source_root)
10003 ));
10004 let raw_rows =
10005 graph_db_backend_eval_full_projection_raw_watermark_rows(root, &gate.source_root)?;
10006 let raw_hash = content_hash(&raw_rows)?;
10007 detail_parts.push(format!("raw_source_files={raw_hash}"));
10008 parts.push(format!("raw_source_files:{raw_hash}"));
10009 }
10010 }
10011
10012 parts.push("agent_doc_session_markdown:bounded_real_dataset_only".to_string());
10013 detail_parts.push("session_markdown=bounded_real_dataset_only".to_string());
10014 let summaries_start = parts.len();
10015 push_traversal_summaries_watermark_part(root, &mut parts)?;
10016 let summaries_hash = content_hash(&parts[summaries_start..].to_vec())?;
10017 detail_parts.push(format!("summaries={summaries_hash}"));
10018 let value = content_hash(&parts)?;
10019 detail_parts.push(format!("watermark={value}"));
10020 Ok(GraphDbBackendEvalFullProjectionSourceWatermark {
10021 value,
10022 detail: detail_parts.join(" "),
10023 })
10024}
10025
10026fn graph_db_backend_eval_full_projection_cache_key(
10027 root: &Path,
10028 scope: Option<&str>,
10029) -> Result<(String, String, String)> {
10030 let source_watermark = graph_db_backend_eval_full_projection_source_watermark(root, scope)?;
10031 let key = graph_db_backend_eval_full_projection_cache_key_for_watermark(
10032 root,
10033 scope,
10034 &source_watermark.value,
10035 )?;
10036 Ok((source_watermark.value, key, source_watermark.detail))
10037}
10038
10039fn graph_db_backend_eval_full_projection_cache_key_for_watermark(
10040 root: &Path,
10041 scope: Option<&str>,
10042 source_watermark: &str,
10043) -> Result<String> {
10044 content_hash(&serde_json::json!({
10045 "version": GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION,
10046 "root": root.display().to_string(),
10047 "scope": scope.unwrap_or("root"),
10048 "source_watermark": source_watermark,
10049 }))
10050}
10051
10052pub(crate) fn graph_db_backend_eval_full_projection_with_profile(
10053 root: &Path,
10054 scope: Option<&str>,
10055) -> Result<(
10056 GraphProjection,
10057 Vec<String>,
10058 Vec<GraphDbBackendEvalPhaseTiming>,
10059 GraphDbBackendEvalFullProjectionCacheStats,
10060)> {
10061 let (source_watermark, key, source_watermark_detail) =
10062 graph_db_backend_eval_full_projection_cache_key(root, scope)?;
10063 let lookup_started = Instant::now();
10064 if let Some((cached, disk_bytes, json_bytes, read_profile)) =
10065 graph_db_backend_eval_read_disk_cache::<GraphDbBackendEvalFullProjectionCache>(
10066 root,
10067 "full_projection",
10068 &key,
10069 )
10070 && cached.version == GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION
10071 && cached.key == key
10072 && cached.source_watermark == source_watermark
10073 {
10074 let lookup_overhead_micros = lookup_started
10075 .elapsed()
10076 .as_micros()
10077 .saturating_sub(read_profile.file_read_micros)
10078 .saturating_sub(read_profile.gzip_decode_micros)
10079 .saturating_sub(read_profile.serde_decode_micros);
10080 let prune_started = Instant::now();
10081 let (pruned_files, pruned_bytes) =
10082 graph_db_backend_eval_prune_disk_cache(root, "full_projection", &key);
10083 let prune_micros = prune_started.elapsed().as_micros();
10084 let cache_stats = GraphDbBackendEvalFullProjectionCacheStats {
10085 hit: true,
10086 disk_bytes,
10087 json_bytes,
10088 pruned_files,
10089 pruned_bytes,
10090 };
10091 let read_detail_suffix = if read_profile.legacy {
10092 " (legacy uncompressed cache path)"
10093 } else {
10094 ""
10095 };
10096 return Ok((
10097 cached.projection,
10098 cached.warnings,
10099 vec![
10100 graph_db_backend_eval_phase_timing(
10101 "full_projection.cache_lookup",
10102 lookup_overhead_micros,
10103 &format!(
10104 "watermark/version check overhead around the cache load phases; {source_watermark_detail}"
10105 ),
10106 ),
10107 graph_db_backend_eval_phase_timing(
10108 "full_projection.cache.file_read",
10109 read_profile.file_read_micros,
10110 &format!(
10111 "read compressed cache bytes from .tsift/backend-eval-cache{read_detail_suffix}"
10112 ),
10113 ),
10114 graph_db_backend_eval_phase_timing(
10115 "full_projection.cache.gzip_decode",
10116 read_profile.gzip_decode_micros,
10117 "gunzip the compressed projection cache bytes",
10118 ),
10119 graph_db_backend_eval_phase_timing(
10120 "full_projection.cache.serde_decode",
10121 read_profile.serde_decode_micros,
10122 "serde_json deserialize the decoded projection cache payload",
10123 ),
10124 graph_db_backend_eval_phase_timing(
10125 "full_projection.cache.prune",
10126 prune_micros,
10127 "prune sibling cache files older than the current key",
10128 ),
10129 graph_db_backend_eval_phase_timing(
10130 "full_projection.source_graph_build",
10131 0,
10132 "reused cached full-project source graph; skipped code-index loading, session markdown scanning, source-handle construction, and semantic summary reads",
10133 ),
10134 graph_db_backend_eval_phase_timing(
10135 "full_projection.projection_rows",
10136 0,
10137 "reused cached provider-neutral full-project projection rows",
10138 ),
10139 ],
10140 cache_stats,
10141 ));
10142 }
10143
10144 let mut cache_stats = GraphDbBackendEvalFullProjectionCacheStats::default();
10145 let mut phases = vec![graph_db_backend_eval_phase_timing(
10146 "full_projection.cache_lookup",
10147 lookup_started.elapsed().as_micros(),
10148 &format!(
10149 "no full-project projection cache entry matched the source watermark; {source_watermark_detail}"
10150 ),
10151 )];
10152 let full_source = graph_db_backend_eval_timed_phase(
10153 &mut phases,
10154 "full_projection.source_graph_build",
10155 "opt-in full-project source graph build; uses the project root as the path hint so bounded session projections cannot hide full-graph regressions",
10156 || build_traversal_graph_source_with_options(root, root, scope, false),
10157 )?;
10158 let projection = graph_db_backend_eval_timed_phase(
10159 &mut phases,
10160 "full_projection.projection_rows",
10161 "provider-neutral row construction for the opt-in full-project projection dataset",
10162 || traversal_projection_from_graph(root, scope, &full_source),
10163 )?;
10164 let warnings = full_source.warnings;
10165 let refreshed_source_watermark =
10166 graph_db_backend_eval_full_projection_source_watermark(root, scope)
10167 .map(|watermark| watermark.value)
10168 .unwrap_or_else(|_| source_watermark.clone());
10169 let write_key = graph_db_backend_eval_full_projection_cache_key_for_watermark(
10170 root,
10171 scope,
10172 &refreshed_source_watermark,
10173 )?;
10174 let cache = GraphDbBackendEvalFullProjectionCache {
10175 version: GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION.to_string(),
10176 key: write_key.clone(),
10177 source_watermark: refreshed_source_watermark,
10178 projection: projection.clone(),
10179 warnings: warnings.clone(),
10180 };
10181 if let Some((disk_bytes, json_bytes, write_profile)) =
10182 graph_db_backend_eval_write_disk_cache(root, "full_projection", &write_key, &cache)
10183 {
10184 cache_stats.disk_bytes = disk_bytes;
10185 cache_stats.json_bytes = json_bytes;
10186 phases.push(graph_db_backend_eval_phase_timing(
10187 "full_projection.cache.serde_encode",
10188 write_profile.serde_encode_micros,
10189 "serde_json serialize the projection cache payload before compression",
10190 ));
10191 phases.push(graph_db_backend_eval_phase_timing(
10192 "full_projection.cache.gzip_encode",
10193 write_profile.gzip_encode_micros,
10194 "gzip-compress the serialized projection cache payload",
10195 ));
10196 phases.push(graph_db_backend_eval_phase_timing(
10197 "full_projection.cache.file_write",
10198 write_profile.file_write_micros,
10199 "write the compressed projection cache bytes to .tsift/backend-eval-cache",
10200 ));
10201 }
10202 let prune_started = Instant::now();
10203 let (pruned_files, pruned_bytes) =
10204 graph_db_backend_eval_prune_disk_cache(root, "full_projection", &write_key);
10205 phases.push(graph_db_backend_eval_phase_timing(
10206 "full_projection.cache.prune",
10207 prune_started.elapsed().as_micros(),
10208 "prune sibling cache files older than the current key",
10209 ));
10210 cache_stats.pruned_files = pruned_files;
10211 cache_stats.pruned_bytes = pruned_bytes;
10212 Ok((projection, warnings, phases, cache_stats))
10213}
10214
10215fn graph_db_backend_eval_timed(
10216 name: &str,
10217 run: impl FnOnce() -> Result<(Option<usize>, serde_json::Value)>,
10218) -> (
10219 GraphDbBackendEvalOperation,
10220 Option<GraphDbBackendEvalSignature>,
10221) {
10222 let started = Instant::now();
10223 match run() {
10224 Ok((rows, value)) => (
10225 GraphDbBackendEvalOperation {
10226 name: name.to_string(),
10227 supported: true,
10228 status: "ok".to_string(),
10229 duration_micros: started.elapsed().as_micros(),
10230 rows,
10231 error: None,
10232 },
10233 Some(GraphDbBackendEvalSignature {
10234 operation: name.to_string(),
10235 value,
10236 }),
10237 ),
10238 Err(err) => (
10239 GraphDbBackendEvalOperation {
10240 name: name.to_string(),
10241 supported: false,
10242 status: "error".to_string(),
10243 duration_micros: started.elapsed().as_micros(),
10244 rows: None,
10245 error: Some(format!("{err:#}")),
10246 },
10247 None,
10248 ),
10249 }
10250}
10251
10252fn graph_db_backend_eval_parity(
10253 sqlite_signatures: Option<&[GraphDbBackendEvalSignature]>,
10254 candidate_signatures: &[GraphDbBackendEvalSignature],
10255) -> GraphDbBackendEvalParity {
10256 let Some(sqlite_signatures) = sqlite_signatures else {
10257 return GraphDbBackendEvalParity {
10258 matches_sqlite: true,
10259 diagnostics: Vec::new(),
10260 };
10261 };
10262 let sqlite = sqlite_signatures
10263 .iter()
10264 .map(|signature| (signature.operation.as_str(), &signature.value))
10265 .collect::<BTreeMap<_, _>>();
10266 let candidate = candidate_signatures
10267 .iter()
10268 .map(|signature| (signature.operation.as_str(), &signature.value))
10269 .collect::<BTreeMap<_, _>>();
10270 let mut diagnostics = Vec::new();
10271 for (operation, sqlite_value) in sqlite {
10272 match candidate.get(operation) {
10273 Some(candidate_value) if *candidate_value == sqlite_value => {}
10274 Some(_) => diagnostics.push(format!("{operation} output differed from SQLite")),
10275 None => diagnostics.push(format!(
10276 "{operation} did not complete for candidate backend"
10277 )),
10278 }
10279 }
10280 GraphDbBackendEvalParity {
10281 matches_sqlite: diagnostics.is_empty(),
10282 diagnostics,
10283 }
10284}
10285
10286pub(crate) fn graph_db_backend_eval_targets(
10287 store: &impl GraphStore,
10288 requested: &[String],
10289) -> Result<Vec<String>> {
10290 let requested = requested
10291 .iter()
10292 .filter_map(|target| normalize_conflict_target(target))
10293 .collect::<Vec<_>>();
10294 if !requested.is_empty() {
10295 return Ok(requested);
10296 }
10297
10298 for kind in ["backlog", "job_packet"] {
10299 let nodes = store.nodes_by_kind(kind)?;
10300 if let Some(node) = nodes.first() {
10301 if let Some(ref_id) = node.properties.get("ref_id") {
10302 return Ok(vec![ref_id.clone()]);
10303 }
10304 return Ok(vec![node.id.clone()]);
10305 }
10306 }
10307 Ok(Vec::new())
10308}
10309
10310fn graph_db_backend_eval_path_targets(
10311 store: &impl GraphStore,
10312 max_hops: usize,
10313) -> Result<Option<(String, String, usize)>> {
10314 let synthetic_from = "gsym-synthetic-0000";
10315 let synthetic_to = format!("gsym-synthetic-{max_hops:04}");
10316 if store.node(synthetic_from)?.is_some() && store.node(&synthetic_to)?.is_some() {
10317 let outgoing = store.outgoing_edges(synthetic_from, None)?;
10318 if outgoing.len() > 1
10319 && let Some(edge) = outgoing.first()
10320 {
10321 return Ok(Some((
10322 edge.from_id.clone(),
10323 edge.to_id.clone(),
10324 GRAPH_DB_BACKEND_EVAL_DIRECT_PATH_HOPS,
10325 )));
10326 }
10327 return Ok(Some((synthetic_from.to_string(), synthetic_to, max_hops)));
10328 }
10329
10330 Ok(store.sample_edge(None)?.map(|edge| {
10331 (
10332 edge.from_id,
10333 edge.to_id,
10334 GRAPH_DB_BACKEND_EVAL_DIRECT_PATH_HOPS,
10335 )
10336 }))
10337}
10338
10339fn graph_db_backend_eval_path_operation<S: GraphStore>(
10340 store: &S,
10341 configured_max_hops: usize,
10342) -> (
10343 GraphDbBackendEvalOperation,
10344 Option<GraphDbBackendEvalSignature>,
10345) {
10346 let operation_name = if configured_max_hops == GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS {
10347 "path_max_hops".to_string()
10348 } else {
10349 format!("path_max_hops_{configured_max_hops}")
10350 };
10351 graph_db_backend_eval_timed(&operation_name, || {
10352 let (from, to, effective_max_hops) =
10353 graph_db_backend_eval_path_targets(store, configured_max_hops)?
10354 .context("backend-eval path probe requires at least one traversable edge")?;
10355 let path = store.shortest_path_with_max_hops(&from, &to, None, Some(effective_max_hops))?;
10356 let warning = if configured_max_hops > GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS {
10357 Some(format!(
10358 "{configured_max_hops}-hop tier is measured only; keep user-facing defaults at {} until repeated samples and SQLite query-plan checks pass",
10359 GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS
10360 ))
10361 } else if path.is_none() && effective_max_hops == configured_max_hops {
10362 Some(format!(
10363 "path probe truncated at {configured_max_hops} hops before a route was found"
10364 ))
10365 } else {
10366 None
10367 };
10368 Ok((
10369 path.as_ref().map(|path| path.nodes.len()),
10370 serde_json::json!({
10371 "from": from,
10372 "to": to,
10373 "configured_max_hops": configured_max_hops,
10374 "effective_max_hops": effective_max_hops,
10375 "hops": path.as_ref().map(|path| path.hops),
10376 "nodes": path.as_ref().map(|path| &path.nodes),
10377 "found": path.is_some(),
10378 "warning": warning,
10379 }),
10380 ))
10381 })
10382}
10383
10384fn graph_db_backend_eval_neighborhood_operation<S: GraphStore>(
10385 store: &S,
10386 depth: usize,
10387 limit: usize,
10388) -> (
10389 GraphDbBackendEvalOperation,
10390 Option<GraphDbBackendEvalSignature>,
10391) {
10392 graph_db_backend_eval_timed("neighborhood", || {
10393 let edge = match store.sample_edge(Some("calls"))? {
10394 Some(edge) => edge,
10395 None => store.sample_edge(None)?.context(
10396 "backend-eval neighborhood probe requires at least one traversable edge",
10397 )?,
10398 };
10399 let page = store
10400 .paged_neighborhood(
10401 &edge.from_id,
10402 depth,
10403 Some(&edge.kind),
10404 GraphQueryOptions {
10405 limit: Some(limit.max(1)),
10406 ..GraphQueryOptions::default()
10407 },
10408 )?
10409 .with_context(|| {
10410 format!(
10411 "backend-eval neighborhood target not found: {}",
10412 edge.from_id
10413 )
10414 })?;
10415 Ok((
10416 Some(page.nodes.len() + page.edges.len()),
10417 serde_json::json!({
10418 "center": edge.from_id,
10419 "kind": edge.kind,
10420 "depth": depth,
10421 "limit": limit.max(1),
10422 "node_ids": page.nodes.iter().map(|node| &node.id).collect::<Vec<_>>(),
10423 "edge_ids": page.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
10424 "truncated": page.page.truncated,
10425 }),
10426 ))
10427 })
10428}
10429
10430fn graph_db_backend_eval_related_operation<S: GraphStore>(
10431 root: &Path,
10432 scope: Option<&str>,
10433 store: &S,
10434 depth: usize,
10435 limit: usize,
10436) -> (
10437 GraphDbBackendEvalOperation,
10438 Option<GraphDbBackendEvalSignature>,
10439) {
10440 graph_db_backend_eval_timed("related", || {
10441 let query = "backend evaluation";
10442 let semantic = semantic_related_report_from_store(
10443 root,
10444 scope,
10445 query,
10446 3,
10447 SemanticRelatedKind::All,
10448 store,
10449 )?;
10450 let seed_ids = semantic
10451 .items
10452 .iter()
10453 .map(|item| item.handle.clone())
10454 .collect::<Vec<_>>();
10455 let subgraph =
10456 graph_db_semantic_seeded_neighborhood(store, &seed_ids, depth, limit.max(1))?;
10457 Ok((
10458 Some(subgraph.nodes.len() + subgraph.edges.len()),
10459 serde_json::json!({
10460 "query": query,
10461 "seed_ids": seed_ids,
10462 "node_ids": subgraph.nodes.iter().map(|node| &node.id).collect::<Vec<_>>(),
10463 "edge_ids": subgraph.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
10464 "truncated": subgraph.truncated,
10465 "warnings": semantic.warnings,
10466 "diagnostics": subgraph.diagnostics,
10467 }),
10468 ))
10469 })
10470}
10471
10472fn graph_db_backend_eval_evidence_signature(report: &GraphDbEvidenceReport) -> serde_json::Value {
10473 serde_json::json!({
10474 "target": report.target,
10475 "target_node_id": report.target_node.id,
10476 "target_kind": report.target_node.kind,
10477 "worker_context": report.worker_context.iter().map(|node| &node.id).collect::<Vec<_>>(),
10478 "source_handles": report.source_handles.iter().map(|node| &node.id).collect::<Vec<_>>(),
10479 "worker_results": report.worker_results.iter().map(|node| &node.id).collect::<Vec<_>>(),
10480 "semantic_related": report.semantic_related.iter().map(|node| &node.id).collect::<Vec<_>>(),
10481 "path_count": report.shortest_paths.len(),
10482 })
10483}
10484
10485fn graph_db_backend_eval_target_resolution_signature(
10486 resolved: &[(String, SubstrateGraphNode)],
10487) -> serde_json::Value {
10488 serde_json::json!({
10489 "targets": resolved.iter().map(|(target, node)| {
10490 serde_json::json!({
10491 "target": target,
10492 "target_node_id": node.id,
10493 "target_kind": node.kind,
10494 "target_label": node.label,
10495 })
10496 }).collect::<Vec<_>>(),
10497 })
10498}
10499
10500fn graph_db_backend_eval_conflict_signature(report: &ConflictMatrixReport) -> serde_json::Value {
10501 serde_json::json!({
10502 "targets": report.targets,
10503 "can_parallel": report.can_parallel,
10504 "fail_closed": report.fail_closed,
10505 "cross_target_parallel_safe": report.cross_target_parallel_safe,
10506 "per_target_fail_closed": report.per_target_fail_closed.iter().map(|target| &target.target).collect::<Vec<_>>(),
10507 "candidates": report.candidates.iter().map(|candidate| {
10508 serde_json::json!({
10509 "target": candidate.target,
10510 "risk": conflict_risk_label(candidate.risk),
10511 "owned_files": candidate.owned_files,
10512 "owned_symbols": candidate.owned_symbols,
10513 "source_handles": candidate.source_handles.iter().map(|handle| &handle.handle).collect::<Vec<_>>(),
10514 "previously_completed": candidate.previously_completed,
10515 "parallel_safe": candidate.parallel_safe,
10516 })
10517 }).collect::<Vec<_>>(),
10518 "conflicts": report.conflicts.iter().map(|pair| {
10519 serde_json::json!({
10520 "left": pair.left,
10521 "right": pair.right,
10522 "risk": conflict_risk_label(pair.risk),
10523 })
10524 }).collect::<Vec<_>>(),
10525 })
10526}
10527
10528fn graph_db_backend_eval_dispatch_signature(report: &DispatchTraceReport) -> serde_json::Value {
10529 serde_json::json!({
10530 "targets": report.targets,
10531 "node_ids": report.nodes.iter().map(|node| &node.id).collect::<Vec<_>>(),
10532 "edge_keys": report.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
10533 "evidence_packet_ids": report.evidence_packet_ids,
10534 "worker_prompt_targets": report.worker_prompt_packets.iter().map(|packet| &packet.target).collect::<Vec<_>>(),
10535 "truncated": report.truncated,
10536 })
10537}
10538
10539fn graph_db_backend_eval_edge_scan_probe(
10540 store: &impl GraphStore,
10541) -> Result<(SubstrateGraphEdge, Vec<GraphPropertyFilter>)> {
10542 if let Some((edge, filter)) = store.sample_edge_with_property()? {
10543 return Ok((edge, vec![filter]));
10544 }
10545 let edge = store
10546 .sample_edge(None)?
10547 .context("backend-eval edge scan requires at least one edge")?;
10548 Ok((edge, Vec::new()))
10549}
10550
10551#[allow(clippy::too_many_arguments)]
10552fn graph_db_backend_eval_report_for_store<S: GraphStore>(
10553 backend: &str,
10554 adapter: &str,
10555 read_only: bool,
10556 root: &Path,
10557 path: &Path,
10558 scope: Option<&str>,
10559 targets: &[String],
10560 depth: usize,
10561 limit: usize,
10562 impact_limit: usize,
10563 store: &S,
10564 freshness: GraphDbFreshnessReport,
10565 refresh_operation: GraphDbBackendEvalOperation,
10566 refresh_signature: Option<GraphDbBackendEvalSignature>,
10567 sqlite_signatures: Option<&[GraphDbBackendEvalSignature]>,
10568 extra_warnings: Vec<String>,
10569 prepared: &ConflictMatrixPreparedInputs,
10570 projection_load: &str,
10571 lock_behavior: &str,
10572 install_portability: &str,
10573) -> (
10574 GraphDbBackendEvalBackendReport,
10575 Vec<GraphDbBackendEvalSignature>,
10576) {
10577 let mut operations = vec![refresh_operation];
10578 let mut signatures = refresh_signature.into_iter().collect::<Vec<_>>();
10579
10580 let (operation, signature) = graph_db_backend_eval_timed("status", || {
10581 let (nodes, edges) = store.graph_counts()?;
10582 Ok((
10583 Some(nodes + edges),
10584 serde_json::json!({
10585 "freshness": freshness.status,
10586 "nodes": nodes,
10587 "edges": edges,
10588 }),
10589 ))
10590 });
10591 operations.push(operation);
10592 signatures.extend(signature);
10593
10594 let (operation, signature) = graph_db_backend_eval_timed("edge_lookup", || {
10595 let edge = store
10596 .sample_edge(None)?
10597 .context("backend-eval edge lookup requires at least one edge")?;
10598 let edge_id = graph_db_edge_key(&edge);
10599 let found = store
10600 .edge(&edge_id)?
10601 .with_context(|| format!("backend-eval edge lookup missed {edge_id}"))?;
10602 Ok((
10603 Some(1),
10604 serde_json::json!({
10605 "edge_id": edge_id,
10606 "from_id": found.from_id,
10607 "to_id": found.to_id,
10608 "kind": found.kind,
10609 }),
10610 ))
10611 });
10612 operations.push(operation);
10613 signatures.extend(signature);
10614
10615 let (operation, signature) = graph_db_backend_eval_timed("edge_property_scan", || {
10616 let (edge, filters) = graph_db_backend_eval_edge_scan_probe(store)?;
10617 let page = store.paged_edges(
10618 Some(&edge.kind),
10619 GraphQueryOptions {
10620 limit: Some(limit.max(1)),
10621 property_filters: filters.clone(),
10622 ..GraphQueryOptions::default()
10623 },
10624 )?;
10625 Ok((
10626 Some(page.edges.len()),
10627 serde_json::json!({
10628 "kind": edge.kind,
10629 "filters": filters.iter().map(|filter| format!("{}={}", filter.key, filter.value)).collect::<Vec<_>>(),
10630 "edge_ids": page.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
10631 "truncated": page.page.truncated,
10632 }),
10633 ))
10634 });
10635 operations.push(operation);
10636 signatures.extend(signature);
10637
10638 let (operation, signature) = graph_db_backend_eval_timed("incident_edges", || {
10639 let edge = store
10640 .sample_edge(None)?
10641 .context("backend-eval incident edge scan requires at least one edge")?;
10642 let page = store.paged_incident_edges(
10643 &edge.from_id,
10644 Some(&edge.kind),
10645 GraphQueryOptions {
10646 limit: Some(limit.max(1)),
10647 ..GraphQueryOptions::default()
10648 },
10649 )?;
10650 Ok((
10651 Some(page.edges.len()),
10652 serde_json::json!({
10653 "node_id": edge.from_id,
10654 "kind": edge.kind,
10655 "edge_ids": page.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
10656 "truncated": page.page.truncated,
10657 }),
10658 ))
10659 });
10660 operations.push(operation);
10661 signatures.extend(signature);
10662
10663 let (operation, signature) = graph_db_backend_eval_neighborhood_operation(store, depth, limit);
10664 operations.push(operation);
10665 signatures.extend(signature);
10666
10667 let (operation, signature) =
10668 graph_db_backend_eval_related_operation(root, scope, store, depth, limit);
10669 operations.push(operation);
10670 signatures.extend(signature);
10671
10672 for configured_max_hops in std::iter::once(GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS)
10673 .chain(GRAPH_DB_BACKEND_EVAL_EXTENDED_PATH_HOPS)
10674 {
10675 let (operation, signature) =
10676 graph_db_backend_eval_path_operation(store, configured_max_hops);
10677 operations.push(operation);
10678 signatures.extend(signature);
10679 }
10680
10681 let (operation, signature) = graph_db_backend_eval_timed("evidence_target_resolution", || {
10682 let resolved = targets
10683 .iter()
10684 .map(|target| {
10685 let node = graph_db_resolve_evidence_target(store, target)?
10686 .with_context(|| format!("backend-eval target not found: {target}"))?;
10687 Ok((target.clone(), node))
10688 })
10689 .collect::<Result<Vec<_>>>()?;
10690 let signature = graph_db_backend_eval_target_resolution_signature(&resolved);
10691 Ok((Some(resolved.len()), signature))
10692 });
10693 operations.push(operation);
10694 signatures.extend(signature);
10695
10696 let mut evidence_for_report = None;
10697 let mut graph_snapshot_for_trace = None;
10698 let (operation, signature) = graph_db_backend_eval_timed("evidence", || {
10699 let resolved_targets =
10700 resolve_conflict_matrix_targets(store, targets, &prepared.context_pack)?;
10701 let evidence = collect_conflict_matrix_evidence_packets(
10702 root,
10703 scope,
10704 backend,
10705 &resolved_targets,
10706 depth,
10707 limit,
10708 store,
10709 freshness.clone(),
10710 )?;
10711 let report = &evidence
10712 .first()
10713 .context("backend-eval evidence requires at least one target")?
10714 .report;
10715 let rows = evidence
10716 .iter()
10717 .map(|entry| {
10718 entry.report.worker_context.len()
10719 + entry.report.source_handles.len()
10720 + entry.report.worker_results.len()
10721 + entry.report.semantic_related.len()
10722 })
10723 .sum();
10724 let signature = graph_db_backend_eval_evidence_signature(report);
10725 evidence_for_report = Some((resolved_targets, evidence));
10726 Ok((Some(rows), signature))
10727 });
10728 operations.push(operation);
10729 signatures.extend(signature);
10730
10731 let mut conflict_for_trace = None;
10732 let (operation, signature) = graph_db_backend_eval_timed("conflict_matrix", || {
10733 let graph_prepared = if let Some((targets, evidence)) = evidence_for_report.take() {
10734 let graph =
10735 conflict_matrix_target_scoped_graph_snapshot(store, &evidence, depth, limit)?;
10736 let shared_preparation =
10737 conflict_matrix_shared_preparation_summary(&graph, &evidence, "memory_reuse");
10738 ConflictMatrixGraphPreparedInputs {
10739 targets,
10740 graph,
10741 evidence,
10742 shared_preparation,
10743 }
10744 } else {
10745 prepare_conflict_matrix_graph_orchestration(
10746 root,
10747 scope,
10748 backend,
10749 targets,
10750 prepared,
10751 depth,
10752 limit,
10753 store,
10754 freshness.clone(),
10755 )?
10756 };
10757 let report = build_conflict_matrix_report_from_prepared_graph(
10758 root,
10759 path,
10760 scope,
10761 depth,
10762 limit,
10763 impact_limit,
10764 freshness.clone(),
10765 extra_warnings.clone(),
10766 prepared,
10767 &graph_prepared,
10768 )?;
10769 let signature = graph_db_backend_eval_conflict_signature(&report);
10770 let rows = report.candidates.len() + report.conflicts.len();
10771 conflict_for_trace = Some(report);
10772 graph_snapshot_for_trace = Some(graph_prepared.graph);
10773 Ok((Some(rows), signature))
10774 });
10775 operations.push(operation);
10776 signatures.extend(signature);
10777
10778 let (operation, signature) = graph_db_backend_eval_timed("dispatch_trace", || {
10779 let conflict = conflict_for_trace
10780 .take()
10781 .context("backend-eval dispatch-trace requires a completed conflict-matrix report")?;
10782 let graph = graph_snapshot_for_trace
10783 .take()
10784 .context("backend-eval dispatch-trace requires conflict-matrix graph preparation")?;
10785 let report = build_dispatch_trace_report_from_conflict_snapshot(
10786 root,
10787 scope,
10788 conflict,
10789 graph.nodes,
10790 graph.edges,
10791 depth,
10792 limit,
10793 Vec::new(),
10794 )?;
10795 Ok((
10796 Some(report.nodes.len() + report.edges.len()),
10797 graph_db_backend_eval_dispatch_signature(&report),
10798 ))
10799 });
10800 operations.push(operation);
10801 signatures.extend(signature);
10802
10803 let total_micros = operations
10804 .iter()
10805 .map(|operation| operation.duration_micros)
10806 .sum();
10807 let parity = graph_db_backend_eval_parity(sqlite_signatures, &signatures);
10808 (
10809 GraphDbBackendEvalBackendReport {
10810 backend: backend.to_string(),
10811 adapter: adapter.to_string(),
10812 read_only,
10813 projection_load: projection_load.to_string(),
10814 operations,
10815 total_micros,
10816 parity,
10817 lock_behavior: lock_behavior.to_string(),
10818 install_portability: install_portability.to_string(),
10819 },
10820 signatures,
10821 )
10822}
10823
10824pub(crate) fn graph_db_backend_eval_refresh_operation(
10825 duration_micros: u128,
10826 rows: usize,
10827 value: serde_json::Value,
10828) -> (GraphDbBackendEvalOperation, GraphDbBackendEvalSignature) {
10829 (
10830 GraphDbBackendEvalOperation {
10831 name: "refresh".to_string(),
10832 supported: true,
10833 status: "ok".to_string(),
10834 duration_micros,
10835 rows: Some(rows),
10836 error: None,
10837 },
10838 GraphDbBackendEvalSignature {
10839 operation: "refresh".to_string(),
10840 value,
10841 },
10842 )
10843}
10844
10845pub(crate) fn graph_db_backend_eval_synthetic_projection(
10846 nodes: usize,
10847 fanout: usize,
10848) -> GraphProjection {
10849 let nodes = nodes.max(12);
10850 let symbol_count = nodes.saturating_sub(9).max(1);
10851 let source = GraphProvenance::new("backend-eval", "synthetic");
10852 let mut projection_nodes = vec![
10853 SubstrateGraphNode::new(
10854 "projection:tsift-traversal:synthetic",
10855 GRAPH_PROJECTION_META_KIND,
10856 "synthetic projection",
10857 )
10858 .with_property("projection_version", GRAPH_PROJECTION_VERSION)
10859 .with_property(
10860 "content_hash",
10861 format!("synthetic-{nodes}-{fanout}-{symbol_count}"),
10862 )
10863 .with_provenance(source.clone()),
10864 SubstrateGraphNode::new("gses-synthetic", "session", "synthetic session")
10865 .with_property("ref_id", "synthetic-session"),
10866 SubstrateGraphNode::new("gbak-synthetic", "backlog", "#synthetic")
10867 .with_property("ref_id", "synthetic")
10868 .with_property("path", "tasks/software/synthetic.md")
10869 .with_property("line", "1")
10870 .with_property(
10871 "expand",
10872 "tsift source-read tasks/software/synthetic.md --start 1 --lines 40",
10873 ),
10874 SubstrateGraphNode::new("gjob-synthetic", "job_packet", "do #synthetic")
10875 .with_property("ref_id", "synthetic"),
10876 SubstrateGraphNode::new("gwctx-synthetic", "worker_context", "synthetic context")
10877 .with_property("target", "synthetic")
10878 .with_property("summary", "Synthetic worker owns synthetic.rs")
10879 .with_property(
10880 "expand",
10881 "tsift source-read synthetic.rs --start 1 --lines 80",
10882 ),
10883 SubstrateGraphNode::new("gsrc-synthetic", "source_handle", "synthetic.rs:1-80")
10884 .with_property("file", "synthetic.rs")
10885 .with_property("start", "1")
10886 .with_property("end", "80")
10887 .with_property(
10888 "expand",
10889 "tsift source-read synthetic.rs --start 1 --lines 80",
10890 ),
10891 SubstrateGraphNode::new("gfil-synthetic", "file", "synthetic.rs")
10892 .with_property("path", "synthetic.rs"),
10893 SubstrateGraphNode::new("gsem-synthetic", "semantic_concept", "backend evaluation")
10894 .with_property("handle", "gsem-synthetic")
10895 .with_property("label", "backend evaluation")
10896 .with_property("embedding_model", SEMANTIC_EMBEDDING_MODEL)
10897 .with_property(
10898 "embedding",
10899 semantic_embedding_property("backend evaluation"),
10900 ),
10901 SubstrateGraphNode::new("gwres-synthetic", "worker_result", "completed #synthetic")
10902 .with_property("ref_id", "synthetic")
10903 .with_property("status", "completed")
10904 .with_property("touched_files", "synthetic.rs")
10905 .with_property("expected_tests", "cargo test --test graph_db_conformance"),
10906 ];
10907 for idx in 0..symbol_count {
10908 projection_nodes.push(
10909 SubstrateGraphNode::new(
10910 format!("gsym-synthetic-{idx:04}"),
10911 "symbol",
10912 format!("synthetic_symbol_{idx:04}"),
10913 )
10914 .with_property("ref_id", format!("synthetic_symbol_{idx:04}"))
10915 .with_property("path", "synthetic.rs")
10916 .with_property("line", (idx + 1).to_string()),
10917 );
10918 }
10919
10920 let mut projection_edges = vec![
10921 SubstrateGraphEdge::new("gses-synthetic", "gbak-synthetic", "contains"),
10922 SubstrateGraphEdge::new("gses-synthetic", "gjob-synthetic", "queues"),
10923 SubstrateGraphEdge::new("gbak-synthetic", "gwctx-synthetic", "has_context"),
10924 SubstrateGraphEdge::new("gjob-synthetic", "gwctx-synthetic", "has_context"),
10925 SubstrateGraphEdge::new("gwctx-synthetic", "gsrc-synthetic", "uses_source"),
10926 SubstrateGraphEdge::new("gbak-synthetic", "gwres-synthetic", "has_worker_result"),
10927 SubstrateGraphEdge::new("gbak-synthetic", "gsem-synthetic", "mentions_concept"),
10928 SubstrateGraphEdge::new("gsrc-synthetic", "gfil-synthetic", "reads_file"),
10929 SubstrateGraphEdge::new("gfil-synthetic", "gsym-synthetic-0000", "defines"),
10930 ];
10931 for idx in 0..symbol_count {
10932 let from = format!("gsym-synthetic-{idx:04}");
10933 for offset in 1..=fanout.max(1).min(symbol_count) {
10934 let to_idx = (idx + offset) % symbol_count;
10935 if to_idx != idx {
10936 projection_edges.push(SubstrateGraphEdge::new(
10937 from.clone(),
10938 format!("gsym-synthetic-{to_idx:04}"),
10939 "calls",
10940 ));
10941 }
10942 }
10943 }
10944
10945 GraphProjection {
10946 nodes: projection_nodes,
10947 edges: projection_edges
10948 .into_iter()
10949 .map(|edge| {
10950 edge.with_property("dataset", "synthetic")
10951 .with_provenance(source.clone())
10952 })
10953 .collect(),
10954 }
10955}
10956
10957pub(crate) fn graph_db_backend_eval_promotion(
10958 datasets: &[GraphDbBackendEvalDataset],
10959 candidates: &[GraphDbExperimentalBackend],
10960) -> Vec<GraphDbBackendPromotionDecision> {
10961 let mut decisions = Vec::new();
10962 for candidate in candidates {
10963 let mut reasons = Vec::new();
10964 let mut faster_everywhere = true;
10965 let mut parity_everywhere = true;
10966 for dataset in datasets {
10967 let Some(sqlite_report) = dataset
10968 .backends
10969 .iter()
10970 .find(|backend| backend.backend == "sqlite")
10971 else {
10972 parity_everywhere = false;
10973 faster_everywhere = false;
10974 reasons.push(format!(
10975 "{} dataset is missing SQLite baseline",
10976 dataset.name
10977 ));
10978 continue;
10979 };
10980 let sqlite_total = sqlite_report.total_micros;
10981 let Some(candidate_report) = dataset
10982 .backends
10983 .iter()
10984 .find(|backend| backend.backend == candidate.name())
10985 else {
10986 parity_everywhere = false;
10987 reasons.push(format!("{} dataset did not run", dataset.name));
10988 continue;
10989 };
10990 if !candidate_report.parity.matches_sqlite {
10991 parity_everywhere = false;
10992 reasons.push(format!("{} parity differed from SQLite", dataset.name));
10993 }
10994 if candidate_report.total_micros >= sqlite_total {
10995 faster_everywhere = false;
10996 reasons.push(format!(
10997 "{} total {}us did not beat SQLite {}us",
10998 dataset.name, candidate_report.total_micros, sqlite_total
10999 ));
11000 }
11001 let sqlite_operations = sqlite_report
11002 .operations
11003 .iter()
11004 .map(|operation| (operation.name.as_str(), operation.duration_micros))
11005 .collect::<BTreeMap<_, _>>();
11006 for operation in &candidate_report.operations {
11007 if let Some(sqlite_duration) = sqlite_operations.get(operation.name.as_str())
11008 && operation.duration_micros >= *sqlite_duration
11009 {
11010 faster_everywhere = false;
11011 reasons.push(format!(
11012 "{} {} operation {}us did not beat SQLite {}us",
11013 dataset.name, operation.name, operation.duration_micros, sqlite_duration
11014 ));
11015 }
11016 }
11017 if candidate_report
11018 .operations
11019 .iter()
11020 .any(|operation| operation.status != "ok")
11021 {
11022 parity_everywhere = false;
11023 reasons.push(format!("{} has failed benchmark operations", dataset.name));
11024 }
11025 }
11026 let decision = if let Some(reason) = candidate.prototype_hold_reason() {
11027 reasons.push(reason.to_string());
11028 reasons.push(
11029 "current bounded prototype timings are benchmark evidence, not a backend switch approval"
11030 .to_string(),
11031 );
11032 "hold"
11033 } else if parity_everywhere && faster_everywhere {
11034 reasons.push(
11035 "prototype gate passed; production promotion still requires the real engine adapter to preserve SQLite's bundled install and multi-process lock behavior"
11036 .to_string(),
11037 );
11038 "eligible"
11039 } else {
11040 reasons.push(
11041 "production promotion requires SQLite parity plus lower total time for every measured operation on every dataset without worse lock behavior or install portability"
11042 .to_string(),
11043 );
11044 "hold"
11045 };
11046 decisions.push(GraphDbBackendPromotionDecision {
11047 backend: candidate.name().to_string(),
11048 decision: decision.to_string(),
11049 reasons: dedupe_preserve_order(reasons),
11050 gate: candidate.promotion_gate(),
11051 });
11052 }
11053 decisions
11054}
11055
11056pub(crate) fn graph_db_backend_eval_metrics(
11057 datasets: &[GraphDbBackendEvalDataset],
11058) -> BTreeMap<String, f64> {
11059 let mut metrics = BTreeMap::new();
11060 for dataset in datasets {
11061 let graph_rows = graph_db_backend_eval_graph_rows(dataset);
11062 metrics.insert(format!("{}.nodes", dataset.name), dataset.nodes as f64);
11063 metrics.insert(format!("{}.edges", dataset.name), dataset.edges as f64);
11064 metrics.insert(format!("{}.graph_rows", dataset.name), graph_rows as f64);
11065 for backend in &dataset.backends {
11066 let prefix = format!("{}.{}", dataset.name, backend.backend.replace('-', "_"));
11067 metrics.insert(
11068 format!("{prefix}.total_duration_micros"),
11069 backend.total_micros as f64,
11070 );
11071 append_graph_db_backend_eval_normalized_duration_metric(
11072 &mut metrics,
11073 &format!("{prefix}.total_duration_micros_per_1k_graph_rows"),
11074 backend.total_micros,
11075 graph_rows,
11076 );
11077 for operation in &backend.operations {
11078 metrics.insert(
11079 format!("{prefix}.{}.duration_micros", operation.name),
11080 operation.duration_micros as f64,
11081 );
11082 append_graph_db_backend_eval_normalized_duration_metric(
11083 &mut metrics,
11084 &format!(
11085 "{prefix}.{}.duration_micros_per_1k_graph_rows",
11086 operation.name
11087 ),
11088 operation.duration_micros,
11089 graph_rows,
11090 );
11091 if let Some(rows) = operation.rows {
11092 metrics.insert(format!("{prefix}.{}.rows", operation.name), rows as f64);
11093 }
11094 }
11095 }
11096 }
11097 metrics
11098}
11099
11100pub(crate) fn graph_db_backend_eval_graph_rows(dataset: &GraphDbBackendEvalDataset) -> usize {
11101 dataset.nodes + dataset.edges
11102}
11103
11104pub(crate) fn append_graph_db_backend_eval_normalized_duration_metric(
11105 metrics: &mut BTreeMap<String, f64>,
11106 key: &str,
11107 duration_micros: u128,
11108 graph_rows: usize,
11109) {
11110 if graph_rows == 0 {
11111 return;
11112 }
11113 metrics.insert(
11114 key.to_string(),
11115 duration_micros as f64 / graph_rows as f64 * GRAPH_DB_BACKEND_EVAL_NORMALIZATION_ROW_UNIT,
11116 );
11117}
11118
11119pub(crate) fn append_graph_db_backend_eval_phase_metrics(
11120 metrics: &mut BTreeMap<String, f64>,
11121 dataset: &str,
11122 graph_rows: usize,
11123 phases: &[GraphDbBackendEvalPhaseTiming],
11124) {
11125 for phase in phases {
11126 metrics.insert(
11127 format!("{dataset}.refresh_phase.{}.duration_micros", phase.name),
11128 phase.duration_micros as f64,
11129 );
11130 append_graph_db_backend_eval_normalized_duration_metric(
11131 metrics,
11132 &format!(
11133 "{dataset}.refresh_phase.{}.duration_micros_per_1k_graph_rows",
11134 phase.name
11135 ),
11136 phase.duration_micros,
11137 graph_rows,
11138 );
11139 }
11140}
11141
11142fn graph_db_backend_eval_base_command(
11143 root: &Path,
11144 scope: Option<&str>,
11145 full_projection: bool,
11146) -> String {
11147 let full_projection_arg = if full_projection {
11148 " --full-projection"
11149 } else {
11150 ""
11151 };
11152 format!(
11153 "tsift graph-db --path {}{} --json backend-eval{}",
11154 shell_quote(root.to_string_lossy().as_ref()),
11155 graph_db_scope_arg(scope),
11156 full_projection_arg
11157 )
11158}
11159
11160pub(crate) fn graph_db_backend_eval_metric_digest_command(
11161 root: &Path,
11162 scope: Option<&str>,
11163 full_projection: bool,
11164) -> String {
11165 format!(
11166 "{} | tsift metric-digest --baseline fixtures/graph-db-performance-history.json",
11167 graph_db_backend_eval_base_command(root, scope, full_projection)
11168 )
11169}
11170
11171fn graph_db_backend_eval_repeated_sample_command(
11172 root: &Path,
11173 scope: Option<&str>,
11174 full_projection: bool,
11175) -> String {
11176 format!(
11177 "for sample in 1 2 3; do {}; done | tsift metric-digest --baseline fixtures/graph-db-performance-history.json",
11178 graph_db_backend_eval_base_command(root, scope, full_projection)
11179 )
11180}
11181
11182fn graph_db_backend_eval_hop_cap_promotion_gate() -> GraphDbHopCapPromotionGate {
11183 let mut required_metrics = Vec::new();
11184 for workload in perf_gate::HOP_CAP_REQUIRED_WORKLOADS {
11185 required_metrics.push(format!("{workload}.sqlite.path_max_hops.duration_micros"));
11186 required_metrics.push(format!("{workload}.sqlite.path_max_hops.rows"));
11187 for hops in perf_gate::HOP_CAP_CANDIDATE_TIERS {
11188 required_metrics.push(format!(
11189 "{workload}.sqlite.path_max_hops_{hops}.duration_micros"
11190 ));
11191 required_metrics.push(format!("{workload}.sqlite.path_max_hops_{hops}.rows"));
11192 }
11193 }
11194 GraphDbHopCapPromotionGate {
11195 status: "hold_64_default_until_gate_passes".to_string(),
11196 current_default_hops: perf_gate::HOP_CAP_CURRENT_DEFAULT,
11197 candidate_hop_tiers: perf_gate::HOP_CAP_CANDIDATE_TIERS.to_vec(),
11198 required_backend: perf_gate::BASELINE_BACKEND.to_string(),
11199 required_workloads: perf_gate::HOP_CAP_REQUIRED_WORKLOADS
11200 .iter()
11201 .map(|workload| (*workload).to_string())
11202 .collect(),
11203 required_metrics,
11204 allowed_regression_percent: GRAPH_DB_BACKEND_EVAL_ALLOWED_REGRESSION_PERCENT,
11205 minimum_sample_runs: GRAPH_DB_BACKEND_EVAL_MIN_SAMPLE_RUNS,
11206 decision_rule:
11207 "keep 64 as the user-facing default until each candidate tier has repeated real, full_projection, and synthetic_deep_chain SQLite samples within the latency-regression budget and returning useful path rows; full_projection samples are binding only after a cold populate leg proves a cache-hit leg"
11208 .to_string(),
11209 }
11210}
11211
11212fn graph_db_backend_eval_backend_adapter_spike_gate() -> GraphDbBackendAdapterSpikeGate {
11213 let candidate_backends = [
11214 GraphDbExperimentalBackend::Falkordb,
11215 GraphDbExperimentalBackend::Kuzu,
11216 ]
11217 .into_iter()
11218 .map(|backend| GraphDbBackendAdapterSpikeCandidate {
11219 backend: backend.name().to_string(),
11220 adapter_label: backend.adapter_label().to_string(),
11221 projection_load: backend.projection_load().to_string(),
11222 lock_behavior: backend.lock_behavior().to_string(),
11223 install_portability: backend.install_portability().to_string(),
11224 })
11225 .collect();
11226
11227 GraphDbBackendAdapterSpikeGate {
11228 status: "hold_real_optional_adapter_required".to_string(),
11229 candidate_backends,
11230 required_workloads: perf_gate::GATE_WORKLOAD_PREFIXES
11231 .iter()
11232 .map(|workload| (*workload).to_string())
11233 .collect(),
11234 required_checks: vec![
11235 "real_optional_adapter_behind_graphstore_without_default_build_dependency".to_string(),
11236 "projection_load_writes_provider_neutral_rows_without_sqlite_row_replay".to_string(),
11237 "freshness_and_full_parity_match_sqlite_on_every_graphstore_operation".to_string(),
11238 "lock_semantics_match_or_beat_sqlite_for_writer_and_read_only_workflows".to_string(),
11239 "install_portability_preserves_cargo_build_install_without_external_service_or_native_toolchain"
11240 .to_string(),
11241 "full_projection_cache_hit_sample_before_backend_or_hop_cap_changes".to_string(),
11242 "beats_sqlite_on_every_required_workload_and_metric_in_backend_eval".to_string(),
11243 ],
11244 decision_rule:
11245 "do not promote a read-only prototype; FalkorDB or Kuzu can only advance after a real optional adapter proves projection writes/load, lock semantics, install portability, full parity, and faster-than-SQLite results across every required workload"
11246 .to_string(),
11247 evidence_plan: "plans/gback-evidence.md".to_string(),
11248 }
11249}
11250
11251pub(crate) fn graph_db_backend_eval_performance_gate(
11252 root: &Path,
11253 scope: Option<&str>,
11254 full_projection: bool,
11255) -> GraphDbBackendEvalPerformanceGate {
11256 let mut required_metrics = vec![
11257 "real.sqlite.refresh.duration_micros".to_string(),
11258 "real.sqlite.refresh.duration_micros_per_1k_graph_rows".to_string(),
11259 "real.sqlite.edge_lookup.duration_micros_per_1k_graph_rows".to_string(),
11260 "real.sqlite.edge_property_scan.duration_micros_per_1k_graph_rows".to_string(),
11261 "real.sqlite.incident_edges.duration_micros_per_1k_graph_rows".to_string(),
11262 "real.sqlite.neighborhood.duration_micros_per_1k_graph_rows".to_string(),
11263 "real.sqlite.evidence_target_resolution.duration_micros_per_1k_graph_rows".to_string(),
11264 "real.sqlite.evidence.duration_micros_per_1k_graph_rows".to_string(),
11265 "real.sqlite.total_duration_micros_per_1k_graph_rows".to_string(),
11266 "real.refresh_phase.source_graph_build.duration_micros_per_1k_graph_rows".to_string(),
11267 "real.refresh_phase.sqlite_delta_write.duration_micros".to_string(),
11268 "real.refresh_phase.sqlite_property_row_staging.duration_micros".to_string(),
11269 "real.refresh_phase.sqlite_edge_property_row_staging.duration_micros".to_string(),
11270 "real.sqlite.conflict_matrix.duration_micros".to_string(),
11271 "real.sqlite.dispatch_trace.duration_micros".to_string(),
11272 "real.sqlite.path_max_hops.duration_micros".to_string(),
11273 "real.sqlite.path_max_hops_128.duration_micros".to_string(),
11274 "real.sqlite.path_max_hops_256.duration_micros".to_string(),
11275 "real.sqlite.path_max_hops_512.duration_micros".to_string(),
11276 "real.sqlite.path_max_hops_128.duration_micros_per_1k_graph_rows".to_string(),
11277 "real.sqlite.path_max_hops_256.duration_micros_per_1k_graph_rows".to_string(),
11278 "real.sqlite.path_max_hops_512.duration_micros_per_1k_graph_rows".to_string(),
11279 "synthetic_high_degree.sqlite.total_duration_micros".to_string(),
11280 "synthetic_high_degree.sqlite.total_duration_micros_per_1k_graph_rows".to_string(),
11281 "synthetic_high_degree.sqlite.neighborhood.duration_micros_per_1k_graph_rows".to_string(),
11282 "synthetic_high_degree.sqlite.edge_property_scan.duration_micros_per_1k_graph_rows"
11283 .to_string(),
11284 "synthetic_high_degree.sqlite.evidence_target_resolution.duration_micros_per_1k_graph_rows"
11285 .to_string(),
11286 "synthetic_deep_chain.sqlite.incident_edges.duration_micros_per_1k_graph_rows".to_string(),
11287 "synthetic_deep_chain.sqlite.neighborhood.duration_micros_per_1k_graph_rows".to_string(),
11288 "synthetic_deep_chain.sqlite.path_max_hops.duration_micros".to_string(),
11289 "synthetic_deep_chain.sqlite.path_max_hops_128.duration_micros".to_string(),
11290 "synthetic_deep_chain.sqlite.path_max_hops_256.duration_micros".to_string(),
11291 "synthetic_deep_chain.sqlite.path_max_hops_512.duration_micros".to_string(),
11292 "synthetic_deep_chain.sqlite.evidence_target_resolution.duration_micros_per_1k_graph_rows"
11293 .to_string(),
11294 "synthetic_deep_chain.sqlite.path_max_hops.duration_micros_per_1k_graph_rows".to_string(),
11295 "synthetic_deep_chain.sqlite.path_max_hops_128.duration_micros_per_1k_graph_rows"
11296 .to_string(),
11297 "synthetic_deep_chain.sqlite.path_max_hops_256.duration_micros_per_1k_graph_rows"
11298 .to_string(),
11299 "synthetic_deep_chain.sqlite.path_max_hops_512.duration_micros_per_1k_graph_rows"
11300 .to_string(),
11301 ];
11302 if full_projection {
11303 required_metrics.extend([
11304 "full_projection.cache.hit".to_string(),
11305 "full_projection.cache.disk_bytes".to_string(),
11306 "full_projection.cache.compression_ratio".to_string(),
11307 "full_projection.refresh_phase.cache_lookup.duration_micros".to_string(),
11308 "full_projection.sqlite.total_duration_micros_per_1k_graph_rows".to_string(),
11309 "full_projection.refresh_phase.source_graph_build.duration_micros_per_1k_graph_rows"
11310 .to_string(),
11311 "full_projection.refresh_phase.projection_rows.duration_micros_per_1k_graph_rows"
11312 .to_string(),
11313 "full_projection.sqlite.sqlite_delta_write.duration_micros".to_string(),
11314 "full_projection.sqlite.sqlite_edge_staging.duration_micros".to_string(),
11315 "full_projection.sqlite.post_write_reads.duration_micros".to_string(),
11316 "full_projection.sqlite.neighborhood.duration_micros".to_string(),
11317 "full_projection.sqlite.evidence_target_resolution.duration_micros".to_string(),
11318 "full_projection.sqlite.evidence.duration_micros".to_string(),
11319 "full_projection.sqlite.path_max_hops.duration_micros".to_string(),
11320 "full_projection.sqlite.path_max_hops_128.duration_micros".to_string(),
11321 "full_projection.sqlite.path_max_hops_256.duration_micros".to_string(),
11322 "full_projection.sqlite.path_max_hops_512.duration_micros".to_string(),
11323 "full_projection.sqlite.conflict_matrix.duration_micros".to_string(),
11324 "full_projection.sqlite.dispatch_trace.duration_micros".to_string(),
11325 ]);
11326 }
11327 GraphDbBackendEvalPerformanceGate {
11328 baseline_fixture: "fixtures/graph-db-performance-history.json".to_string(),
11329 ci_profile: "synthetic_high_degree + synthetic_deep_chain metrics are CI-safe and bounded"
11330 .to_string(),
11331 opt_in_real_profile:
11332 "pass --full-projection to add the full-project dataset when checking for large projection regressions"
11333 .to_string(),
11334 full_projection_cache_hit_gate: if full_projection {
11335 "binding full_projection performance evidence requires a cold populate leg followed by cache-leg samples with full_projection.cache.hit=1; cache-miss samples are diagnostics, not backend or hop-cap promotion proof"
11336 .to_string()
11337 } else {
11338 "not evaluated until --full-projection is enabled".to_string()
11339 },
11340 allowed_regression_percent: GRAPH_DB_BACKEND_EVAL_ALLOWED_REGRESSION_PERCENT,
11341 minimum_sample_runs: GRAPH_DB_BACKEND_EVAL_MIN_SAMPLE_RUNS,
11342 normalized_metric_unit: "duration_micros_per_1k_graph_rows".to_string(),
11343 required_metrics,
11344 digest_command: graph_db_backend_eval_metric_digest_command(root, scope, full_projection),
11345 repeated_sample_command: graph_db_backend_eval_repeated_sample_command(
11346 root,
11347 scope,
11348 full_projection,
11349 ),
11350 hop_cap_promotion: graph_db_backend_eval_hop_cap_promotion_gate(),
11351 backend_adapter_spike: graph_db_backend_eval_backend_adapter_spike_gate(),
11352 }
11353}
11354
11355pub(crate) struct GraphDbBackendEvalOptions<'a> {
11356 path: &'a Path,
11357 scope: Option<&'a str>,
11358 candidates: &'a [String],
11359 targets: &'a [String],
11360 full_projection: bool,
11361}
11362
11363#[allow(clippy::too_many_arguments)]
11364pub(crate) fn graph_db_backend_eval_dataset(
11365 name: &str,
11366 root: &Path,
11367 path: &Path,
11368 scope: Option<&str>,
11369 targets: &[String],
11370 depth: usize,
11371 limit: usize,
11372 impact_limit: usize,
11373 candidates: &[GraphDbExperimentalBackend],
11374 sqlite_store: &SqliteGraphStore,
11375 sqlite_freshness: GraphDbFreshnessReport,
11376 sqlite_refresh: (GraphDbBackendEvalOperation, GraphDbBackendEvalSignature),
11377 sqlite_rows: ConvexProjectionRows,
11378 extra_warnings: Vec<String>,
11379 prepared: &ConflictMatrixPreparedInputs,
11380) -> Result<GraphDbBackendEvalDataset> {
11381 let (nodes, edges) = sqlite_store.graph_counts()?;
11382 let (sqlite_operation, sqlite_signature) = sqlite_refresh;
11383 let (sqlite_report, sqlite_signatures) = graph_db_backend_eval_report_for_store(
11384 "sqlite",
11385 "SQLite GraphStore correctness baseline",
11386 false,
11387 root,
11388 path,
11389 scope,
11390 targets,
11391 depth,
11392 limit,
11393 impact_limit,
11394 sqlite_store,
11395 sqlite_freshness,
11396 sqlite_operation,
11397 Some(sqlite_signature),
11398 None,
11399 extra_warnings.clone(),
11400 prepared,
11401 "SQLite refresh writes provider-neutral projection rows into graph.db transactionally",
11402 "SQLite WAL correctness store; refresh uses one transactional writer and read-only queries use snapshot recovery",
11403 "bundled rusqlite baseline; no external service or runtime required",
11404 );
11405
11406 let mut backends = vec![sqlite_report];
11407 for candidate in candidates {
11408 let started = Instant::now();
11409 let store = ExperimentalReadOnlyGraphStore::from_rows(*candidate, &sqlite_rows)?;
11410 let (candidate_nodes, candidate_edges) = store.graph_counts()?;
11411 let rows = candidate_nodes + candidate_edges;
11412 let refresh = graph_db_backend_eval_refresh_operation(
11413 started.elapsed().as_micros(),
11414 rows,
11415 serde_json::json!({
11416 "nodes": candidate_nodes,
11417 "edges": candidate_edges,
11418 }),
11419 );
11420 let freshness = sqlite_graph_freshness(sqlite_store, scope.unwrap_or("root"))?;
11421 let (candidate_report, _signatures) = graph_db_backend_eval_report_for_store(
11422 candidate.name(),
11423 candidate.adapter_label(),
11424 true,
11425 root,
11426 path,
11427 scope,
11428 targets,
11429 depth,
11430 limit,
11431 impact_limit,
11432 &store,
11433 freshness,
11434 refresh.0,
11435 Some(refresh.1),
11436 Some(&sqlite_signatures),
11437 extra_warnings.clone(),
11438 prepared,
11439 candidate.projection_load(),
11440 candidate.lock_behavior(),
11441 candidate.install_portability(),
11442 );
11443 backends.push(candidate_report);
11444 }
11445
11446 Ok(GraphDbBackendEvalDataset {
11447 name: name.to_string(),
11448 target_count: targets.len(),
11449 nodes,
11450 edges,
11451 backends,
11452 })
11453}
11454
11455pub(crate) fn print_graph_db_backend_eval_human(report: &GraphDbBackendEvalReport) {
11456 println!(
11457 "graph-db backend-eval baseline:{} candidates:{}",
11458 report.baseline_backend,
11459 report.candidates.join(", ")
11460 );
11461 for phase in &report.phase_timings {
11462 println!(
11463 "phase:{} {}us {}",
11464 phase.name, phase.duration_micros, phase.detail
11465 );
11466 }
11467 for dataset in &report.datasets {
11468 println!(
11469 "dataset:{} targets:{} rows:{}",
11470 dataset.name,
11471 dataset.target_count,
11472 dataset.nodes + dataset.edges
11473 );
11474 for backend in &dataset.backends {
11475 println!(
11476 " backend:{} total:{}us parity:{}",
11477 backend.backend, backend.total_micros, backend.parity.matches_sqlite
11478 );
11479 println!(" projection-load: {}", backend.projection_load);
11480 println!(" lock-behavior: {}", backend.lock_behavior);
11481 println!(" install-portability: {}", backend.install_portability);
11482 for operation in &backend.operations {
11483 println!(
11484 " {} {} {}us",
11485 operation.name, operation.status, operation.duration_micros
11486 );
11487 }
11488 for diagnostic in &backend.parity.diagnostics {
11489 println!(" parity: {diagnostic}");
11490 }
11491 }
11492 }
11493 for decision in &report.promotion {
11494 println!("promotion {}: {}", decision.backend, decision.decision);
11495 println!(" gate: {}", decision.gate.status);
11496 for reason in &decision.reasons {
11497 println!(" reason: {reason}");
11498 }
11499 for check in &decision.gate.required_checks {
11500 println!(" check: {check}");
11501 }
11502 }
11503 println!("metric-digest: {}", report.metric_digest_command);
11504 println!(
11505 "repeat-samples: {}",
11506 report.performance_gate.repeated_sample_command
11507 );
11508}
11509
11510fn traversal_expand_command(root: &Path, handle: &str) -> String {
11511 format!(
11512 "tsift traverse {} --path {} --depth 1 --limit 50",
11513 shell_quote(handle),
11514 shell_quote(root.to_string_lossy().as_ref())
11515 )
11516}
11517
11518fn traversal_file_node(root: &Path, file: &str) -> TraversalNode {
11519 let display = relativize(file, root);
11520 let handle = stable_handle("gfil", &format!("file:{display}"));
11521 TraversalNode {
11522 handle: handle.clone(),
11523 kind: "file".to_string(),
11524 label: display.clone(),
11525 ref_id: Some(display.clone()),
11526 path: Some(display),
11527 line: None,
11528 detail: None,
11529 properties: BTreeMap::new(),
11530 expand: traversal_expand_command(root, &handle),
11531 }
11532}
11533
11534fn traversal_raw_source_file_node(root: &Path, file: &str) -> TraversalNode {
11535 let mut node = traversal_file_node(root, file);
11536 if let Some(path) = node.path.clone() {
11537 node.detail = Some("raw source fallback; graph evidence unavailable".to_string());
11538 node.expand = source_read_command(root, &path, 1, 80);
11539 }
11540 node
11541}
11542
11543fn traversal_symbol_node(root: &Path, symbol: &index::StoredSymbol) -> TraversalNode {
11544 let file = relativize(&symbol.file, root);
11545 let key = format!("symbol:{file}:{}:{}", symbol.line, symbol.name);
11546 let handle = stable_handle("gsym", &key);
11547 TraversalNode {
11548 handle: handle.clone(),
11549 kind: "symbol".to_string(),
11550 label: symbol.name.clone(),
11551 ref_id: Some(symbol.name.clone()),
11552 path: Some(file),
11553 line: Some(symbol.line),
11554 detail: Some(format!("{} {}", symbol.language, symbol.kind)),
11555 properties: BTreeMap::new(),
11556 expand: traversal_expand_command(root, &handle),
11557 }
11558}
11559
11560fn traversal_unresolved_symbol_node(root: &Path, name: &str) -> TraversalNode {
11561 let handle = stable_handle("gsym", &format!("symbol:{name}"));
11562 TraversalNode {
11563 handle: handle.clone(),
11564 kind: "symbol".to_string(),
11565 label: name.to_string(),
11566 ref_id: Some(name.to_string()),
11567 path: None,
11568 line: None,
11569 detail: Some("unresolved call target".to_string()),
11570 properties: BTreeMap::new(),
11571 expand: traversal_expand_command(root, &handle),
11572 }
11573}
11574
11575fn traversal_route_node(root: &Path, route: &index::StoredRoute) -> TraversalNode {
11576 let file = relativize(&route.file, root);
11577 let method = route.method.as_deref().unwrap_or("any");
11578 let key = format!(
11579 "route:{file}:{}:{}:{}",
11580 route.line, method, route.route_path
11581 );
11582 let handle = stable_handle("grte", &key);
11583 TraversalNode {
11584 handle: handle.clone(),
11585 kind: "route".to_string(),
11586 label: format!("{} {}", method.to_uppercase(), route.route_path),
11587 ref_id: Some(route.route_path.clone()),
11588 path: Some(file),
11589 line: Some(route.line),
11590 detail: Some(format!(
11591 "{} route handled by {}",
11592 route.framework, route.handler_name
11593 )),
11594 properties: BTreeMap::new(),
11595 expand: traversal_expand_command(root, &handle),
11596 }
11597}
11598
11599fn traversal_session_node(
11600 root: &Path,
11601 markdown_path: &Path,
11602 session_id: Option<&str>,
11603) -> TraversalNode {
11604 let display = relativize_pathbuf(markdown_path, root)
11605 .to_string_lossy()
11606 .replace('\\', "/");
11607 let handle = stable_handle("gses", &format!("session:{display}"));
11608 TraversalNode {
11609 handle: handle.clone(),
11610 kind: "session".to_string(),
11611 label: session_id.unwrap_or(&display).to_string(),
11612 ref_id: session_id.map(str::to_string),
11613 path: Some(display),
11614 line: None,
11615 detail: Some("agent-doc session artifact".to_string()),
11616 properties: BTreeMap::new(),
11617 expand: traversal_expand_command(root, &handle),
11618 }
11619}
11620
11621fn traversal_backlog_node(
11622 root: &Path,
11623 markdown_path: &Path,
11624 id: &str,
11625 text: &str,
11626 line: i64,
11627) -> TraversalNode {
11628 let display = relativize_pathbuf(markdown_path, root)
11629 .to_string_lossy()
11630 .replace('\\', "/");
11631 let handle = stable_handle("gbak", &format!("backlog:{display}:#{id}"));
11632 TraversalNode {
11633 handle: handle.clone(),
11634 kind: "backlog".to_string(),
11635 label: format!("#{id}"),
11636 ref_id: Some(id.to_string()),
11637 path: Some(display),
11638 line: Some(line),
11639 detail: Some(text.to_string()),
11640 properties: BTreeMap::new(),
11641 expand: traversal_expand_command(root, &handle),
11642 }
11643}
11644
11645fn traversal_job_packet_node(
11646 root: &Path,
11647 markdown_path: &Path,
11648 label: &str,
11649 ref_id: Option<&str>,
11650 detail: &str,
11651 line: i64,
11652) -> TraversalNode {
11653 let display = relativize_pathbuf(markdown_path, root)
11654 .to_string_lossy()
11655 .replace('\\', "/");
11656 let handle = stable_handle("gjob", &format!("job:{display}:{line}:{label}"));
11657 TraversalNode {
11658 handle: handle.clone(),
11659 kind: "job_packet".to_string(),
11660 label: label.to_string(),
11661 ref_id: ref_id.map(str::to_string),
11662 path: Some(display),
11663 line: Some(line),
11664 detail: Some(detail.to_string()),
11665 properties: BTreeMap::new(),
11666 expand: traversal_expand_command(root, &handle),
11667 }
11668}
11669
11670#[derive(Clone, Debug)]
11671struct ParsedWorkerResult {
11672 id: String,
11673 status: String,
11674 touched_files: Vec<String>,
11675 tests: Vec<String>,
11676 follow_up_ids: Vec<String>,
11677}
11678
11679fn traversal_worker_result_node(
11680 root: &Path,
11681 markdown_path: &Path,
11682 parsed: &ParsedWorkerResult,
11683 line_text: &str,
11684 line: i64,
11685) -> TraversalNode {
11686 let display = relativize_pathbuf(markdown_path, root)
11687 .to_string_lossy()
11688 .replace('\\', "/");
11689 let handle = stable_handle(
11690 "wres",
11691 &format!(
11692 "worker-result:{display}:{}:{}:{}",
11693 parsed.id, parsed.status, line
11694 ),
11695 );
11696 let mut properties = BTreeMap::new();
11697 properties.insert("status".to_string(), parsed.status.clone());
11698 if !parsed.touched_files.is_empty() {
11699 properties.insert("touched_files".to_string(), parsed.touched_files.join(","));
11700 }
11701 if !parsed.tests.is_empty() {
11702 properties.insert("expected_tests".to_string(), parsed.tests.join(" && "));
11703 }
11704 if !parsed.follow_up_ids.is_empty() {
11705 properties.insert("follow_up_ids".to_string(), parsed.follow_up_ids.join(","));
11706 }
11707 TraversalNode {
11708 handle: handle.clone(),
11709 kind: "worker_result".to_string(),
11710 label: format!("{} #{}", parsed.status, parsed.id),
11711 ref_id: Some(parsed.id.clone()),
11712 path: Some(display),
11713 line: Some(line),
11714 detail: Some(line_text.trim().to_string()),
11715 properties,
11716 expand: traversal_expand_command(root, &handle),
11717 }
11718}
11719
11720fn traversal_tokens(input: &str) -> BTreeSet<String> {
11721 input
11722 .split(|ch: char| !(ch.is_ascii_alphanumeric() || ch == '_' || ch == '-'))
11723 .flat_map(|part| part.split(['_', '-']))
11724 .map(str::trim)
11725 .filter(|part| part.len() >= 3)
11726 .map(|part| part.to_ascii_lowercase())
11727 .collect()
11728}
11729
11730fn traversal_node_tokens(node: &TraversalNode) -> BTreeSet<String> {
11731 let mut tokens = traversal_tokens(&node.label);
11732 if let Some(ref_id) = &node.ref_id {
11733 tokens.extend(traversal_tokens(ref_id));
11734 }
11735 if let Some(path) = &node.path {
11736 tokens.extend(traversal_tokens(path));
11737 }
11738 if let Some(detail) = &node.detail {
11739 tokens.extend(traversal_tokens(detail));
11740 }
11741 tokens
11742}
11743
11744fn parse_agent_doc_session_id(content: &str) -> Option<String> {
11745 content.lines().find_map(|line| {
11746 let trimmed = line.trim();
11747 trimmed
11748 .strip_prefix("agent_doc_session:")
11749 .map(str::trim)
11750 .filter(|value| !value.is_empty())
11751 .map(str::to_string)
11752 })
11753}
11754
11755fn parse_backlog_line(line: &str) -> Option<(String, String)> {
11756 let trimmed = line.trim();
11757 if !trimmed.starts_with("- [") {
11758 return None;
11759 }
11760 let start = trimmed.find("[#")?;
11761 let after_start = start + 2;
11762 let rest = &trimmed[after_start..];
11763 let end = rest.find(']')?;
11764 let id = rest[..end].trim();
11765 if id.is_empty() {
11766 return None;
11767 }
11768 let text = rest[end + 1..].trim().to_string();
11769 Some((id.to_string(), text))
11770}
11771
11772fn parse_queue_dispatch_line(line: &str) -> Option<String> {
11773 let trimmed = line.trim();
11774 ["dispatch ", "preset "].iter().find_map(|prefix| {
11775 trimmed
11776 .strip_prefix(prefix)
11777 .map(str::trim)
11778 .filter(|value| !value.is_empty())
11779 .map(str::to_string)
11780 })
11781}
11782
11783fn parse_queue_do_line(line: &str) -> Option<String> {
11784 let trimmed = line.trim();
11785 let rest = trimmed.strip_prefix("- do [#")?;
11786 let end = rest.find(']')?;
11787 let id = rest[..end].trim();
11788 (!id.is_empty()).then(|| id.to_string())
11789}
11790
11791fn markdown_code_spans(input: &str) -> Vec<String> {
11792 input
11793 .split('`')
11794 .enumerate()
11795 .filter(|(idx, _)| idx % 2 == 1)
11796 .map(|(_, part)| part.trim().to_string())
11797 .filter(|part| !part.is_empty())
11798 .collect()
11799}
11800
11801fn push_traversal_token_index(
11802 index: &mut HashMap<String, Vec<usize>>,
11803 tokens: &BTreeSet<String>,
11804 entry_index: usize,
11805) {
11806 for token in tokens {
11807 index.entry(token.clone()).or_default().push(entry_index);
11808 }
11809}
11810
11811impl<'a> TraversalCodeLookup<'a> {
11812 fn new(
11813 symbols: &'a [TraversalSymbolIndexEntry],
11814 files: &'a [TraversalFileIndexEntry],
11815 routes: &'a [TraversalRouteIndexEntry],
11816 ) -> Self {
11817 let mut symbol_index = HashMap::new();
11818 for (idx, entry) in symbols.iter().enumerate() {
11819 push_traversal_token_index(&mut symbol_index, &entry.tokens, idx);
11820 }
11821 let mut file_index = HashMap::new();
11822 let mut file_path_index = HashMap::new();
11823 for (idx, entry) in files.iter().enumerate() {
11824 push_traversal_token_index(&mut file_index, &entry.tokens, idx);
11825 if let Some(path) = entry.node.path.as_ref() {
11826 file_path_index.insert(path.clone(), path.clone());
11827 }
11828 }
11829 let mut route_index = HashMap::new();
11830 for (idx, entry) in routes.iter().enumerate() {
11831 push_traversal_token_index(&mut route_index, &entry.tokens, idx);
11832 }
11833 Self {
11834 symbols,
11835 files,
11836 routes,
11837 symbol_index,
11838 file_index,
11839 route_index,
11840 file_path_index,
11841 }
11842 }
11843
11844 fn touched_files_for_line(&self, line: &str) -> Vec<String> {
11845 let mut touched_files = BTreeSet::new();
11846 for candidate in markdown_code_spans(line)
11847 .into_iter()
11848 .chain(line.split_whitespace().map(str::to_string))
11849 {
11850 for path in traversal_path_candidates(&candidate) {
11851 if let Some(file) = self.file_path_index.get(&path) {
11852 touched_files.insert(file.clone());
11853 }
11854 }
11855 }
11856 touched_files.into_iter().collect()
11857 }
11858}
11859
11860fn traversal_path_candidates(candidate: &str) -> Vec<String> {
11861 let trimmed = candidate.trim_matches(|ch: char| {
11862 matches!(
11863 ch,
11864 '`' | '"' | '\'' | ',' | ';' | '.' | '!' | '?' | '(' | ')' | '[' | ']' | '{' | '}'
11865 )
11866 });
11867 if trimmed.is_empty() {
11868 return Vec::new();
11869 }
11870 let mut candidates = vec![trimmed.to_string()];
11871 if let Some((path, line_suffix)) = trimmed.rsplit_once(':')
11872 && !path.is_empty()
11873 && line_suffix.chars().all(|ch| ch.is_ascii_digit())
11874 {
11875 candidates.push(path.to_string());
11876 }
11877 candidates
11878}
11879
11880fn parse_worker_result_line(
11881 line: &str,
11882 lookup: &TraversalCodeLookup<'_>,
11883) -> Vec<ParsedWorkerResult> {
11884 if line.trim_start().starts_with("- [") {
11885 return Vec::new();
11886 }
11887 let lower = line.to_ascii_lowercase();
11888 let status =
11889 if lower.contains("completed") || lower.contains("code-complete") || lower.contains("done")
11890 {
11891 "completed"
11892 } else if lower.contains("blocked") || lower.contains("externally blocked") {
11893 "blocked"
11894 } else {
11895 return Vec::new();
11896 };
11897 let result_prefix_end = ["follow-up", "follow up", "next:"]
11898 .iter()
11899 .filter_map(|marker| lower.find(marker))
11900 .min()
11901 .unwrap_or(line.len());
11902 let ids = extract_conflict_target_refs(&line[..result_prefix_end]);
11903 if ids.is_empty() {
11904 return Vec::new();
11905 }
11906 let result_ids = ids.iter().cloned().collect::<BTreeSet<_>>();
11907 let all_ids = extract_conflict_target_refs(line);
11908
11909 let touched_files = lookup.touched_files_for_line(line);
11910 let tests = markdown_code_spans(line)
11911 .into_iter()
11912 .filter(|span| span.to_ascii_lowercase().contains("test"))
11913 .collect::<Vec<_>>();
11914
11915 ids.iter()
11916 .map(|id| ParsedWorkerResult {
11917 id: id.clone(),
11918 status: status.to_string(),
11919 touched_files: touched_files.clone(),
11920 tests: tests.clone(),
11921 follow_up_ids: all_ids
11922 .iter()
11923 .filter(|other| *other != id && !result_ids.contains(*other))
11924 .cloned()
11925 .collect(),
11926 })
11927 .collect()
11928}
11929
11930fn hinted_markdown_file(root: &Path, path_hint: &Path) -> Option<PathBuf> {
11931 let hinted_path = if path_hint.is_absolute() {
11932 path_hint.to_path_buf()
11933 } else {
11934 root.join(path_hint)
11935 };
11936 if hinted_path.extension().and_then(|ext| ext.to_str()) == Some("md") && hinted_path.is_file() {
11937 return Some(hinted_path);
11938 }
11939 None
11940}
11941
11942fn traversal_markdown_content_looks_like_session(content: &str) -> bool {
11943 parse_agent_doc_session_id(content).is_some()
11944 || content.contains("<!-- agent:exchange")
11945 || content.contains("<!-- agent:backlog")
11946 || content.contains("## Backlog")
11947}
11948
11949fn markdown_files_for_traversal(root: &Path, path_hint: &Path) -> Result<Vec<PathBuf>> {
11950 if let Some(hinted_path) = hinted_markdown_file(root, path_hint) {
11951 return Ok(vec![hinted_path]);
11952 }
11953 let mut files = Vec::new();
11954 let walker = ignore::WalkBuilder::new(root)
11955 .hidden(true)
11956 .git_ignore(true)
11957 .git_global(true)
11958 .git_exclude(true)
11959 .build();
11960 for result in walker {
11961 let entry =
11962 result.with_context(|| format!("walking markdown files under {}", root.display()))?;
11963 if !entry.file_type().is_some_and(|ft| ft.is_file()) {
11964 continue;
11965 }
11966 if traversal_path_is_generated_artifact(root, root, entry.path()) {
11967 continue;
11968 }
11969 if entry.path().extension().and_then(|ext| ext.to_str()) == Some("md") {
11970 files.push(entry.path().to_path_buf());
11971 }
11972 }
11973 files.sort();
11974 Ok(files)
11975}
11976
11977fn traversal_watermark_path(root: &Path, path: &Path) -> String {
11978 path.strip_prefix(root)
11979 .unwrap_or(path)
11980 .to_string_lossy()
11981 .replace('\\', "/")
11982}
11983
11984fn push_traversal_metadata_watermark_part(
11985 root: &Path,
11986 path: &Path,
11987 label: &str,
11988 parts: &mut Vec<String>,
11989) {
11990 let display = traversal_watermark_path(root, path);
11991 match fs::metadata(path) {
11992 Ok(metadata) => {
11993 let (secs, nanos) = metadata
11994 .modified()
11995 .ok()
11996 .and_then(|modified| modified.duration_since(UNIX_EPOCH).ok())
11997 .map(|duration| (duration.as_secs(), duration.subsec_nanos()))
11998 .unwrap_or((0, 0));
11999 parts.push(format!(
12000 "{label}:{display}:len={}:mtime={secs}.{nanos}",
12001 metadata.len()
12002 ));
12003 }
12004 Err(_) => parts.push(format!("{label}:{display}:missing")),
12005 }
12006}
12007
12008#[derive(Serialize)]
12009struct TraversalSummaryWatermarkRow<'a> {
12010 symbol_name: &'a str,
12011 file_path: &'a str,
12012 entities: &'a Option<Vec<summarize::Entity>>,
12013 relationships: &'a Option<Vec<summarize::Relationship>>,
12014 concept_labels: &'a Option<Vec<String>>,
12015}
12016
12017fn push_traversal_summaries_watermark_part(root: &Path, parts: &mut Vec<String>) -> Result<()> {
12018 let summaries_db = root.join(".tsift/summaries.db");
12019 if !summaries_db.exists() {
12020 parts.push("summaries_db:absent".to_string());
12021 return Ok(());
12022 }
12023
12024 match summarize::SummaryDb::open_read_only_resilient(&summaries_db)
12025 .and_then(|summary_db| summary_db.all())
12026 {
12027 Ok(summaries) => {
12028 let rows = summaries
12029 .iter()
12030 .map(|summary| TraversalSummaryWatermarkRow {
12031 symbol_name: &summary.symbol_name,
12032 file_path: &summary.file_path,
12033 entities: &summary.entities,
12034 relationships: &summary.relationships,
12035 concept_labels: &summary.concept_labels,
12036 })
12037 .collect::<Vec<_>>();
12038 parts.push(format!(
12039 "summaries_db:rows={}:semantic_hash={}",
12040 rows.len(),
12041 content_hash(&rows)?
12042 ));
12043 }
12044 Err(_) => {
12045 push_traversal_metadata_watermark_part(
12046 root,
12047 &summaries_db,
12048 "summaries_db_unreadable",
12049 parts,
12050 );
12051 }
12052 }
12053 Ok(())
12054}
12055
12056#[cfg(test)]
12057fn traversal_relative_path_is_generated_artifact(relative: &str) -> bool {
12058 resolution::relative_path_is_generated_artifact(relative)
12059}
12060
12061fn traversal_path_is_generated_artifact(root: &Path, source_root: &Path, path: &Path) -> bool {
12062 resolution::path_is_generated_artifact(root, source_root, path)
12063}
12064
12065fn traversal_index_snapshot_part_is_generated(root: &Path, source_root: &Path, part: &str) -> bool {
12066 resolution::index_snapshot_part_is_generated(root, source_root, part)
12067}
12068
12069pub(crate) fn traversal_source_watermark(
12070 root: &Path,
12071 path_hint: &Path,
12072 scope: Option<&str>,
12073 session_only: bool,
12074) -> Result<Option<String>> {
12075 let mut parts = vec![
12076 format!("projection_version:{GRAPH_PROJECTION_VERSION}"),
12077 format!("scope:{}", scope.unwrap_or("root")),
12078 format!("path_hint:{}", traversal_watermark_path(root, path_hint)),
12079 format!("session_only:{session_only}"),
12080 ];
12081
12082 if !session_only || hinted_markdown_file(root, path_hint).is_none() {
12083 let targets = match resolve_search_index_targets(root, path_hint, scope, false) {
12084 Ok(targets) => targets,
12085 Err(_) => return Ok(None),
12086 };
12087 let Some(target) = targets.into_iter().next() else {
12088 return Ok(None);
12089 };
12090 let db = match index::IndexDb::open_read_only_resilient(&target.db_path) {
12091 Ok(db) => db,
12092 Err(_) => return Ok(None),
12093 };
12094 parts.push(format!("index_label:{}", target.label));
12095 parts.push(format!(
12096 "index_scope:{}",
12097 target.scope_name.as_deref().unwrap_or("root")
12098 ));
12099 parts.push(format!(
12100 "index_source_root:{}",
12101 traversal_watermark_path(root, &target.source_root)
12102 ));
12103 let mut snapshot_rows = 0usize;
12104 for part in db.source_snapshot_parts()? {
12105 if traversal_index_snapshot_part_is_generated(root, &target.source_root, &part) {
12106 continue;
12107 }
12108 snapshot_rows += 1;
12109 parts.push(format!("index_snapshot:{part}"));
12110 }
12111 parts.push(format!("index_snapshot_rows:{snapshot_rows}"));
12112 }
12113
12114 let markdown_files = markdown_files_for_traversal(root, path_hint)?;
12115 parts.push(format!("markdown_count:{}", markdown_files.len()));
12116 for markdown_path in markdown_files {
12117 push_traversal_metadata_watermark_part(root, &markdown_path, "markdown", &mut parts);
12118 }
12119
12120 push_traversal_summaries_watermark_part(root, &mut parts)?;
12121
12122 Ok(Some(content_hash(&parts)?))
12123}
12124
12125fn ranked_symbol_matches<'a>(
12126 query_tokens: &BTreeSet<String>,
12127 entries: &'a [TraversalSymbolIndexEntry],
12128 index: &HashMap<String, Vec<usize>>,
12129) -> Vec<(usize, &'a TraversalSymbolIndexEntry)> {
12130 let mut scores = BTreeMap::<usize, usize>::new();
12131 for token in query_tokens {
12132 if let Some(indices) = index.get(token) {
12133 for idx in indices {
12134 *scores.entry(*idx).or_default() += 1;
12135 }
12136 }
12137 }
12138 let mut matches = scores
12139 .into_iter()
12140 .map(|(idx, score)| (score, &entries[idx]))
12141 .collect::<Vec<_>>();
12142 matches.sort_by(|(left_score, left), (right_score, right)| {
12143 right_score
12144 .cmp(left_score)
12145 .then_with(|| left.node.label.cmp(&right.node.label))
12146 .then_with(|| left.handle.cmp(&right.handle))
12147 });
12148 matches
12149}
12150
12151fn ranked_file_matches<'a>(
12152 query_tokens: &BTreeSet<String>,
12153 entries: &'a [TraversalFileIndexEntry],
12154 index: &HashMap<String, Vec<usize>>,
12155) -> Vec<(usize, &'a TraversalFileIndexEntry)> {
12156 let mut scores = BTreeMap::<usize, usize>::new();
12157 for token in query_tokens {
12158 if let Some(indices) = index.get(token) {
12159 for idx in indices {
12160 *scores.entry(*idx).or_default() += 1;
12161 }
12162 }
12163 }
12164 let mut matches = scores
12165 .into_iter()
12166 .map(|(idx, score)| (score, &entries[idx]))
12167 .collect::<Vec<_>>();
12168 matches.sort_by(|(left_score, left), (right_score, right)| {
12169 right_score
12170 .cmp(left_score)
12171 .then_with(|| left.node.label.cmp(&right.node.label))
12172 .then_with(|| left.handle.cmp(&right.handle))
12173 });
12174 matches
12175}
12176
12177fn ranked_route_matches<'a>(
12178 query_tokens: &BTreeSet<String>,
12179 entries: &'a [TraversalRouteIndexEntry],
12180 index: &HashMap<String, Vec<usize>>,
12181) -> Vec<(usize, &'a TraversalRouteIndexEntry)> {
12182 let mut scores = BTreeMap::<usize, usize>::new();
12183 for token in query_tokens {
12184 if let Some(indices) = index.get(token) {
12185 for idx in indices {
12186 *scores.entry(*idx).or_default() += 1;
12187 }
12188 }
12189 }
12190 let mut matches = scores
12191 .into_iter()
12192 .map(|(idx, score)| (score, &entries[idx]))
12193 .collect::<Vec<_>>();
12194 matches.sort_by(|(left_score, left), (right_score, right)| {
12195 right_score
12196 .cmp(left_score)
12197 .then_with(|| left.node.label.cmp(&right.node.label))
12198 .then_with(|| left.handle.cmp(&right.handle))
12199 });
12200 matches
12201}
12202
12203fn link_backlog_to_code_nodes(
12204 graph: &mut TraversalGraphBuild,
12205 backlog: &TraversalNode,
12206 text: &str,
12207 lookup: &TraversalCodeLookup<'_>,
12208 limit: usize,
12209) {
12210 let mut query_tokens = traversal_tokens(text);
12211 if let Some(ref_id) = &backlog.ref_id {
12212 query_tokens.extend(traversal_tokens(ref_id));
12213 }
12214 if query_tokens.is_empty() {
12215 return;
12216 }
12217
12218 for (score, entry) in ranked_symbol_matches(&query_tokens, lookup.symbols, &lookup.symbol_index)
12219 .into_iter()
12220 .take(limit)
12221 {
12222 graph.add_edge(
12223 &backlog.handle,
12224 &entry.handle,
12225 "mentions",
12226 Some("backlog text matches symbol tokens".to_string()),
12227 score,
12228 );
12229 }
12230
12231 for (score, entry) in ranked_file_matches(&query_tokens, lookup.files, &lookup.file_index)
12232 .into_iter()
12233 .take(limit.min(5))
12234 {
12235 graph.add_edge(
12236 &backlog.handle,
12237 &entry.handle,
12238 "mentions",
12239 Some("backlog text matches file tokens".to_string()),
12240 score,
12241 );
12242 }
12243
12244 for (score, entry) in ranked_route_matches(&query_tokens, lookup.routes, &lookup.route_index)
12245 .into_iter()
12246 .take(limit.min(5))
12247 {
12248 graph.add_edge(
12249 &backlog.handle,
12250 &entry.handle,
12251 "mentions",
12252 Some("backlog text matches route tokens".to_string()),
12253 score,
12254 );
12255 }
12256}
12257
12258fn load_agent_doc_traversal_nodes(
12259 root: &Path,
12260 path_hint: &Path,
12261 graph: &mut TraversalGraphBuild,
12262 lookup: &TraversalCodeLookup<'_>,
12263) -> Result<()> {
12264 for markdown_path in markdown_files_for_traversal(root, path_hint)? {
12265 let content = match fs::read_to_string(&markdown_path) {
12266 Ok(content) => content,
12267 Err(err) => {
12268 graph.warnings.push(format!(
12269 "session artifact unavailable: {}: {err}",
12270 markdown_path.display()
12271 ));
12272 continue;
12273 }
12274 };
12275 if !traversal_markdown_content_looks_like_session(&content) {
12276 continue;
12277 }
12278
12279 let session_id = parse_agent_doc_session_id(&content);
12280 let session = traversal_session_node(root, &markdown_path, session_id.as_deref());
12281 graph.add_node(session.clone());
12282 let lines = content.lines().collect::<Vec<_>>();
12283 let mut backlog_by_id = BTreeMap::<String, TraversalNode>::new();
12284 for (idx, line) in lines.iter().enumerate() {
12285 let Some((id, text)) = parse_backlog_line(line) else {
12286 continue;
12287 };
12288 let backlog = traversal_backlog_node(root, &markdown_path, &id, &text, idx as i64 + 1);
12289 graph.add_node(backlog.clone());
12290 backlog_by_id.insert(id.clone(), backlog.clone());
12291 graph.add_edge(
12292 &session.handle,
12293 &backlog.handle,
12294 "contains",
12295 Some("session backlog item".to_string()),
12296 1,
12297 );
12298 link_backlog_to_code_nodes(graph, &backlog, &text, lookup, 8);
12299 }
12300
12301 let mut in_queue = false;
12302 let mut job_by_id = BTreeMap::<String, TraversalNode>::new();
12303 for (idx, line) in lines.iter().enumerate() {
12304 let trimmed = line.trim();
12305 if trimmed.starts_with("<!-- agent:queue") {
12306 in_queue = true;
12307 continue;
12308 }
12309 if trimmed.starts_with("<!-- /agent:queue") {
12310 in_queue = false;
12311 continue;
12312 }
12313 if !in_queue {
12314 continue;
12315 }
12316 if let Some(dispatch) = parse_queue_dispatch_line(line) {
12317 let dispatch_ref = dispatch.strip_prefix('#').unwrap_or(dispatch.as_str());
12318 let node = traversal_job_packet_node(
12319 root,
12320 &markdown_path,
12321 &format!("dispatch {dispatch}"),
12322 Some(dispatch_ref),
12323 "agent-doc dispatch preset",
12324 idx as i64 + 1,
12325 );
12326 graph.add_node(node.clone());
12327 graph.add_edge(
12328 &session.handle,
12329 &node.handle,
12330 "contains",
12331 Some("session queued dispatch".to_string()),
12332 1,
12333 );
12334 continue;
12335 }
12336 if let Some(id) = parse_queue_do_line(line) {
12337 let detail = backlog_by_id
12338 .get(&id)
12339 .and_then(|node| node.detail.clone())
12340 .unwrap_or_else(|| "queued backlog item".to_string());
12341 let node = traversal_job_packet_node(
12342 root,
12343 &markdown_path,
12344 &format!("do #{id}"),
12345 Some(&id),
12346 &detail,
12347 idx as i64 + 1,
12348 );
12349 graph.add_node(node.clone());
12350 graph.add_edge(
12351 &session.handle,
12352 &node.handle,
12353 "contains",
12354 Some("session queued job packet".to_string()),
12355 1,
12356 );
12357 if let Some(backlog) = backlog_by_id.get(&id) {
12358 graph.add_edge(
12359 &node.handle,
12360 &backlog.handle,
12361 "targets",
12362 Some("queued backlog item".to_string()),
12363 1,
12364 );
12365 }
12366 job_by_id.insert(id, node);
12367 }
12368 }
12369
12370 let mut seen_results = BTreeSet::<(String, String, i64)>::new();
12371 for (idx, line) in lines.iter().enumerate() {
12372 for parsed in parse_worker_result_line(line, lookup) {
12373 let line_no = idx as i64 + 1;
12374 if !seen_results.insert((parsed.id.clone(), parsed.status.clone(), line_no)) {
12375 continue;
12376 }
12377 let result =
12378 traversal_worker_result_node(root, &markdown_path, &parsed, line, line_no);
12379 graph.add_node(result.clone());
12380 graph.add_edge(
12381 &session.handle,
12382 &result.handle,
12383 "contains",
12384 Some("session worker result".to_string()),
12385 1,
12386 );
12387 if let Some(backlog) = backlog_by_id.get(&parsed.id) {
12388 graph.add_edge(
12389 &backlog.handle,
12390 &result.handle,
12391 "has_result",
12392 Some(format!("worker result {}", parsed.status)),
12393 1,
12394 );
12395 }
12396 if let Some(job) = job_by_id.get(&parsed.id) {
12397 graph.add_edge(
12398 &job.handle,
12399 &result.handle,
12400 "has_result",
12401 Some(format!("queued worker result {}", parsed.status)),
12402 1,
12403 );
12404 }
12405 let mut result_text = line.to_string();
12406 if !parsed.touched_files.is_empty() {
12407 result_text.push(' ');
12408 result_text.push_str(&parsed.touched_files.join(" "));
12409 }
12410 link_backlog_to_code_nodes(graph, &result, &result_text, lookup, 8);
12411 }
12412 }
12413 }
12414 Ok(())
12415}
12416
12417#[derive(Debug, Clone)]
12418struct AgentDocIndexGate {
12419 db_path: Option<PathBuf>,
12420 source_root: PathBuf,
12421 diagnostics: Vec<String>,
12422}
12423
12424#[derive(Clone, Hash, PartialEq, Eq)]
12425struct AgentDocIndexGateCacheKey {
12426 root: PathBuf,
12427 path_hint: PathBuf,
12428 scope: Option<String>,
12429 packet_label: String,
12430}
12431
12432fn agent_doc_index_gate_cache() -> &'static std::sync::Mutex<
12433 std::collections::HashMap<AgentDocIndexGateCacheKey, AgentDocIndexGate>,
12434> {
12435 static CACHE: std::sync::OnceLock<
12436 std::sync::Mutex<std::collections::HashMap<AgentDocIndexGateCacheKey, AgentDocIndexGate>>,
12437 > = std::sync::OnceLock::new();
12438 CACHE.get_or_init(|| std::sync::Mutex::new(std::collections::HashMap::new()))
12439}
12440
12441fn prepare_agent_doc_index_gate_cached(
12442 root: &Path,
12443 path_hint: &Path,
12444 scope: Option<&str>,
12445 packet_label: &str,
12446) -> (AgentDocIndexGate, String) {
12447 let key = AgentDocIndexGateCacheKey {
12448 root: root.to_path_buf(),
12449 path_hint: path_hint.to_path_buf(),
12450 scope: scope.map(str::to_string),
12451 packet_label: packet_label.to_string(),
12452 };
12453 if let Ok(cache) = agent_doc_index_gate_cache().lock()
12454 && let Some(cached) = cache.get(&key)
12455 {
12456 return (
12457 cached.clone(),
12458 "reused from in-process index gate cache by root/path_hint/scope key".to_string(),
12459 );
12460 }
12461 let gate = prepare_agent_doc_index_gate(root, path_hint, scope, packet_label);
12462 if let Ok(mut cache) = agent_doc_index_gate_cache().lock() {
12463 cache.insert(key, gate.clone());
12464 }
12465 (
12466 gate,
12467 "fresh inspection/refresh — cache miss on this preparation key".to_string(),
12468 )
12469}
12470
12471fn index_reason_for_state(state: SearchIndexState) -> Option<RebuildSearchReason> {
12472 match state {
12473 SearchIndexState::Fresh => None,
12474 SearchIndexState::Missing => Some(RebuildSearchReason::Missing),
12475 SearchIndexState::Stale { stale_files } => Some(RebuildSearchReason::Stale { stale_files }),
12476 }
12477}
12478
12479fn index_reason_detail(target: &SearchIndexTarget, reason: RebuildSearchReason) -> String {
12480 rebuild_search_target_detail(&RebuildSearchTarget {
12481 label: target.label.clone(),
12482 reason,
12483 reindex_cmd: target.reindex_cmd.clone(),
12484 })
12485}
12486
12487fn index_refresh_diagnostic(
12488 target: &SearchIndexTarget,
12489 reason: RebuildSearchReason,
12490 summary: &index::IndexSummary,
12491 packet_label: &str,
12492) -> String {
12493 let changed = summary.new + summary.modified + summary.deleted;
12494 format!(
12495 "index refreshed: {}; updated {} changed file{} before {}",
12496 index_reason_detail(target, reason),
12497 changed,
12498 if changed == 1 { "" } else { "s" },
12499 packet_label
12500 )
12501}
12502
12503fn index_refresh_fallback_diagnostic(
12504 target: &SearchIndexTarget,
12505 reason: RebuildSearchReason,
12506 err: &anyhow::Error,
12507 packet_label: &str,
12508) -> String {
12509 format!(
12510 "{}; could not refresh before {}: {err:#}; falling back to raw source file nodes",
12511 index_reason_detail(target, reason),
12512 packet_label
12513 )
12514}
12515
12516fn graph_fallback_source_root(root: &Path, path_hint: &Path, scope: Option<&str>) -> PathBuf {
12517 if let Some(scope_name) = scope
12518 && let Ok(scope) = config::Config::resolve_submodule(root, scope_name)
12519 {
12520 return scope.source_root;
12521 }
12522 if let Ok(Some(scope)) = config::Config::infer_submodule_from_path(root, path_hint) {
12523 return scope.source_root;
12524 }
12525 if let Ok(Some(scope)) = infer_agent_doc_task_submodule(root, path_hint) {
12526 return scope.source_root;
12527 }
12528 root.to_path_buf()
12529}
12530
12531fn prepare_agent_doc_index_gate(
12532 root: &Path,
12533 path_hint: &Path,
12534 scope: Option<&str>,
12535 packet_label: &str,
12536) -> AgentDocIndexGate {
12537 let fallback_source_root = graph_fallback_source_root(root, path_hint, scope);
12538 let targets = match resolve_search_index_targets(root, path_hint, scope, false) {
12539 Ok(targets) => targets,
12540 Err(err) => {
12541 return AgentDocIndexGate {
12542 db_path: None,
12543 source_root: fallback_source_root,
12544 diagnostics: vec![format!(
12545 "code index unavailable before {packet_label}: {err:#}; falling back to raw source file nodes"
12546 )],
12547 };
12548 }
12549 };
12550 let Some(target) = targets.into_iter().next() else {
12551 return AgentDocIndexGate {
12552 db_path: None,
12553 source_root: fallback_source_root,
12554 diagnostics: vec![format!(
12555 "code index unavailable before {packet_label}: no index target resolved; falling back to raw source file nodes"
12556 )],
12557 };
12558 };
12559
12560 let state = match inspect_search_index(&target) {
12561 Ok(state) => state,
12562 Err(err) => {
12563 return AgentDocIndexGate {
12564 db_path: None,
12565 source_root: target.source_root,
12566 diagnostics: vec![format!(
12567 "code index freshness unavailable before {packet_label}: {err:#}; falling back to raw source file nodes"
12568 )],
12569 };
12570 }
12571 };
12572
12573 let Some(reason) = index_reason_for_state(state) else {
12574 return AgentDocIndexGate {
12575 db_path: Some(target.db_path),
12576 source_root: target.source_root,
12577 diagnostics: Vec::new(),
12578 };
12579 };
12580
12581 match apply_search_index_update(root, &target) {
12582 Ok(summary) => {
12583 index::inspect_scope_invalidate_all();
12588 let diagnostics = vec![index_refresh_diagnostic(
12589 &target,
12590 reason,
12591 &summary,
12592 packet_label,
12593 )];
12594 AgentDocIndexGate {
12595 db_path: Some(target.db_path),
12596 source_root: target.source_root,
12597 diagnostics,
12598 }
12599 }
12600 Err(err) => {
12601 let diagnostics = vec![index_refresh_fallback_diagnostic(
12602 &target,
12603 reason,
12604 &err,
12605 packet_label,
12606 )];
12607 AgentDocIndexGate {
12608 db_path: None,
12609 source_root: target.source_root,
12610 diagnostics,
12611 }
12612 }
12613 }
12614}
12615
12616fn add_raw_source_file_nodes(
12617 root: &Path,
12618 source_root: &Path,
12619 graph: &mut TraversalGraphBuild,
12620 file_entries: &mut Vec<TraversalFileIndexEntry>,
12621) -> Result<()> {
12622 let mut entries = walk::walk_files(source_root)?;
12623 entries.sort_by(|left, right| left.path.cmp(&right.path));
12624 for entry in entries {
12625 let file = entry.path.to_string_lossy();
12626 let node = traversal_raw_source_file_node(root, file.as_ref());
12627 let entry = TraversalFileIndexEntry {
12628 handle: node.handle.clone(),
12629 tokens: traversal_node_tokens(&node),
12630 node: node.clone(),
12631 };
12632 graph.add_node(node);
12633 file_entries.push(entry);
12634 }
12635 Ok(())
12636}
12637
12638fn build_traversal_graph_source_with_options(
12639 root: &Path,
12640 path_hint: &Path,
12641 scope: Option<&str>,
12642 session_only: bool,
12643) -> Result<TraversalGraphBuild> {
12644 let mut graph = TraversalGraphBuild::default();
12645 let mut symbol_entries = Vec::new();
12646 let mut file_entries = Vec::new();
12647 let mut route_entries = Vec::new();
12648 let bounded_session_projection = hinted_markdown_file(root, path_hint).is_some();
12649 if !session_only || hinted_markdown_file(root, path_hint).is_none() {
12650 let (gate, _cache_detail) =
12651 prepare_agent_doc_index_gate_cached(root, path_hint, scope, "graph traversal packet");
12652 graph.warnings.extend(gate.diagnostics);
12653
12654 match gate.db_path {
12655 Some(db_path) if db_path.exists() => {
12656 let db = index::IndexDb::open_read_only_resilient(&db_path)?;
12657 let file_paths = db.file_paths()?;
12658 let mut file_handle_by_path = HashMap::<String, String>::new();
12659 for file in file_paths {
12660 if traversal_path_is_generated_artifact(
12661 root,
12662 &gate.source_root,
12663 Path::new(&file),
12664 ) {
12665 continue;
12666 }
12667 let node = traversal_file_node(root, &file);
12668 let entry = TraversalFileIndexEntry {
12669 handle: node.handle.clone(),
12670 tokens: traversal_node_tokens(&node),
12671 node: node.clone(),
12672 };
12673 if let Some(path) = entry.node.path.as_ref() {
12674 file_handle_by_path.insert(path.clone(), entry.handle.clone());
12675 }
12676 graph.add_node(node);
12677 file_entries.push(entry);
12678 }
12679
12680 let symbols = db.all_symbols()?;
12681 let mut symbol_by_file_name_line = HashMap::new();
12682 let mut first_symbol_by_name = BTreeMap::<String, String>::new();
12683 for symbol in symbols.iter().filter(|symbol| {
12684 !traversal_path_is_generated_artifact(
12685 root,
12686 &gate.source_root,
12687 Path::new(&symbol.file),
12688 )
12689 }) {
12690 let node = traversal_symbol_node(root, symbol);
12691 let file = relativize(&symbol.file, root);
12692 symbol_by_file_name_line.insert(
12693 format!("{file}:{}:{}", symbol.line, symbol.name),
12694 node.handle.clone(),
12695 );
12696 first_symbol_by_name
12697 .entry(symbol.name.clone())
12698 .or_insert_with(|| node.handle.clone());
12699 let entry = TraversalSymbolIndexEntry {
12700 handle: node.handle.clone(),
12701 tokens: traversal_node_tokens(&node),
12702 node: node.clone(),
12703 };
12704 graph.add_node(node.clone());
12705 if let Some(file_handle) = file_handle_by_path.get(&file) {
12706 graph.add_edge(
12707 file_handle,
12708 &node.handle,
12709 "defines",
12710 Some("file defines symbol".to_string()),
12711 1,
12712 );
12713 }
12714 symbol_entries.push(entry);
12715 }
12716
12717 if !bounded_session_projection {
12718 for edge in db.all_stored_edges()? {
12719 if traversal_path_is_generated_artifact(
12720 root,
12721 &gate.source_root,
12722 Path::new(&edge.caller_file),
12723 ) {
12724 continue;
12725 }
12726 let caller_file = relativize(&edge.caller_file, root);
12727 let caller_key =
12728 format!("{caller_file}:{}:{}", edge.caller_line, edge.caller_name);
12729 let Some(caller_handle) =
12730 symbol_by_file_name_line.get(&caller_key).cloned()
12731 else {
12732 continue;
12733 };
12734 let callee_handle = if let Some(handle) =
12735 first_symbol_by_name.get(&edge.callee_name)
12736 {
12737 handle.clone()
12738 } else {
12739 let node = traversal_unresolved_symbol_node(root, &edge.callee_name);
12740 let handle = node.handle.clone();
12741 graph.add_node(node);
12742 handle
12743 };
12744 graph.add_edge(
12745 &caller_handle,
12746 &callee_handle,
12747 "calls",
12748 Some(format!("call site {}:{}", caller_file, edge.call_site_line)),
12749 1,
12750 );
12751 }
12752 }
12753
12754 for route in db.all_routes()? {
12755 if traversal_path_is_generated_artifact(
12756 root,
12757 &gate.source_root,
12758 Path::new(&route.file),
12759 ) {
12760 continue;
12761 }
12762 let node = traversal_route_node(root, &route);
12763 let entry = TraversalRouteIndexEntry {
12764 handle: node.handle.clone(),
12765 tokens: traversal_node_tokens(&node),
12766 node: node.clone(),
12767 };
12768 graph.add_node(node.clone());
12769 if let Some(path) = node.path.as_ref()
12770 && let Some(file_handle) = file_handle_by_path.get(path)
12771 {
12772 graph.add_edge(
12773 file_handle,
12774 &node.handle,
12775 "defines_route",
12776 Some("file declares route".to_string()),
12777 1,
12778 );
12779 }
12780 let handler_handle =
12781 if let Some(handle) = first_symbol_by_name.get(&route.handler_name) {
12782 handle.clone()
12783 } else {
12784 let node = traversal_unresolved_symbol_node(root, &route.handler_name);
12785 let handle = node.handle.clone();
12786 graph.add_node(node);
12787 handle
12788 };
12789 graph.add_edge(
12790 &entry.handle,
12791 &handler_handle,
12792 "handled_by",
12793 Some("route handler reference".to_string()),
12794 1,
12795 );
12796 route_entries.push(entry);
12797 }
12798 }
12799 _ => {
12800 add_raw_source_file_nodes(root, &gate.source_root, &mut graph, &mut file_entries)
12801 .with_context(|| {
12802 format!(
12803 "loading raw source fallback nodes from {}",
12804 gate.source_root.display()
12805 )
12806 })?;
12807 }
12808 }
12809 }
12810
12811 let code_lookup = TraversalCodeLookup::new(&symbol_entries, &file_entries, &route_entries);
12812 load_agent_doc_traversal_nodes(root, path_hint, &mut graph, &code_lookup)?;
12813 Ok(graph)
12814}
12815
12816#[cfg(test)]
12817fn build_traversal_graph_source(
12818 root: &Path,
12819 path_hint: &Path,
12820 scope: Option<&str>,
12821) -> Result<TraversalGraphBuild> {
12822 build_traversal_graph_source_with_options(root, path_hint, scope, false)
12823}
12824
12825pub(crate) fn write_traversal_graph_store_with_options(
12826 root: &Path,
12827 path_hint: &Path,
12828 scope: Option<&str>,
12829 session_only: bool,
12830) -> Result<(TraversalGraphBuild, SqliteProjectionRefresh)> {
12831 let source_graph =
12832 build_traversal_graph_source_with_options(root, path_hint, scope, session_only)?;
12833 let projection = traversal_projection_from_graph(root, scope, &source_graph)?;
12834 let graph_db = graph_substrate_db_path(root, scope);
12835 let mut store = SqliteGraphStore::open(&graph_db)?;
12836 let source_watermark = traversal_source_watermark(root, path_hint, scope, session_only)
12837 .ok()
12838 .flatten()
12839 .or_else(|| graph_projection_content_hash(&projection));
12840 let refresh = store.replace_projection_with_version(
12841 scope.unwrap_or("root"),
12842 &projection,
12843 Some(GRAPH_PROJECTION_VERSION),
12844 source_watermark,
12845 )?;
12846 Ok((source_graph, refresh))
12847}
12848
12849pub(crate) fn write_traversal_graph_store(
12850 root: &Path,
12851 path_hint: &Path,
12852 scope: Option<&str>,
12853) -> Result<(TraversalGraphBuild, SqliteProjectionRefresh)> {
12854 write_traversal_graph_store_with_options(root, path_hint, scope, false)
12855}
12856
12857fn refresh_traversal_graph_store_with_options(
12858 root: &Path,
12859 path_hint: &Path,
12860 scope: Option<&str>,
12861 session_only: bool,
12862) -> Result<(TraversalGraphBuild, SqliteProjectionRefresh)> {
12863 let (source_graph, refresh) =
12864 write_traversal_graph_store_with_options(root, path_hint, scope, session_only)?;
12865 let graph_db = graph_substrate_db_path(root, scope);
12866 let store = SqliteGraphStore::open_read_only_resilient(&graph_db)?;
12867 let mut graph = traversal_graph_from_store(root, &store)?;
12868 graph.warnings = source_graph.warnings;
12869 Ok((graph, refresh))
12870}
12871
12872fn refresh_traversal_graph_store(
12873 root: &Path,
12874 path_hint: &Path,
12875 scope: Option<&str>,
12876) -> Result<(TraversalGraphBuild, SqliteProjectionRefresh)> {
12877 refresh_traversal_graph_store_with_options(root, path_hint, scope, false)
12878}
12879
12880pub(crate) fn build_traversal_graph(
12881 root: &Path,
12882 path_hint: &Path,
12883 scope: Option<&str>,
12884) -> Result<TraversalGraphBuild> {
12885 let (graph, _refresh) = refresh_traversal_graph_store(root, path_hint, scope)?;
12886 Ok(graph)
12887}
12888
12889fn traversal_query_kind_priority(kind: &str) -> usize {
12890 match kind {
12891 "backlog" => 0,
12892 "job_packet" => 1,
12893 "worker_result" => 2,
12894 "symbol" => 3,
12895 "file" => 4,
12896 "route" => 5,
12897 "session" => 6,
12898 "semantic_concept" => 7,
12899 "semantic_entity" => 8,
12900 _ => 9,
12901 }
12902}
12903
12904fn traversal_node_match_rank(node: &TraversalNode, query: &str) -> Option<(usize, usize, String)> {
12905 let trimmed = query.trim();
12906 if trimmed.is_empty() {
12907 return None;
12908 }
12909 let kind_priority = traversal_query_kind_priority(&node.kind);
12910 if node.handle == trimmed {
12911 return Some((0, kind_priority, node.handle.clone()));
12912 }
12913 if node.path.as_deref() == Some(trimmed) {
12914 let path_priority = if node.kind == "file" {
12915 0
12916 } else {
12917 kind_priority.saturating_add(1)
12918 };
12919 return Some((1, path_priority, node.handle.clone()));
12920 }
12921 let normalized_backlog = trimmed.trim_start_matches('#');
12922 if node.ref_id.as_deref() == Some(trimmed) || node.ref_id.as_deref() == Some(normalized_backlog)
12923 {
12924 return Some((2, kind_priority, node.handle.clone()));
12925 }
12926 if node.label == trimmed || (node.kind == "symbol" && node.label == normalized_backlog) {
12927 return Some((3, kind_priority, node.handle.clone()));
12928 }
12929 None
12930}
12931
12932fn resolve_traversal_node<'a>(
12933 graph: &'a TraversalGraphBuild,
12934 query: &str,
12935) -> Option<&'a TraversalNode> {
12936 graph
12937 .nodes
12938 .values()
12939 .filter_map(|node| traversal_node_match_rank(node, query).map(|rank| (rank, node)))
12940 .min_by(|(left_rank, _), (right_rank, _)| left_rank.cmp(right_rank))
12941 .map(|(_, node)| node)
12942}
12943
12944fn traversal_adjacency(edges: &[TraversalEdge]) -> BTreeMap<String, Vec<String>> {
12945 let mut adj = BTreeMap::<String, BTreeSet<String>>::new();
12946 for edge in edges {
12947 adj.entry(edge.from.clone())
12948 .or_default()
12949 .insert(edge.to.clone());
12950 adj.entry(edge.to.clone())
12951 .or_default()
12952 .insert(edge.from.clone());
12953 }
12954 adj.into_iter()
12955 .map(|(node, neighbors)| (node, neighbors.into_iter().collect()))
12956 .collect()
12957}
12958
12959fn traversal_shortest_handles(
12960 edges: &[TraversalEdge],
12961 from: &str,
12962 to: &str,
12963) -> Option<Vec<String>> {
12964 if from == to {
12965 return Some(vec![from.to_string()]);
12966 }
12967 let adj = traversal_adjacency(edges);
12968 if !adj.contains_key(from) || !adj.contains_key(to) {
12969 return None;
12970 }
12971 let mut visited = BTreeSet::new();
12972 let mut queue = VecDeque::new();
12973 let mut parent = BTreeMap::<String, String>::new();
12974 visited.insert(from.to_string());
12975 queue.push_back(from.to_string());
12976 while let Some(current) = queue.pop_front() {
12977 if let Some(neighbors) = adj.get(¤t) {
12978 for neighbor in neighbors {
12979 if visited.insert(neighbor.clone()) {
12980 parent.insert(neighbor.clone(), current.clone());
12981 if neighbor == to {
12982 let mut path = vec![to.to_string()];
12983 let mut cursor = to.to_string();
12984 while let Some(prev) = parent.get(&cursor) {
12985 path.push(prev.clone());
12986 cursor = prev.clone();
12987 }
12988 path.reverse();
12989 return Some(path);
12990 }
12991 queue.push_back(neighbor.clone());
12992 }
12993 }
12994 }
12995 }
12996 None
12997}
12998
12999fn traversal_scored_neighbors(edges: &[TraversalEdge], current: &str) -> Vec<String> {
13000 let mut best_score_by_neighbor = BTreeMap::<String, usize>::new();
13001 for edge in edges {
13002 let neighbor = if edge.from == current {
13003 edge.to.as_str()
13004 } else if edge.to == current {
13005 edge.from.as_str()
13006 } else {
13007 continue;
13008 };
13009 let score = traversal_relation_score(edge, current);
13010 best_score_by_neighbor
13011 .entry(neighbor.to_string())
13012 .and_modify(|best| *best = (*best).max(score))
13013 .or_insert(score);
13014 }
13015 let mut ranked = best_score_by_neighbor.into_iter().collect::<Vec<_>>();
13016 ranked.sort_by(|(left_handle, left_score), (right_handle, right_score)| {
13017 right_score
13018 .cmp(left_score)
13019 .then_with(|| left_handle.cmp(right_handle))
13020 });
13021 ranked.into_iter().map(|(handle, _)| handle).collect()
13022}
13023
13024fn traversal_neighborhood_handles(
13025 edges: &[TraversalEdge],
13026 origin: &str,
13027 depth: usize,
13028 limit: usize,
13029) -> BTreeSet<String> {
13030 let mut seen = BTreeSet::new();
13031 let mut queue = VecDeque::new();
13032 seen.insert(origin.to_string());
13033 queue.push_back((origin.to_string(), 0usize));
13034 while let Some((current, current_depth)) = queue.pop_front() {
13035 if current_depth >= depth {
13036 continue;
13037 }
13038 for neighbor in traversal_scored_neighbors(edges, ¤t) {
13039 if limit > 0 && seen.len() >= limit {
13040 return seen;
13041 }
13042 if seen.insert(neighbor.clone()) {
13043 queue.push_back((neighbor, current_depth + 1));
13044 }
13045 }
13046 }
13047 seen
13048}
13049
13050fn traversal_edges_between(
13051 handles: &BTreeSet<String>,
13052 edges: &[TraversalEdge],
13053) -> Vec<TraversalEdge> {
13054 edges
13055 .iter()
13056 .filter(|edge| handles.contains(&edge.from) && handles.contains(&edge.to))
13057 .cloned()
13058 .collect()
13059}
13060
13061fn traversal_path_edges(path: &[String], edges: &[TraversalEdge]) -> Vec<TraversalEdge> {
13062 let mut result = Vec::new();
13063 for pair in path.windows(2) {
13064 if let Some(edge) = edges.iter().find(|edge| {
13065 (edge.from == pair[0] && edge.to == pair[1])
13066 || (edge.from == pair[1] && edge.to == pair[0])
13067 }) {
13068 result.push(edge.clone());
13069 }
13070 }
13071 result
13072}
13073
13074fn sorted_traversal_nodes<'a>(
13075 nodes: impl IntoIterator<Item = &'a TraversalNode>,
13076) -> Vec<TraversalNode> {
13077 let mut nodes = nodes.into_iter().cloned().collect::<Vec<_>>();
13078 nodes.sort_by(|left, right| {
13079 left.kind
13080 .cmp(&right.kind)
13081 .then_with(|| left.label.cmp(&right.label))
13082 .then_with(|| left.path.cmp(&right.path))
13083 .then_with(|| left.handle.cmp(&right.handle))
13084 });
13085 nodes
13086}
13087
13088fn traversal_relation_score(edge: &TraversalEdge, origin: &str) -> usize {
13089 let base = match edge.relation.as_str() {
13090 "mentions" => 100,
13091 "contains" => 80,
13092 "calls" => {
13093 if edge.from == origin {
13094 70
13095 } else {
13096 65
13097 }
13098 }
13099 "handled_by" => 68,
13100 "defines_route" => 62,
13101 "mentions_concept" | "mentions_entity" => 66,
13102 "semantic_relation" => 64,
13103 "tagged_concept" | "related_concept" => 58,
13104 "defines" => {
13105 if edge.from == origin {
13106 60
13107 } else {
13108 55
13109 }
13110 }
13111 _ => 10,
13112 };
13113 base + edge.weight
13114}
13115
13116fn traversal_recommendation_reason(edge: &TraversalEdge, origin: &str) -> String {
13117 match edge.relation.as_str() {
13118 "mentions" => "matched from backlog/session text".to_string(),
13119 "contains" => "contained in the selected session artifact".to_string(),
13120 "defines" if edge.from == origin => "symbol defined in selected file".to_string(),
13121 "defines" => "file that defines the selected symbol".to_string(),
13122 "defines_route" if edge.from == origin => "route declared in selected file".to_string(),
13123 "defines_route" => "file that declares the selected route".to_string(),
13124 "handled_by" if edge.from == origin => "handler for the selected route".to_string(),
13125 "handled_by" => "route handled by the selected symbol".to_string(),
13126 "mentions_concept" => "cached summary concept for the selected source".to_string(),
13127 "mentions_entity" => "cached summary entity for the selected source".to_string(),
13128 "semantic_relation" => "LLM-extracted semantic relationship".to_string(),
13129 "tagged_concept" => "concept label attached to the selected entity".to_string(),
13130 "related_concept" => "co-occurring cached summary concept".to_string(),
13131 "calls" if edge.from == origin => "callee from the selected symbol".to_string(),
13132 "calls" => "caller of the selected symbol".to_string(),
13133 other => format!("connected by {other}"),
13134 }
13135}
13136
13137fn traversal_recommendations(
13138 graph: &TraversalGraphBuild,
13139 origin: Option<&str>,
13140 shortest_path: Option<&[String]>,
13141 limit: usize,
13142) -> Vec<TraversalRecommendation> {
13143 let Some(origin) = origin else {
13144 return Vec::new();
13145 };
13146 let mut recommendations = Vec::new();
13147 let mut seen = BTreeSet::new();
13148
13149 if let Some(path) = shortest_path
13150 && path.len() > 1
13151 && path.first().is_some_and(|handle| handle == origin)
13152 && let Some(next) = graph.nodes.get(&path[1])
13153 {
13154 seen.insert(next.handle.clone());
13155 recommendations.push(TraversalRecommendation {
13156 handle: next.handle.clone(),
13157 kind: next.kind.clone(),
13158 label: next.label.clone(),
13159 reason: "next hop on shortest path".to_string(),
13160 score: 1_000,
13161 expand: next.expand.clone(),
13162 });
13163 }
13164
13165 let mut candidates = graph
13166 .edges
13167 .iter()
13168 .filter_map(|edge| {
13169 let neighbor = if edge.from == origin {
13170 edge.to.as_str()
13171 } else if edge.to == origin {
13172 edge.from.as_str()
13173 } else {
13174 return None;
13175 };
13176 let node = graph.nodes.get(neighbor)?;
13177 Some((traversal_relation_score(edge, origin), edge, node))
13178 })
13179 .collect::<Vec<_>>();
13180 candidates.sort_by(|(left_score, _, left), (right_score, _, right)| {
13181 right_score
13182 .cmp(left_score)
13183 .then_with(|| left.kind.cmp(&right.kind))
13184 .then_with(|| left.label.cmp(&right.label))
13185 .then_with(|| left.handle.cmp(&right.handle))
13186 });
13187
13188 let max = if limit == 0 { usize::MAX } else { limit };
13189 for (score, edge, node) in candidates {
13190 if recommendations.len() >= max {
13191 break;
13192 }
13193 if seen.insert(node.handle.clone()) {
13194 recommendations.push(TraversalRecommendation {
13195 handle: node.handle.clone(),
13196 kind: node.kind.clone(),
13197 label: node.label.clone(),
13198 reason: traversal_recommendation_reason(edge, origin),
13199 score,
13200 expand: node.expand.clone(),
13201 });
13202 }
13203 }
13204
13205 recommendations
13206}
13207
13208fn exploration_budget_for_counts(nodes: usize, edges: usize) -> ExplorationBudget {
13209 let scale = nodes.saturating_add(edges);
13210 if scale <= 80 {
13211 ExplorationBudget {
13212 project_size: "small".to_string(),
13213 max_source_windows: 8,
13214 lines_per_window: 96,
13215 relationship_limit: 40,
13216 }
13217 } else if scale <= 800 {
13218 ExplorationBudget {
13219 project_size: "medium".to_string(),
13220 max_source_windows: 6,
13221 lines_per_window: 80,
13222 relationship_limit: 32,
13223 }
13224 } else {
13225 ExplorationBudget {
13226 project_size: "large".to_string(),
13227 max_source_windows: 4,
13228 lines_per_window: 64,
13229 relationship_limit: 24,
13230 }
13231 }
13232}
13233
13234fn exploration_node_label(node: &TraversalNode) -> String {
13235 format!("{}:{}", node.kind, node.label)
13236}
13237
13238fn exploration_source_window_for_node(
13239 root: &Path,
13240 node: &TraversalNode,
13241 budget: &ExplorationBudget,
13242) -> Option<ExplorationSourceWindow> {
13243 let file = node.path.as_ref()?;
13244 let anchor = node
13245 .line
13246 .and_then(|line| usize::try_from(line).ok())
13247 .and_then(|line| line.checked_add(1))
13248 .unwrap_or(1);
13249 let context_before = budget.lines_per_window / 3;
13250 let start = anchor.saturating_sub(context_before).max(1);
13251 let end = start
13252 .saturating_add(budget.lines_per_window)
13253 .saturating_sub(1);
13254 let handle = stable_handle("xwin", &format!("{file}:{start}:{end}:{}", node.handle));
13255 Some(ExplorationSourceWindow {
13256 handle,
13257 file: file.clone(),
13258 start,
13259 end,
13260 reason: format!("cluster around {}", exploration_node_label(node)),
13261 expand: source_read_command(root, file, start, budget.lines_per_window),
13262 })
13263}
13264
13265fn build_exploration_packet(
13266 root: &Path,
13267 totals: &TraversalTotals,
13268 selected_nodes: &[TraversalNode],
13269 selected_edges: &[TraversalEdge],
13270) -> ExplorationPacket {
13271 let budget = exploration_budget_for_counts(totals.nodes, totals.edges);
13272 let node_by_handle = selected_nodes
13273 .iter()
13274 .map(|node| (node.handle.as_str(), node))
13275 .collect::<BTreeMap<_, _>>();
13276 let relationship_map = selected_edges
13277 .iter()
13278 .take(budget.relationship_limit)
13279 .filter_map(|edge| {
13280 let from = node_by_handle.get(edge.from.as_str())?;
13281 let to = node_by_handle.get(edge.to.as_str())?;
13282 Some(ExplorationRelation {
13283 from: exploration_node_label(from),
13284 relation: edge.relation.clone(),
13285 to: exploration_node_label(to),
13286 label: edge.label.clone(),
13287 })
13288 })
13289 .collect::<Vec<_>>();
13290
13291 let mut seen_windows = BTreeSet::new();
13292 let mut source_windows = Vec::new();
13293 for node in selected_nodes {
13294 if source_windows.len() >= budget.max_source_windows {
13295 break;
13296 }
13297 let Some(window) = exploration_source_window_for_node(root, node, &budget) else {
13298 continue;
13299 };
13300 let key = (window.file.clone(), window.start, window.end);
13301 if seen_windows.insert(key) {
13302 source_windows.push(window);
13303 }
13304 }
13305
13306 ExplorationPacket {
13307 budget,
13308 relationship_map,
13309 source_windows,
13310 worker_context: Vec::new(),
13311 no_reread_guidance:
13312 "Use the source_windows expand commands for line-numbered context; avoid whole-file reads unless the needed line is outside every listed window."
13313 .to_string(),
13314 }
13315}
13316
13317pub(crate) fn traversal_report(
13318 root: &Path,
13319 scope: Option<&str>,
13320 graph: TraversalGraphBuild,
13321 query: Option<&str>,
13322 target: Option<&str>,
13323 depth: usize,
13324 limit: usize,
13325) -> Result<TraversalReport> {
13326 let totals = TraversalTotals {
13327 nodes: graph.nodes.len(),
13328 edges: graph.edges.len(),
13329 };
13330 let origin_node = query.and_then(|value| resolve_traversal_node(&graph, value));
13331 let target_node = target.and_then(|value| resolve_traversal_node(&graph, value));
13332 if let Some(query) = query
13333 && origin_node.is_none()
13334 {
13335 bail!("traversal node not found: {}", query);
13336 }
13337 if let Some(target) = target
13338 && target_node.is_none()
13339 {
13340 bail!("traversal target not found: {}", target);
13341 }
13342
13343 let (mode, selected_nodes, selected_edges, shortest_path) =
13344 if let (Some(origin), Some(target)) = (origin_node, target_node) {
13345 if let Some(handles) =
13346 traversal_shortest_handles(&graph.edges, &origin.handle, &target.handle)
13347 {
13348 let handle_set = handles.iter().cloned().collect::<BTreeSet<_>>();
13349 let nodes = handles
13350 .iter()
13351 .filter_map(|handle| graph.nodes.get(handle).cloned())
13352 .collect::<Vec<_>>();
13353 let edges = traversal_path_edges(&handles, &graph.edges);
13354 let path = TraversalPathReport {
13355 from: origin.clone(),
13356 to: target.clone(),
13357 hops: handles.len().saturating_sub(1),
13358 nodes: nodes.clone(),
13359 edges: edges.clone(),
13360 };
13361 (
13362 "path".to_string(),
13363 nodes,
13364 traversal_edges_between(&handle_set, &graph.edges),
13365 Some(path),
13366 )
13367 } else {
13368 (
13369 "path".to_string(),
13370 vec![origin.clone(), target.clone()],
13371 Vec::new(),
13372 None,
13373 )
13374 }
13375 } else if let Some(origin) = origin_node {
13376 let handles =
13377 traversal_neighborhood_handles(&graph.edges, &origin.handle, depth, limit);
13378 let nodes =
13379 sorted_traversal_nodes(handles.iter().filter_map(|handle| graph.nodes.get(handle)));
13380 let edges = traversal_edges_between(&handles, &graph.edges);
13381 ("neighborhood".to_string(), nodes, edges, None)
13382 } else {
13383 let mut nodes = sorted_traversal_nodes(graph.nodes.values());
13384 let truncated_nodes = limit > 0 && nodes.len() > limit;
13385 if truncated_nodes {
13386 nodes.truncate(limit);
13387 }
13388 let handles = nodes
13389 .iter()
13390 .map(|node| node.handle.clone())
13391 .collect::<BTreeSet<_>>();
13392 let mut edges = traversal_edges_between(&handles, &graph.edges);
13393 let truncated_edges = limit > 0 && edges.len() > limit;
13394 if truncated_edges {
13395 edges.truncate(limit);
13396 }
13397 ("export".to_string(), nodes, edges, None)
13398 };
13399
13400 let shortest_handles = shortest_path.as_ref().map(|path| {
13401 path.nodes
13402 .iter()
13403 .map(|node| node.handle.clone())
13404 .collect::<Vec<_>>()
13405 });
13406 let recommendations = traversal_recommendations(
13407 &graph,
13408 origin_node.map(|node| node.handle.as_str()),
13409 shortest_handles.as_deref(),
13410 if limit == 0 { 10 } else { limit.min(10) },
13411 );
13412 let exploration = build_exploration_packet(root, &totals, &selected_nodes, &selected_edges);
13413 let truncated = selected_nodes.len() < totals.nodes || selected_edges.len() < totals.edges;
13414
13415 Ok(TraversalReport {
13416 root: root.to_string_lossy().to_string(),
13417 scope: scope.map(str::to_string),
13418 mode,
13419 totals,
13420 query: query.map(str::to_string),
13421 target: target.map(str::to_string),
13422 nodes: selected_nodes,
13423 edges: selected_edges,
13424 shortest_path,
13425 recommendations,
13426 exploration,
13427 truncated,
13428 warnings: graph.warnings,
13429 })
13430}
13431
13432fn html_escape(input: &str) -> String {
13433 input
13434 .replace('&', "&")
13435 .replace('<', "<")
13436 .replace('>', ">")
13437 .replace('"', """)
13438 .replace('\'', "'")
13439}
13440
13441pub(crate) fn traversal_report_html(report: &TraversalReport) -> Result<String> {
13442 let json = serde_json::to_string(report)?.replace("</", "<\\/");
13443 let mut html = String::new();
13444 html.push_str(
13445 "<!doctype html><html><head><meta charset=\"utf-8\"><title>tsift traversal graph</title>",
13446 );
13447 html.push_str(
13448 r#"<style>
13449:root{color-scheme:light dark;--bg:#f7f8fb;--panel:#ffffff;--text:#17202a;--muted:#5c6674;--line:#d7dce3;--edge:#8b98a8;--accent:#0f766e;--semantic:#9a3412}
13450@media (prefers-color-scheme:dark){:root{--bg:#111318;--panel:#1b2028;--text:#ecf1f7;--muted:#a8b3c1;--line:#323946;--edge:#667386;--accent:#2dd4bf;--semantic:#fb923c}}
13451*{box-sizing:border-box}body{margin:0;background:var(--bg);color:var(--text);font-family:Inter,ui-sans-serif,system-ui,sans-serif;line-height:1.4}.page{max-width:1280px;margin:0 auto;padding:20px}.top{display:flex;align-items:flex-end;justify-content:space-between;gap:16px;margin-bottom:14px}.top h1{font-size:22px;margin:0}.meta{color:var(--muted);font-size:13px}.toolbar{display:flex;gap:8px;align-items:center}.toolbar input{min-width:220px;border:1px solid var(--line);border-radius:6px;background:var(--panel);color:var(--text);padding:8px 10px}.layout{display:grid;grid-template-columns:minmax(0,1fr) 320px;gap:14px;min-height:650px}.graph-panel,.side{background:var(--panel);border:1px solid var(--line);border-radius:8px;overflow:hidden}.graph-panel{position:relative}.legend{position:absolute;left:12px;top:12px;display:flex;flex-wrap:wrap;gap:6px;max-width:calc(100% - 24px)}.legend span{font-size:12px;background:color-mix(in srgb,var(--panel) 86%,transparent);border:1px solid var(--line);border-radius:999px;padding:4px 8px}.side{padding:14px;overflow:auto}.side h2{font-size:15px;margin:0 0 8px}.selected{border-top:1px solid var(--line);margin-top:12px;padding-top:12px}.list{display:grid;gap:8px}.row{border:1px solid var(--line);border-radius:6px;padding:8px;cursor:pointer}.row:hover{border-color:var(--accent)}.kind{font-size:11px;text-transform:uppercase;color:var(--muted);letter-spacing:.04em}.label{font-weight:650;overflow-wrap:anywhere}.handle,code{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;font-size:12px;color:var(--muted)}svg{width:100%;height:650px;display:block}.edge{stroke:var(--edge);stroke-width:1.4;opacity:.72}.edge.semantic{stroke:var(--semantic);stroke-width:1.8}.node{stroke:var(--panel);stroke-width:2;cursor:pointer}.node.semantic{stroke:var(--semantic);stroke-width:2.5}.node-label{font-size:12px;paint-order:stroke;stroke:var(--panel);stroke-width:4px;stroke-linejoin:round;fill:var(--text);pointer-events:none}.hidden{display:none}@media(max-width:900px){.top{display:block}.toolbar{margin-top:12px}.layout{grid-template-columns:1fr}.side{max-height:360px}svg{height:560px}}
13452</style>"#,
13453 );
13454 html.push_str("</head><body>");
13455 html.push_str("<div class=\"page\">");
13456 html.push_str(&format!(
13457 "<header class=\"top\"><div><h1>tsift traversal graph</h1><div class=\"meta\">mode <code>{}</code> | nodes <code>{}</code>/<code>{}</code> | edges <code>{}</code>/<code>{}</code></div></div><div class=\"toolbar\"><input id=\"filter\" type=\"search\" placeholder=\"Filter nodes\"></div></header>",
13458 html_escape(&report.mode),
13459 report.nodes.len(),
13460 report.totals.nodes,
13461 report.edges.len(),
13462 report.totals.edges
13463 ));
13464 html.push_str(
13465 r#"<main class="layout"><section class="graph-panel"><div id="legend" class="legend"></div><svg id="graph-canvas" role="img" aria-label="Traversal graph"></svg></section><aside class="side"><h2>Nodes</h2><div id="node-list" class="list"></div><div id="selected" class="selected"></div></aside></main>"#,
13466 );
13467 html.push_str("<script id=\"graph-data\" type=\"application/json\">");
13468 html.push_str(&json);
13469 html.push_str(
13470 r##"</script><script>
13471const report = JSON.parse(document.getElementById("graph-data").textContent);
13472const svg = document.getElementById("graph-canvas");
13473const list = document.getElementById("node-list");
13474const selected = document.getElementById("selected");
13475const filter = document.getElementById("filter");
13476const legend = document.getElementById("legend");
13477const nodes = report.nodes.map((node, index) => ({...node, index}));
13478const nodeByHandle = new Map(nodes.map(node => [node.handle, node]));
13479const edges = report.edges.filter(edge => nodeByHandle.has(edge.from) && nodeByHandle.has(edge.to));
13480const colorByKind = new Map([
13481 ["file", "#2563eb"], ["symbol", "#16a34a"], ["route", "#7c3aed"],
13482 ["session", "#0891b2"], ["backlog", "#dc2626"], ["job_packet", "#ea580c"],
13483 ["semantic_concept", "#9a3412"], ["semantic_entity", "#b45309"],
13484 ["source_handle", "#64748b"], ["worker_context", "#475569"], ["worker_result", "#15803d"]
13485]);
13486function color(kind){ return colorByKind.get(kind) || "#6b7280"; }
13487function isSemantic(edge){ return edge.relation.includes("concept") || edge.relation.includes("entity") || edge.relation.includes("semantic"); }
13488function text(value){ return value == null ? "" : String(value); }
13489function matches(node, query){
13490 if (!query) return true;
13491 const haystack = [node.kind,node.label,node.handle,node.ref_id,node.path,node.detail].map(text).join(" ").toLowerCase();
13492 return haystack.includes(query);
13493}
13494function layout(){
13495 const rect = svg.getBoundingClientRect();
13496 const width = rect.width || 900;
13497 const height = rect.height || 650;
13498 const cx = width / 2;
13499 const cy = height / 2;
13500 const kinds = [...new Set(nodes.map(node => node.kind))].sort();
13501 const counts = new Map();
13502 for (const node of nodes) counts.set(node.kind, (counts.get(node.kind) || 0) + 1);
13503 const offsets = new Map();
13504 for (const node of nodes) {
13505 const group = kinds.indexOf(node.kind);
13506 const index = offsets.get(node.kind) || 0;
13507 offsets.set(node.kind, index + 1);
13508 const groupCount = counts.get(node.kind) || 1;
13509 const ring = Math.min(width, height) * (0.18 + ((group % 4) * 0.09));
13510 const angle = (Math.PI * 2 * index / Math.max(groupCount, 1)) + (group * 0.47);
13511 node.x = cx + Math.cos(angle) * ring;
13512 node.y = cy + Math.sin(angle) * ring;
13513 }
13514}
13515function draw(){
13516 const query = filter.value.trim().toLowerCase();
13517 const visible = new Set(nodes.filter(node => matches(node, query)).map(node => node.handle));
13518 svg.innerHTML = "";
13519 for (const edge of edges) {
13520 if (!visible.has(edge.from) || !visible.has(edge.to)) continue;
13521 const from = nodeByHandle.get(edge.from);
13522 const to = nodeByHandle.get(edge.to);
13523 const line = document.createElementNS("http://www.w3.org/2000/svg", "line");
13524 line.setAttribute("x1", from.x); line.setAttribute("y1", from.y);
13525 line.setAttribute("x2", to.x); line.setAttribute("y2", to.y);
13526 line.setAttribute("class", "edge" + (isSemantic(edge) ? " semantic" : ""));
13527 line.appendChild(document.createElementNS("http://www.w3.org/2000/svg", "title")).textContent = edge.relation + (edge.label ? ": " + edge.label : "");
13528 svg.appendChild(line);
13529 }
13530 for (const node of nodes) {
13531 if (!visible.has(node.handle)) continue;
13532 const circle = document.createElementNS("http://www.w3.org/2000/svg", "circle");
13533 circle.setAttribute("cx", node.x); circle.setAttribute("cy", node.y);
13534 circle.setAttribute("r", node.kind.startsWith("semantic_") ? 8 : 6);
13535 circle.setAttribute("fill", color(node.kind));
13536 circle.setAttribute("class", "node" + (node.kind.startsWith("semantic_") ? " semantic" : ""));
13537 circle.addEventListener("click", () => selectNode(node));
13538 circle.appendChild(document.createElementNS("http://www.w3.org/2000/svg", "title")).textContent = node.kind + ": " + node.label;
13539 svg.appendChild(circle);
13540 const label = document.createElementNS("http://www.w3.org/2000/svg", "text");
13541 label.setAttribute("x", node.x + 9); label.setAttribute("y", node.y + 4);
13542 label.setAttribute("class", "node-label");
13543 label.textContent = node.label.length > 34 ? node.label.slice(0, 31) + "..." : node.label;
13544 svg.appendChild(label);
13545 }
13546 renderList(query);
13547}
13548function renderLegend(){
13549 const kinds = [...new Set(nodes.map(node => node.kind))].sort();
13550 legend.innerHTML = kinds.map(kind => `<span><b style="color:${color(kind)}">●</b> ${kind}</span>`).join("");
13551}
13552function renderList(query){
13553 const rows = nodes.filter(node => matches(node, query)).slice(0, 120);
13554 list.innerHTML = rows.map(node => `<div class="row" data-handle="${node.handle}"><div class="kind">${node.kind}</div><div class="label">${escapeHtml(node.label)}</div><div class="handle">${node.handle}</div></div>`).join("");
13555 for (const row of list.querySelectorAll(".row")) {
13556 row.addEventListener("click", () => selectNode(nodeByHandle.get(row.dataset.handle)));
13557 }
13558}
13559function selectNode(node){
13560 const adjacent = edges.filter(edge => edge.from === node.handle || edge.to === node.handle).slice(0, 20);
13561 selected.innerHTML = `<h2>${escapeHtml(node.label)}</h2><div class="kind">${node.kind}</div><p class="handle">${node.handle}</p>${node.path ? `<p>${escapeHtml(node.path)}${node.line != null ? ":" + node.line : ""}</p>` : ""}${node.detail ? `<p>${escapeHtml(node.detail)}</p>` : ""}<p><code>${escapeHtml(node.expand)}</code></p><h2>Edges</h2><div class="list">${adjacent.map(edge => `<div class="row"><div class="kind">${edge.relation}</div><div>${escapeHtml(edge.from)} -> ${escapeHtml(edge.to)}</div>${edge.label ? `<div>${escapeHtml(edge.label)}</div>` : ""}</div>`).join("") || "<div class=\"meta\">No visible edges.</div>"}</div>`;
13562}
13563function escapeHtml(value){
13564 return text(value).replace(/[&<>"']/g, ch => ({"&":"&","<":"<",">":">","\"":""","'":"'"}[ch]));
13565}
13566filter.addEventListener("input", draw);
13567window.addEventListener("resize", () => { layout(); draw(); });
13568renderLegend();
13569layout();
13570draw();
13571if (nodes.length) selectNode(nodes[0]);
13572</script></div></body></html>"##,
13573 );
13574 Ok(html)
13575}
13576
13577fn semantic_related_report_from_store(
13578 root: &Path,
13579 scope: Option<&str>,
13580 query: &str,
13581 limit: usize,
13582 kind: SemanticRelatedKind,
13583 store: &impl GraphStore,
13584) -> Result<SemanticRelatedReport> {
13585 if query.trim().is_empty() {
13586 bail!("semantic query cannot be empty");
13587 }
13588
13589 let query_embedding = semantic_embedding(query);
13590 let node_kinds: &[&str] = match kind {
13591 SemanticRelatedKind::Concept => &["semantic_concept"],
13592 SemanticRelatedKind::Entity => &["semantic_entity"],
13593 SemanticRelatedKind::All => &["semantic_concept", "semantic_entity"],
13594 };
13595
13596 let mut items = Vec::new();
13597 for node_kind in node_kinds {
13598 for node in store.nodes_by_kind(node_kind)? {
13599 let Some(embedding) = node
13600 .properties
13601 .get("embedding")
13602 .and_then(|value| parse_semantic_embedding_property(value))
13603 else {
13604 continue;
13605 };
13606 let score = semantic_cosine(&query_embedding, &embedding);
13607 items.push(SemanticRelatedItem {
13608 handle: node
13609 .properties
13610 .get("handle")
13611 .cloned()
13612 .unwrap_or_else(|| node.id.clone()),
13613 kind: node.kind,
13614 label: node.label,
13615 score,
13616 file_path: node
13617 .properties
13618 .get("source_file")
13619 .or_else(|| node.properties.get("path"))
13620 .cloned(),
13621 source_symbol: node.properties.get("source_symbol").cloned(),
13622 detail: node
13623 .properties
13624 .get("description")
13625 .or_else(|| node.properties.get("detail"))
13626 .cloned(),
13627 expand: node
13628 .properties
13629 .get("expand")
13630 .cloned()
13631 .unwrap_or_else(|| traversal_expand_command(root, &node.id)),
13632 });
13633 }
13634 }
13635
13636 items.sort_by(|left, right| {
13637 right
13638 .score
13639 .partial_cmp(&left.score)
13640 .unwrap_or(Ordering::Equal)
13641 .then_with(|| left.kind.cmp(&right.kind))
13642 .then_with(|| left.label.cmp(&right.label))
13643 .then_with(|| left.handle.cmp(&right.handle))
13644 });
13645 if limit > 0 && items.len() > limit {
13646 items.truncate(limit);
13647 }
13648
13649 let mut warnings = Vec::new();
13650 if items.is_empty() {
13651 warnings.push(
13652 "no semantic graph rows found; run `tsift summarize --extract <path>` first"
13653 .to_string(),
13654 );
13655 }
13656
13657 Ok(SemanticRelatedReport {
13658 root: root.to_string_lossy().to_string(),
13659 scope: scope.map(str::to_string),
13660 query: query.to_string(),
13661 embedding_model: SEMANTIC_EMBEDDING_MODEL.to_string(),
13662 count: items.len(),
13663 items,
13664 warnings,
13665 })
13666}
13667
13668fn graph_db_semantic_edge_scan_cap(limit: usize) -> usize {
13669 if limit == 0 {
13670 return 0;
13671 }
13672 limit.saturating_mul(4).clamp(
13673 GRAPH_DB_SEMANTIC_MIN_EDGE_SCAN_CAP,
13674 GRAPH_DB_SEMANTIC_MAX_EDGE_SCAN_CAP,
13675 )
13676}
13677
13678fn graph_db_semantic_node_discovery_cap(seed_count: usize, limit: usize) -> usize {
13679 if limit == 0 {
13680 return usize::MAX;
13681 }
13682 limit.saturating_mul(3).max(limit).max(seed_count)
13683}
13684
13685fn graph_db_semantic_edge_other_id<'a>(
13686 edge: &'a SubstrateGraphEdge,
13687 current_id: &str,
13688) -> Option<&'a str> {
13689 if edge.from_id == current_id {
13690 Some(edge.to_id.as_str())
13691 } else if edge.to_id == current_id {
13692 Some(edge.from_id.as_str())
13693 } else {
13694 None
13695 }
13696}
13697
13698fn graph_db_semantic_edge_score(edge: &SubstrateGraphEdge, current_id: &str) -> i64 {
13699 let mut score = resolution::edge_kind_rank_score(&edge.kind).saturating_mul(10);
13700 score += if edge.from_id == current_id { 8 } else { 4 };
13701 score += match edge.kind.as_str() {
13702 "mentions_concept" | "mentions_entity" | "tagged_concept" | "tagged_entity"
13703 | "related_concept" => 30,
13704 "semantic_relation" => 28,
13705 "calls" => 24,
13706 "mentions" => 22,
13707 "requests_context" | "scopes_context" | "scopes_source" | "explains_result" => 18,
13708 "defines" | "contains" | "belongs_to" => 12,
13709 _ => 0,
13710 };
13711 score
13712}
13713
13714fn graph_db_semantic_seeded_neighborhood(
13715 store: &impl GraphStore,
13716 seed_ids: &[String],
13717 depth: usize,
13718 limit: usize,
13719) -> Result<GraphDbSemanticSeededSubgraph> {
13720 let seed_rank = seed_ids
13721 .iter()
13722 .enumerate()
13723 .map(|(idx, seed)| (seed.clone(), idx))
13724 .collect::<BTreeMap<_, _>>();
13725 let mut nodes = BTreeMap::<String, SubstrateGraphNode>::new();
13726 let mut edges = BTreeMap::<String, SubstrateGraphEdge>::new();
13727 let mut node_score_by_id = BTreeMap::<String, i64>::new();
13728 let mut queue = VecDeque::<(String, usize)>::new();
13729 let mut seen_at_depth = BTreeMap::<String, usize>::new();
13730 let edge_scan_cap = graph_db_semantic_edge_scan_cap(limit);
13731 let node_discovery_cap = graph_db_semantic_node_discovery_cap(seed_ids.len(), limit);
13732 let mut skipped_by_edge_cap = 0usize;
13733 let mut skipped_by_node_cap = 0usize;
13734 let mut diagnostics = vec![
13735 "semantic-seeded retrieval uses phrase similarity to pick graph seeds".to_string(),
13736 "seed expansion traverses both outgoing and incident edges so code, markdown, conversation, and memory adapters can link into semantic rows without reversing their edge direction".to_string(),
13737 format!(
13738 "seed expansion ranks incident/outgoing edges before caps; per-node edge scan cap={} node discovery cap={}",
13739 if edge_scan_cap == 0 {
13740 "unbounded".to_string()
13741 } else {
13742 edge_scan_cap.to_string()
13743 },
13744 if node_discovery_cap == usize::MAX {
13745 "unbounded".to_string()
13746 } else {
13747 node_discovery_cap.to_string()
13748 }
13749 ),
13750 ];
13751
13752 for (idx, seed_id) in seed_ids.iter().enumerate() {
13753 if let Some(node) = store.node(seed_id)? {
13754 nodes.entry(seed_id.clone()).or_insert(node);
13755 node_score_by_id
13756 .entry(seed_id.clone())
13757 .or_insert(1_000_000i64.saturating_sub(idx as i64));
13758 queue.push_back((seed_id.clone(), 0));
13759 seen_at_depth.entry(seed_id.clone()).or_insert(0);
13760 } else {
13761 diagnostics.push(format!(
13762 "semantic seed {seed_id} was not present in the graph store"
13763 ));
13764 }
13765 }
13766
13767 while let Some((current_id, current_depth)) = queue.pop_front() {
13768 if current_depth >= depth {
13769 continue;
13770 }
13771
13772 let mut expansion_edges_by_key = BTreeMap::<String, SubstrateGraphEdge>::new();
13773 for edge in store.outgoing_edges(¤t_id, None)? {
13774 expansion_edges_by_key
13775 .entry(graph_db_edge_key(&edge))
13776 .or_insert(edge);
13777 }
13778 for edge in store.incident_edges(¤t_id, None)? {
13779 expansion_edges_by_key
13780 .entry(graph_db_edge_key(&edge))
13781 .or_insert(edge);
13782 }
13783 let mut expansion_edges = expansion_edges_by_key.into_values().collect::<Vec<_>>();
13784 expansion_edges.sort_by(|left, right| {
13785 graph_db_semantic_edge_score(right, ¤t_id)
13786 .cmp(&graph_db_semantic_edge_score(left, ¤t_id))
13787 .then_with(|| graph_db_edge_key(left).cmp(&graph_db_edge_key(right)))
13788 });
13789 if edge_scan_cap > 0 && expansion_edges.len() > edge_scan_cap {
13790 skipped_by_edge_cap += expansion_edges.len() - edge_scan_cap;
13791 expansion_edges.truncate(edge_scan_cap);
13792 }
13793
13794 for edge in expansion_edges {
13795 let Some(other_id) = graph_db_semantic_edge_other_id(&edge, ¤t_id) else {
13796 continue;
13797 };
13798 let other_known = nodes.contains_key(other_id);
13799 if !other_known && nodes.len() >= node_discovery_cap {
13800 skipped_by_node_cap += 1;
13801 continue;
13802 }
13803 let other_id = other_id.to_string();
13804 let edge_score = graph_db_semantic_edge_score(&edge, ¤t_id)
13805 .saturating_add((depth.saturating_sub(current_depth) as i64).saturating_mul(5));
13806 node_score_by_id
13807 .entry(other_id.clone())
13808 .and_modify(|score| *score = (*score).max(edge_score))
13809 .or_insert(edge_score);
13810 let edge_key = graph_db_edge_key(&edge);
13811 edges.entry(edge_key).or_insert_with(|| edge.clone());
13812 if let std::collections::btree_map::Entry::Vacant(entry) = nodes.entry(other_id.clone())
13813 && let Some(node) = store.node(&other_id)?
13814 {
13815 entry.insert(node);
13816 }
13817 if !nodes.contains_key(&other_id) {
13818 continue;
13819 }
13820 let next_depth = current_depth + 1;
13821 let should_queue = seen_at_depth
13822 .get(&other_id)
13823 .is_none_or(|seen_depth| next_depth < *seen_depth);
13824 if should_queue {
13825 seen_at_depth.insert(other_id.clone(), next_depth);
13826 queue.push_back((other_id, next_depth));
13827 }
13828 }
13829 }
13830
13831 if skipped_by_edge_cap > 0 {
13832 diagnostics.push(format!(
13833 "semantic-seeded expansion skipped {skipped_by_edge_cap} lower-scoring incident/outgoing edge(s) after per-node caps"
13834 ));
13835 }
13836 if skipped_by_node_cap > 0 {
13837 diagnostics.push(format!(
13838 "semantic-seeded expansion skipped {skipped_by_node_cap} lower-scoring node discovery edge(s) after the discovery cap"
13839 ));
13840 }
13841
13842 let mut nodes = nodes.into_values().collect::<Vec<_>>();
13843 nodes.sort_by(|left, right| {
13844 seed_rank
13845 .get(&left.id)
13846 .copied()
13847 .unwrap_or(usize::MAX)
13848 .cmp(&seed_rank.get(&right.id).copied().unwrap_or(usize::MAX))
13849 .then_with(|| {
13850 node_score_by_id
13851 .get(&right.id)
13852 .copied()
13853 .unwrap_or_default()
13854 .cmp(&node_score_by_id.get(&left.id).copied().unwrap_or_default())
13855 })
13856 .then(left.id.cmp(&right.id))
13857 });
13858
13859 let before_limit = nodes.len();
13860 let truncated = limit > 0 && nodes.len() > limit;
13861 if truncated {
13862 nodes.truncate(limit);
13863 diagnostics.push(format!(
13864 "semantic-seeded neighborhood truncated from {before_limit} to {limit} node(s)"
13865 ));
13866 }
13867
13868 let node_ids = nodes
13869 .iter()
13870 .map(|node| node.id.as_str())
13871 .collect::<BTreeSet<_>>();
13872 let mut edges = edges
13873 .into_values()
13874 .filter(|edge| {
13875 node_ids.contains(edge.from_id.as_str()) && node_ids.contains(edge.to_id.as_str())
13876 })
13877 .collect::<Vec<_>>();
13878 edges.sort_by_key(graph_db_edge_key);
13879
13880 Ok(GraphDbSemanticSeededSubgraph {
13881 nodes,
13882 edges,
13883 truncated,
13884 diagnostics,
13885 })
13886}
13887
13888#[allow(clippy::too_many_arguments)]
13889fn cmd_semantic_related(
13890 query: &str,
13891 path: &Path,
13892 scope: Option<&str>,
13893 limit: usize,
13894 kind: SemanticRelatedKind,
13895 json_output: bool,
13896 compact: bool,
13897 pretty: bool,
13898 terse: bool,
13899 schema: bool,
13900) -> Result<()> {
13901 let root = lint::resolve_project_root_or_canonical_path(path)?;
13902 write_traversal_graph_store(&root, path, scope)?;
13903 let graph_db = graph_substrate_db_path(&root, scope);
13904 let store = SqliteGraphStore::open_read_only_resilient(&graph_db)?;
13905 let mut report = semantic_related_report_from_store(&root, scope, query, limit, kind, &store)?;
13906 if let Some(recovery) = store.read_only_recovery() {
13907 report
13908 .warnings
13909 .push(graph_db_read_recovery_diagnostic(recovery));
13910 }
13911
13912 if json_output {
13913 println!("{}", to_json_schema(&report, pretty, terse, schema)?);
13914 } else if compact {
13915 for item in &report.items {
13916 println!(
13917 "{:.3}\t{}\t{}\t{}",
13918 item.score, item.kind, item.label, item.handle
13919 );
13920 }
13921 for warning in &report.warnings {
13922 eprintln!("warning: {warning}");
13923 }
13924 } else {
13925 println!(
13926 "Related semantic graph rows for {:?} ({})",
13927 report.query, report.embedding_model
13928 );
13929 for item in &report.items {
13930 println!(
13931 " {:.3} [{}] {} ({})",
13932 item.score, item.kind, item.label, item.handle
13933 );
13934 if let Some(detail) = &item.detail {
13935 println!(" {}", detail);
13936 }
13937 if let Some(file_path) = &item.file_path {
13938 println!(" file: {}", file_path);
13939 }
13940 println!(" expand: {}", item.expand);
13941 }
13942 for warning in &report.warnings {
13943 eprintln!("warning: {warning}");
13944 }
13945 }
13946
13947 Ok(())
13948}
13949
13950#[derive(Serialize)]
13951struct SourceLinePreview {
13952 line: usize,
13953 text: String,
13954}
13955
13956#[derive(Serialize)]
13957struct SourceRangePreview {
13958 start: usize,
13959 end: usize,
13960 total_lines: usize,
13961 truncated_before: bool,
13962 truncated_after: bool,
13963}
13964
13965#[derive(Serialize)]
13966struct SourceExpandCommands {
13967 #[serde(skip_serializing_if = "Option::is_none")]
13968 before: Option<String>,
13969 #[serde(skip_serializing_if = "Option::is_none")]
13970 after: Option<String>,
13971 file: String,
13972}
13973
13974#[derive(Serialize)]
13975struct SourceSymbolRef {
13976 handle: String,
13977 name: String,
13978 kind: String,
13979 language: String,
13980 file: String,
13981 line: usize,
13982 #[serde(skip_serializing_if = "Option::is_none")]
13983 end_line: Option<usize>,
13984 #[serde(skip_serializing_if = "Option::is_none")]
13985 signature: Option<String>,
13986 expand: String,
13987}
13988
13989#[derive(Serialize)]
13990struct SourceSummaryRef {
13991 handle: String,
13992 symbol_name: String,
13993 file_path: String,
13994 summary: String,
13995 expand: String,
13996}
13997
13998#[derive(Serialize)]
13999struct SourceReadReport {
14000 handle: String,
14001 root: String,
14002 file: String,
14003 range: SourceRangePreview,
14004 preview: Vec<SourceLinePreview>,
14005 symbols: Vec<SourceSymbolRef>,
14006 summaries: Vec<SourceSummaryRef>,
14007 expand: SourceExpandCommands,
14008 #[serde(skip_serializing_if = "Vec::is_empty", default)]
14009 warnings: Vec<String>,
14010}
14011
14012fn resolve_source_file(root: &Path, file: &Path) -> Result<PathBuf> {
14013 let candidate = if file.is_absolute() {
14014 file.to_path_buf()
14015 } else {
14016 root.join(file)
14017 };
14018 let canonical = candidate
14019 .canonicalize()
14020 .with_context(|| format!("canonicalizing source file {}", candidate.display()))?;
14021 if !canonical.is_file() {
14022 bail!("source file is not a regular file: {}", canonical.display());
14023 }
14024 let canonical_root = root
14025 .canonicalize()
14026 .with_context(|| format!("canonicalizing project root {}", root.display()))?;
14027 if !canonical.starts_with(&canonical_root) {
14028 bail!(
14029 "source file {} is outside project root {}",
14030 canonical.display(),
14031 canonical_root.display()
14032 );
14033 }
14034 Ok(canonical)
14035}
14036
14037fn source_read_command(root: &Path, file: &str, start: usize, lines: usize) -> String {
14038 format!(
14039 "tsift source-read {} --path {} --start {} --lines {} --budget normal",
14040 shell_quote(file),
14041 shell_quote(&root.to_string_lossy()),
14042 start,
14043 lines
14044 )
14045}
14046
14047fn source_symbol_expand_command(root: &Path, symbol: &str) -> String {
14048 format!(
14049 "tsift --envelope explain {} --path {} --budget normal",
14050 shell_quote(symbol),
14051 shell_quote(&root.to_string_lossy())
14052 )
14053}
14054
14055fn source_summary_expand_command(root: &Path, symbol: &str) -> String {
14056 format!(
14057 "tsift summarize {} --path {} --json",
14058 shell_quote(symbol),
14059 shell_quote(&root.to_string_lossy())
14060 )
14061}
14062
14063fn source_symbol_line(symbol: &index::StoredSymbol) -> usize {
14064 usize::try_from(symbol.line)
14065 .ok()
14066 .and_then(|line| line.checked_add(1))
14067 .unwrap_or(1)
14068}
14069
14070fn source_symbol_end_line(symbol: &index::StoredSymbol) -> Option<usize> {
14071 symbol
14072 .end_line
14073 .and_then(|line| usize::try_from(line).ok())
14074 .and_then(|line| line.checked_add(1))
14075}
14076
14077fn source_symbol_intersects(symbol: &index::StoredSymbol, start: usize, end: usize) -> bool {
14078 if end == 0 {
14079 return false;
14080 }
14081 let symbol_start = source_symbol_line(symbol);
14082 let symbol_end = source_symbol_end_line(symbol).unwrap_or(symbol_start);
14083 symbol_start <= end && symbol_end >= start
14084}
14085
14086#[allow(clippy::too_many_arguments)]
14087fn load_source_symbols(
14088 root: &Path,
14089 file_abs: &Path,
14090 file_display: &str,
14091 scope: Option<&str>,
14092 start: usize,
14093 end: usize,
14094 limit: usize,
14095 max_bytes: usize,
14096 warnings: &mut Vec<String>,
14097) -> Vec<SourceSymbolRef> {
14098 let db_path = match resolve_query_db_path(root, file_abs, scope) {
14099 Ok(path) => path,
14100 Err(err) => {
14101 warnings.push(format!("index refs unavailable: {err:#}"));
14102 return Vec::new();
14103 }
14104 };
14105 if !db_path.exists() {
14106 warnings.push(format!(
14107 "index refs unavailable: no index found at {}",
14108 db_path.display()
14109 ));
14110 return Vec::new();
14111 }
14112
14113 let db = match index::IndexDb::open_read_only_resilient(&db_path) {
14114 Ok(db) => db,
14115 Err(err) => {
14116 warnings.push(format!("index refs unavailable: {err:#}"));
14117 return Vec::new();
14118 }
14119 };
14120
14121 let file_key = file_abs.to_string_lossy().to_string();
14122 let symbols = match db.symbols_for_file(&file_key) {
14123 Ok(symbols) => symbols,
14124 Err(err) => {
14125 warnings.push(format!("symbol refs unavailable: {err:#}"));
14126 return Vec::new();
14127 }
14128 };
14129
14130 symbols
14131 .into_iter()
14132 .filter(|symbol| source_symbol_intersects(symbol, start, end))
14133 .take(limit)
14134 .map(|symbol| {
14135 let line = source_symbol_line(&symbol);
14136 let end_line = source_symbol_end_line(&symbol);
14137 let handle = stable_handle(
14138 "ssym",
14139 &format!("{}:{}:{}", file_display, symbol.name, line),
14140 );
14141 SourceSymbolRef {
14142 handle,
14143 name: truncate_for_budget(&symbol.name, max_bytes),
14144 kind: symbol.kind,
14145 language: symbol.language,
14146 file: file_display.to_string(),
14147 line,
14148 end_line,
14149 signature: symbol
14150 .signature
14151 .map(|signature| truncate_for_budget(&signature, max_bytes)),
14152 expand: source_symbol_expand_command(root, &symbol.name),
14153 }
14154 })
14155 .collect()
14156}
14157
14158fn load_source_summaries(
14159 root: &Path,
14160 file_display: &str,
14161 limit: usize,
14162 max_bytes: usize,
14163 warnings: &mut Vec<String>,
14164) -> Vec<SourceSummaryRef> {
14165 let db_path = root.join(".tsift/summaries.db");
14166 if !db_path.exists() {
14167 return Vec::new();
14168 }
14169 let db = match summarize::SummaryDb::open_read_only_resilient(&db_path) {
14170 Ok(db) => db,
14171 Err(err) => {
14172 warnings.push(format!("summary refs unavailable: {err:#}"));
14173 return Vec::new();
14174 }
14175 };
14176 let summaries = match db.get_by_file(file_display) {
14177 Ok(summaries) => summaries,
14178 Err(err) => {
14179 warnings.push(format!("summary refs unavailable: {err:#}"));
14180 return Vec::new();
14181 }
14182 };
14183
14184 summaries
14185 .into_iter()
14186 .take(limit)
14187 .map(|summary| SourceSummaryRef {
14188 handle: stable_handle(
14189 "sum",
14190 &format!(
14191 "{}:{}:{}",
14192 summary.file_path, summary.symbol_name, summary.id
14193 ),
14194 ),
14195 symbol_name: truncate_for_budget(&summary.symbol_name, max_bytes),
14196 file_path: summary.file_path,
14197 summary: truncate_for_budget(&summary.summary, max_bytes),
14198 expand: source_summary_expand_command(root, &summary.symbol_name),
14199 })
14200 .collect()
14201}
14202
14203#[allow(clippy::too_many_arguments)]
14204fn cmd_source_read(
14205 file: &Path,
14206 path: &Path,
14207 start: usize,
14208 lines: usize,
14209 end: Option<usize>,
14210 scope: Option<&str>,
14211 format: OutputFormat,
14212 absolute: bool,
14213 budget: ResponseBudget,
14214) -> Result<()> {
14215 if start == 0 {
14216 bail!("--start is 1-based and must be greater than zero");
14217 }
14218 if lines == 0 {
14219 bail!("--lines must be greater than zero");
14220 }
14221 if let Some(end) = end
14222 && end < start
14223 {
14224 bail!("--end must be greater than or equal to --start");
14225 }
14226
14227 let root = lint::resolve_project_root_or_canonical_path(path)?;
14228 let file_abs = resolve_source_file(&root, file)?;
14229 let file_display = if absolute {
14230 file_abs.to_string_lossy().to_string()
14231 } else {
14232 relativize_pathbuf(&file_abs, &root)
14233 .to_string_lossy()
14234 .to_string()
14235 };
14236
14237 let source = fs::read(&file_abs).with_context(|| format!("reading {}", file_abs.display()))?;
14238 let text = String::from_utf8_lossy(&source);
14239 let all_lines: Vec<&str> = text.lines().collect();
14240 let total_lines = all_lines.len();
14241 if total_lines > 0 && start > total_lines {
14242 bail!(
14243 "--start {} is beyond end of {} ({} lines)",
14244 start,
14245 file_display,
14246 total_lines
14247 );
14248 }
14249 let requested_end = end.unwrap_or_else(|| start.saturating_add(lines).saturating_sub(1));
14250 let end_line = requested_end.min(total_lines);
14251 let max_bytes = budget.preview_bytes();
14252 let preview = if total_lines == 0 {
14253 Vec::new()
14254 } else {
14255 all_lines[(start - 1)..end_line]
14256 .iter()
14257 .enumerate()
14258 .map(|(idx, line)| SourceLinePreview {
14259 line: start + idx,
14260 text: truncate_for_budget(line, max_bytes),
14261 })
14262 .collect()
14263 };
14264
14265 let mut warnings = Vec::new();
14266 let max_items = budget.preview_items();
14267 let symbols = load_source_symbols(
14268 &root,
14269 &file_abs,
14270 &file_display,
14271 scope,
14272 start,
14273 end_line,
14274 max_items,
14275 max_bytes,
14276 &mut warnings,
14277 );
14278 let summaries =
14279 load_source_summaries(&root, &file_display, max_items, max_bytes, &mut warnings);
14280
14281 let effective_lines = end_line.saturating_sub(start).saturating_add(1).max(1);
14282 let expand = SourceExpandCommands {
14283 before: (start > 1).then(|| {
14284 let before_start = start.saturating_sub(lines).max(1);
14285 source_read_command(&root, &file_display, before_start, start - before_start)
14286 }),
14287 after: (end_line < total_lines)
14288 .then(|| source_read_command(&root, &file_display, end_line + 1, lines)),
14289 file: source_read_command(&root, &file_display, 1, total_lines.max(effective_lines)),
14290 };
14291
14292 let report = SourceReadReport {
14293 handle: stable_handle("swin", &format!("{file_display}:{start}:{end_line}")),
14294 root: root.to_string_lossy().to_string(),
14295 file: file_display,
14296 range: SourceRangePreview {
14297 start,
14298 end: end_line,
14299 total_lines,
14300 truncated_before: start > 1,
14301 truncated_after: end_line < total_lines,
14302 },
14303 preview,
14304 symbols,
14305 summaries,
14306 expand,
14307 warnings,
14308 };
14309
14310 if format.json_output {
14311 let truncated = report.range.truncated_before || report.range.truncated_after;
14312 let follow_up = [
14313 report.expand.before.clone(),
14314 report.expand.after.clone(),
14315 Some(report.expand.file.clone()),
14316 ]
14317 .into_iter()
14318 .flatten()
14319 .collect::<Vec<_>>();
14320 print_json_or_envelope(
14321 &report,
14322 &format,
14323 "source-read",
14324 "window",
14325 ToolEnvelopeSummary {
14326 text: format!(
14327 "source window {}:{}-{}",
14328 report.file, report.range.start, report.range.end
14329 ),
14330 metrics: vec![
14331 envelope_metric("lines", report.preview.len()),
14332 envelope_metric("symbols", report.symbols.len()),
14333 envelope_metric("summaries", report.summaries.len()),
14334 ],
14335 },
14336 truncated,
14337 follow_up,
14338 )?;
14339 } else if format.compact {
14340 println!(
14341 "source {}:{}-{} / {} handle:{}",
14342 report.file,
14343 report.range.start,
14344 report.range.end,
14345 report.range.total_lines,
14346 report.handle
14347 );
14348 for line in &report.preview {
14349 println!("{:>5} {}", line.line, line.text);
14350 }
14351 if !report.symbols.is_empty() {
14352 println!("syms[{}]:", report.symbols.len());
14353 for symbol in &report.symbols {
14354 println!(" {} {}:{}", symbol.name, symbol.file, symbol.line);
14355 }
14356 }
14357 if report.range.truncated_before || report.range.truncated_after {
14358 println!("expand: {}", report.expand.file);
14359 }
14360 } else {
14361 println!(
14362 "Source window `{}` lines {}-{} of {} ({})",
14363 report.file,
14364 report.range.start,
14365 report.range.end,
14366 report.range.total_lines,
14367 report.handle
14368 );
14369 for line in &report.preview {
14370 println!("{:>5} | {}", line.line, line.text);
14371 }
14372 if !report.symbols.is_empty() {
14373 println!();
14374 println!("Symbol refs:");
14375 for symbol in &report.symbols {
14376 println!(
14377 " {} `{}` {}:{} — {}",
14378 symbol.handle, symbol.name, symbol.file, symbol.line, symbol.expand
14379 );
14380 }
14381 }
14382 if !report.summaries.is_empty() {
14383 println!();
14384 println!("Summary refs:");
14385 for summary in &report.summaries {
14386 println!(
14387 " {} `{}` — {}",
14388 summary.handle, summary.symbol_name, summary.expand
14389 );
14390 }
14391 }
14392 if report.range.truncated_before || report.range.truncated_after {
14393 println!();
14394 println!("Expand:");
14395 if let Some(before) = &report.expand.before {
14396 println!(" before: {}", before);
14397 }
14398 if let Some(after) = &report.expand.after {
14399 println!(" after: {}", after);
14400 }
14401 println!(" file: {}", report.expand.file);
14402 }
14403 for warning in &report.warnings {
14404 eprintln!("warning: {warning}");
14405 }
14406 }
14407
14408 Ok(())
14409}
14410
14411#[allow(clippy::too_many_arguments)]
14412#[derive(Serialize)]
14413struct ExplainBudgetDefinitionPreview {
14414 handle: String,
14415 #[serde(skip_serializing_if = "Option::is_none")]
14416 tag_alias: Option<String>,
14417 kind: String,
14418 name: String,
14419 file: String,
14420 line: i64,
14421 expand: String,
14422}
14423
14424#[derive(Serialize)]
14425struct ExplainBudgetEdgePreview {
14426 handle: String,
14427 #[serde(skip_serializing_if = "Option::is_none")]
14428 tag_alias: Option<String>,
14429 name: String,
14430 file: String,
14431 line: i64,
14432 expand: String,
14433}
14434
14435#[derive(Serialize)]
14436struct ExplainBudgetCommunityPreview {
14437 size: usize,
14438 members: Vec<String>,
14439}
14440
14441#[derive(Serialize)]
14442struct ExplainBudgetReport {
14443 symbol: String,
14444 max_items: usize,
14445 max_bytes: usize,
14446 definition_total: usize,
14447 callers_total: usize,
14448 callers_truncated_by_limit: bool,
14449 callees_total: usize,
14450 callees_truncated_by_limit: bool,
14451 truncated: bool,
14452 definitions: Vec<ExplainBudgetDefinitionPreview>,
14453 callers: Vec<ExplainBudgetEdgePreview>,
14454 callees: Vec<ExplainBudgetEdgePreview>,
14455 #[serde(skip_serializing_if = "Option::is_none")]
14456 community: Option<ExplainBudgetCommunityPreview>,
14457}
14458
14459#[allow(clippy::too_many_arguments)]
14460pub(crate) fn build_explain_budget_report(
14461 symbol: &str,
14462 _root: &Path,
14463 symbols: &[index::StoredSymbol],
14464 callers: &[index::StoredEdge],
14465 callers_total: usize,
14466 callers_truncated_by_limit: bool,
14467 callees: &[index::StoredEdge],
14468 callees_total: usize,
14469 callees_truncated_by_limit: bool,
14470 community: Option<&graph::Community>,
14471 budget: ResponseBudget,
14472) -> ExplainBudgetReport {
14473 let max_items = budget.preview_items();
14474 let max_bytes = budget.preview_bytes();
14475 let definitions = symbols
14476 .iter()
14477 .take(max_items)
14478 .map(|entry| {
14479 let symbol_ref = build_compact_symbol_ref(
14480 "edef",
14481 &format!(
14482 "{}:{}:{}:{}",
14483 entry.kind, entry.name, entry.file, entry.line
14484 ),
14485 &entry.name,
14486 entry.tags.as_deref(),
14487 max_bytes,
14488 );
14489 ExplainBudgetDefinitionPreview {
14490 handle: symbol_ref.handle,
14491 tag_alias: symbol_ref.tag_alias,
14492 kind: entry.kind.clone(),
14493 name: symbol_ref.name,
14494 file: truncate_for_budget(&entry.file, max_bytes),
14495 line: entry.line,
14496 expand: format!(
14497 "tsift search {} --exact --path {} --limit 20",
14498 shell_quote(&entry.name),
14499 shell_quote(&entry.file)
14500 ),
14501 }
14502 })
14503 .collect();
14504 let callers_preview: Vec<ExplainBudgetEdgePreview> = callers
14505 .iter()
14506 .take(max_items)
14507 .map(|entry| {
14508 let symbol_ref = build_compact_symbol_ref(
14509 "ecall",
14510 &format!(
14511 "{}:{}:{}:{}",
14512 entry.caller_name, entry.caller_file, entry.call_site_line, symbol
14513 ),
14514 &entry.caller_name,
14515 None,
14516 max_bytes,
14517 );
14518 ExplainBudgetEdgePreview {
14519 handle: symbol_ref.handle,
14520 tag_alias: symbol_ref.tag_alias,
14521 name: symbol_ref.name,
14522 file: truncate_for_budget(&entry.caller_file, max_bytes),
14523 line: entry.call_site_line,
14524 expand: format!(
14525 "tsift explain {} --path {} --limit 0",
14526 shell_quote(&entry.caller_name),
14527 shell_quote(&entry.caller_file)
14528 ),
14529 }
14530 })
14531 .collect();
14532 let callees_preview: Vec<ExplainBudgetEdgePreview> = callees
14533 .iter()
14534 .take(max_items)
14535 .map(|entry| {
14536 let symbol_ref = build_compact_symbol_ref(
14537 "eces",
14538 &format!(
14539 "{}:{}:{}:{}",
14540 entry.callee_name, entry.caller_file, entry.call_site_line, symbol
14541 ),
14542 &entry.callee_name,
14543 None,
14544 max_bytes,
14545 );
14546 ExplainBudgetEdgePreview {
14547 handle: symbol_ref.handle,
14548 tag_alias: symbol_ref.tag_alias,
14549 name: symbol_ref.name,
14550 file: truncate_for_budget(&entry.caller_file, max_bytes),
14551 line: entry.call_site_line,
14552 expand: format!(
14553 "tsift explain {} --path {} --limit 0",
14554 shell_quote(&entry.callee_name),
14555 shell_quote(&entry.caller_file)
14556 ),
14557 }
14558 })
14559 .collect();
14560 let community_preview = community.map(|entry| ExplainBudgetCommunityPreview {
14561 size: entry.members.len(),
14562 members: entry
14563 .members
14564 .iter()
14565 .take(max_items)
14566 .map(|member| truncate_for_budget(&member.name, max_bytes))
14567 .collect(),
14568 });
14569
14570 ExplainBudgetReport {
14571 symbol: symbol.to_string(),
14572 max_items,
14573 max_bytes,
14574 definition_total: symbols.len(),
14575 callers_total,
14576 callers_truncated_by_limit,
14577 callees_total,
14578 callees_truncated_by_limit,
14579 truncated: symbols.len() > max_items
14580 || callers_total > callers_preview.len()
14581 || callees_total > callees_preview.len()
14582 || community
14583 .map(|entry| entry.members.len() > max_items)
14584 .unwrap_or(false),
14585 definitions,
14586 callers: callers_preview,
14587 callees: callees_preview,
14588 community: community_preview,
14589 }
14590}
14591
14592pub(crate) fn print_explain_budget_human(report: &ExplainBudgetReport) {
14593 println!(
14594 "explain-budget sym:{} defs:{}/{} crs:{}/{} ces:{}/{}",
14595 shell_quote(&report.symbol),
14596 report.definitions.len(),
14597 report.definition_total,
14598 report.callers.len(),
14599 report.callers_total,
14600 report.callees.len(),
14601 report.callees_total
14602 );
14603 for entry in &report.definitions {
14604 println!(
14605 "def {} {} {}:{} expand:{}",
14606 format_symbol_preview_line(&entry.handle, &entry.name, entry.tag_alias.as_deref()),
14607 entry.kind,
14608 entry.file,
14609 entry.line,
14610 entry.expand
14611 );
14612 }
14613 for entry in &report.callers {
14614 println!(
14615 "caller {} {}:{} expand:{}",
14616 format_symbol_preview_line(&entry.handle, &entry.name, entry.tag_alias.as_deref()),
14617 entry.file,
14618 entry.line,
14619 entry.expand
14620 );
14621 }
14622 for entry in &report.callees {
14623 println!(
14624 "callee {} {}:{} expand:{}",
14625 format_symbol_preview_line(&entry.handle, &entry.name, entry.tag_alias.as_deref()),
14626 entry.file,
14627 entry.line,
14628 entry.expand
14629 );
14630 }
14631 if let Some(community) = &report.community {
14632 println!(
14633 "community size:{} members:{}",
14634 community.size,
14635 community.members.join(", ")
14636 );
14637 }
14638 if report.truncated {
14639 println!(
14640 "budget truncated items:{} bytes:{}",
14641 report.max_items, report.max_bytes
14642 );
14643 }
14644}
14645
14646const TAGPATH_AUDIT_SKIP_DIRS: &[&str] = &[
14656 ".git",
14657 "node_modules",
14658 "target",
14659 "__pycache__",
14660 ".venv",
14661 "vendor",
14662];
14663
14664const TAGPATH_AUDIT_SOURCE_EXTENSIONS: &[&str] = &[
14665 "rs", "py", "ts", "js", "go", "java", "rb", "c", "cpp", "h", "hpp", "cs", "swift", "kt",
14666 "scala", "zig", "nim", "ex", "exs", "erl", "hs", "ml", "clj", "r", "lua", "php", "pl", "d",
14667 "cr", "dart", "jl", "v", "odin", "gleam", "rkt", "scm", "lisp", "lsp", "f", "fs", "fsi", "fsx",
14668 "sh", "bash", "zsh", "sql", "css", "tsx",
14669];
14670
14671pub(crate) fn tagpath_audit_supported_extensions(root: &Path) -> BTreeSet<String> {
14672 let mut extensions = TAGPATH_AUDIT_SOURCE_EXTENSIONS
14673 .iter()
14674 .map(|ext| (*ext).to_string())
14675 .collect::<BTreeSet<_>>();
14676
14677 let config_path = root.join(".naming.toml");
14678 if !config_path.exists() {
14679 return extensions;
14680 }
14681
14682 match tagpath::config::resolve(&config_path) {
14683 Ok(config) => {
14684 if let Some(grammars) = config.grammars {
14685 for grammar in grammars.languages.values() {
14686 for ext in &grammar.extensions {
14687 if let Some(normalized) = normalize_extension(ext) {
14688 extensions.insert(normalized);
14689 }
14690 }
14691 }
14692 }
14693 }
14694 Err(err) => {
14695 eprintln!("tagpath_policy_hint_config_unreadable: {err}");
14696 }
14697 }
14698 extensions
14699}
14700
14701pub(crate) fn tagpath_audit_policy_hints(
14702 rel_path: &str,
14703 supported_extensions: &BTreeSet<String>,
14704) -> Vec<String> {
14705 let path = Path::new(rel_path);
14706 let mut hints = BTreeSet::new();
14707 if let Some(parent) = path.parent() {
14708 for component in parent.components() {
14709 if let std::path::Component::Normal(name) = component {
14710 let name = name.to_string_lossy();
14711 if TAGPATH_AUDIT_SKIP_DIRS.contains(&name.as_ref()) {
14712 hints.insert(format!("skip_dir:{name}"));
14713 }
14714 }
14715 }
14716 }
14717 if path
14718 .extension()
14719 .and_then(|ext| ext.to_str())
14720 .and_then(normalize_extension)
14721 .is_some_and(|ext| !supported_extensions.contains(&ext))
14722 {
14723 hints.insert("extension_unsupported".to_string());
14724 }
14725 hints.into_iter().collect()
14726}
14727
14728fn normalize_extension(ext: &str) -> Option<String> {
14729 let normalized = ext.trim().trim_start_matches('.').to_ascii_lowercase();
14730 if normalized.is_empty() {
14731 None
14732 } else {
14733 Some(normalized)
14734 }
14735}
14736
14737pub(crate) fn diff_digest_status_label(status: diff_digest::DiffDigestFileStatus) -> &'static str {
14738 match status {
14739 diff_digest::DiffDigestFileStatus::Added => "added",
14740 diff_digest::DiffDigestFileStatus::Modified => "modified",
14741 diff_digest::DiffDigestFileStatus::Deleted => "deleted",
14742 }
14743}
14744
14745pub(crate) fn diff_digest_summary_label(
14746 state: diff_digest::DiffDigestSummaryState,
14747) -> &'static str {
14748 match state {
14749 diff_digest::DiffDigestSummaryState::Current => "current",
14750 diff_digest::DiffDigestSummaryState::Stale => "stale",
14751 diff_digest::DiffDigestSummaryState::Missing => "missing",
14752 diff_digest::DiffDigestSummaryState::Unavailable => "unavailable",
14753 }
14754}
14755
14756fn test_digest_summary_label(state: test_digest::TestDigestSummaryState) -> &'static str {
14757 match state {
14758 test_digest::TestDigestSummaryState::Current => "current",
14759 test_digest::TestDigestSummaryState::Stale => "stale",
14760 test_digest::TestDigestSummaryState::Missing => "missing",
14761 test_digest::TestDigestSummaryState::Unavailable => "unavailable",
14762 }
14763}
14764
14765fn log_digest_summary_label(state: log_digest::LogDigestSummaryState) -> &'static str {
14766 match state {
14767 log_digest::LogDigestSummaryState::Current => "current",
14768 log_digest::LogDigestSummaryState::Stale => "stale",
14769 log_digest::LogDigestSummaryState::Missing => "missing",
14770 log_digest::LogDigestSummaryState::Unavailable => "unavailable",
14771 }
14772}
14773
14774pub(crate) fn diff_digest_mode_label(mode: diff_digest::DiffDigestMode) -> &'static str {
14775 match mode {
14776 diff_digest::DiffDigestMode::WorkingTree => "worktree",
14777 diff_digest::DiffDigestMode::Cached => "cached",
14778 diff_digest::DiffDigestMode::Revision => "revision",
14779 }
14780}
14781
14782pub(crate) fn diff_digest_mode_display(report: &diff_digest::DiffDigestReport) -> String {
14783 match (&report.mode, &report.revision) {
14784 (diff_digest::DiffDigestMode::WorkingTree, _) => "working tree".to_string(),
14785 (diff_digest::DiffDigestMode::Cached, _) => "staged index".to_string(),
14786 (diff_digest::DiffDigestMode::Revision, Some(revision)) => {
14787 format!("revision {revision}")
14788 }
14789 (diff_digest::DiffDigestMode::Revision, None) => "revision".to_string(),
14790 }
14791}
14792
14793pub(crate) fn diff_digest_empty_message(report: &diff_digest::DiffDigestReport) -> String {
14794 match (&report.mode, &report.revision) {
14795 (diff_digest::DiffDigestMode::WorkingTree, _) => "No git changes found.".to_string(),
14796 (diff_digest::DiffDigestMode::Cached, _) => "No staged git changes found.".to_string(),
14797 (diff_digest::DiffDigestMode::Revision, Some(revision)) => {
14798 format!("No diff found for revision {revision}.")
14799 }
14800 (diff_digest::DiffDigestMode::Revision, None) => "No revision diff found.".to_string(),
14801 }
14802}
14803
14804fn cmd_impact(
14805 path: &Path,
14806 cached: bool,
14807 revision: Option<&str>,
14808 scope: Option<&str>,
14809 limit: usize,
14810 format: OutputFormat,
14811) -> Result<()> {
14812 let report = impact::compute(
14813 path,
14814 impact::ImpactOptions {
14815 cached,
14816 revision,
14817 scope,
14818 limit,
14819 },
14820 )?;
14821 if format.json_output {
14822 println!(
14823 "{}",
14824 to_json_schema(&report, format.pretty, format.terse, format.schema)?
14825 );
14826 return Ok(());
14827 }
14828
14829 if format.compact {
14830 println!(
14831 "impact mode:{} changed:{} symbols:{} tests:{}/{}",
14832 diff_digest_mode_label(report.mode),
14833 report.changed_files.len(),
14834 report.changed_symbols.len(),
14835 report.affected_tests.len(),
14836 report.affected_tests_total
14837 );
14838 for target in &report.affected_tests {
14839 println!(
14840 "{} reasons:{} command:{}",
14841 target.path,
14842 target.reasons.len(),
14843 target.commands.join(" && ")
14844 );
14845 }
14846 for warning in &report.warnings {
14847 println!("warning {warning}");
14848 }
14849 return Ok(());
14850 }
14851
14852 println!("Impact ({})", diff_digest_mode_label(report.mode));
14853 println!(" changed files: {}", report.changed_files.len());
14854 println!(" changed symbols: {}", report.changed_symbols.len());
14855 println!(
14856 " affected tests: {}/{}",
14857 report.affected_tests.len(),
14858 report.affected_tests_total
14859 );
14860 for target in &report.affected_tests {
14861 println!();
14862 println!("{}", target.path);
14863 for reason in &target.reasons {
14864 println!(" - {reason}");
14865 }
14866 if !target.symbols.is_empty() {
14867 println!(" symbols: {}", target.symbols.join(", "));
14868 }
14869 for command in &target.commands {
14870 println!(" run: {}", command);
14871 }
14872 }
14873 for warning in &report.warnings {
14874 println!("warning: {warning}");
14875 }
14876 Ok(())
14877}
14878
14879pub(crate) fn render_test_digest_from_input(
14880 path: &Path,
14881 input: &str,
14882 runner: Option<&str>,
14883 format: OutputFormat,
14884) -> Result<()> {
14885 let report = test_digest::compute(path, input, runner)?;
14886 if format.json_output {
14887 println!(
14888 "{}",
14889 to_json_schema(&report, format.pretty, format.terse, format.schema)?
14890 );
14891 return Ok(());
14892 }
14893
14894 if report.failure_groups.is_empty() {
14895 println!("No failures detected (runner: {}).", report.runner);
14896 for warning in &report.warnings {
14897 println!("warning: {warning}");
14898 }
14899 return Ok(());
14900 }
14901
14902 if format.compact {
14903 println!(
14904 "test runner:{} failures:{} groups:{} passed:{} failed:{} skipped:{}",
14905 report.runner,
14906 report.failures,
14907 report.grouped_failures,
14908 report.counts.passed.unwrap_or(0),
14909 report.counts.failed.unwrap_or(report.grouped_failures),
14910 report.counts.skipped.unwrap_or(0),
14911 );
14912 for failure in &report.failure_groups {
14913 let tests = truncate_for_compact(&failure.tests.join(","), 60);
14914 let location = match (&failure.path, failure.line) {
14915 (Some(path), Some(line)) => format!("{path}:{line}"),
14916 (Some(path), None) => path.clone(),
14917 _ => "-".to_string(),
14918 };
14919 println!(
14920 "{} tests:{} count:{} summaries:{} msg:{}",
14921 location,
14922 tests,
14923 failure.occurrences,
14924 test_digest_summary_label(failure.summary_state),
14925 truncate_for_compact(&failure.message, 80)
14926 );
14927 }
14928 for warning in &report.warnings {
14929 println!("warning: {warning}");
14930 }
14931 return Ok(());
14932 }
14933
14934 println!("Test digest ({})", report.runner);
14935 println!(" failures: {}", report.failures);
14936 println!(" failure groups: {}", report.grouped_failures);
14937 if let Some(passed) = report.counts.passed {
14938 println!(" passed: {}", passed);
14939 }
14940 if let Some(failed) = report.counts.failed {
14941 println!(" failed: {}", failed);
14942 }
14943 if let Some(skipped) = report.counts.skipped {
14944 println!(" skipped: {}", skipped);
14945 }
14946
14947 for failure in &report.failure_groups {
14948 println!();
14949 match (&failure.path, failure.line, failure.column) {
14950 (Some(path), Some(line), Some(column)) => println!("{path}:{line}:{column}"),
14951 (Some(path), Some(line), None) => println!("{path}:{line}"),
14952 (Some(path), None, _) => println!("{path}"),
14953 (None, _, _) => println!("(no file anchor)"),
14954 }
14955 println!(" tests: {}", failure.tests.join(", "));
14956 println!(" occurrences: {}", failure.occurrences);
14957 println!(" message: {}", failure.message);
14958 println!(
14959 " cached summaries: {}",
14960 test_digest_summary_label(failure.summary_state)
14961 );
14962 for summary in &failure.current_summaries {
14963 println!(
14964 " - {}: {}",
14965 summary.symbol,
14966 truncate_for_compact(&summary.summary, 160)
14967 );
14968 }
14969 }
14970 for warning in &report.warnings {
14971 println!("warning: {warning}");
14972 }
14973 Ok(())
14974}
14975
14976#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize)]
14977#[serde(rename_all = "snake_case")]
14978enum ConflictMatrixRisk {
14979 Low,
14980 Medium,
14981 High,
14982 FailClosed,
14983}
14984
14985#[derive(Clone, Debug, Default, Serialize)]
14986struct ConflictMatrixOverlap {
14987 files: Vec<String>,
14988 symbols: Vec<String>,
14989 tests: Vec<String>,
14990 config_files: Vec<String>,
14991}
14992
14993#[derive(Clone, Debug, Serialize)]
14994struct ConflictMatrixSourceHandle {
14995 handle: String,
14996 file: String,
14997 start: usize,
14998 end: usize,
14999 reason: String,
15000 expand: String,
15001}
15002
15003#[derive(Clone, Debug, Serialize)]
15004struct ConflictMatrixSemanticRef {
15005 handle: String,
15006 kind: String,
15007 label: String,
15008 #[serde(skip_serializing_if = "Option::is_none")]
15009 source_file: Option<String>,
15010 #[serde(skip_serializing_if = "Option::is_none")]
15011 source_symbol: Option<String>,
15012 expand: String,
15013}
15014
15015#[derive(Clone, Debug, Default, Serialize)]
15016struct ConflictMatrixTokenBudget {
15017 prompt_estimated_tokens: usize,
15018 max_prompt_tokens: usize,
15019 source_window_count: usize,
15020 source_window_lines: usize,
15021 max_context_bytes: usize,
15022}
15023
15024#[derive(Clone, Debug, Default, Serialize)]
15025struct ConflictMatrixRequiredContext {
15026 read_only_files: Vec<String>,
15027 source_handles: Vec<String>,
15028 worker_context_handles: Vec<String>,
15029 semantic_handles: Vec<String>,
15030 expansion_commands: Vec<String>,
15031}
15032
15033#[derive(Clone, Debug, Default, Serialize)]
15034struct ConflictMatrixGraphHandles {
15035 target_node_id: String,
15036 evidence_packet_id: String,
15037 worker_prompt_packet_id: String,
15038 #[serde(skip_serializing_if = "Option::is_none")]
15039 projection_hash: Option<String>,
15040 source_handles: Vec<String>,
15041 worker_context_handles: Vec<String>,
15042 semantic_handles: Vec<String>,
15043}
15044
15045#[derive(Clone, Debug, Default, Serialize)]
15046struct ConflictMatrixWorkerFeedback {
15047 total: usize,
15048 completed: usize,
15049 blocked: usize,
15050 touched_files: Vec<String>,
15051 expected_tests: Vec<String>,
15052 follow_up_ids: Vec<String>,
15053 outcome_history: Vec<String>,
15054 repeated_blockage: bool,
15055 stale_expected_tests: Vec<String>,
15056 follow_up_debt: Vec<String>,
15057 closure_rank_score: usize,
15058 closure_rank_reasons: Vec<String>,
15059 #[serde(skip_serializing_if = "Vec::is_empty", default)]
15060 warnings: Vec<String>,
15061}
15062
15063#[derive(Clone, Debug, Serialize)]
15064struct ConflictMatrixOwnershipBlock {
15065 contract_version: &'static str,
15066 title: String,
15067 owned_files: Vec<String>,
15068 owned_symbols: Vec<String>,
15069 read_only_context: Vec<String>,
15070 read_only_files: Vec<String>,
15071 forbidden_files: Vec<String>,
15072 expected_tests: Vec<String>,
15073 expansion_commands: Vec<String>,
15074 token_budget: ConflictMatrixTokenBudget,
15075 prompt: String,
15076}
15077
15078#[derive(Clone, Debug, Serialize)]
15079struct ConflictMatrixWorkerPromptPacket {
15080 contract_version: &'static str,
15081 packet_id: String,
15082 target: String,
15083 rank: usize,
15084 risk: ConflictMatrixRisk,
15085 previously_completed: bool,
15086 parallel_safe: bool,
15087 blocks: Vec<String>,
15088 blocked_by: Vec<String>,
15089 required_context: ConflictMatrixRequiredContext,
15090 graph_handles: ConflictMatrixGraphHandles,
15091 #[serde(skip_serializing_if = "Option::is_none")]
15092 projection_hash: Option<String>,
15093 title: String,
15094 owned_files: Vec<String>,
15095 owned_symbols: Vec<String>,
15096 read_only_context: Vec<String>,
15097 forbidden_files: Vec<String>,
15098 expected_tests: Vec<String>,
15099 expansion_commands: Vec<String>,
15100 token_budget: ConflictMatrixTokenBudget,
15101 semantic_dispatch_score: usize,
15102 semantic_dispatch_reasons: Vec<String>,
15103 worker_feedback: ConflictMatrixWorkerFeedback,
15104 prompt: String,
15105}
15106
15107#[derive(Clone, Debug, Serialize)]
15108struct ConflictMatrixCandidate {
15109 rank: usize,
15110 target: String,
15111 evidence_packet_id: String,
15112 #[serde(skip_serializing_if = "Option::is_none")]
15113 projection_hash: Option<String>,
15114 target_node_id: String,
15115 target_kind: String,
15116 target_label: String,
15117 risk: ConflictMatrixRisk,
15118 previously_completed: bool,
15119 parallel_safe: bool,
15120 blocks: Vec<String>,
15121 blocked_by: Vec<String>,
15122 required_context: ConflictMatrixRequiredContext,
15123 graph_handles: ConflictMatrixGraphHandles,
15124 risk_score: usize,
15125 risk_reasons: Vec<String>,
15126 owned_files: Vec<String>,
15127 owned_symbols: Vec<String>,
15128 config_files: Vec<String>,
15129 affected_tests: Vec<String>,
15130 worker_context: Vec<String>,
15131 semantic_related: Vec<ConflictMatrixSemanticRef>,
15132 semantic_dispatch_score: usize,
15133 semantic_dispatch_reasons: Vec<String>,
15134 worker_feedback: ConflictMatrixWorkerFeedback,
15135 source_handles: Vec<ConflictMatrixSourceHandle>,
15136 worker_context_handles: Vec<String>,
15137 staged_overlap: ConflictMatrixOverlap,
15138 ownership: ConflictMatrixOwnershipBlock,
15139}
15140
15141#[derive(Clone, Debug, Serialize)]
15142struct ConflictMatrixPair {
15143 left: String,
15144 right: String,
15145 risk: ConflictMatrixRisk,
15146 risk_score: usize,
15147 shared_files: Vec<String>,
15148 shared_symbols: Vec<String>,
15149 shared_tests: Vec<String>,
15150 shared_config_files: Vec<String>,
15151 verdict: String,
15152}
15153
15154#[derive(Serialize)]
15155struct ConflictMatrixInputSummary {
15156 graph_db_evidence_targets: Vec<String>,
15157 evidence_packets: Vec<ConflictMatrixEvidencePacketSummary>,
15158 shared_preparation: ConflictMatrixSharedPreparationSummary,
15159 preparation_cache: ConflictMatrixPreparationCacheSummary,
15160 preparation_timings: Vec<GraphDbBackendEvalPhaseTiming>,
15161 context_pack_command: String,
15162 cached_diff_command: String,
15163 impact_command: String,
15164}
15165
15166#[derive(Clone, Serialize, Deserialize)]
15167struct ConflictMatrixPreparedSourceWindow {
15168 file: String,
15169 start: usize,
15170 end: usize,
15171}
15172
15173#[derive(Clone, Serialize, Deserialize)]
15174struct ConflictMatrixPreparedContext {
15175 target: String,
15176 target_kind: String,
15177 status_reminders: Vec<String>,
15178 prompt_targets: Vec<String>,
15179 touched_files: Vec<String>,
15180 touched_symbols: Vec<String>,
15181 files_changed: usize,
15182 worker_context: Vec<String>,
15183 source_windows: Vec<ConflictMatrixPreparedSourceWindow>,
15184}
15185
15186impl ConflictMatrixPreparedContext {
15187 fn from_context_pack(context_pack: &ContextPackReport) -> Self {
15188 Self {
15189 target: context_pack.target.clone(),
15190 target_kind: context_pack.target_kind.clone(),
15191 status_reminders: context_pack.status_reminders.clone(),
15192 prompt_targets: context_pack.next_context.prompt_targets.clone(),
15193 touched_files: context_pack.next_context.touched_files.clone(),
15194 touched_symbols: context_pack.next_context.touched_symbols.clone(),
15195 files_changed: context_pack.diff_digest.files_changed,
15196 worker_context: context_pack
15197 .exploration
15198 .worker_context
15199 .iter()
15200 .map(|worker| worker.summary.clone())
15201 .collect(),
15202 source_windows: context_pack
15203 .exploration
15204 .source_windows
15205 .iter()
15206 .map(|window| ConflictMatrixPreparedSourceWindow {
15207 file: window.file.clone(),
15208 start: window.start,
15209 end: window.end,
15210 })
15211 .collect(),
15212 }
15213 }
15214}
15215
15216#[derive(Clone, Serialize, Deserialize)]
15217struct ConflictMatrixEvidencePacketSummary {
15218 target: String,
15219 packet_id: String,
15220 target_node_id: String,
15221 #[serde(skip_serializing_if = "Option::is_none")]
15222 projection_hash: Option<String>,
15223 replay_command: String,
15224}
15225
15226#[derive(Clone, Serialize, Deserialize)]
15227struct ConflictMatrixSharedPreparationSummary {
15228 evidence_cache_status: String,
15229 graph_nodes: usize,
15230 graph_edges: usize,
15231 evidence_packets: usize,
15232 source_handles: usize,
15233 worker_context: usize,
15234 worker_results: usize,
15235 semantic_rows: usize,
15236 dispatch_trace_snapshot_nodes: usize,
15237 dispatch_trace_snapshot_edges: usize,
15238}
15239
15240#[derive(Clone, Serialize, Deserialize)]
15241struct ConflictMatrixPreparationCacheSummary {
15242 version: String,
15243 key: String,
15244 status: String,
15245 source_watermark: String,
15246 document_watermark: String,
15247 staged_diff_watermark: String,
15248}
15249
15250#[derive(Serialize)]
15251struct ConflictMatrixContextSummary {
15252 target: String,
15253 target_kind: String,
15254 prompt_targets: Vec<String>,
15255 touched_files: Vec<String>,
15256 touched_symbols: Vec<String>,
15257 files_changed: usize,
15258 worker_context: Vec<String>,
15259 source_windows: Vec<String>,
15260 status_reminders: Vec<String>,
15261}
15262
15263#[derive(Clone, Debug, Serialize)]
15264struct ConflictMatrixPerTargetFailClosed {
15265 target: String,
15266 previously_completed: bool,
15267 risk_reasons: Vec<String>,
15268 owned_files: Vec<String>,
15269 source_handle_count: usize,
15270}
15271
15272#[derive(Serialize)]
15273struct ConflictMatrixOrchestrationObservability {
15274 contract_version: &'static str,
15275 projection_freshness: GraphDbFreshnessReport,
15276 projection_hashes: Vec<String>,
15277 evidence_packet_ids: Vec<String>,
15278 conflict_matrix_decisions: Vec<String>,
15279 worker_ownership_blocks: Vec<String>,
15280 follow_up_commands: Vec<String>,
15281}
15282
15283#[derive(Serialize)]
15284struct ConflictMatrixReport {
15285 contract_version: &'static str,
15286 root: String,
15287 #[serde(skip_serializing_if = "Option::is_none")]
15288 scope: Option<String>,
15289 targets: Vec<String>,
15290 can_parallel: bool,
15291 fail_closed: bool,
15292 cross_target_parallel_safe: bool,
15293 per_target_fail_closed: Vec<ConflictMatrixPerTargetFailClosed>,
15294 inputs: ConflictMatrixInputSummary,
15295 context_pack: ConflictMatrixContextSummary,
15296 cached_diff: diff_digest::DiffDigestReport,
15297 impact: impact::ImpactReport,
15298 candidates: Vec<ConflictMatrixCandidate>,
15299 worker_prompt_packets: Vec<ConflictMatrixWorkerPromptPacket>,
15300 conflicts: Vec<ConflictMatrixPair>,
15301 orchestration: ConflictMatrixOrchestrationObservability,
15302 next_commands: Vec<String>,
15303 #[serde(skip_serializing_if = "Vec::is_empty", default)]
15304 warnings: Vec<String>,
15305}
15306
15307fn conflict_risk_label(risk: ConflictMatrixRisk) -> &'static str {
15308 match risk {
15309 ConflictMatrixRisk::Low => "low",
15310 ConflictMatrixRisk::Medium => "medium",
15311 ConflictMatrixRisk::High => "high",
15312 ConflictMatrixRisk::FailClosed => "fail_closed",
15313 }
15314}
15315
15316fn sorted_set(values: &BTreeSet<String>) -> Vec<String> {
15317 values.iter().cloned().collect()
15318}
15319
15320fn sorted_intersection(left: &BTreeSet<String>, right: &BTreeSet<String>) -> Vec<String> {
15321 left.intersection(right).cloned().collect()
15322}
15323
15324fn normalize_conflict_target(raw: &str) -> Option<String> {
15325 let trimmed = raw
15326 .trim()
15327 .trim_matches(|ch: char| matches!(ch, '`' | ',' | ';' | '.'));
15328 let bracketed = trimmed
15329 .strip_prefix("[#")
15330 .and_then(|value| value.strip_suffix(']'))
15331 .unwrap_or(trimmed);
15332 let normalized = bracketed
15333 .trim()
15334 .trim_start_matches('#')
15335 .trim_matches(|ch: char| matches!(ch, '[' | ']'));
15336 (!normalized.is_empty()).then(|| normalized.to_string())
15337}
15338
15339fn extract_conflict_target_refs(input: &str) -> Vec<String> {
15340 input
15341 .split(|ch: char| {
15342 !(ch.is_ascii_alphanumeric()
15343 || ch == '#'
15344 || ch == '_'
15345 || ch == '-'
15346 || ch == '['
15347 || ch == ']')
15348 })
15349 .filter_map(|token| {
15350 let hash = token.find('#')?;
15351 normalize_conflict_target(&token[hash..])
15352 })
15353 .collect()
15354}
15355
15356fn conflict_targets_from_context_pack(
15357 store: &impl GraphStore,
15358 context_pack: &ConflictMatrixPreparedContext,
15359) -> Result<Vec<String>> {
15360 let mut candidates = Vec::new();
15361 for prompt in &context_pack.prompt_targets {
15362 candidates.extend(extract_conflict_target_refs(prompt));
15363 }
15364 for worker in &context_pack.worker_context {
15365 candidates.extend(extract_conflict_target_refs(worker));
15366 }
15367
15368 let mut targets = Vec::new();
15369 let mut seen = BTreeSet::new();
15370 for candidate in candidates {
15371 if !seen.insert(candidate.clone()) {
15372 continue;
15373 }
15374 if graph_db_resolve_evidence_target(store, &candidate)?.is_some() {
15375 targets.push(candidate);
15376 }
15377 }
15378 Ok(targets)
15379}
15380
15381fn resolve_conflict_matrix_targets(
15382 store: &impl GraphStore,
15383 raw_targets: &[String],
15384 context_pack: &ConflictMatrixPreparedContext,
15385) -> Result<Vec<String>> {
15386 let mut targets = raw_targets
15387 .iter()
15388 .filter_map(|target| normalize_conflict_target(target))
15389 .collect::<Vec<_>>();
15390 if targets.is_empty() {
15391 targets = conflict_targets_from_context_pack(store, context_pack)?;
15392 }
15393
15394 let mut seen = BTreeSet::new();
15395 targets.retain(|target| seen.insert(target.clone()));
15396 if targets.is_empty() {
15397 bail!(
15398 "conflict-matrix needs at least one resolvable backlog id, job handle, or graph node id"
15399 );
15400 }
15401 Ok(targets)
15402}
15403
15404fn is_planner_config_path(path: &str) -> bool {
15405 resolution::is_planner_config_path(path)
15406}
15407
15408fn conflict_matrix_source_handle(node: &SubstrateGraphNode) -> Option<ConflictMatrixSourceHandle> {
15409 let file = node.properties.get("file")?.clone();
15410 let start = node
15411 .properties
15412 .get("start")
15413 .and_then(|value| value.parse::<usize>().ok())
15414 .unwrap_or(1);
15415 let end = node
15416 .properties
15417 .get("end")
15418 .and_then(|value| value.parse::<usize>().ok())
15419 .unwrap_or(start);
15420 Some(ConflictMatrixSourceHandle {
15421 handle: node
15422 .properties
15423 .get("handle")
15424 .cloned()
15425 .unwrap_or_else(|| node.id.clone()),
15426 file,
15427 start,
15428 end,
15429 reason: node.properties.get("reason").cloned().unwrap_or_default(),
15430 expand: node.properties.get("expand").cloned().unwrap_or_default(),
15431 })
15432}
15433
15434fn conflict_matrix_semantic_ref(
15435 root: &Path,
15436 node: &SubstrateGraphNode,
15437) -> ConflictMatrixSemanticRef {
15438 ConflictMatrixSemanticRef {
15439 handle: node
15440 .properties
15441 .get("handle")
15442 .cloned()
15443 .unwrap_or_else(|| node.id.clone()),
15444 kind: node.kind.clone(),
15445 label: node.label.clone(),
15446 source_file: node
15447 .properties
15448 .get("source_file")
15449 .or_else(|| node.properties.get("path"))
15450 .cloned(),
15451 source_symbol: node.properties.get("source_symbol").cloned(),
15452 expand: node
15453 .properties
15454 .get("expand")
15455 .cloned()
15456 .unwrap_or_else(|| traversal_expand_command(root, &node.id)),
15457 }
15458}
15459
15460#[derive(Clone)]
15461struct ConflictMatrixGraphIndex {
15462 symbols_by_file: BTreeMap<String, Vec<String>>,
15463}
15464
15465fn conflict_matrix_graph_index(graph_nodes: &[SubstrateGraphNode]) -> ConflictMatrixGraphIndex {
15466 let mut symbols_by_file = BTreeMap::<String, Vec<String>>::new();
15467 for node in graph_nodes {
15468 if node.kind != "symbol" {
15469 continue;
15470 }
15471 if let Some(path) = node.properties.get("path") {
15472 symbols_by_file
15473 .entry(path.clone())
15474 .or_default()
15475 .push(node.label.clone());
15476 }
15477 }
15478 for symbols in symbols_by_file.values_mut() {
15479 symbols.sort();
15480 symbols.dedup();
15481 }
15482 ConflictMatrixGraphIndex { symbols_by_file }
15483}
15484
15485fn conflict_matrix_symbols_for_files(
15486 graph_index: &ConflictMatrixGraphIndex,
15487 files: &BTreeSet<String>,
15488 target_node: &SubstrateGraphNode,
15489) -> BTreeSet<String> {
15490 let mut symbols = BTreeSet::new();
15491 if target_node.kind == "symbol" {
15492 symbols.insert(target_node.label.clone());
15493 }
15494 for file in files {
15495 if let Some(file_symbols) = graph_index.symbols_by_file.get(file) {
15496 symbols.extend(file_symbols.iter().cloned());
15497 }
15498 }
15499 symbols
15500}
15501
15502fn conflict_matrix_test_commands(target: &impact::ImpactTestTarget) -> Vec<String> {
15503 if target.commands.is_empty() {
15504 vec![target.path.clone()]
15505 } else {
15506 target.commands.clone()
15507 }
15508}
15509
15510fn conflict_matrix_affected_tests(
15511 impact_report: &impact::ImpactReport,
15512 files: &BTreeSet<String>,
15513 symbols: &BTreeSet<String>,
15514 staged_overlap: &ConflictMatrixOverlap,
15515) -> Vec<String> {
15516 let mut tests = BTreeSet::new();
15517 for target in &impact_report.affected_tests {
15518 let path_match = files.contains(&target.path);
15519 let symbol_match = target.symbols.iter().any(|symbol| symbols.contains(symbol));
15520 if path_match || symbol_match {
15521 tests.extend(conflict_matrix_test_commands(target));
15522 }
15523 }
15524
15525 if tests.is_empty()
15526 && (!staged_overlap.files.is_empty()
15527 || !staged_overlap.symbols.is_empty()
15528 || !staged_overlap.config_files.is_empty())
15529 {
15530 for target in &impact_report.affected_tests {
15531 tests.extend(conflict_matrix_test_commands(target));
15532 }
15533 }
15534 tests.into_iter().collect()
15535}
15536
15537fn conflict_matrix_semantic_dispatch_score(
15538 semantic_related: &[ConflictMatrixSemanticRef],
15539 files: &BTreeSet<String>,
15540 symbols: &BTreeSet<String>,
15541) -> (usize, Vec<String>) {
15542 let mut score = 0usize;
15543 let mut reasons = Vec::new();
15544 for semantic in semantic_related {
15545 let base = match semantic.kind.as_str() {
15546 "semantic_concept" => 8,
15547 "semantic_entity" => 6,
15548 _ => 3,
15549 };
15550 let mut points = base;
15551 let mut detail = vec![format!("{} {}", semantic.kind, semantic.label)];
15552 if semantic
15553 .source_file
15554 .as_ref()
15555 .is_some_and(|file| files.contains(file))
15556 {
15557 points += 4;
15558 detail.push("owned file".to_string());
15559 }
15560 if semantic
15561 .source_symbol
15562 .as_ref()
15563 .is_some_and(|symbol| symbols.contains(symbol))
15564 {
15565 points += 2;
15566 detail.push("owned symbol".to_string());
15567 }
15568 score += points;
15569 reasons.push(format!("+{points} {}", detail.join(" / ")));
15570 }
15571 (score, reasons)
15572}
15573
15574fn conflict_matrix_staged_overlap(
15575 files: &BTreeSet<String>,
15576 symbols: &BTreeSet<String>,
15577 cached_diff: &diff_digest::DiffDigestReport,
15578) -> ConflictMatrixOverlap {
15579 let staged_files = cached_diff
15580 .files
15581 .iter()
15582 .map(|file| file.path.clone())
15583 .collect::<BTreeSet<_>>();
15584 let staged_symbols = cached_diff
15585 .files
15586 .iter()
15587 .flat_map(|file| file.touched_symbols.iter().cloned())
15588 .collect::<BTreeSet<_>>();
15589 let file_overlap = sorted_intersection(files, &staged_files);
15590 let symbol_overlap = sorted_intersection(symbols, &staged_symbols);
15591 let config_files = file_overlap
15592 .iter()
15593 .filter(|file| is_planner_config_path(file))
15594 .cloned()
15595 .collect::<Vec<_>>();
15596 ConflictMatrixOverlap {
15597 files: file_overlap,
15598 symbols: symbol_overlap,
15599 tests: Vec::new(),
15600 config_files,
15601 }
15602}
15603
15604fn graph_node_list_property(node: &SubstrateGraphNode, key: &str) -> Vec<String> {
15605 node.properties
15606 .get(key)
15607 .map(|value| {
15608 value
15609 .split([',', ';'])
15610 .flat_map(|part| part.split("&&"))
15611 .map(str::trim)
15612 .filter(|part| !part.is_empty())
15613 .map(str::to_string)
15614 .collect()
15615 })
15616 .unwrap_or_default()
15617}
15618
15619fn conflict_matrix_worker_feedback(
15620 worker_results: &[SubstrateGraphNode],
15621) -> ConflictMatrixWorkerFeedback {
15622 let mut touched_files = BTreeSet::new();
15623 let mut expected_tests = BTreeSet::new();
15624 let mut follow_up_ids = BTreeSet::new();
15625 let mut outcome_history = Vec::new();
15626 let mut completed = 0usize;
15627 let mut blocked = 0usize;
15628
15629 let mut results = worker_results.iter().collect::<Vec<_>>();
15630 results.sort_by(|left, right| {
15631 left.properties
15632 .get("line")
15633 .and_then(|value| value.parse::<i64>().ok())
15634 .cmp(
15635 &right
15636 .properties
15637 .get("line")
15638 .and_then(|value| value.parse::<i64>().ok()),
15639 )
15640 .then(left.id.cmp(&right.id))
15641 });
15642
15643 for node in results {
15644 let status = node
15645 .properties
15646 .get("status")
15647 .map(String::as_str)
15648 .unwrap_or("unknown");
15649 match status {
15650 "completed" => completed += 1,
15651 "blocked" => blocked += 1,
15652 _ => {}
15653 }
15654 touched_files.extend(graph_node_list_property(node, "touched_files"));
15655 expected_tests.extend(graph_node_list_property(node, "expected_tests"));
15656 follow_up_ids.extend(graph_node_list_property(node, "follow_up_ids"));
15657 let location = match (node.properties.get("path"), node.properties.get("line")) {
15658 (Some(path), Some(line)) => format!("{path}:{line}"),
15659 (Some(path), None) => path.clone(),
15660 _ => node.id.clone(),
15661 };
15662 let detail = node
15663 .properties
15664 .get("detail")
15665 .cloned()
15666 .unwrap_or_else(|| node.label.clone());
15667 outcome_history.push(format!("{status} at {location}: {detail}"));
15668 }
15669
15670 let repeated_blockage = blocked > 1;
15671 let warnings = if repeated_blockage {
15672 vec![format!(
15673 "repeated blockage observed in {blocked} worker_result rows; inspect outcome_history before redispatch"
15674 )]
15675 } else {
15676 Vec::new()
15677 };
15678
15679 ConflictMatrixWorkerFeedback {
15680 total: worker_results.len(),
15681 completed,
15682 blocked,
15683 touched_files: touched_files.into_iter().collect(),
15684 expected_tests: expected_tests.into_iter().collect(),
15685 follow_up_ids: follow_up_ids.into_iter().collect(),
15686 outcome_history,
15687 repeated_blockage,
15688 stale_expected_tests: Vec::new(),
15689 follow_up_debt: Vec::new(),
15690 closure_rank_score: 0,
15691 closure_rank_reasons: Vec::new(),
15692 warnings,
15693 }
15694}
15695
15696fn feedback_ref_list(values: &[String]) -> String {
15697 if values.is_empty() {
15698 "none".to_string()
15699 } else {
15700 values.join(",")
15701 }
15702}
15703
15704fn stale_expected_tests_for_candidate(candidate: &ConflictMatrixCandidate) -> Vec<String> {
15705 if candidate.worker_feedback.expected_tests.is_empty() {
15706 return Vec::new();
15707 }
15708 let current_tests = candidate
15709 .affected_tests
15710 .iter()
15711 .cloned()
15712 .collect::<BTreeSet<_>>();
15713 if current_tests.is_empty() {
15714 return candidate.worker_feedback.expected_tests.clone();
15715 }
15716 candidate
15717 .worker_feedback
15718 .expected_tests
15719 .iter()
15720 .filter(|test| !current_tests.contains(*test))
15721 .cloned()
15722 .collect()
15723}
15724
15725fn apply_conflict_matrix_worker_feedback_controls(candidates: &mut [ConflictMatrixCandidate]) {
15726 for candidate in candidates.iter_mut() {
15727 let stale_expected_tests = stale_expected_tests_for_candidate(candidate);
15728 let follow_up_debt = candidate.worker_feedback.follow_up_ids.clone();
15729 let mut score = 0usize;
15730 let mut reasons = Vec::new();
15731
15732 if candidate.worker_feedback.repeated_blockage {
15733 score += candidate.worker_feedback.blocked.saturating_mul(40);
15734 reasons.push(format!(
15735 "repeated blockage: {} blocked worker_result rows",
15736 candidate.worker_feedback.blocked
15737 ));
15738 }
15739 if !stale_expected_tests.is_empty() {
15740 score += stale_expected_tests.len().saturating_mul(25);
15741 let reason = if candidate.affected_tests.is_empty() {
15742 format!(
15743 "stale expected tests: {} no longer match current impact output",
15744 feedback_ref_list(&stale_expected_tests)
15745 )
15746 } else {
15747 format!(
15748 "stale expected tests: {} not in current impacted tests {}",
15749 feedback_ref_list(&stale_expected_tests),
15750 feedback_ref_list(&candidate.affected_tests)
15751 )
15752 };
15753 reasons.push(reason.clone());
15754 candidate.worker_feedback.warnings.push(format!(
15755 "{reason}; refresh impact or rerun the listed tests before redispatch"
15756 ));
15757 }
15758 if !follow_up_debt.is_empty() {
15759 score += follow_up_debt.len().saturating_mul(10);
15760 let reason = format!("follow-up debt: {}", feedback_ref_list(&follow_up_debt));
15761 reasons.push(reason.clone());
15762 candidate.worker_feedback.warnings.push(format!(
15763 "{reason}; include or resolve the referenced backlog ids before closing dispatch"
15764 ));
15765 }
15766
15767 candidate.worker_feedback.stale_expected_tests = stale_expected_tests;
15768 candidate.worker_feedback.follow_up_debt = follow_up_debt;
15769 candidate.worker_feedback.closure_rank_score = score;
15770 candidate.worker_feedback.closure_rank_reasons = reasons;
15771 candidate.worker_feedback.warnings =
15772 dedupe_preserve_order(std::mem::take(&mut candidate.worker_feedback.warnings));
15773 }
15774}
15775
15776fn empty_conflict_matrix_ownership(target: &str) -> ConflictMatrixOwnershipBlock {
15777 ConflictMatrixOwnershipBlock {
15778 contract_version: WORKER_PROMPT_PACKET_CONTRACT_VERSION,
15779 title: format!("Worker ownership for {target}"),
15780 owned_files: Vec::new(),
15781 owned_symbols: Vec::new(),
15782 read_only_context: Vec::new(),
15783 read_only_files: Vec::new(),
15784 forbidden_files: Vec::new(),
15785 expected_tests: Vec::new(),
15786 expansion_commands: Vec::new(),
15787 token_budget: ConflictMatrixTokenBudget::default(),
15788 prompt: String::new(),
15789 }
15790}
15791
15792fn conflict_matrix_candidate_from_evidence(
15793 root: &Path,
15794 evidence: &GraphDbEvidenceReport,
15795 graph_index: &ConflictMatrixGraphIndex,
15796 cached_diff: &diff_digest::DiffDigestReport,
15797 impact_report: &impact::ImpactReport,
15798) -> ConflictMatrixCandidate {
15799 let mut files = BTreeSet::new();
15800 let source_handles = evidence
15801 .source_handles
15802 .iter()
15803 .filter_map(|node| {
15804 let handle = conflict_matrix_source_handle(node)?;
15805 files.insert(handle.file.clone());
15806 Some(handle)
15807 })
15808 .collect::<Vec<_>>();
15809 if matches!(
15810 evidence.target_node.kind.as_str(),
15811 "file" | "symbol" | "route"
15812 ) && let Some(path) = evidence.target_node.properties.get("path")
15813 {
15814 files.insert(path.clone());
15815 }
15816
15817 let symbols = conflict_matrix_symbols_for_files(graph_index, &files, &evidence.target_node);
15818 let config_files = files
15819 .iter()
15820 .filter(|file| is_planner_config_path(file))
15821 .cloned()
15822 .collect::<BTreeSet<_>>();
15823 let mut staged_overlap = conflict_matrix_staged_overlap(&files, &symbols, cached_diff);
15824 let affected_tests =
15825 conflict_matrix_affected_tests(impact_report, &files, &symbols, &staged_overlap);
15826 staged_overlap.tests = affected_tests.clone();
15827 let mut worker_feedback = conflict_matrix_worker_feedback(&evidence.worker_results);
15828 let previously_completed = worker_feedback.completed > 0;
15829
15830 let mut risk_score = 0usize;
15831 let mut risk_reasons = Vec::new();
15832 if files.is_empty() && previously_completed {
15833 worker_feedback.warnings.push(format!(
15834 "previously completed: {} completed worker_result row(s) exist without source ownership evidence; treating no-owned-files as informational instead of per-target fail-closed",
15835 worker_feedback.completed
15836 ));
15837 } else if files.is_empty() {
15838 risk_score += 120;
15839 risk_reasons.push("no source ownership evidence; fail closed before dispatch".to_string());
15840 }
15841 if !config_files.is_empty() {
15842 risk_score += 80 * config_files.len();
15843 risk_reasons.push("candidate owns config or workflow files".to_string());
15844 }
15845 if !staged_overlap.config_files.is_empty() {
15846 risk_score += 100 * staged_overlap.config_files.len();
15847 risk_reasons.push("staged diff already touches candidate config files".to_string());
15848 }
15849 if !staged_overlap.files.is_empty() {
15850 risk_score += 70 * staged_overlap.files.len();
15851 risk_reasons.push("staged diff already touches candidate files".to_string());
15852 }
15853 if !staged_overlap.symbols.is_empty() {
15854 risk_score += 35 * staged_overlap.symbols.len();
15855 risk_reasons.push("staged diff already touches candidate symbols".to_string());
15856 }
15857 if affected_tests.len() > 1 {
15858 risk_score += affected_tests.len() * 5;
15859 risk_reasons.push("candidate fans into multiple affected test commands".to_string());
15860 }
15861 let risk = if (files.is_empty() && !previously_completed)
15862 || !staged_overlap.config_files.is_empty()
15863 || !staged_overlap.files.is_empty()
15864 {
15865 ConflictMatrixRisk::FailClosed
15866 } else if !config_files.is_empty() || !staged_overlap.symbols.is_empty() {
15867 ConflictMatrixRisk::High
15868 } else if affected_tests.len() > 1 {
15869 ConflictMatrixRisk::Medium
15870 } else {
15871 ConflictMatrixRisk::Low
15872 };
15873
15874 let worker_context = evidence
15875 .worker_context
15876 .iter()
15877 .map(|node| {
15878 node.properties
15879 .get("summary")
15880 .cloned()
15881 .unwrap_or_else(|| node.label.clone())
15882 })
15883 .collect::<Vec<_>>();
15884 let worker_context_handles = evidence
15885 .worker_context
15886 .iter()
15887 .map(|node| {
15888 node.properties
15889 .get("handle")
15890 .cloned()
15891 .unwrap_or_else(|| node.id.clone())
15892 })
15893 .collect::<Vec<_>>();
15894 let semantic_related = evidence
15895 .semantic_related
15896 .iter()
15897 .map(|node| conflict_matrix_semantic_ref(root, node))
15898 .collect::<Vec<_>>();
15899 let (semantic_dispatch_score, semantic_dispatch_reasons) =
15900 conflict_matrix_semantic_dispatch_score(&semantic_related, &files, &symbols);
15901
15902 ConflictMatrixCandidate {
15903 rank: 0,
15904 target: evidence.target.clone(),
15905 evidence_packet_id: evidence.packet_id.clone(),
15906 projection_hash: evidence.projection_hash.clone(),
15907 target_node_id: evidence.target_node.id.clone(),
15908 target_kind: evidence.target_node.kind.clone(),
15909 target_label: evidence.target_node.label.clone(),
15910 risk,
15911 previously_completed,
15912 parallel_safe: false,
15913 blocks: Vec::new(),
15914 blocked_by: Vec::new(),
15915 required_context: ConflictMatrixRequiredContext::default(),
15916 graph_handles: ConflictMatrixGraphHandles::default(),
15917 risk_score,
15918 risk_reasons,
15919 owned_files: sorted_set(&files),
15920 owned_symbols: sorted_set(&symbols),
15921 config_files: sorted_set(&config_files),
15922 affected_tests,
15923 worker_context,
15924 semantic_related,
15925 semantic_dispatch_score,
15926 semantic_dispatch_reasons,
15927 worker_feedback,
15928 source_handles,
15929 worker_context_handles,
15930 staged_overlap,
15931 ownership: empty_conflict_matrix_ownership(&evidence.target),
15932 }
15933}
15934
15935fn set_from_vec(values: &[String]) -> BTreeSet<String> {
15936 values.iter().cloned().collect()
15937}
15938
15939fn conflict_pair_risk(
15940 shared_files: &[String],
15941 shared_symbols: &[String],
15942 shared_tests: &[String],
15943 shared_config_files: &[String],
15944) -> (ConflictMatrixRisk, usize, String) {
15945 let score = shared_files.len() * 100
15946 + shared_config_files.len() * 100
15947 + shared_symbols.len() * 40
15948 + shared_tests.len() * 10;
15949 if !shared_files.is_empty() || !shared_config_files.is_empty() {
15950 (
15951 ConflictMatrixRisk::FailClosed,
15952 score,
15953 "serialize or assign one worker as the sole owner of the shared files".to_string(),
15954 )
15955 } else if !shared_symbols.is_empty() {
15956 (
15957 ConflictMatrixRisk::High,
15958 score,
15959 "split by file or serialize; shared symbols are not safe parallel ownership"
15960 .to_string(),
15961 )
15962 } else if !shared_tests.is_empty() {
15963 (
15964 ConflictMatrixRisk::Medium,
15965 score,
15966 "parallel work is possible, but keep a shared test gate after merge".to_string(),
15967 )
15968 } else {
15969 (
15970 ConflictMatrixRisk::Low,
15971 score,
15972 "no direct file, symbol, config, or test overlap found".to_string(),
15973 )
15974 }
15975}
15976
15977fn build_conflict_matrix_pairs(candidates: &[ConflictMatrixCandidate]) -> Vec<ConflictMatrixPair> {
15978 let mut pairs = Vec::new();
15979 for left_idx in 0..candidates.len() {
15980 for right_idx in (left_idx + 1)..candidates.len() {
15981 let left = &candidates[left_idx];
15982 let right = &candidates[right_idx];
15983 let left_files = set_from_vec(&left.owned_files);
15984 let right_files = set_from_vec(&right.owned_files);
15985 let left_symbols = set_from_vec(&left.owned_symbols);
15986 let right_symbols = set_from_vec(&right.owned_symbols);
15987 let left_tests = set_from_vec(&left.affected_tests);
15988 let right_tests = set_from_vec(&right.affected_tests);
15989 let left_config = set_from_vec(&left.config_files);
15990 let right_config = set_from_vec(&right.config_files);
15991 let shared_files = sorted_intersection(&left_files, &right_files);
15992 let shared_symbols = sorted_intersection(&left_symbols, &right_symbols);
15993 let shared_tests = sorted_intersection(&left_tests, &right_tests);
15994 let shared_config_files = sorted_intersection(&left_config, &right_config);
15995 let (risk, risk_score, verdict) = conflict_pair_risk(
15996 &shared_files,
15997 &shared_symbols,
15998 &shared_tests,
15999 &shared_config_files,
16000 );
16001 pairs.push(ConflictMatrixPair {
16002 left: left.target.clone(),
16003 right: right.target.clone(),
16004 risk,
16005 risk_score,
16006 shared_files,
16007 shared_symbols,
16008 shared_tests,
16009 shared_config_files,
16010 verdict,
16011 });
16012 }
16013 }
16014 pairs.sort_by(|left, right| {
16015 right
16016 .risk
16017 .cmp(&left.risk)
16018 .then_with(|| right.risk_score.cmp(&left.risk_score))
16019 .then_with(|| left.left.cmp(&right.left))
16020 .then_with(|| left.right.cmp(&right.right))
16021 });
16022 pairs
16023}
16024
16025fn conflict_matrix_per_target_fail_closed(
16026 candidates: &[ConflictMatrixCandidate],
16027) -> Vec<ConflictMatrixPerTargetFailClosed> {
16028 candidates
16029 .iter()
16030 .filter(|candidate| candidate.risk == ConflictMatrixRisk::FailClosed)
16031 .map(|candidate| ConflictMatrixPerTargetFailClosed {
16032 target: candidate.target.clone(),
16033 previously_completed: candidate.previously_completed,
16034 risk_reasons: candidate.risk_reasons.clone(),
16035 owned_files: candidate.owned_files.clone(),
16036 source_handle_count: candidate.source_handles.len(),
16037 })
16038 .collect()
16039}
16040
16041fn markdown_list(values: &[String]) -> String {
16042 if values.is_empty() {
16043 return "- none".to_string();
16044 }
16045 values
16046 .iter()
16047 .map(|value| format!("- {value}"))
16048 .collect::<Vec<_>>()
16049 .join("\n")
16050}
16051
16052fn conflict_matrix_expansion_commands(candidate: &ConflictMatrixCandidate) -> Vec<String> {
16053 let mut commands = candidate
16054 .source_handles
16055 .iter()
16056 .filter(|handle| !handle.expand.trim().is_empty())
16057 .map(|handle| handle.expand.clone())
16058 .chain(
16059 candidate
16060 .semantic_related
16061 .iter()
16062 .map(|semantic| semantic.expand.clone()),
16063 )
16064 .chain(candidate.affected_tests.iter().cloned())
16065 .collect::<Vec<_>>();
16066 if commands.is_empty() {
16067 commands.push(format!(
16068 "tsift graph-db evidence {} --depth 3 --limit 8 --json",
16069 shell_quote(&candidate.target)
16070 ));
16071 }
16072 dedupe_preserve_order(commands)
16073}
16074
16075fn conflict_matrix_token_budget(
16076 prompt: &str,
16077 source_handles: &[ConflictMatrixSourceHandle],
16078) -> ConflictMatrixTokenBudget {
16079 let source_window_lines = source_handles
16080 .iter()
16081 .map(|handle| handle.end.saturating_sub(handle.start).saturating_add(1))
16082 .sum::<usize>();
16083 let max_context_bytes = source_window_lines.saturating_mul(120).max(prompt.len());
16084 ConflictMatrixTokenBudget {
16085 prompt_estimated_tokens: estimated_tokens_from_bytes(prompt.len()),
16086 max_prompt_tokens: estimated_tokens_from_bytes(max_context_bytes),
16087 source_window_count: source_handles.len(),
16088 source_window_lines,
16089 max_context_bytes,
16090 }
16091}
16092
16093fn conflict_matrix_worker_prompt_packet_id(candidate: &ConflictMatrixCandidate) -> String {
16094 stable_handle(
16095 "wpp",
16096 &format!(
16097 "{}:{}:{}:{}",
16098 WORKER_PROMPT_PACKET_CONTRACT_VERSION,
16099 candidate.target,
16100 candidate.target_node_id,
16101 candidate.projection_hash.as_deref().unwrap_or("no-hash")
16102 ),
16103 )
16104}
16105
16106fn conflict_matrix_required_context(
16107 candidate: &ConflictMatrixCandidate,
16108) -> ConflictMatrixRequiredContext {
16109 ConflictMatrixRequiredContext {
16110 read_only_files: candidate.ownership.read_only_files.clone(),
16111 source_handles: candidate
16112 .source_handles
16113 .iter()
16114 .map(|handle| handle.handle.clone())
16115 .collect(),
16116 worker_context_handles: candidate.worker_context_handles.clone(),
16117 semantic_handles: candidate
16118 .semantic_related
16119 .iter()
16120 .map(|semantic| semantic.handle.clone())
16121 .collect(),
16122 expansion_commands: candidate.ownership.expansion_commands.clone(),
16123 }
16124}
16125
16126fn conflict_matrix_graph_handles(
16127 candidate: &ConflictMatrixCandidate,
16128) -> ConflictMatrixGraphHandles {
16129 ConflictMatrixGraphHandles {
16130 target_node_id: candidate.target_node_id.clone(),
16131 evidence_packet_id: candidate.evidence_packet_id.clone(),
16132 worker_prompt_packet_id: conflict_matrix_worker_prompt_packet_id(candidate),
16133 projection_hash: candidate.projection_hash.clone(),
16134 source_handles: candidate
16135 .source_handles
16136 .iter()
16137 .map(|handle| handle.handle.clone())
16138 .collect(),
16139 worker_context_handles: candidate.worker_context_handles.clone(),
16140 semantic_handles: candidate
16141 .semantic_related
16142 .iter()
16143 .map(|semantic| semantic.handle.clone())
16144 .collect(),
16145 }
16146}
16147
16148fn apply_conflict_matrix_ownership_blocks(candidates: &mut [ConflictMatrixCandidate]) {
16149 let all_files_by_target = candidates
16150 .iter()
16151 .map(|candidate| {
16152 (
16153 candidate.target.clone(),
16154 candidate
16155 .owned_files
16156 .iter()
16157 .cloned()
16158 .collect::<BTreeSet<_>>(),
16159 )
16160 })
16161 .collect::<Vec<_>>();
16162
16163 for candidate in candidates.iter_mut() {
16164 let mut read_only = BTreeSet::new();
16165 for (target, files) in &all_files_by_target {
16166 if target != &candidate.target {
16167 read_only.extend(files.iter().cloned());
16168 }
16169 }
16170 let mut forbidden = read_only.clone();
16171 forbidden.extend(candidate.staged_overlap.files.iter().cloned());
16172 forbidden.extend(candidate.staged_overlap.config_files.iter().cloned());
16173 let read_only_files = sorted_set(&read_only);
16174 let forbidden_files = sorted_set(&forbidden);
16175 let expected_tests = candidate.affected_tests.clone();
16176 let mut read_only_context = read_only_files.clone();
16177 read_only_context.extend(
16178 candidate
16179 .worker_context
16180 .iter()
16181 .map(|summary| format!("worker_context: {summary}")),
16182 );
16183 read_only_context.extend(candidate.semantic_related.iter().map(|semantic| {
16184 format!(
16185 "semantic:{}:{}{}",
16186 semantic.kind,
16187 semantic.label,
16188 semantic
16189 .source_file
16190 .as_ref()
16191 .map(|file| format!(" ({file})"))
16192 .unwrap_or_default()
16193 )
16194 }));
16195 read_only_context.extend(
16196 candidate
16197 .semantic_dispatch_reasons
16198 .iter()
16199 .map(|reason| format!("semantic_rank: {reason}")),
16200 );
16201 if candidate.worker_feedback.total > 0 {
16202 read_only_context.push(format!(
16203 "worker_feedback: completed={} blocked={} touched_files={} expected_tests={} follow_up_ids={}",
16204 candidate.worker_feedback.completed,
16205 candidate.worker_feedback.blocked,
16206 feedback_ref_list(&candidate.worker_feedback.touched_files),
16207 feedback_ref_list(&candidate.worker_feedback.expected_tests),
16208 feedback_ref_list(&candidate.worker_feedback.follow_up_ids),
16209 ));
16210 }
16211 if candidate.worker_feedback.closure_rank_score > 0 {
16212 read_only_context.push(format!(
16213 "worker_feedback_closure: score={} stale_expected_tests={} follow_up_debt={}",
16214 candidate.worker_feedback.closure_rank_score,
16215 feedback_ref_list(&candidate.worker_feedback.stale_expected_tests),
16216 feedback_ref_list(&candidate.worker_feedback.follow_up_debt),
16217 ));
16218 }
16219 read_only_context.extend(
16220 candidate
16221 .worker_feedback
16222 .warnings
16223 .iter()
16224 .map(|warning| format!("worker_feedback_warning: {warning}")),
16225 );
16226 read_only_context = dedupe_preserve_order(read_only_context);
16227 let expansion_commands = conflict_matrix_expansion_commands(candidate);
16228 let title = format!(
16229 "Worker {} owns {} ({})",
16230 candidate.rank, candidate.target, candidate.target_label
16231 );
16232 let prompt_body = format!(
16233 "{title}\n\nOwned files:\n{}\n\nOwned symbols:\n{}\n\nRead-only context:\n{}\n\nForbidden files:\n{}\n\nExpected tests:\n{}\n\nExpansion commands:\n{}\n\nSemantic dispatch score: {}\n{}\n\nFail closed if the task requires a forbidden/shared file, an unowned config file, or a public contract change outside this ownership block.",
16234 markdown_list(&candidate.owned_files),
16235 markdown_list(&candidate.owned_symbols),
16236 markdown_list(&read_only_context),
16237 markdown_list(&forbidden_files),
16238 markdown_list(&expected_tests),
16239 markdown_list(&expansion_commands),
16240 candidate.semantic_dispatch_score,
16241 markdown_list(&candidate.semantic_dispatch_reasons),
16242 );
16243 let token_budget = conflict_matrix_token_budget(&prompt_body, &candidate.source_handles);
16244 let prompt = format!(
16245 "{prompt_body}\n\nToken budget: prompt_estimated_tokens={} max_prompt_tokens={} source_windows={} source_window_lines={} max_context_bytes={}",
16246 token_budget.prompt_estimated_tokens,
16247 token_budget.max_prompt_tokens,
16248 token_budget.source_window_count,
16249 token_budget.source_window_lines,
16250 token_budget.max_context_bytes,
16251 );
16252 candidate.ownership = ConflictMatrixOwnershipBlock {
16253 contract_version: WORKER_PROMPT_PACKET_CONTRACT_VERSION,
16254 title,
16255 owned_files: candidate.owned_files.clone(),
16256 owned_symbols: candidate.owned_symbols.clone(),
16257 read_only_context,
16258 read_only_files,
16259 forbidden_files,
16260 expected_tests,
16261 expansion_commands,
16262 token_budget,
16263 prompt,
16264 };
16265 }
16266}
16267
16268fn conflict_matrix_pair_requires_serial(pair: &ConflictMatrixPair) -> bool {
16269 matches!(
16270 pair.risk,
16271 ConflictMatrixRisk::High | ConflictMatrixRisk::FailClosed
16272 )
16273}
16274
16275fn apply_conflict_matrix_scheduler_fields(
16276 candidates: &mut [ConflictMatrixCandidate],
16277 conflicts: &[ConflictMatrixPair],
16278) {
16279 let rank_by_target = candidates
16280 .iter()
16281 .map(|candidate| (candidate.target.clone(), candidate.rank))
16282 .collect::<BTreeMap<_, _>>();
16283 let mut blocks = BTreeMap::<String, BTreeSet<String>>::new();
16284 let mut blocked_by = BTreeMap::<String, BTreeSet<String>>::new();
16285
16286 for pair in conflicts {
16287 if !conflict_matrix_pair_requires_serial(pair) {
16288 continue;
16289 }
16290 let left_rank = rank_by_target
16291 .get(&pair.left)
16292 .copied()
16293 .unwrap_or(usize::MAX);
16294 let right_rank = rank_by_target
16295 .get(&pair.right)
16296 .copied()
16297 .unwrap_or(usize::MAX);
16298 let (blocker, blocked) = if left_rank <= right_rank {
16299 (&pair.left, &pair.right)
16300 } else {
16301 (&pair.right, &pair.left)
16302 };
16303 blocks
16304 .entry(blocker.clone())
16305 .or_default()
16306 .insert(blocked.clone());
16307 blocked_by
16308 .entry(blocked.clone())
16309 .or_default()
16310 .insert(blocker.clone());
16311 }
16312
16313 for candidate in candidates.iter() {
16314 for follow_up in &candidate.worker_feedback.follow_up_debt {
16315 blocks
16316 .entry(candidate.target.clone())
16317 .or_default()
16318 .insert(follow_up.clone());
16319 if rank_by_target.contains_key(follow_up) {
16320 blocked_by
16321 .entry(follow_up.clone())
16322 .or_default()
16323 .insert(candidate.target.clone());
16324 }
16325 }
16326 }
16327
16328 for candidate in candidates.iter_mut() {
16329 let candidate_blocks: Vec<String> = blocks
16330 .remove(&candidate.target)
16331 .map(|values| values.into_iter().collect())
16332 .unwrap_or_default();
16333 let candidate_blocked_by: Vec<String> = blocked_by
16334 .remove(&candidate.target)
16335 .map(|values| values.into_iter().collect())
16336 .unwrap_or_default();
16337 let has_serial_edges = !candidate_blocks.is_empty() || !candidate_blocked_by.is_empty();
16338 candidate.parallel_safe =
16339 candidate.risk != ConflictMatrixRisk::FailClosed && !has_serial_edges;
16340 candidate.blocks = candidate_blocks;
16341 candidate.blocked_by = candidate_blocked_by;
16342 candidate.required_context = conflict_matrix_required_context(candidate);
16343 candidate.graph_handles = conflict_matrix_graph_handles(candidate);
16344 }
16345}
16346
16347fn conflict_matrix_worker_prompt_packets(
16348 candidates: &[ConflictMatrixCandidate],
16349) -> Vec<ConflictMatrixWorkerPromptPacket> {
16350 candidates
16351 .iter()
16352 .map(|candidate| ConflictMatrixWorkerPromptPacket {
16353 contract_version: WORKER_PROMPT_PACKET_CONTRACT_VERSION,
16354 packet_id: conflict_matrix_worker_prompt_packet_id(candidate),
16355 target: candidate.target.clone(),
16356 rank: candidate.rank,
16357 risk: candidate.risk,
16358 previously_completed: candidate.previously_completed,
16359 parallel_safe: candidate.parallel_safe,
16360 blocks: candidate.blocks.clone(),
16361 blocked_by: candidate.blocked_by.clone(),
16362 required_context: candidate.required_context.clone(),
16363 graph_handles: candidate.graph_handles.clone(),
16364 projection_hash: candidate.projection_hash.clone(),
16365 title: candidate.ownership.title.clone(),
16366 owned_files: candidate.ownership.owned_files.clone(),
16367 owned_symbols: candidate.ownership.owned_symbols.clone(),
16368 read_only_context: candidate.ownership.read_only_context.clone(),
16369 forbidden_files: candidate.ownership.forbidden_files.clone(),
16370 expected_tests: candidate.ownership.expected_tests.clone(),
16371 expansion_commands: candidate.ownership.expansion_commands.clone(),
16372 token_budget: candidate.ownership.token_budget.clone(),
16373 semantic_dispatch_score: candidate.semantic_dispatch_score,
16374 semantic_dispatch_reasons: candidate.semantic_dispatch_reasons.clone(),
16375 worker_feedback: candidate.worker_feedback.clone(),
16376 prompt: candidate.ownership.prompt.clone(),
16377 })
16378 .collect()
16379}
16380
16381fn conflict_matrix_orchestration_observability(
16382 freshness: &GraphDbFreshnessReport,
16383 candidates: &[ConflictMatrixCandidate],
16384 conflicts: &[ConflictMatrixPair],
16385 next_commands: &[String],
16386) -> ConflictMatrixOrchestrationObservability {
16387 let evidence_packet_ids = candidates
16388 .iter()
16389 .map(|candidate| candidate.evidence_packet_id.clone())
16390 .collect::<Vec<_>>();
16391 let projection_hashes = candidates
16392 .iter()
16393 .filter_map(|candidate| candidate.projection_hash.clone())
16394 .collect::<BTreeSet<_>>()
16395 .into_iter()
16396 .collect::<Vec<_>>();
16397 let mut conflict_matrix_decisions = candidates
16398 .iter()
16399 .map(|candidate| {
16400 format!(
16401 "candidate #{} {} risk={} previously_completed={} closure_score={} semantic_score={} owned_files={} forbidden_files={}",
16402 candidate.rank,
16403 candidate.target,
16404 conflict_risk_label(candidate.risk),
16405 candidate.previously_completed,
16406 candidate.worker_feedback.closure_rank_score,
16407 candidate.semantic_dispatch_score,
16408 candidate.ownership.owned_files.len(),
16409 candidate.ownership.forbidden_files.len()
16410 )
16411 })
16412 .collect::<Vec<_>>();
16413 conflict_matrix_decisions.extend(conflicts.iter().map(|pair| {
16414 format!(
16415 "pair {}<->{} risk={} verdict={}",
16416 pair.left,
16417 pair.right,
16418 conflict_risk_label(pair.risk),
16419 pair.verdict
16420 )
16421 }));
16422 let worker_ownership_blocks = candidates
16423 .iter()
16424 .map(|candidate| candidate.ownership.title.clone())
16425 .collect::<Vec<_>>();
16426 ConflictMatrixOrchestrationObservability {
16427 contract_version: CONFLICT_MATRIX_CONTRACT_VERSION,
16428 projection_freshness: freshness.clone(),
16429 projection_hashes,
16430 evidence_packet_ids,
16431 conflict_matrix_decisions,
16432 worker_ownership_blocks,
16433 follow_up_commands: next_commands.to_vec(),
16434 }
16435}
16436
16437fn conflict_matrix_context_summary(
16438 context_pack: &ConflictMatrixPreparedContext,
16439) -> ConflictMatrixContextSummary {
16440 ConflictMatrixContextSummary {
16441 target: context_pack.target.clone(),
16442 target_kind: context_pack.target_kind.clone(),
16443 prompt_targets: context_pack.prompt_targets.clone(),
16444 touched_files: context_pack.touched_files.clone(),
16445 touched_symbols: context_pack.touched_symbols.clone(),
16446 files_changed: context_pack.files_changed,
16447 worker_context: context_pack.worker_context.clone(),
16448 source_windows: context_pack
16449 .source_windows
16450 .iter()
16451 .map(|window| format!("{}:{}-{}", window.file, window.start, window.end))
16452 .collect(),
16453 status_reminders: context_pack.status_reminders.clone(),
16454 }
16455}
16456
16457fn conflict_matrix_next_commands(
16458 root: &Path,
16459 path: &Path,
16460 scope: Option<&str>,
16461 targets: &[String],
16462 depth: usize,
16463 limit: usize,
16464 impact_limit: usize,
16465) -> Vec<String> {
16466 let mut commands = Vec::new();
16467 for target in targets {
16468 commands.push(format!(
16469 "tsift graph-db --path {}{} evidence {} --depth {} --limit {} --json",
16470 shell_quote(root.to_string_lossy().as_ref()),
16471 graph_db_scope_arg(scope),
16472 shell_quote(target),
16473 depth,
16474 limit
16475 ));
16476 }
16477 commands.push(format!(
16478 "tsift --envelope context-pack {} --budget normal",
16479 shell_quote(path.to_string_lossy().as_ref())
16480 ));
16481 commands.push(format!(
16482 "tsift diff-digest --cached {} --json",
16483 shell_quote(root.to_string_lossy().as_ref())
16484 ));
16485 commands.push(format!(
16486 "tsift impact {} --cached{} --limit {} --json",
16487 shell_quote(root.to_string_lossy().as_ref()),
16488 scope
16489 .map(|scope| format!(" --scope {}", shell_quote(scope)))
16490 .unwrap_or_default(),
16491 impact_limit
16492 ));
16493 dedupe_preserve_order(commands)
16494}
16495
16496fn print_conflict_matrix_human(report: &ConflictMatrixReport, compact: bool) {
16497 if compact {
16498 println!(
16499 "conflict-matrix targets:{} candidates:{} conflicts:{} can_parallel:{} fail_closed:{} cross_safe:{} per_target_fail_closed:{}",
16500 report.targets.len(),
16501 report.candidates.len(),
16502 report.conflicts.len(),
16503 report.can_parallel,
16504 report.fail_closed,
16505 report.cross_target_parallel_safe,
16506 report.per_target_fail_closed.len()
16507 );
16508 } else {
16509 println!("Conflict matrix");
16510 println!(" targets: {}", report.targets.join(", "));
16511 println!(" can parallel: {}", report.can_parallel);
16512 println!(" fail closed: {}", report.fail_closed);
16513 println!(
16514 " cross target parallel safe: {}",
16515 report.cross_target_parallel_safe
16516 );
16517 println!(
16518 " per target fail closed: {}",
16519 report.per_target_fail_closed.len()
16520 );
16521 }
16522 for candidate in &report.candidates {
16523 println!(
16524 "candidate #{} {} risk:{} score:{} semantic:{} files:{} symbols:{} tests:{}",
16525 candidate.rank,
16526 candidate.target,
16527 conflict_risk_label(candidate.risk),
16528 candidate.risk_score,
16529 candidate.semantic_dispatch_score,
16530 candidate.owned_files.len(),
16531 candidate.owned_symbols.len(),
16532 candidate.affected_tests.len()
16533 );
16534 if candidate.previously_completed {
16535 println!(" previously completed: true");
16536 }
16537 for reason in &candidate.risk_reasons {
16538 println!(" reason: {reason}");
16539 }
16540 if candidate.worker_feedback.total > 0 {
16541 println!(
16542 " worker feedback: completed:{} blocked:{} files:{} tests:{} follow-ups:{} closure:{}",
16543 candidate.worker_feedback.completed,
16544 candidate.worker_feedback.blocked,
16545 candidate.worker_feedback.touched_files.len(),
16546 candidate.worker_feedback.expected_tests.len(),
16547 candidate.worker_feedback.follow_up_ids.len(),
16548 candidate.worker_feedback.closure_rank_score
16549 );
16550 for reason in &candidate.worker_feedback.closure_rank_reasons {
16551 println!(" closure: {reason}");
16552 }
16553 for warning in &candidate.worker_feedback.warnings {
16554 println!(" warning: {warning}");
16555 }
16556 }
16557 }
16558 for pair in &report.conflicts {
16559 println!(
16560 "conflict {} <-> {} risk:{} score:{} verdict:{}",
16561 pair.left,
16562 pair.right,
16563 conflict_risk_label(pair.risk),
16564 pair.risk_score,
16565 pair.verdict
16566 );
16567 for file in &pair.shared_files {
16568 println!(" shared file: {file}");
16569 }
16570 for symbol in &pair.shared_symbols {
16571 println!(" shared symbol: {symbol}");
16572 }
16573 }
16574 for command in &report.next_commands {
16575 println!("next: {command}");
16576 }
16577 for packet in &report.worker_prompt_packets {
16578 println!("worker-prompt #{} {}", packet.rank, packet.title);
16579 }
16580 for warning in &report.warnings {
16581 println!("warning: {warning}");
16582 }
16583 if !report.per_target_fail_closed.is_empty() {
16584 println!(
16585 "per-target fail closed: {} target(s)",
16586 report.per_target_fail_closed.len()
16587 );
16588 for target in &report.per_target_fail_closed {
16589 println!(
16590 " {} source_handles:{} owned_files:{} reasons:{}",
16591 target.target,
16592 target.source_handle_count,
16593 target.owned_files.len(),
16594 target.risk_reasons.join("; ")
16595 );
16596 }
16597 }
16598}
16599
16600#[derive(Clone, Serialize, Deserialize)]
16601struct ConflictMatrixPreparedInputs {
16602 context_pack: ConflictMatrixPreparedContext,
16603 cached_diff: diff_digest::DiffDigestReport,
16604 impact_report: impact::ImpactReport,
16605 preparation_cache: ConflictMatrixPreparationCacheSummary,
16606 preparation_timings: Vec<GraphDbBackendEvalPhaseTiming>,
16607}
16608
16609struct ConflictMatrixGraphSnapshot {
16610 nodes: Vec<SubstrateGraphNode>,
16611 edges: Vec<SubstrateGraphEdge>,
16612 index: ConflictMatrixGraphIndex,
16613}
16614
16615#[derive(Clone, Serialize, Deserialize)]
16616struct ConflictMatrixPreparedEvidence {
16617 report: GraphDbEvidenceReport,
16618 summary: ConflictMatrixEvidencePacketSummary,
16619}
16620
16621struct ConflictMatrixGraphPreparedInputs {
16622 targets: Vec<String>,
16623 graph: ConflictMatrixGraphSnapshot,
16624 evidence: Vec<ConflictMatrixPreparedEvidence>,
16625 shared_preparation: ConflictMatrixSharedPreparationSummary,
16626}
16627
16628#[derive(Clone, Serialize, Deserialize)]
16629struct ConflictMatrixGraphPreparedCache {
16630 version: String,
16631 key: String,
16632 targets: Vec<String>,
16633 nodes: Vec<SubstrateGraphNode>,
16634 edges: Vec<SubstrateGraphEdge>,
16635 evidence: Vec<ConflictMatrixPreparedEvidence>,
16636 shared_preparation: ConflictMatrixSharedPreparationSummary,
16637}
16638
16639static CONFLICT_MATRIX_PREPARATION_CACHE: OnceLock<
16640 Mutex<BTreeMap<String, ConflictMatrixPreparedInputs>>,
16641> = OnceLock::new();
16642
16643fn conflict_matrix_preparation_cache()
16644-> &'static Mutex<BTreeMap<String, ConflictMatrixPreparedInputs>> {
16645 CONFLICT_MATRIX_PREPARATION_CACHE.get_or_init(|| Mutex::new(BTreeMap::new()))
16646}
16647
16648fn hash_bytes_hex(bytes: &[u8]) -> String {
16649 blake3::hash(bytes).to_hex().to_string()
16650}
16651
16652fn conflict_matrix_disk_cache_dir(root: &Path) -> PathBuf {
16653 root.join(".tsift/conflict-matrix-cache")
16654}
16655
16656fn conflict_matrix_disk_cache_path(root: &Path, kind: &str, key: &str) -> PathBuf {
16657 conflict_matrix_disk_cache_dir(root)
16658 .join(kind)
16659 .join(format!("{key}.json"))
16660}
16661
16662fn conflict_matrix_read_disk_cache<T: for<'de> Deserialize<'de>>(
16663 root: &Path,
16664 kind: &str,
16665 key: &str,
16666) -> Option<T> {
16667 let path = conflict_matrix_disk_cache_path(root, kind, key);
16668 let bytes = fs::read(path).ok()?;
16669 serde_json::from_slice(&bytes).ok()
16670}
16671
16672fn conflict_matrix_write_disk_cache<T: Serialize>(root: &Path, kind: &str, key: &str, value: &T) {
16673 let path = conflict_matrix_disk_cache_path(root, kind, key);
16674 let Some(parent) = path.parent() else {
16675 return;
16676 };
16677 if fs::create_dir_all(parent).is_err() {
16678 return;
16679 }
16680 if let Ok(bytes) = serde_json::to_vec(value) {
16681 let _ = fs::write(path, bytes);
16682 }
16683}
16684
16685fn conflict_matrix_document_watermark(path: &Path) -> Result<String> {
16686 if path.is_dir() {
16687 let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
16688 return Ok(hash_bytes_hex(
16689 format!("directory:{}", canonical.display()).as_bytes(),
16690 ));
16691 }
16692 let bytes = fs::read(path)
16693 .with_context(|| format!("reading conflict-matrix document {}", path.display()))?;
16694 Ok(hash_bytes_hex(&bytes))
16695}
16696
16697fn conflict_matrix_staged_diff_watermark(root: &Path) -> String {
16698 match Command::new("git")
16699 .arg("-C")
16700 .arg(root)
16701 .args(["diff", "--cached", "--raw", "--no-ext-diff"])
16702 .output()
16703 {
16704 Ok(output) => {
16705 let mut bytes = Vec::new();
16706 bytes.extend_from_slice(output.status.to_string().as_bytes());
16707 bytes.extend_from_slice(&output.stdout);
16708 bytes.extend_from_slice(&output.stderr);
16709 hash_bytes_hex(&bytes)
16710 }
16711 Err(err) => hash_bytes_hex(format!("git-diff-cached-unavailable:{err:#}").as_bytes()),
16712 }
16713}
16714
16715fn conflict_matrix_preparation_cache_summary(
16716 root: &Path,
16717 path: &Path,
16718 scope: Option<&str>,
16719) -> Result<ConflictMatrixPreparationCacheSummary> {
16720 let source_watermark = traversal_source_watermark(root, path, scope, false)?
16721 .unwrap_or_else(|| "unavailable".to_string());
16722 let document_watermark = conflict_matrix_document_watermark(path)?;
16723 let staged_diff_watermark = conflict_matrix_staged_diff_watermark(root);
16724 let key = content_hash(&vec![
16725 format!("version:{CONFLICT_MATRIX_PREPARATION_CACHE_VERSION}"),
16726 format!("root:{}", root.display()),
16727 format!("path:{}", path.display()),
16728 format!("scope:{}", scope.unwrap_or("root")),
16729 format!("source:{source_watermark}"),
16730 format!("document:{document_watermark}"),
16731 format!("staged_diff:{staged_diff_watermark}"),
16732 ])?;
16733 Ok(ConflictMatrixPreparationCacheSummary {
16734 version: CONFLICT_MATRIX_PREPARATION_CACHE_VERSION.to_string(),
16735 key,
16736 status: "memory_miss".to_string(),
16737 source_watermark,
16738 document_watermark,
16739 staged_diff_watermark,
16740 })
16741}
16742
16743fn conflict_matrix_prepared_inputs_cache_hit(
16744 mut cached: ConflictMatrixPreparedInputs,
16745 status: &str,
16746 duration_micros: u128,
16747 detail: &str,
16748) -> ConflictMatrixPreparedInputs {
16749 cached.preparation_cache.status = status.to_string();
16750 let cached_detail = format!(
16751 "reused from {status} conflict-matrix preparation cache by source/document/staged-diff watermark; cost accounted in preparation_cache_lookup"
16752 );
16753 cached.preparation_timings = vec![
16754 graph_db_backend_eval_phase_timing("preparation_cache_lookup", duration_micros, detail),
16755 graph_db_backend_eval_phase_timing("session_review_compute", 0, &cached_detail),
16756 graph_db_backend_eval_phase_timing(
16757 "session_review_compute.target_context_build",
16758 0,
16759 &cached_detail,
16760 ),
16761 graph_db_backend_eval_phase_timing(
16762 "session_review_compute.session_discovery",
16763 0,
16764 &cached_detail,
16765 ),
16766 graph_db_backend_eval_phase_timing(
16767 "session_review_compute.session_digest_total",
16768 0,
16769 &cached_detail,
16770 ),
16771 graph_db_backend_eval_phase_timing(
16772 "session_review_compute.session_cost_total",
16773 0,
16774 &cached_detail,
16775 ),
16776 graph_db_backend_eval_phase_timing(
16777 "session_review_compute.session_aggregation",
16778 0,
16779 &cached_detail,
16780 ),
16781 graph_db_backend_eval_phase_timing(
16782 "session_review_compute.report_assembly",
16783 0,
16784 &cached_detail,
16785 ),
16786 graph_db_backend_eval_phase_timing("status_index_gate", 0, &cached_detail),
16787 graph_db_backend_eval_phase_timing(
16788 "status_index_gate.prepare_agent_doc_index_gate",
16789 0,
16790 &cached_detail,
16791 ),
16792 graph_db_backend_eval_phase_timing(
16793 "status_index_gate.context_pack_status_reminders",
16794 0,
16795 &cached_detail,
16796 ),
16797 graph_db_backend_eval_phase_timing(
16798 "status_index_gate.load_tag_ontology_preview_context",
16799 0,
16800 &cached_detail,
16801 ),
16802 graph_db_backend_eval_phase_timing("context_pack_diff", 0, &cached_detail),
16803 graph_db_backend_eval_phase_timing("exploration_materialization", 0, &cached_detail),
16804 graph_db_backend_eval_phase_timing("graph_orchestration", 0, &cached_detail),
16805 graph_db_backend_eval_phase_timing("staged_diff", 0, &cached_detail),
16806 graph_db_backend_eval_phase_timing("impact", 0, &cached_detail),
16807 graph_db_backend_eval_phase_timing("impact.context_resolution", 0, &cached_detail),
16808 graph_db_backend_eval_phase_timing("impact.diff_digest", 0, &cached_detail),
16809 graph_db_backend_eval_phase_timing("impact.test_path_scan", 0, &cached_detail),
16810 graph_db_backend_eval_phase_timing("impact.index_open", 0, &cached_detail),
16811 graph_db_backend_eval_phase_timing("impact.call_edge_impacts", 0, &cached_detail),
16812 graph_db_backend_eval_phase_timing("impact.route_handler_impacts", 0, &cached_detail),
16813 graph_db_backend_eval_phase_timing("impact.import_impacts", 0, &cached_detail),
16814 graph_db_backend_eval_phase_timing("impact.report_assembly", 0, &cached_detail),
16815 ];
16816 cached
16817}
16818
16819pub(crate) fn prepare_conflict_matrix_inputs(
16820 root: &Path,
16821 path: &Path,
16822 scope: Option<&str>,
16823 impact_limit: usize,
16824) -> Result<ConflictMatrixPreparedInputs> {
16825 let cache_lookup_started = Instant::now();
16826 let mut cache_summary = conflict_matrix_preparation_cache_summary(root, path, scope)?;
16827 if let Some(cached) = conflict_matrix_preparation_cache()
16828 .lock()
16829 .map_err(|_| anyhow::anyhow!("conflict-matrix preparation cache lock poisoned"))?
16830 .get(&cache_summary.key)
16831 .cloned()
16832 {
16833 return Ok(conflict_matrix_prepared_inputs_cache_hit(
16834 cached,
16835 "memory_hit",
16836 cache_lookup_started.elapsed().as_micros(),
16837 "reused prepared context-pack, staged diff, and impact packet from memory by source/document/staged-diff watermark",
16838 ));
16839 }
16840 if let Some(cached) = conflict_matrix_read_disk_cache::<ConflictMatrixPreparedInputs>(
16841 root,
16842 "inputs",
16843 &cache_summary.key,
16844 ) {
16845 let cached = conflict_matrix_prepared_inputs_cache_hit(
16846 cached,
16847 "disk_hit",
16848 cache_lookup_started.elapsed().as_micros(),
16849 "reused prepared context-pack, staged diff, and impact packet from .tsift/conflict-matrix-cache by source/document/staged-diff watermark",
16850 );
16851 conflict_matrix_preparation_cache()
16852 .lock()
16853 .map_err(|_| anyhow::anyhow!("conflict-matrix preparation cache lock poisoned"))?
16854 .insert(cached.preparation_cache.key.clone(), cached.clone());
16855 return Ok(cached);
16856 }
16857
16858 let mut preparation_timings = vec![graph_db_backend_eval_phase_timing(
16859 "preparation_cache_lookup",
16860 cache_lookup_started.elapsed().as_micros(),
16861 "no prepared packet matched the source/document/staged-diff watermark",
16862 )];
16863 cache_summary.status = "computed".to_string();
16864 let (context_pack_report, context_pack_timings) = build_context_pack_report_with_profile(
16865 path,
16866 None,
16867 None,
16868 None,
16869 ResponseBudget::from_cli(None, None, Some(ResponseBudgetPreset::Normal), false),
16870 )?;
16871 preparation_timings.extend(context_pack_timings);
16872 let context_pack = ConflictMatrixPreparedContext::from_context_pack(&context_pack_report);
16873 let cached_diff = graph_db_backend_eval_timed_phase(
16874 &mut preparation_timings,
16875 "staged_diff",
16876 "cached/staged diff digest used for ownership overlap checks",
16877 || {
16878 diff_digest::compute(
16879 root,
16880 diff_digest::DiffDigestOptions {
16881 cached: true,
16882 revision: None,
16883 max_parsed_files: None,
16884 },
16885 )
16886 .with_context(|| format!("computing cached diff digest for {}", root.display()))
16887 },
16888 )?;
16889 let impact_started = Instant::now();
16890 let (impact_report, impact_sub_phases) = impact::compute_with_phases(
16891 root,
16892 impact::ImpactOptions {
16893 cached: true,
16894 revision: None,
16895 scope,
16896 limit: impact_limit,
16897 },
16898 )
16899 .with_context(|| format!("computing cached impact report for {}", root.display()))?;
16900 let impact_total_micros = impact_started.elapsed().as_micros();
16901 preparation_timings.push(graph_db_backend_eval_phase_timing(
16902 "impact",
16903 impact_total_micros,
16904 "cached impact analysis used for affected-test ownership checks",
16905 ));
16906 for sub in &impact_sub_phases {
16907 preparation_timings.push(graph_db_backend_eval_phase_timing(
16908 &format!("impact.{}", sub.name),
16909 sub.duration_micros,
16910 &sub.detail,
16911 ));
16912 }
16913 let prepared = ConflictMatrixPreparedInputs {
16914 context_pack,
16915 cached_diff,
16916 impact_report,
16917 preparation_cache: cache_summary,
16918 preparation_timings,
16919 };
16920 conflict_matrix_preparation_cache()
16921 .lock()
16922 .map_err(|_| anyhow::anyhow!("conflict-matrix preparation cache lock poisoned"))?
16923 .insert(prepared.preparation_cache.key.clone(), prepared.clone());
16924 conflict_matrix_write_disk_cache(root, "inputs", &prepared.preparation_cache.key, &prepared);
16925 Ok(prepared)
16926}
16927
16928fn conflict_matrix_evidence_packet_summary(
16929 root: &Path,
16930 scope: Option<&str>,
16931 target: &str,
16932 depth: usize,
16933 limit: usize,
16934 evidence: &GraphDbEvidenceReport,
16935) -> ConflictMatrixEvidencePacketSummary {
16936 ConflictMatrixEvidencePacketSummary {
16937 target: evidence.target.clone(),
16938 packet_id: evidence.packet_id.clone(),
16939 target_node_id: evidence.target_node.id.clone(),
16940 projection_hash: evidence.projection_hash.clone(),
16941 replay_command: evidence
16942 .replay_commands
16943 .first()
16944 .cloned()
16945 .unwrap_or_else(|| {
16946 format!(
16947 "tsift graph-db --path {}{} evidence {} --depth {} --limit {} --json",
16948 shell_quote(root.to_string_lossy().as_ref()),
16949 graph_db_scope_arg(scope),
16950 shell_quote(target),
16951 depth,
16952 limit
16953 )
16954 }),
16955 }
16956}
16957
16958fn conflict_matrix_shared_preparation_summary(
16959 graph: &ConflictMatrixGraphSnapshot,
16960 evidence: &[ConflictMatrixPreparedEvidence],
16961 evidence_cache_status: &str,
16962) -> ConflictMatrixSharedPreparationSummary {
16963 ConflictMatrixSharedPreparationSummary {
16964 evidence_cache_status: evidence_cache_status.to_string(),
16965 graph_nodes: graph.nodes.len(),
16966 graph_edges: graph.edges.len(),
16967 evidence_packets: evidence.len(),
16968 source_handles: evidence
16969 .iter()
16970 .map(|entry| entry.report.source_handles.len())
16971 .sum(),
16972 worker_context: evidence
16973 .iter()
16974 .map(|entry| entry.report.worker_context.len())
16975 .sum(),
16976 worker_results: evidence
16977 .iter()
16978 .map(|entry| entry.report.worker_results.len())
16979 .sum(),
16980 semantic_rows: evidence
16981 .iter()
16982 .map(|entry| entry.report.semantic_related.len())
16983 .sum(),
16984 dispatch_trace_snapshot_nodes: graph.nodes.len(),
16985 dispatch_trace_snapshot_edges: graph.edges.len(),
16986 }
16987}
16988
16989#[allow(dead_code)]
16990fn conflict_matrix_graph_snapshot(store: &impl GraphStore) -> Result<ConflictMatrixGraphSnapshot> {
16991 let nodes = store.all_nodes()?;
16992 let edges = store.all_edges()?;
16993 let index = conflict_matrix_graph_index(&nodes);
16994 Ok(ConflictMatrixGraphSnapshot {
16995 nodes,
16996 edges,
16997 index,
16998 })
16999}
17000
17001fn insert_conflict_graph_node(
17002 nodes: &mut BTreeMap<String, SubstrateGraphNode>,
17003 node: SubstrateGraphNode,
17004) {
17005 nodes.entry(node.id.clone()).or_insert(node);
17006}
17007
17008fn insert_conflict_graph_edge(
17009 edges: &mut BTreeMap<(String, String, String), SubstrateGraphEdge>,
17010 edge: SubstrateGraphEdge,
17011) {
17012 edges
17013 .entry((edge.from_id.clone(), edge.kind.clone(), edge.to_id.clone()))
17014 .or_insert(edge);
17015}
17016
17017fn conflict_matrix_files_from_evidence(evidence: &GraphDbEvidenceReport) -> BTreeSet<String> {
17018 let mut files = BTreeSet::new();
17019 if matches!(
17020 evidence.target_node.kind.as_str(),
17021 "file" | "symbol" | "route"
17022 ) && let Some(path) = evidence.target_node.properties.get("path")
17023 {
17024 files.insert(path.clone());
17025 }
17026 for node in &evidence.source_handles {
17027 if let Some(handle) = conflict_matrix_source_handle(node) {
17028 files.insert(handle.file);
17029 }
17030 }
17031 files
17032}
17033
17034fn conflict_matrix_add_path_nodes<S: GraphStore>(
17035 store: &S,
17036 nodes: &mut BTreeMap<String, SubstrateGraphNode>,
17037 evidence: &GraphDbEvidenceReport,
17038) -> Result<()> {
17039 for path in &evidence.shortest_paths {
17040 let Some(graph_path) = &path.path else {
17041 continue;
17042 };
17043 for id in &graph_path.nodes {
17044 if nodes.contains_key(id) {
17045 continue;
17046 }
17047 if let Some(node) = store.node(id)? {
17048 insert_conflict_graph_node(nodes, node);
17049 }
17050 }
17051 }
17052 Ok(())
17053}
17054
17055fn conflict_matrix_add_file_symbol_nodes<S: GraphStore>(
17056 store: &S,
17057 nodes: &mut BTreeMap<String, SubstrateGraphNode>,
17058 files: &BTreeSet<String>,
17059) -> Result<()> {
17060 for file in files {
17061 for kind in ["file", "route", "symbol"] {
17062 let page = store.paged_nodes_by_kind(
17063 kind,
17064 GraphQueryOptions {
17065 property_filters: vec![GraphPropertyFilter {
17066 key: "path".to_string(),
17067 value: file.clone(),
17068 }],
17069 ..GraphQueryOptions::default()
17070 },
17071 )?;
17072 for node in page.nodes {
17073 insert_conflict_graph_node(nodes, node);
17074 }
17075 }
17076 }
17077 Ok(())
17078}
17079
17080fn conflict_matrix_add_target_ref_nodes<S: GraphStore>(
17081 store: &S,
17082 nodes: &mut BTreeMap<String, SubstrateGraphNode>,
17083 target_node: &SubstrateGraphNode,
17084) -> Result<()> {
17085 let Some(ref_id) = target_node.properties.get("ref_id") else {
17086 return Ok(());
17087 };
17088 for kind in ["backlog", "job_packet", "worker_result"] {
17089 let page = store.paged_nodes_by_kind(
17090 kind,
17091 GraphQueryOptions {
17092 property_filters: vec![GraphPropertyFilter {
17093 key: "ref_id".to_string(),
17094 value: ref_id.clone(),
17095 }],
17096 ..GraphQueryOptions::default()
17097 },
17098 )?;
17099 for node in page.nodes {
17100 insert_conflict_graph_node(nodes, node);
17101 }
17102 }
17103 Ok(())
17104}
17105
17106fn conflict_matrix_add_target_neighborhood<S: GraphStore>(
17107 store: &S,
17108 nodes: &mut BTreeMap<String, SubstrateGraphNode>,
17109 edges: &mut BTreeMap<(String, String, String), SubstrateGraphEdge>,
17110 target_node: &SubstrateGraphNode,
17111 depth: usize,
17112 limit: usize,
17113) -> Result<()> {
17114 let node_limit = if limit == 0 {
17115 None
17116 } else {
17117 Some(limit.saturating_mul(depth.max(1)).saturating_mul(8).max(64))
17118 };
17119 if let Some(page) = store.paged_neighborhood(
17120 &target_node.id,
17121 depth,
17122 None,
17123 GraphQueryOptions {
17124 limit: node_limit,
17125 ..GraphQueryOptions::default()
17126 },
17127 )? {
17128 for node in page.nodes {
17129 insert_conflict_graph_node(nodes, node);
17130 }
17131 for edge in page.edges {
17132 insert_conflict_graph_edge(edges, edge);
17133 }
17134 }
17135 Ok(())
17136}
17137
17138fn conflict_matrix_add_scoped_edges<S: GraphStore>(
17139 store: &S,
17140 nodes: &BTreeMap<String, SubstrateGraphNode>,
17141 edges: &mut BTreeMap<(String, String, String), SubstrateGraphEdge>,
17142) -> Result<()> {
17143 let node_ids = nodes.keys().cloned().collect::<BTreeSet<_>>();
17144 for edge in store.edges_between_nodes(&node_ids)? {
17145 insert_conflict_graph_edge(edges, edge);
17146 }
17147 Ok(())
17148}
17149
17150fn conflict_matrix_target_scoped_graph_snapshot<S: GraphStore>(
17151 store: &S,
17152 evidence: &[ConflictMatrixPreparedEvidence],
17153 depth: usize,
17154 limit: usize,
17155) -> Result<ConflictMatrixGraphSnapshot> {
17156 let mut nodes = BTreeMap::<String, SubstrateGraphNode>::new();
17157 let mut edges = BTreeMap::<(String, String, String), SubstrateGraphEdge>::new();
17158 let mut files = BTreeSet::new();
17159
17160 for prepared in evidence {
17161 let report = &prepared.report;
17162 insert_conflict_graph_node(&mut nodes, report.target_node.clone());
17163 for node in report
17164 .worker_context
17165 .iter()
17166 .chain(report.source_handles.iter())
17167 .chain(report.worker_results.iter())
17168 .chain(report.semantic_related.iter())
17169 {
17170 insert_conflict_graph_node(&mut nodes, node.clone());
17171 }
17172 files.extend(conflict_matrix_files_from_evidence(report));
17173 conflict_matrix_add_target_ref_nodes(store, &mut nodes, &report.target_node)?;
17174 conflict_matrix_add_path_nodes(store, &mut nodes, report)?;
17175 conflict_matrix_add_target_neighborhood(
17176 store,
17177 &mut nodes,
17178 &mut edges,
17179 &report.target_node,
17180 depth,
17181 limit,
17182 )?;
17183 }
17184
17185 conflict_matrix_add_file_symbol_nodes(store, &mut nodes, &files)?;
17186 conflict_matrix_add_scoped_edges(store, &nodes, &mut edges)?;
17187
17188 let nodes = nodes.into_values().collect::<Vec<_>>();
17189 let edges = edges.into_values().collect::<Vec<_>>();
17190 let index = conflict_matrix_graph_index(&nodes);
17191 Ok(ConflictMatrixGraphSnapshot {
17192 nodes,
17193 edges,
17194 index,
17195 })
17196}
17197
17198#[allow(clippy::too_many_arguments)]
17199fn collect_conflict_matrix_evidence_packets<S: GraphStore>(
17200 root: &Path,
17201 scope: Option<&str>,
17202 backend: &str,
17203 targets: &[String],
17204 depth: usize,
17205 limit: usize,
17206 store: &S,
17207 freshness: GraphDbFreshnessReport,
17208) -> Result<Vec<ConflictMatrixPreparedEvidence>> {
17209 let mut evidence = Vec::new();
17210 for target in targets {
17211 let report = graph_db_evidence_report_from_store(GraphDbEvidenceInput {
17212 root,
17213 scope,
17214 backend,
17215 target,
17216 depth,
17217 limit,
17218 store,
17219 freshness: freshness.clone(),
17220 warnings: Vec::new(),
17221 })
17222 .with_context(|| format!("collecting graph-db evidence for {target}"))?;
17223 let summary =
17224 conflict_matrix_evidence_packet_summary(root, scope, target, depth, limit, &report);
17225 evidence.push(ConflictMatrixPreparedEvidence { report, summary });
17226 }
17227 Ok(evidence)
17228}
17229
17230fn conflict_matrix_graph_preparation_cache_key(
17231 prepared: &ConflictMatrixPreparedInputs,
17232 scope: Option<&str>,
17233 backend: &str,
17234 targets: &[String],
17235 depth: usize,
17236 limit: usize,
17237 freshness: &GraphDbFreshnessReport,
17238) -> Result<String> {
17239 content_hash(&serde_json::json!({
17240 "version": CONFLICT_MATRIX_GRAPH_PREPARATION_CACHE_VERSION,
17241 "prepared_inputs_key": prepared.preparation_cache.key.as_str(),
17242 "scope": scope.unwrap_or("root"),
17243 "backend": backend,
17244 "targets": targets,
17245 "depth": depth,
17246 "limit": limit,
17247 "projection_version": freshness.projection_version.as_deref(),
17248 "projection_hash": freshness.content_hash.as_deref(),
17249 "source_watermark": freshness.source_watermark.as_deref(),
17250 }))
17251}
17252
17253fn conflict_matrix_graph_prepared_cache_hit(
17254 cached: ConflictMatrixGraphPreparedCache,
17255 status: &str,
17256) -> ConflictMatrixGraphPreparedInputs {
17257 let mut shared_preparation = cached.shared_preparation;
17258 shared_preparation.evidence_cache_status = status.to_string();
17259 let index = conflict_matrix_graph_index(&cached.nodes);
17260 ConflictMatrixGraphPreparedInputs {
17261 targets: cached.targets,
17262 graph: ConflictMatrixGraphSnapshot {
17263 nodes: cached.nodes,
17264 edges: cached.edges,
17265 index,
17266 },
17267 evidence: cached.evidence,
17268 shared_preparation,
17269 }
17270}
17271
17272fn conflict_matrix_graph_prepared_cache_from_inputs(
17273 key: &str,
17274 prepared: &ConflictMatrixGraphPreparedInputs,
17275) -> ConflictMatrixGraphPreparedCache {
17276 ConflictMatrixGraphPreparedCache {
17277 version: CONFLICT_MATRIX_GRAPH_PREPARATION_CACHE_VERSION.to_string(),
17278 key: key.to_string(),
17279 targets: prepared.targets.clone(),
17280 nodes: prepared.graph.nodes.clone(),
17281 edges: prepared.graph.edges.clone(),
17282 evidence: prepared.evidence.clone(),
17283 shared_preparation: prepared.shared_preparation.clone(),
17284 }
17285}
17286
17287#[allow(clippy::too_many_arguments)]
17288fn prepare_conflict_matrix_graph_orchestration<S: GraphStore>(
17289 root: &Path,
17290 scope: Option<&str>,
17291 backend: &str,
17292 raw_targets: &[String],
17293 prepared: &ConflictMatrixPreparedInputs,
17294 depth: usize,
17295 limit: usize,
17296 store: &S,
17297 freshness: GraphDbFreshnessReport,
17298) -> Result<ConflictMatrixGraphPreparedInputs> {
17299 let targets = resolve_conflict_matrix_targets(store, raw_targets, &prepared.context_pack)?;
17300 let graph_cache_key = conflict_matrix_graph_preparation_cache_key(
17301 prepared, scope, backend, &targets, depth, limit, &freshness,
17302 )?;
17303 if let Some(cached) = conflict_matrix_read_disk_cache::<ConflictMatrixGraphPreparedCache>(
17304 root,
17305 "graph",
17306 &graph_cache_key,
17307 ) && cached.version == CONFLICT_MATRIX_GRAPH_PREPARATION_CACHE_VERSION
17308 && cached.key == graph_cache_key
17309 && cached.targets == targets
17310 {
17311 return Ok(conflict_matrix_graph_prepared_cache_hit(cached, "disk_hit"));
17312 }
17313 let evidence = collect_conflict_matrix_evidence_packets(
17314 root, scope, backend, &targets, depth, limit, store, freshness,
17315 )?;
17316 let graph = conflict_matrix_target_scoped_graph_snapshot(store, &evidence, depth, limit)?;
17317 let shared_preparation =
17318 conflict_matrix_shared_preparation_summary(&graph, &evidence, "computed");
17319
17320 let prepared_graph = ConflictMatrixGraphPreparedInputs {
17321 targets,
17322 graph,
17323 evidence,
17324 shared_preparation,
17325 };
17326 let cache = conflict_matrix_graph_prepared_cache_from_inputs(&graph_cache_key, &prepared_graph);
17327 conflict_matrix_write_disk_cache(root, "graph", &graph_cache_key, &cache);
17328 Ok(prepared_graph)
17329}
17330
17331#[allow(clippy::too_many_arguments)]
17332fn build_conflict_matrix_report_from_prepared_graph(
17333 root: &Path,
17334 path: &Path,
17335 scope: Option<&str>,
17336 depth: usize,
17337 limit: usize,
17338 impact_limit: usize,
17339 freshness: GraphDbFreshnessReport,
17340 extra_warnings: Vec<String>,
17341 prepared: &ConflictMatrixPreparedInputs,
17342 graph_prepared: &ConflictMatrixGraphPreparedInputs,
17343) -> Result<ConflictMatrixReport> {
17344 let context_pack = &prepared.context_pack;
17345 let targets = graph_prepared.targets.clone();
17346 let graph_index = &graph_prepared.graph.index;
17347
17348 let mut warnings = context_pack.status_reminders.clone();
17349 warnings.extend(extra_warnings);
17350 let mut candidates = Vec::new();
17351 let mut evidence_packets = Vec::new();
17352 for prepared_evidence in &graph_prepared.evidence {
17353 let evidence = &prepared_evidence.report;
17354 warnings.extend(evidence.warnings.clone());
17355 evidence_packets.push(prepared_evidence.summary.clone());
17356 candidates.push(conflict_matrix_candidate_from_evidence(
17357 root,
17358 evidence,
17359 graph_index,
17360 &prepared.cached_diff,
17361 &prepared.impact_report,
17362 ));
17363 }
17364
17365 apply_conflict_matrix_worker_feedback_controls(&mut candidates);
17366 candidates.sort_by(|left, right| {
17367 left.risk
17368 .cmp(&right.risk)
17369 .then_with(|| left.risk_score.cmp(&right.risk_score))
17370 .then_with(|| {
17371 right
17372 .worker_feedback
17373 .closure_rank_score
17374 .cmp(&left.worker_feedback.closure_rank_score)
17375 })
17376 .then_with(|| {
17377 right
17378 .semantic_dispatch_score
17379 .cmp(&left.semantic_dispatch_score)
17380 })
17381 .then_with(|| left.target.cmp(&right.target))
17382 });
17383 for (idx, candidate) in candidates.iter_mut().enumerate() {
17384 candidate.rank = idx + 1;
17385 }
17386 warnings.extend(candidates.iter().flat_map(|candidate| {
17387 candidate
17388 .worker_feedback
17389 .warnings
17390 .iter()
17391 .map(|warning| format!("{}: {warning}", candidate.target))
17392 }));
17393 let conflicts = build_conflict_matrix_pairs(&candidates);
17394 apply_conflict_matrix_ownership_blocks(&mut candidates);
17395 apply_conflict_matrix_scheduler_fields(&mut candidates, &conflicts);
17396 let worker_prompt_packets = conflict_matrix_worker_prompt_packets(&candidates);
17397
17398 let per_target_fail_closed = conflict_matrix_per_target_fail_closed(&candidates);
17399 let cross_target_parallel_safe = conflicts
17400 .iter()
17401 .all(|pair| pair.risk <= ConflictMatrixRisk::Medium);
17402 let fail_closed = !per_target_fail_closed.is_empty()
17403 || conflicts
17404 .iter()
17405 .any(|pair| pair.risk == ConflictMatrixRisk::FailClosed);
17406 let can_parallel = !fail_closed && cross_target_parallel_safe;
17407 let next_commands =
17408 conflict_matrix_next_commands(root, path, scope, &targets, depth, limit, impact_limit);
17409 let orchestration = conflict_matrix_orchestration_observability(
17410 &freshness,
17411 &candidates,
17412 &conflicts,
17413 &next_commands,
17414 );
17415 let inputs = ConflictMatrixInputSummary {
17416 graph_db_evidence_targets: targets.clone(),
17417 evidence_packets,
17418 shared_preparation: graph_prepared.shared_preparation.clone(),
17419 preparation_cache: prepared.preparation_cache.clone(),
17420 preparation_timings: prepared.preparation_timings.clone(),
17421 context_pack_command: format!(
17422 "tsift --envelope context-pack {} --budget normal",
17423 shell_quote(path.to_string_lossy().as_ref())
17424 ),
17425 cached_diff_command: format!(
17426 "tsift diff-digest --cached {} --json",
17427 shell_quote(root.to_string_lossy().as_ref())
17428 ),
17429 impact_command: format!(
17430 "tsift impact {} --cached{} --limit {} --json",
17431 shell_quote(root.to_string_lossy().as_ref()),
17432 scope
17433 .map(|scope| format!(" --scope {}", shell_quote(scope)))
17434 .unwrap_or_default(),
17435 impact_limit
17436 ),
17437 };
17438 let context_summary = conflict_matrix_context_summary(context_pack);
17439 Ok(ConflictMatrixReport {
17440 contract_version: CONFLICT_MATRIX_CONTRACT_VERSION,
17441 root: root.to_string_lossy().to_string(),
17442 scope: scope.map(str::to_string),
17443 targets,
17444 can_parallel,
17445 fail_closed,
17446 cross_target_parallel_safe,
17447 per_target_fail_closed,
17448 inputs,
17449 context_pack: context_summary,
17450 cached_diff: prepared.cached_diff.clone(),
17451 impact: prepared.impact_report.clone(),
17452 candidates,
17453 worker_prompt_packets,
17454 conflicts,
17455 orchestration,
17456 next_commands,
17457 warnings,
17458 })
17459}
17460
17461#[allow(clippy::too_many_arguments)]
17462fn build_conflict_matrix_report_with_prepared<S: GraphStore>(
17463 root: &Path,
17464 path: &Path,
17465 scope: Option<&str>,
17466 raw_targets: &[String],
17467 depth: usize,
17468 limit: usize,
17469 impact_limit: usize,
17470 store: &S,
17471 freshness: GraphDbFreshnessReport,
17472 extra_warnings: Vec<String>,
17473 prepared: &ConflictMatrixPreparedInputs,
17474) -> Result<ConflictMatrixReport> {
17475 let graph_prepared = prepare_conflict_matrix_graph_orchestration(
17476 root,
17477 scope,
17478 "sqlite",
17479 raw_targets,
17480 prepared,
17481 depth,
17482 limit,
17483 store,
17484 freshness.clone(),
17485 )?;
17486 build_conflict_matrix_report_from_prepared_graph(
17487 root,
17488 path,
17489 scope,
17490 depth,
17491 limit,
17492 impact_limit,
17493 freshness,
17494 extra_warnings,
17495 prepared,
17496 &graph_prepared,
17497 )
17498}
17499
17500#[allow(clippy::too_many_arguments)]
17501fn build_conflict_matrix_report_with_store<S: GraphStore>(
17502 root: &Path,
17503 path: &Path,
17504 scope: Option<&str>,
17505 raw_targets: &[String],
17506 depth: usize,
17507 limit: usize,
17508 impact_limit: usize,
17509 store: &S,
17510 freshness: GraphDbFreshnessReport,
17511 extra_warnings: Vec<String>,
17512) -> Result<ConflictMatrixReport> {
17513 let prepared = prepare_conflict_matrix_inputs(root, path, scope, impact_limit)?;
17514 build_conflict_matrix_report_with_prepared(
17515 root,
17516 path,
17517 scope,
17518 raw_targets,
17519 depth,
17520 limit,
17521 impact_limit,
17522 store,
17523 freshness,
17524 extra_warnings,
17525 &prepared,
17526 )
17527}
17528
17529fn build_conflict_matrix_report(
17530 path: &Path,
17531 scope: Option<&str>,
17532 raw_targets: &[String],
17533 depth: usize,
17534 limit: usize,
17535 impact_limit: usize,
17536) -> Result<ConflictMatrixReport> {
17537 let root = lint::resolve_project_root_or_canonical_path(path)?;
17538 let source_watermark = traversal_source_watermark(&root, path, scope, false)?;
17539 if graph_db_backend_eval_cached_refresh(&root, scope, source_watermark.as_deref())?.is_none() {
17540 write_traversal_graph_store(&root, path, scope)
17541 .with_context(|| format!("refreshing graph-db projection for {}", root.display()))?;
17542 }
17543 let graph_db = graph_substrate_db_path(&root, scope);
17544 let store = SqliteGraphStore::open_read_only_resilient(&graph_db)
17545 .with_context(|| format!("opening graph-db projection: {}", graph_db.display()))?;
17546 let freshness = sqlite_graph_freshness(&store, scope.unwrap_or("root"))?;
17547 let mut warnings = Vec::new();
17548 if let Some(recovery) = store.read_only_recovery() {
17549 warnings.push(graph_db_read_recovery_diagnostic(recovery));
17550 }
17551 build_conflict_matrix_report_with_store(
17552 &root,
17553 path,
17554 scope,
17555 raw_targets,
17556 depth,
17557 limit,
17558 impact_limit,
17559 &store,
17560 freshness,
17561 warnings,
17562 )
17563}
17564
17565fn cmd_conflict_matrix(
17566 path: &Path,
17567 scope: Option<&str>,
17568 raw_targets: &[String],
17569 depth: usize,
17570 limit: usize,
17571 impact_limit: usize,
17572 format: OutputFormat,
17573) -> Result<()> {
17574 let report =
17575 build_conflict_matrix_report(path, scope, raw_targets, depth, limit, impact_limit)?;
17576 if format.json_output {
17577 print_json_or_envelope(
17578 &report,
17579 &format,
17580 "conflict-matrix",
17581 "parallel-planning",
17582 ToolEnvelopeSummary {
17583 text: format!(
17584 "Conflict matrix for {} target(s): can_parallel={} fail_closed={} cross_target_parallel_safe={} per_target_fail_closed={}",
17585 report.targets.len(),
17586 report.can_parallel,
17587 report.fail_closed,
17588 report.cross_target_parallel_safe,
17589 report.per_target_fail_closed.len()
17590 ),
17591 metrics: vec![
17592 envelope_metric("targets", report.targets.len()),
17593 envelope_metric("candidates", report.candidates.len()),
17594 envelope_metric("conflicts", report.conflicts.len()),
17595 envelope_metric("can_parallel", report.can_parallel),
17596 envelope_metric("fail_closed", report.fail_closed),
17597 envelope_metric(
17598 "cross_target_parallel_safe",
17599 report.cross_target_parallel_safe,
17600 ),
17601 envelope_metric(
17602 "per_target_fail_closed",
17603 report.per_target_fail_closed.len(),
17604 ),
17605 ],
17606 },
17607 report.fail_closed,
17608 report.next_commands.clone(),
17609 )
17610 } else {
17611 print_conflict_matrix_human(&report, format.compact);
17612 Ok(())
17613 }
17614}
17615
17616#[derive(Serialize)]
17617struct DispatchTraceSummary {
17618 backlog: usize,
17619 job_packet: usize,
17620 worker_result: usize,
17621 worker_context: usize,
17622 source_handle: usize,
17623 semantic_rows: usize,
17624}
17625
17626#[derive(Serialize)]
17627struct DispatchTraceReport {
17628 contract_version: &'static str,
17629 root: String,
17630 #[serde(skip_serializing_if = "Option::is_none")]
17631 scope: Option<String>,
17632 targets: Vec<String>,
17633 projection_freshness: GraphDbFreshnessReport,
17634 projection_hashes: Vec<String>,
17635 evidence_packet_ids: Vec<String>,
17636 shared_preparation: ConflictMatrixSharedPreparationSummary,
17637 worker_prompt_packets: Vec<ConflictMatrixWorkerPromptPacket>,
17638 worker_feedback: Vec<ConflictMatrixWorkerFeedback>,
17639 summary: DispatchTraceSummary,
17640 nodes: Vec<SubstrateGraphNode>,
17641 edges: Vec<SubstrateGraphEdge>,
17642 conflict_matrix_decisions: Vec<String>,
17643 replay_commands: Vec<String>,
17644 repair_commands: Vec<String>,
17645 truncated: bool,
17646 #[serde(skip_serializing_if = "Vec::is_empty", default)]
17647 warnings: Vec<String>,
17648}
17649
17650fn dispatch_trace_allowed_node_kind(kind: &str) -> bool {
17651 matches!(
17652 kind,
17653 "session"
17654 | "backlog"
17655 | "job_packet"
17656 | "worker_result"
17657 | "worker_context"
17658 | "source_handle"
17659 | "semantic_concept"
17660 | "semantic_entity"
17661 | "file"
17662 | "symbol"
17663 | "route"
17664 )
17665}
17666
17667fn dispatch_trace_kind_rank(kind: &str) -> usize {
17668 match kind {
17669 "backlog" => 0,
17670 "job_packet" => 1,
17671 "worker_result" => 2,
17672 "worker_context" => 3,
17673 "source_handle" => 4,
17674 "file" => 5,
17675 "symbol" => 6,
17676 "route" => 7,
17677 "semantic_concept" => 8,
17678 "semantic_entity" => 9,
17679 "session" => 10,
17680 _ => 99,
17681 }
17682}
17683
17684fn dispatch_trace_summary(nodes: &[SubstrateGraphNode]) -> DispatchTraceSummary {
17685 DispatchTraceSummary {
17686 backlog: nodes.iter().filter(|node| node.kind == "backlog").count(),
17687 job_packet: nodes
17688 .iter()
17689 .filter(|node| node.kind == "job_packet")
17690 .count(),
17691 worker_result: nodes
17692 .iter()
17693 .filter(|node| node.kind == "worker_result")
17694 .count(),
17695 worker_context: nodes
17696 .iter()
17697 .filter(|node| node.kind == "worker_context")
17698 .count(),
17699 source_handle: nodes
17700 .iter()
17701 .filter(|node| node.kind == "source_handle")
17702 .count(),
17703 semantic_rows: nodes
17704 .iter()
17705 .filter(|node| matches!(node.kind.as_str(), "semantic_concept" | "semantic_entity"))
17706 .count(),
17707 }
17708}
17709
17710fn dispatch_trace_shared_preparation_summary(
17711 graph_nodes: &[SubstrateGraphNode],
17712 graph_edges: &[SubstrateGraphEdge],
17713 conflict: &ConflictMatrixReport,
17714) -> ConflictMatrixSharedPreparationSummary {
17715 ConflictMatrixSharedPreparationSummary {
17716 evidence_cache_status: conflict
17717 .inputs
17718 .shared_preparation
17719 .evidence_cache_status
17720 .clone(),
17721 graph_nodes: graph_nodes.len(),
17722 graph_edges: graph_edges.len(),
17723 evidence_packets: conflict.orchestration.evidence_packet_ids.len(),
17724 source_handles: conflict
17725 .candidates
17726 .iter()
17727 .map(|candidate| candidate.source_handles.len())
17728 .sum(),
17729 worker_context: conflict
17730 .candidates
17731 .iter()
17732 .map(|candidate| candidate.worker_context_handles.len())
17733 .sum(),
17734 worker_results: conflict
17735 .candidates
17736 .iter()
17737 .map(|candidate| candidate.worker_feedback.total)
17738 .sum(),
17739 semantic_rows: conflict
17740 .candidates
17741 .iter()
17742 .map(|candidate| candidate.semantic_related.len())
17743 .sum(),
17744 dispatch_trace_snapshot_nodes: graph_nodes.len(),
17745 dispatch_trace_snapshot_edges: graph_edges.len(),
17746 }
17747}
17748
17749fn dispatch_trace_collect_ids(
17750 targets: &[String],
17751 candidates: &[ConflictMatrixCandidate],
17752 graph_nodes: &[SubstrateGraphNode],
17753 graph_edges: &[SubstrateGraphEdge],
17754 depth: usize,
17755 limit: usize,
17756) -> (BTreeSet<String>, bool) {
17757 let target_refs = targets
17758 .iter()
17759 .map(|target| target.trim_start_matches('#').to_string())
17760 .collect::<BTreeSet<_>>();
17761 let mut ids = BTreeSet::new();
17762 for candidate in candidates {
17763 ids.insert(candidate.target_node_id.clone());
17764 for source in &candidate.source_handles {
17765 ids.insert(source.handle.clone());
17766 }
17767 for handle in &candidate.worker_context_handles {
17768 ids.insert(handle.clone());
17769 }
17770 for semantic in &candidate.semantic_related {
17771 ids.insert(semantic.handle.clone());
17772 }
17773 }
17774 for node in graph_nodes {
17775 if !dispatch_trace_allowed_node_kind(&node.kind) {
17776 continue;
17777 }
17778 if node
17779 .properties
17780 .get("ref_id")
17781 .is_some_and(|ref_id| target_refs.contains(ref_id))
17782 {
17783 ids.insert(node.id.clone());
17784 }
17785 }
17786
17787 let node_by_id = graph_nodes
17788 .iter()
17789 .map(|node| (node.id.as_str(), node))
17790 .collect::<BTreeMap<_, _>>();
17791 let max_nodes = if limit == 0 {
17792 usize::MAX
17793 } else {
17794 limit
17795 .saturating_mul(targets.len().max(1))
17796 .saturating_mul(12)
17797 .max(64)
17798 };
17799 let mut truncated = false;
17800 for _ in 0..depth.max(1) {
17801 let before = ids.len();
17802 let current_ids = ids.clone();
17803 for edge in graph_edges {
17804 if ids.len() >= max_nodes {
17805 truncated = true;
17806 break;
17807 }
17808 let touches = current_ids.contains(&edge.from_id) || current_ids.contains(&edge.to_id);
17809 if !touches {
17810 continue;
17811 }
17812 for endpoint in [&edge.from_id, &edge.to_id] {
17813 let Some(node) = node_by_id.get(endpoint.as_str()) else {
17814 continue;
17815 };
17816 if dispatch_trace_allowed_node_kind(&node.kind) {
17817 ids.insert(endpoint.clone());
17818 }
17819 }
17820 }
17821 if ids.len() == before || truncated {
17822 break;
17823 }
17824 }
17825 (ids, truncated)
17826}
17827
17828#[allow(clippy::too_many_arguments)]
17829fn build_dispatch_trace_report_from_conflict_snapshot(
17830 root: &Path,
17831 scope: Option<&str>,
17832 conflict: ConflictMatrixReport,
17833 graph_nodes: Vec<SubstrateGraphNode>,
17834 graph_edges: Vec<SubstrateGraphEdge>,
17835 depth: usize,
17836 limit: usize,
17837 extra_warnings: Vec<String>,
17838) -> Result<DispatchTraceReport> {
17839 let shared_preparation =
17840 dispatch_trace_shared_preparation_summary(&graph_nodes, &graph_edges, &conflict);
17841 let (ids, truncated) = dispatch_trace_collect_ids(
17842 &conflict.targets,
17843 &conflict.candidates,
17844 &graph_nodes,
17845 &graph_edges,
17846 depth,
17847 limit,
17848 );
17849 let mut nodes = graph_nodes
17850 .into_iter()
17851 .filter(|node| ids.contains(&node.id))
17852 .collect::<Vec<_>>();
17853 nodes.sort_by(|left, right| {
17854 dispatch_trace_kind_rank(&left.kind)
17855 .cmp(&dispatch_trace_kind_rank(&right.kind))
17856 .then(left.id.cmp(&right.id))
17857 });
17858 let node_ids = nodes
17859 .iter()
17860 .map(|node| node.id.as_str())
17861 .collect::<BTreeSet<_>>();
17862 let mut edges = graph_edges
17863 .into_iter()
17864 .filter(|edge| {
17865 node_ids.contains(edge.from_id.as_str()) && node_ids.contains(edge.to_id.as_str())
17866 })
17867 .collect::<Vec<_>>();
17868 edges.sort_by(|left, right| {
17869 left.from_id
17870 .cmp(&right.from_id)
17871 .then(left.kind.cmp(&right.kind))
17872 .then(left.to_id.cmp(&right.to_id))
17873 });
17874 let mut warnings = conflict.warnings;
17875 warnings.extend(extra_warnings);
17876
17877 Ok(DispatchTraceReport {
17878 contract_version: DISPATCH_TRACE_CONTRACT_VERSION,
17879 root: conflict.root,
17880 scope: conflict.scope,
17881 targets: conflict.targets,
17882 projection_freshness: conflict.orchestration.projection_freshness,
17883 projection_hashes: conflict.orchestration.projection_hashes,
17884 evidence_packet_ids: conflict.orchestration.evidence_packet_ids,
17885 shared_preparation,
17886 worker_prompt_packets: conflict.worker_prompt_packets,
17887 worker_feedback: conflict
17888 .candidates
17889 .iter()
17890 .map(|candidate| candidate.worker_feedback.clone())
17891 .collect(),
17892 summary: dispatch_trace_summary(&nodes),
17893 nodes,
17894 edges,
17895 conflict_matrix_decisions: conflict.orchestration.conflict_matrix_decisions,
17896 replay_commands: conflict.next_commands,
17897 repair_commands: graph_db_repair_commands(root, scope),
17898 truncated,
17899 warnings,
17900 })
17901}
17902
17903fn build_dispatch_trace_report(
17904 path: &Path,
17905 scope: Option<&str>,
17906 raw_targets: &[String],
17907 depth: usize,
17908 limit: usize,
17909 impact_limit: usize,
17910) -> Result<DispatchTraceReport> {
17911 let root = lint::resolve_project_root_or_canonical_path(path)?;
17912 let source_watermark = traversal_source_watermark(&root, path, scope, false)?;
17913 if graph_db_backend_eval_cached_refresh(&root, scope, source_watermark.as_deref())?.is_none() {
17914 write_traversal_graph_store(&root, path, scope)
17915 .with_context(|| format!("refreshing graph-db projection for {}", root.display()))?;
17916 }
17917 let graph_db = graph_substrate_db_path(&root, scope);
17918 let store = SqliteGraphStore::open_read_only_resilient(&graph_db)
17919 .with_context(|| format!("opening graph-db projection: {}", graph_db.display()))?;
17920 let freshness = sqlite_graph_freshness(&store, scope.unwrap_or("root"))?;
17921 let extra_warnings = store
17922 .read_only_recovery()
17923 .map(graph_db_read_recovery_diagnostic)
17924 .into_iter()
17925 .collect::<Vec<_>>();
17926 let prepared = prepare_conflict_matrix_inputs(&root, path, scope, impact_limit)?;
17927 let graph_prepared = prepare_conflict_matrix_graph_orchestration(
17928 &root,
17929 scope,
17930 "sqlite",
17931 raw_targets,
17932 &prepared,
17933 depth,
17934 limit,
17935 &store,
17936 freshness.clone(),
17937 )?;
17938 let conflict = build_conflict_matrix_report_from_prepared_graph(
17939 &root,
17940 path,
17941 scope,
17942 depth,
17943 limit,
17944 impact_limit,
17945 freshness,
17946 extra_warnings.clone(),
17947 &prepared,
17948 &graph_prepared,
17949 )?;
17950 build_dispatch_trace_report_from_conflict_snapshot(
17951 &root,
17952 scope,
17953 conflict,
17954 graph_prepared.graph.nodes,
17955 graph_prepared.graph.edges,
17956 depth,
17957 limit,
17958 extra_warnings,
17959 )
17960}
17961
17962fn dispatch_trace_html(report: &DispatchTraceReport) -> Result<String> {
17963 let json = serde_json::to_string(report)?.replace("</", "<\\/");
17964 let mut html = String::new();
17965 html.push_str(
17966 "<!doctype html><html><head><meta charset=\"utf-8\"><title>tsift dispatch trace</title>",
17967 );
17968 html.push_str(
17969 r#"<style>
17970:root{color-scheme:light dark;--bg:#f7f8fb;--panel:#fff;--text:#17202a;--muted:#5c6674;--line:#d7dce3;--edge:#8b98a8;--accent:#0f766e}
17971@media (prefers-color-scheme:dark){:root{--bg:#111318;--panel:#1b2028;--text:#ecf1f7;--muted:#a8b3c1;--line:#323946;--edge:#667386;--accent:#2dd4bf}}
17972*{box-sizing:border-box}body{margin:0;background:var(--bg);color:var(--text);font-family:Inter,ui-sans-serif,system-ui,sans-serif;line-height:1.4}.page{max-width:1280px;margin:0 auto;padding:20px}.top{display:flex;align-items:flex-end;justify-content:space-between;gap:16px;margin-bottom:14px}.top h1{font-size:22px;margin:0}.meta{color:var(--muted);font-size:13px}.layout{display:grid;grid-template-columns:minmax(0,1fr) 360px;gap:14px}.panel,.side{background:var(--panel);border:1px solid var(--line);border-radius:8px;overflow:hidden}.side{padding:14px;overflow:auto;max-height:720px}.side h2{font-size:15px;margin:12px 0 8px}.side h2:first-child{margin-top:0}.list{display:grid;gap:8px}.row{border:1px solid var(--line);border-radius:6px;padding:8px}.kind{font-size:11px;text-transform:uppercase;color:var(--muted);letter-spacing:.04em}.label{font-weight:650;overflow-wrap:anywhere}.handle,code{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;font-size:12px;color:var(--muted);overflow-wrap:anywhere}svg{width:100%;height:680px;display:block}.edge{stroke:var(--edge);stroke-width:1.4;opacity:.72}.node{stroke:var(--panel);stroke-width:2}.node-label{font-size:12px;paint-order:stroke;stroke:var(--panel);stroke-width:4px;stroke-linejoin:round;fill:var(--text)}@media(max-width:900px){.top{display:block}.layout{grid-template-columns:1fr}.side{max-height:none}svg{height:560px}}
17973</style>"#,
17974 );
17975 html.push_str("</head><body><div class=\"page\">");
17976 html.push_str(&format!(
17977 "<header class=\"top\"><div><h1>tsift dispatch trace</h1><div class=\"meta\">targets <code>{}</code> | evidence <code>{}</code> | nodes <code>{}</code> | worker_prompt_packets <code>{}</code></div></div><div class=\"meta\"><code>{}</code></div></header>",
17978 html_escape(&report.targets.join(", ")),
17979 report.evidence_packet_ids.len(),
17980 report.nodes.len(),
17981 report.worker_prompt_packets.len(),
17982 html_escape(report.contract_version)
17983 ));
17984 html.push_str(
17985 r#"<main class="layout"><section class="panel"><svg id="graph-canvas" role="img" aria-label="Dispatch trace graph"></svg></section><aside class="side"><h2>Worker Prompt Packets</h2><div id="packets" class="list"></div><h2>Worker Feedback</h2><div id="feedback" class="list"></div><h2>Nodes</h2><div id="nodes" class="list"></div></aside></main>"#,
17986 );
17987 html.push_str("<script id=\"trace-data\" type=\"application/json\">");
17988 html.push_str(&json);
17989 html.push_str(
17990 r##"</script><script>
17991const report = JSON.parse(document.getElementById("trace-data").textContent);
17992const svg = document.getElementById("graph-canvas");
17993const nodeList = document.getElementById("nodes");
17994const packets = document.getElementById("packets");
17995const feedback = document.getElementById("feedback");
17996const nodes = report.nodes.map((node, index) => ({...node, index}));
17997const nodeById = new Map(nodes.map(node => [node.id, node]));
17998const edges = report.edges.filter(edge => nodeById.has(edge.from_id) && nodeById.has(edge.to_id));
17999const colorByKind = new Map([["backlog","#dc2626"],["job_packet","#ea580c"],["worker_result","#15803d"],["worker_context","#475569"],["source_handle","#64748b"],["semantic_concept","#9a3412"],["semantic_entity","#b45309"],["file","#2563eb"],["symbol","#16a34a"],["route","#7c3aed"],["session","#0891b2"]]);
18000function color(kind){return colorByKind.get(kind)||"#6b7280";}
18001function text(value){return value == null ? "" : String(value);}
18002function escapeHtml(value){return text(value).replace(/[&<>"']/g, ch => ({"&":"&","<":"<",">":">","\"":""","'":"'"}[ch]));}
18003function layout(){
18004 const rect = svg.getBoundingClientRect();
18005 const width = rect.width || 900, height = rect.height || 680, cx = width / 2, cy = height / 2;
18006 const kinds = [...new Set(nodes.map(node => node.kind))].sort();
18007 const counts = new Map();
18008 for (const node of nodes) counts.set(node.kind, (counts.get(node.kind)||0)+1);
18009 const offsets = new Map();
18010 for (const node of nodes) {
18011 const group = kinds.indexOf(node.kind);
18012 const index = offsets.get(node.kind) || 0;
18013 offsets.set(node.kind, index + 1);
18014 const total = counts.get(node.kind) || 1;
18015 const ring = Math.min(width, height) * (0.18 + ((group % 4) * 0.09));
18016 const angle = Math.PI * 2 * index / Math.max(total, 1) + group * 0.53;
18017 node.x = cx + Math.cos(angle) * ring;
18018 node.y = cy + Math.sin(angle) * ring;
18019 }
18020}
18021function draw(){
18022 svg.innerHTML = "";
18023 for (const edge of edges) {
18024 const from = nodeById.get(edge.from_id), to = nodeById.get(edge.to_id);
18025 const line = document.createElementNS("http://www.w3.org/2000/svg", "line");
18026 line.setAttribute("x1", from.x); line.setAttribute("y1", from.y);
18027 line.setAttribute("x2", to.x); line.setAttribute("y2", to.y);
18028 line.setAttribute("class", "edge");
18029 line.appendChild(document.createElementNS("http://www.w3.org/2000/svg", "title")).textContent = edge.kind;
18030 svg.appendChild(line);
18031 }
18032 for (const node of nodes) {
18033 const circle = document.createElementNS("http://www.w3.org/2000/svg", "circle");
18034 circle.setAttribute("cx", node.x); circle.setAttribute("cy", node.y);
18035 circle.setAttribute("r", node.kind.startsWith("semantic_") ? 8 : 6);
18036 circle.setAttribute("fill", color(node.kind));
18037 circle.setAttribute("class", "node");
18038 circle.appendChild(document.createElementNS("http://www.w3.org/2000/svg", "title")).textContent = node.kind + ": " + node.label;
18039 svg.appendChild(circle);
18040 const label = document.createElementNS("http://www.w3.org/2000/svg", "text");
18041 label.setAttribute("x", node.x + 9); label.setAttribute("y", node.y + 4);
18042 label.setAttribute("class", "node-label");
18043 label.textContent = node.label.length > 34 ? node.label.slice(0,31) + "..." : node.label;
18044 svg.appendChild(label);
18045 }
18046}
18047packets.innerHTML = report.worker_prompt_packets.map(packet => `<div class="row"><div class="kind">${escapeHtml(packet.contract_version)} - ${escapeHtml(packet.risk)} - parallel_safe ${packet.parallel_safe ? "true" : "false"} - closure ${packet.worker_feedback ? packet.worker_feedback.closure_rank_score : 0}</div><div class="label">${escapeHtml(packet.title)}</div><div class="handle">${escapeHtml(packet.packet_id)}</div><div class="handle">blocks ${escapeHtml((packet.blocks||[]).join(", ") || "none")} | blocked_by ${escapeHtml((packet.blocked_by||[]).join(", ") || "none")}</div></div>`).join("") || "<div class=\"meta\">No packets.</div>";
18048feedback.innerHTML = report.worker_feedback.map(item => `<div class="row"><div class="kind">completed ${item.completed} - blocked ${item.blocked} - closure ${item.closure_rank_score}</div><div>files ${escapeHtml((item.touched_files||[]).join(", ") || "none")}</div><div>tests ${escapeHtml((item.expected_tests||[]).join(" && ") || "none")}</div>${item.repeated_blockage ? "<div class=\"label\">Repeated blockage</div>" : ""}${(item.stale_expected_tests||[]).length ? `<div class="label">Stale tests: ${escapeHtml(item.stale_expected_tests.join(", "))}</div>` : ""}${(item.follow_up_debt||[]).length ? `<div class="label">Follow-up debt: ${escapeHtml(item.follow_up_debt.join(", "))}</div>` : ""}</div>`).join("") || "<div class=\"meta\">No worker results.</div>";
18049nodeList.innerHTML = nodes.map(node => `<div class="row"><div class="kind">${escapeHtml(node.kind)}</div><div class="label">${escapeHtml(node.label)}</div><div class="handle">${escapeHtml(node.id)}</div></div>`).join("");
18050window.addEventListener("resize", () => { layout(); draw(); });
18051layout(); draw();
18052</script></div></body></html>"##,
18053 );
18054 Ok(html)
18055}
18056
18057struct DispatchTraceOptions<'a> {
18058 path: &'a Path,
18059 scope: Option<&'a str>,
18060 raw_targets: &'a [String],
18061 depth: usize,
18062 limit: usize,
18063 impact_limit: usize,
18064 trace_format: DispatchTraceFormat,
18065}
18066
18067fn cmd_dispatch_trace(
18068 options: DispatchTraceOptions<'_>,
18069 output_format: OutputFormat,
18070) -> Result<()> {
18071 let report = build_dispatch_trace_report(
18072 options.path,
18073 options.scope,
18074 options.raw_targets,
18075 options.depth,
18076 options.limit,
18077 options.impact_limit,
18078 )?;
18079 match options.trace_format {
18080 DispatchTraceFormat::Json => {
18081 if output_format.envelope {
18082 print_json_or_envelope(
18083 &report,
18084 &output_format,
18085 "dispatch-trace",
18086 "operator-review",
18087 ToolEnvelopeSummary {
18088 text: format!(
18089 "Dispatch trace for {} target(s): {} graph node(s), {} worker prompt packet(s)",
18090 report.targets.len(),
18091 report.nodes.len(),
18092 report.worker_prompt_packets.len()
18093 ),
18094 metrics: vec![
18095 envelope_metric("targets", report.targets.len()),
18096 envelope_metric("nodes", report.nodes.len()),
18097 envelope_metric("edges", report.edges.len()),
18098 envelope_metric(
18099 "worker_prompt_packets",
18100 report.worker_prompt_packets.len(),
18101 ),
18102 ],
18103 },
18104 report.truncated,
18105 report.replay_commands.clone(),
18106 )
18107 } else {
18108 println!(
18109 "{}",
18110 to_json_schema(
18111 &report,
18112 output_format.pretty,
18113 output_format.terse,
18114 output_format.schema
18115 )?
18116 );
18117 Ok(())
18118 }
18119 }
18120 DispatchTraceFormat::Html => {
18121 println!("{}", dispatch_trace_html(&report)?);
18122 Ok(())
18123 }
18124 }
18125}
18126
18127#[derive(Clone, Debug)]
18128struct DependencyDagProfile {
18129 id: String,
18130 graph_node_id: String,
18131 label: String,
18132 path: Option<String>,
18133 line: Option<i64>,
18134 detail: Option<String>,
18135 source_files: BTreeSet<String>,
18136 source_symbols: BTreeSet<String>,
18137 config_files: BTreeSet<String>,
18138 expected_tests: BTreeSet<String>,
18139 semantic_refs: BTreeMap<String, ConflictMatrixSemanticRef>,
18140 worker_feedback: ConflictMatrixWorkerFeedback,
18141}
18142
18143#[derive(Clone, Debug, Serialize)]
18144struct DependencyDagNode {
18145 id: String,
18146 graph_node_id: String,
18147 label: String,
18148 #[serde(skip_serializing_if = "Option::is_none")]
18149 path: Option<String>,
18150 #[serde(skip_serializing_if = "Option::is_none")]
18151 line: Option<i64>,
18152 #[serde(skip_serializing_if = "Option::is_none")]
18153 detail: Option<String>,
18154 source_files: Vec<String>,
18155 source_symbols: Vec<String>,
18156 config_files: Vec<String>,
18157 expected_tests: Vec<String>,
18158 semantic_refs: Vec<ConflictMatrixSemanticRef>,
18159 worker_feedback: ConflictMatrixWorkerFeedback,
18160}
18161
18162#[derive(Clone, Debug, Serialize)]
18163struct DependencyDagEdge {
18164 from: String,
18165 to: String,
18166 kind: String,
18167 weight: usize,
18168 reasons: Vec<String>,
18169 #[serde(skip_serializing_if = "Vec::is_empty", default)]
18170 shared_files: Vec<String>,
18171 #[serde(skip_serializing_if = "Vec::is_empty", default)]
18172 shared_symbols: Vec<String>,
18173 #[serde(skip_serializing_if = "Vec::is_empty", default)]
18174 shared_tests: Vec<String>,
18175 #[serde(skip_serializing_if = "Vec::is_empty", default)]
18176 shared_config_files: Vec<String>,
18177 #[serde(skip_serializing_if = "Vec::is_empty", default)]
18178 shared_semantic_refs: Vec<String>,
18179}
18180
18181#[derive(Clone, Debug, Serialize)]
18182struct DependencyDagTopoBatch {
18183 batch: usize,
18184 targets: Vec<String>,
18185}
18186
18187#[derive(Clone, Debug, Serialize)]
18188struct DependencyDagCycleDiagnostics {
18189 has_cycles: bool,
18190 blocked_nodes: Vec<String>,
18191 cycle_edges: Vec<DependencyDagEdge>,
18192}
18193
18194#[derive(Serialize)]
18195struct DependencyDagSummary {
18196 nodes: usize,
18197 edges: usize,
18198 topo_batches: usize,
18199 has_cycles: bool,
18200}
18201
18202#[derive(Serialize)]
18203struct DependencyDagReport {
18204 contract_version: &'static str,
18205 root: String,
18206 #[serde(skip_serializing_if = "Option::is_none")]
18207 scope: Option<String>,
18208 path: String,
18209 targets: Vec<String>,
18210 projection_freshness: GraphDbFreshnessReport,
18211 projection_hashes: Vec<String>,
18212 nodes: Vec<DependencyDagNode>,
18213 edges: Vec<DependencyDagEdge>,
18214 topo_batches: Vec<DependencyDagTopoBatch>,
18215 cycle_diagnostics: DependencyDagCycleDiagnostics,
18216 summary: DependencyDagSummary,
18217 replay_commands: Vec<String>,
18218 repair_commands: Vec<String>,
18219 #[serde(skip_serializing_if = "Vec::is_empty", default)]
18220 warnings: Vec<String>,
18221}
18222
18223fn dependency_dag_backlog_node_for_target(
18224 store: &impl GraphStore,
18225 target: &str,
18226) -> Result<SubstrateGraphNode> {
18227 let resolved = graph_db_resolve_evidence_target(store, target)?
18228 .with_context(|| format!("dependency-dag target not found: {target}"))?;
18229 if resolved.kind == "backlog" {
18230 return Ok(resolved);
18231 }
18232 let Some(ref_id) = resolved.properties.get("ref_id").cloned() else {
18233 bail!(
18234 "dependency-dag target {} resolved to {} without a backlog ref_id",
18235 target,
18236 resolved.kind
18237 );
18238 };
18239 store
18240 .nodes_by_kind("backlog")?
18241 .into_iter()
18242 .filter(|node| node.properties.get("ref_id") == Some(&ref_id))
18243 .min_by(|left, right| {
18244 left.properties
18245 .get("line")
18246 .and_then(|value| value.parse::<i64>().ok())
18247 .cmp(
18248 &right
18249 .properties
18250 .get("line")
18251 .and_then(|value| value.parse::<i64>().ok()),
18252 )
18253 .then(left.id.cmp(&right.id))
18254 })
18255 .with_context(|| format!("dependency-dag backlog node not found for #{ref_id}"))
18256}
18257
18258fn dependency_dag_resolve_backlog_nodes(
18259 root: &Path,
18260 path: &Path,
18261 store: &impl GraphStore,
18262 raw_targets: &[String],
18263) -> Result<Vec<SubstrateGraphNode>> {
18264 let mut nodes = Vec::new();
18265 let mut seen = BTreeSet::new();
18266 if raw_targets.is_empty() {
18267 let hinted_path = if path.is_absolute() {
18268 path.to_path_buf()
18269 } else {
18270 root.join(path)
18271 };
18272 let hinted_markdown = hinted_path
18273 .extension()
18274 .and_then(|ext| ext.to_str())
18275 .is_some_and(|ext| ext.eq_ignore_ascii_case("md"));
18276 let hinted_rel = hinted_markdown.then(|| {
18277 relativize_pathbuf(&hinted_path, root)
18278 .to_string_lossy()
18279 .replace('\\', "/")
18280 });
18281 for node in store.nodes_by_kind("backlog")? {
18282 if let Some(expected_path) = &hinted_rel
18283 && node.properties.get("path") != Some(expected_path)
18284 {
18285 continue;
18286 }
18287 if seen.insert(node.id.clone()) {
18288 nodes.push(node);
18289 }
18290 }
18291 if nodes.is_empty() && hinted_rel.is_some() {
18292 for node in store.nodes_by_kind("backlog")? {
18293 if seen.insert(node.id.clone()) {
18294 nodes.push(node);
18295 }
18296 }
18297 }
18298 } else {
18299 for target in raw_targets {
18300 let normalized = normalize_conflict_target(target).unwrap_or_else(|| target.clone());
18301 let node = dependency_dag_backlog_node_for_target(store, &normalized)?;
18302 if seen.insert(node.id.clone()) {
18303 nodes.push(node);
18304 }
18305 }
18306 }
18307 if nodes.is_empty() {
18308 bail!("dependency-dag needs at least one resolvable backlog id");
18309 }
18310 nodes.sort_by(|left, right| {
18311 left.properties
18312 .get("line")
18313 .and_then(|value| value.parse::<i64>().ok())
18314 .cmp(
18315 &right
18316 .properties
18317 .get("line")
18318 .and_then(|value| value.parse::<i64>().ok()),
18319 )
18320 .then(left.id.cmp(&right.id))
18321 });
18322 Ok(nodes)
18323}
18324
18325fn dependency_dag_node_id(node: &SubstrateGraphNode) -> String {
18326 node.properties
18327 .get("ref_id")
18328 .cloned()
18329 .unwrap_or_else(|| node.label.trim_start_matches('#').to_string())
18330}
18331
18332fn dependency_dag_node_profile(
18333 root: &Path,
18334 store: &impl GraphStore,
18335 node: &SubstrateGraphNode,
18336 graph_nodes_by_id: &BTreeMap<String, SubstrateGraphNode>,
18337 graph_edges: &[SubstrateGraphEdge],
18338 depth: usize,
18339 limit: usize,
18340) -> Result<DependencyDagProfile> {
18341 let id = dependency_dag_node_id(node);
18342 let mut source_files = BTreeSet::new();
18343 let mut source_symbols = BTreeSet::new();
18344 for edge in graph_edges
18345 .iter()
18346 .filter(|edge| edge.from_id == node.id && edge.kind == "mentions")
18347 {
18348 let Some(target) = graph_nodes_by_id.get(&edge.to_id) else {
18349 continue;
18350 };
18351 match target.kind.as_str() {
18352 "file" | "route" => {
18353 if let Some(path) = target.properties.get("path") {
18354 source_files.insert(path.clone());
18355 }
18356 }
18357 "symbol" => {
18358 source_symbols.insert(target.label.clone());
18359 if let Some(path) = target.properties.get("path") {
18360 source_files.insert(path.clone());
18361 }
18362 }
18363 _ => {}
18364 }
18365 }
18366
18367 let max_rows = if limit == 0 { usize::MAX } else { limit };
18368 for (source, _) in
18369 graph_db_reachable_nodes_by_kind(store, &node.id, "source_handle", depth, max_rows)?
18370 {
18371 if let Some(handle) = conflict_matrix_source_handle(&source) {
18372 source_files.insert(handle.file);
18373 }
18374 }
18375
18376 let worker_results = graph_nodes_by_id
18377 .values()
18378 .filter(|candidate| {
18379 candidate.kind == "worker_result"
18380 && candidate.properties.get("ref_id").map(String::as_str) == Some(id.as_str())
18381 })
18382 .cloned()
18383 .collect::<Vec<_>>();
18384 let worker_feedback = conflict_matrix_worker_feedback(&worker_results);
18385 let expected_tests = worker_feedback.expected_tests.iter().cloned().collect();
18386 let config_files = source_files
18387 .iter()
18388 .filter(|file| is_planner_config_path(file))
18389 .cloned()
18390 .collect();
18391
18392 let mut semantic_refs = BTreeMap::new();
18393 for kind in ["semantic_concept", "semantic_entity"] {
18394 for (semantic, _) in
18395 graph_db_reachable_nodes_by_kind(store, &node.id, kind, depth, max_rows)?
18396 {
18397 let item = conflict_matrix_semantic_ref(root, &semantic);
18398 semantic_refs
18399 .entry(format!("{}:{}", item.kind, item.label))
18400 .or_insert(item);
18401 }
18402 }
18403
18404 Ok(DependencyDagProfile {
18405 id,
18406 graph_node_id: node.id.clone(),
18407 label: node.label.clone(),
18408 path: node.properties.get("path").cloned(),
18409 line: node
18410 .properties
18411 .get("line")
18412 .and_then(|value| value.parse::<i64>().ok()),
18413 detail: node.properties.get("detail").cloned(),
18414 source_files,
18415 source_symbols,
18416 config_files,
18417 expected_tests,
18418 semantic_refs,
18419 worker_feedback,
18420 })
18421}
18422
18423fn dependency_dag_marker_refs(text: &str, markers: &[&str]) -> Vec<String> {
18424 let lower = text.to_ascii_lowercase();
18425 let mut refs = Vec::new();
18426 for marker in markers {
18427 let mut offset = 0usize;
18428 while let Some(pos) = lower[offset..].find(marker) {
18429 let start = offset + pos + marker.len();
18430 let segment = text[start..]
18431 .split(['\n', '.'])
18432 .next()
18433 .unwrap_or(&text[start..]);
18434 refs.extend(extract_conflict_target_refs(segment));
18435 offset = start;
18436 }
18437 }
18438 dedupe_preserve_order(refs)
18439}
18440
18441fn dependency_dag_push_edge(
18442 edges: &mut Vec<DependencyDagEdge>,
18443 seen: &mut BTreeSet<(String, String, String)>,
18444 edge: DependencyDagEdge,
18445) {
18446 if edge.from == edge.to {
18447 return;
18448 }
18449 if seen.insert((edge.from.clone(), edge.to.clone(), edge.kind.clone())) {
18450 edges.push(edge);
18451 }
18452}
18453
18454fn dependency_dag_explicit_edges(
18455 profiles: &[DependencyDagProfile],
18456 target_ids: &BTreeSet<String>,
18457 edges: &mut Vec<DependencyDagEdge>,
18458 seen: &mut BTreeSet<(String, String, String)>,
18459) {
18460 for profile in profiles {
18461 let detail = profile.detail.as_deref().unwrap_or_default();
18462 for dep in dependency_dag_marker_refs(
18463 detail,
18464 &[
18465 "depends on",
18466 "depends-on",
18467 "deps:",
18468 "after",
18469 "blocked by",
18470 "requires",
18471 ],
18472 ) {
18473 if target_ids.contains(&dep) {
18474 dependency_dag_push_edge(
18475 edges,
18476 seen,
18477 DependencyDagEdge {
18478 from: dep.clone(),
18479 to: profile.id.clone(),
18480 kind: "explicit_depends_on".to_string(),
18481 weight: 1000,
18482 reasons: vec![format!("{} declares dependency on #{dep}", profile.id)],
18483 shared_files: Vec::new(),
18484 shared_symbols: Vec::new(),
18485 shared_tests: Vec::new(),
18486 shared_config_files: Vec::new(),
18487 shared_semantic_refs: Vec::new(),
18488 },
18489 );
18490 }
18491 }
18492 for downstream in dependency_dag_marker_refs(detail, &["before", "unblocks"]) {
18493 if target_ids.contains(&downstream) {
18494 dependency_dag_push_edge(
18495 edges,
18496 seen,
18497 DependencyDagEdge {
18498 from: profile.id.clone(),
18499 to: downstream.clone(),
18500 kind: "explicit_before".to_string(),
18501 weight: 900,
18502 reasons: vec![format!(
18503 "{} declares it should run before #{downstream}",
18504 profile.id
18505 )],
18506 shared_files: Vec::new(),
18507 shared_symbols: Vec::new(),
18508 shared_tests: Vec::new(),
18509 shared_config_files: Vec::new(),
18510 shared_semantic_refs: Vec::new(),
18511 },
18512 );
18513 }
18514 }
18515 }
18516}
18517
18518fn dependency_dag_worker_follow_up_edges(
18519 profiles: &[DependencyDagProfile],
18520 target_ids: &BTreeSet<String>,
18521 edges: &mut Vec<DependencyDagEdge>,
18522 seen: &mut BTreeSet<(String, String, String)>,
18523) {
18524 for profile in profiles {
18525 for follow_up in &profile.worker_feedback.follow_up_ids {
18526 if target_ids.contains(follow_up) {
18527 dependency_dag_push_edge(
18528 edges,
18529 seen,
18530 DependencyDagEdge {
18531 from: profile.id.clone(),
18532 to: follow_up.clone(),
18533 kind: "worker_result_follow_up".to_string(),
18534 weight: 700,
18535 reasons: vec![format!(
18536 "worker_result for #{} references follow-up #{}",
18537 profile.id, follow_up
18538 )],
18539 shared_files: Vec::new(),
18540 shared_symbols: Vec::new(),
18541 shared_tests: Vec::new(),
18542 shared_config_files: Vec::new(),
18543 shared_semantic_refs: Vec::new(),
18544 },
18545 );
18546 }
18547 }
18548 }
18549}
18550
18551fn dependency_dag_overlap_edges(
18552 profiles: &[DependencyDagProfile],
18553 edges: &mut Vec<DependencyDagEdge>,
18554 seen: &mut BTreeSet<(String, String, String)>,
18555) {
18556 for left_idx in 0..profiles.len() {
18557 for right_idx in (left_idx + 1)..profiles.len() {
18558 let left = &profiles[left_idx];
18559 let right = &profiles[right_idx];
18560 let shared_files = sorted_intersection(&left.source_files, &right.source_files);
18561 let shared_symbols = sorted_intersection(&left.source_symbols, &right.source_symbols);
18562 let shared_tests = sorted_intersection(&left.expected_tests, &right.expected_tests);
18563 let shared_config_files = sorted_intersection(&left.config_files, &right.config_files);
18564 let left_semantic = left.semantic_refs.keys().cloned().collect::<BTreeSet<_>>();
18565 let right_semantic = right.semantic_refs.keys().cloned().collect::<BTreeSet<_>>();
18566 let shared_semantic_refs = sorted_intersection(&left_semantic, &right_semantic);
18567 if shared_files.is_empty()
18568 && shared_symbols.is_empty()
18569 && shared_tests.is_empty()
18570 && shared_config_files.is_empty()
18571 && shared_semantic_refs.is_empty()
18572 {
18573 continue;
18574 }
18575 let kind = if shared_files.is_empty()
18576 && shared_symbols.is_empty()
18577 && shared_tests.is_empty()
18578 && shared_config_files.is_empty()
18579 {
18580 "semantic_relation"
18581 } else {
18582 "shared_resource"
18583 };
18584 let mut reasons = Vec::new();
18585 if !shared_files.is_empty() {
18586 reasons.push(format!("shared files: {}", shared_files.join(", ")));
18587 }
18588 if !shared_symbols.is_empty() {
18589 reasons.push(format!("shared symbols: {}", shared_symbols.join(", ")));
18590 }
18591 if !shared_tests.is_empty() {
18592 reasons.push(format!("shared tests: {}", shared_tests.join(" && ")));
18593 }
18594 if !shared_config_files.is_empty() {
18595 reasons.push(format!(
18596 "shared config files: {}",
18597 shared_config_files.join(", ")
18598 ));
18599 }
18600 if !shared_semantic_refs.is_empty() {
18601 reasons.push(format!(
18602 "shared semantic refs: {}",
18603 shared_semantic_refs.join(", ")
18604 ));
18605 }
18606 let weight = shared_files.len() * 100
18607 + shared_config_files.len() * 100
18608 + shared_symbols.len() * 40
18609 + shared_tests.len() * 10
18610 + shared_semantic_refs.len() * 5;
18611 dependency_dag_push_edge(
18612 edges,
18613 seen,
18614 DependencyDagEdge {
18615 from: left.id.clone(),
18616 to: right.id.clone(),
18617 kind: kind.to_string(),
18618 weight,
18619 reasons,
18620 shared_files,
18621 shared_symbols,
18622 shared_tests,
18623 shared_config_files,
18624 shared_semantic_refs,
18625 },
18626 );
18627 }
18628 }
18629}
18630
18631fn dependency_dag_topo_batches(
18632 targets: &[String],
18633 edges: &[DependencyDagEdge],
18634) -> (Vec<DependencyDagTopoBatch>, DependencyDagCycleDiagnostics) {
18635 let target_set = targets.iter().cloned().collect::<BTreeSet<_>>();
18636 let order = targets
18637 .iter()
18638 .enumerate()
18639 .map(|(idx, id)| (id.clone(), idx))
18640 .collect::<BTreeMap<_, _>>();
18641 let mut indegree = targets
18642 .iter()
18643 .map(|id| (id.clone(), 0usize))
18644 .collect::<BTreeMap<_, _>>();
18645 let mut outgoing = BTreeMap::<String, Vec<String>>::new();
18646 let mut seen_pairs = BTreeSet::<(String, String)>::new();
18647 for edge in edges {
18648 if !target_set.contains(&edge.from) || !target_set.contains(&edge.to) {
18649 continue;
18650 }
18651 if !seen_pairs.insert((edge.from.clone(), edge.to.clone())) {
18652 continue;
18653 }
18654 *indegree.entry(edge.to.clone()).or_default() += 1;
18655 outgoing
18656 .entry(edge.from.clone())
18657 .or_default()
18658 .push(edge.to.clone());
18659 }
18660 for values in outgoing.values_mut() {
18661 values.sort_by_key(|id| order.get(id).copied().unwrap_or(usize::MAX));
18662 values.dedup();
18663 }
18664
18665 let mut processed = BTreeSet::new();
18666 let mut batches = Vec::new();
18667 loop {
18668 let mut ready = targets
18669 .iter()
18670 .filter(|id| !processed.contains(*id))
18671 .filter(|id| indegree.get(*id).copied().unwrap_or(0) == 0)
18672 .cloned()
18673 .collect::<Vec<_>>();
18674 ready.sort_by_key(|id| order.get(id).copied().unwrap_or(usize::MAX));
18675 if ready.is_empty() {
18676 break;
18677 }
18678 for id in &ready {
18679 processed.insert(id.clone());
18680 for next in outgoing.get(id).into_iter().flatten() {
18681 if let Some(value) = indegree.get_mut(next) {
18682 *value = value.saturating_sub(1);
18683 }
18684 }
18685 }
18686 batches.push(DependencyDagTopoBatch {
18687 batch: batches.len() + 1,
18688 targets: ready,
18689 });
18690 }
18691
18692 let blocked_nodes = targets
18693 .iter()
18694 .filter(|id| !processed.contains(*id))
18695 .cloned()
18696 .collect::<Vec<_>>();
18697 let blocked_set = blocked_nodes.iter().cloned().collect::<BTreeSet<_>>();
18698 let cycle_edges = edges
18699 .iter()
18700 .filter(|edge| blocked_set.contains(&edge.from) && blocked_set.contains(&edge.to))
18701 .cloned()
18702 .collect::<Vec<_>>();
18703 (
18704 batches,
18705 DependencyDagCycleDiagnostics {
18706 has_cycles: !blocked_nodes.is_empty(),
18707 blocked_nodes,
18708 cycle_edges,
18709 },
18710 )
18711}
18712
18713fn dependency_dag_replay_commands(
18714 path: &Path,
18715 scope: Option<&str>,
18716 targets: &[String],
18717 depth: usize,
18718 limit: usize,
18719) -> Vec<String> {
18720 let target_args = targets
18721 .iter()
18722 .map(|target| shell_quote(target))
18723 .collect::<Vec<_>>()
18724 .join(" ");
18725 let mut command = format!(
18726 "tsift dependency-dag --path {}{} --depth {} --limit {} --json",
18727 shell_quote(path.to_string_lossy().as_ref()),
18728 scope
18729 .map(|scope| format!(" --scope {}", shell_quote(scope)))
18730 .unwrap_or_default(),
18731 depth,
18732 limit
18733 );
18734 if !target_args.is_empty() {
18735 command.push(' ');
18736 command.push_str(&target_args);
18737 }
18738 vec![command]
18739}
18740
18741fn build_dependency_dag_report(
18742 path: &Path,
18743 scope: Option<&str>,
18744 raw_targets: &[String],
18745 depth: usize,
18746 limit: usize,
18747) -> Result<DependencyDagReport> {
18748 let root = lint::resolve_project_root_or_canonical_path(path)?;
18749 write_traversal_graph_store(&root, path, scope)
18750 .with_context(|| format!("refreshing graph-db projection for {}", root.display()))?;
18751 let graph_db = graph_substrate_db_path(&root, scope);
18752 let store = SqliteGraphStore::open_read_only_resilient(&graph_db)
18753 .with_context(|| format!("opening graph-db projection: {}", graph_db.display()))?;
18754 let mut warnings = Vec::new();
18755 if let Some(recovery) = store.read_only_recovery() {
18756 warnings.push(graph_db_read_recovery_diagnostic(recovery));
18757 }
18758 let freshness = sqlite_graph_freshness(&store, scope.unwrap_or("root"))?;
18759 if freshness.fail_closed {
18760 bail!(
18761 "dependency-dag graph projection failed closed: {}; repair: {}",
18762 freshness.diagnostics.join("; "),
18763 graph_db_repair_commands(&root, scope).join("; ")
18764 );
18765 }
18766
18767 let target_nodes = dependency_dag_resolve_backlog_nodes(&root, path, &store, raw_targets)?;
18768 let graph_nodes = store.all_nodes()?;
18769 let graph_edges = store.all_edges()?;
18770 let graph_nodes_by_id = graph_nodes
18771 .into_iter()
18772 .map(|node| (node.id.clone(), node))
18773 .collect::<BTreeMap<_, _>>();
18774 let profiles = target_nodes
18775 .iter()
18776 .map(|node| {
18777 dependency_dag_node_profile(
18778 &root,
18779 &store,
18780 node,
18781 &graph_nodes_by_id,
18782 &graph_edges,
18783 depth,
18784 limit,
18785 )
18786 })
18787 .collect::<Result<Vec<_>>>()?;
18788 let targets = profiles
18789 .iter()
18790 .map(|profile| profile.id.clone())
18791 .collect::<Vec<_>>();
18792 let target_ids = targets.iter().cloned().collect::<BTreeSet<_>>();
18793
18794 let mut edges = Vec::new();
18795 let mut seen_edges = BTreeSet::new();
18796 dependency_dag_explicit_edges(&profiles, &target_ids, &mut edges, &mut seen_edges);
18797 dependency_dag_worker_follow_up_edges(&profiles, &target_ids, &mut edges, &mut seen_edges);
18798 dependency_dag_overlap_edges(&profiles, &mut edges, &mut seen_edges);
18799 edges.sort_by(|left, right| {
18800 left.from
18801 .cmp(&right.from)
18802 .then(left.to.cmp(&right.to))
18803 .then(left.kind.cmp(&right.kind))
18804 });
18805 let (topo_batches, cycle_diagnostics) = dependency_dag_topo_batches(&targets, &edges);
18806
18807 let nodes = profiles
18808 .into_iter()
18809 .map(|profile| DependencyDagNode {
18810 id: profile.id,
18811 graph_node_id: profile.graph_node_id,
18812 label: profile.label,
18813 path: profile.path,
18814 line: profile.line,
18815 detail: profile.detail,
18816 source_files: sorted_set(&profile.source_files),
18817 source_symbols: sorted_set(&profile.source_symbols),
18818 config_files: sorted_set(&profile.config_files),
18819 expected_tests: sorted_set(&profile.expected_tests),
18820 semantic_refs: profile.semantic_refs.into_values().collect(),
18821 worker_feedback: profile.worker_feedback,
18822 })
18823 .collect::<Vec<_>>();
18824 let projection_hashes = freshness
18825 .content_hash
18826 .clone()
18827 .into_iter()
18828 .collect::<Vec<_>>();
18829 let replay_commands = dependency_dag_replay_commands(path, scope, &targets, depth, limit);
18830 let repair_commands = graph_db_repair_commands(&root, scope);
18831 let summary = DependencyDagSummary {
18832 nodes: nodes.len(),
18833 edges: edges.len(),
18834 topo_batches: topo_batches.len(),
18835 has_cycles: cycle_diagnostics.has_cycles,
18836 };
18837
18838 Ok(DependencyDagReport {
18839 contract_version: DEPENDENCY_DAG_CONTRACT_VERSION,
18840 root: root.to_string_lossy().to_string(),
18841 scope: scope.map(str::to_string),
18842 path: path.to_string_lossy().to_string(),
18843 targets,
18844 projection_freshness: freshness,
18845 projection_hashes,
18846 nodes,
18847 edges,
18848 topo_batches,
18849 cycle_diagnostics,
18850 summary,
18851 replay_commands,
18852 repair_commands,
18853 warnings,
18854 })
18855}
18856
18857fn print_dependency_dag_human(report: &DependencyDagReport, compact: bool) {
18858 if compact {
18859 println!(
18860 "dependency-dag targets:{} edges:{} batches:{} cycles:{}",
18861 report.targets.len(),
18862 report.edges.len(),
18863 report.topo_batches.len(),
18864 report.cycle_diagnostics.has_cycles
18865 );
18866 } else {
18867 println!("Dependency DAG");
18868 println!(" targets: {}", report.targets.join(", "));
18869 println!(" edges: {}", report.edges.len());
18870 println!(" cycles: {}", report.cycle_diagnostics.has_cycles);
18871 }
18872 for batch in &report.topo_batches {
18873 println!("batch #{}: {}", batch.batch, batch.targets.join(", "));
18874 }
18875 for edge in &report.edges {
18876 println!(
18877 "edge {} -> {} kind:{} weight:{}",
18878 edge.from, edge.to, edge.kind, edge.weight
18879 );
18880 for reason in &edge.reasons {
18881 println!(" reason: {reason}");
18882 }
18883 }
18884 if report.cycle_diagnostics.has_cycles {
18885 println!(
18886 "cycle blocked nodes: {}",
18887 report.cycle_diagnostics.blocked_nodes.join(", ")
18888 );
18889 }
18890 for command in &report.replay_commands {
18891 println!("replay: {command}");
18892 }
18893 for command in &report.repair_commands {
18894 println!("repair: {command}");
18895 }
18896 for warning in &report.warnings {
18897 println!("warning: {warning}");
18898 }
18899}
18900
18901fn cmd_dependency_dag(
18902 path: &Path,
18903 scope: Option<&str>,
18904 raw_targets: &[String],
18905 depth: usize,
18906 limit: usize,
18907 format: OutputFormat,
18908) -> Result<()> {
18909 let report = build_dependency_dag_report(path, scope, raw_targets, depth, limit)?;
18910 if format.json_output {
18911 print_json_or_envelope(
18912 &report,
18913 &format,
18914 "dependency-dag",
18915 "topological-planning",
18916 ToolEnvelopeSummary {
18917 text: format!(
18918 "Dependency DAG for {} target(s): edges={} batches={} cycles={}",
18919 report.targets.len(),
18920 report.edges.len(),
18921 report.topo_batches.len(),
18922 report.cycle_diagnostics.has_cycles
18923 ),
18924 metrics: vec![
18925 envelope_metric("targets", report.targets.len()),
18926 envelope_metric("edges", report.edges.len()),
18927 envelope_metric("topo_batches", report.topo_batches.len()),
18928 envelope_metric("has_cycles", report.cycle_diagnostics.has_cycles),
18929 ],
18930 },
18931 report.cycle_diagnostics.has_cycles,
18932 report.replay_commands.clone(),
18933 )
18934 } else {
18935 print_dependency_dag_human(&report, format.compact);
18936 Ok(())
18937 }
18938}
18939
18940pub(crate) fn render_log_digest_from_input(
18941 path: &Path,
18942 input: &str,
18943 format: OutputFormat,
18944) -> Result<()> {
18945 let report = log_digest::compute(path, input)?;
18946 if format.json_output {
18947 println!(
18948 "{}",
18949 to_json_schema(&report, format.pretty, format.terse, format.schema)?
18950 );
18951 return Ok(());
18952 }
18953
18954 if format.compact {
18955 println!(
18956 "log lines:{} signals:{} repeats:{} files:{} syms:{} stacks:{}",
18957 report.non_empty_lines,
18958 report.signal_groups,
18959 report.repeated_line_groups,
18960 report.file_ref_groups,
18961 report.symbol_ref_groups,
18962 report.stack_groups
18963 );
18964 for signal in &report.signals {
18965 let location = match (&signal.path, signal.line) {
18966 (Some(path), Some(line)) => format!("{path}:{line}"),
18967 (Some(path), None) => path.clone(),
18968 _ => "-".to_string(),
18969 };
18970 println!(
18971 "{} sev:{} count:{} sums:{} msg:{}",
18972 location,
18973 signal.severity,
18974 signal.occurrences,
18975 log_digest_summary_label(signal.summary_state),
18976 truncate_for_compact(&signal.message, 80)
18977 );
18978 }
18979 for repeated in &report.repeated_lines {
18980 println!(
18981 "repeat count:{} line:{}",
18982 repeated.occurrences,
18983 truncate_for_compact(&repeated.line, 80)
18984 );
18985 }
18986 for symbol in &report.symbol_refs {
18987 println!(
18988 "sym:{} count:{} sums:{}",
18989 symbol.symbol,
18990 symbol.occurrences,
18991 log_digest_summary_label(symbol.summary_state)
18992 );
18993 }
18994 for warning in &report.warnings {
18995 println!("warning: {warning}");
18996 }
18997 return Ok(());
18998 }
18999
19000 println!("Log digest");
19001 println!(" lines: {}", report.total_lines);
19002 println!(" non-empty lines: {}", report.non_empty_lines);
19003 println!(" signal groups: {}", report.signal_groups);
19004 println!(
19005 " repeated lines: {}",
19006 report.repeated_line_groups
19007 );
19008 println!(
19009 " repeated line instances: {}",
19010 report.repeated_line_occurrences
19011 );
19012 println!(" file refs: {}", report.file_ref_groups);
19013 println!(" symbol refs: {}", report.symbol_ref_groups);
19014 println!(" stack groups: {}", report.stack_groups);
19015
19016 if !report.signals.is_empty() {
19017 println!();
19018 println!("Signals:");
19019 for signal in &report.signals {
19020 match (&signal.path, signal.line, signal.column) {
19021 (Some(path), Some(line), Some(column)) => println!("{path}:{line}:{column}"),
19022 (Some(path), Some(line), None) => println!("{path}:{line}"),
19023 (Some(path), None, _) => println!("{path}"),
19024 (None, _, _) => println!("(no file anchor)"),
19025 }
19026 println!(" severity: {}", signal.severity);
19027 println!(" occurrences: {}", signal.occurrences);
19028 println!(" message: {}", signal.message);
19029 println!(
19030 " cached summaries: {}",
19031 log_digest_summary_label(signal.summary_state)
19032 );
19033 for summary in &signal.current_summaries {
19034 println!(
19035 " - {}: {}",
19036 summary.symbol,
19037 truncate_for_compact(&summary.summary, 160)
19038 );
19039 }
19040 }
19041 }
19042
19043 if !report.repeated_lines.is_empty() {
19044 println!();
19045 println!("Repeated lines:");
19046 for repeated in &report.repeated_lines {
19047 println!(
19048 " {}x {}",
19049 repeated.occurrences,
19050 truncate_for_compact(&repeated.line, 180)
19051 );
19052 }
19053 }
19054
19055 if !report.file_refs.is_empty() {
19056 println!();
19057 println!("Anchored files:");
19058 for file_ref in &report.file_refs {
19059 match (file_ref.line, file_ref.column) {
19060 (Some(line), Some(column)) => println!("{}:{}:{}", file_ref.path, line, column),
19061 (Some(line), None) => println!("{}:{}", file_ref.path, line),
19062 (None, _) => println!("{}", file_ref.path),
19063 }
19064 println!(" occurrences: {}", file_ref.occurrences);
19065 println!(
19066 " cached summaries: {}",
19067 log_digest_summary_label(file_ref.summary_state)
19068 );
19069 for summary in &file_ref.current_summaries {
19070 println!(
19071 " - {}: {}",
19072 summary.symbol,
19073 truncate_for_compact(&summary.summary, 160)
19074 );
19075 }
19076 }
19077 }
19078
19079 if !report.symbol_refs.is_empty() {
19080 println!();
19081 println!("Symbol candidates:");
19082 for symbol in &report.symbol_refs {
19083 println!("{}", symbol.symbol);
19084 println!(" occurrences: {}", symbol.occurrences);
19085 println!(
19086 " cached summaries: {}",
19087 log_digest_summary_label(symbol.summary_state)
19088 );
19089 for summary in &symbol.current_summaries {
19090 println!(
19091 " - {}: {}",
19092 summary.symbol,
19093 truncate_for_compact(&summary.summary, 160)
19094 );
19095 }
19096 }
19097 }
19098
19099 if !report.stack_traces.is_empty() {
19100 println!();
19101 println!("Stack groups:");
19102 for stack in &report.stack_traces {
19103 println!(" occurrences: {}", stack.occurrences);
19104 for frame in &stack.frames {
19105 println!(" - {}", frame);
19106 }
19107 }
19108 }
19109
19110 for warning in &report.warnings {
19111 println!("warning: {warning}");
19112 }
19113 Ok(())
19114}
19115
19116pub(crate) fn metric_digest_trend_label(trend: metric_digest::MetricDigestTrend) -> &'static str {
19117 match trend {
19118 metric_digest::MetricDigestTrend::Improved => "improved",
19119 metric_digest::MetricDigestTrend::Regressed => "regressed",
19120 metric_digest::MetricDigestTrend::Flat => "flat",
19121 metric_digest::MetricDigestTrend::Unknown => "changed",
19122 }
19123}
19124
19125pub(crate) fn metric_digest_gate_label(
19126 decision: metric_digest::CommunitySearchGateDecision,
19127) -> &'static str {
19128 match decision {
19129 metric_digest::CommunitySearchGateDecision::Pass => "pass",
19130 metric_digest::CommunitySearchGateDecision::Block => "block",
19131 }
19132}
19133
19134fn cmd_dci_benchmark(fixture_path: &Path, format: OutputFormat) -> Result<()> {
19135 let input = fs::read_to_string(fixture_path)
19136 .with_context(|| format!("reading dci-benchmark fixture: {}", fixture_path.display()))?;
19137 let report = dci_benchmark::compute(&input)?;
19138
19139 if format.json_output {
19140 println!(
19141 "{}",
19142 to_json_schema(&report, format.pretty, format.terse, format.schema)?
19143 );
19144 return Ok(());
19145 }
19146
19147 if format.compact {
19148 println!(
19149 "dci tasks:{} strategies:{} warnings:{}",
19150 report.tasks_loaded,
19151 report.strategies_compared,
19152 report.warnings.len()
19153 );
19154 for summary in &report.strategy_summaries {
19155 println!(
19156 "{} rank:{} loc:{}/{} rate:{} calls:{} latency_ms:{} tokens:{}",
19157 summary.strategy,
19158 summary.rank,
19159 summary.localized,
19160 summary.task_runs,
19161 dci_benchmark::format_number(summary.localization_rate * 100.0),
19162 dci_benchmark::format_number(summary.avg_tool_calls),
19163 dci_benchmark::format_number(summary.avg_latency_ms),
19164 dci_benchmark::format_number(summary.avg_estimated_tokens)
19165 );
19166 }
19167 for warning in &report.warnings {
19168 println!("warning: {warning}");
19169 }
19170 return Ok(());
19171 }
19172
19173 println!("DCI benchmark");
19174 if let Some(description) = &report.description {
19175 println!(" description: {}", description);
19176 }
19177 println!(" tasks loaded: {}", report.tasks_loaded);
19178 println!(" strategies compared: {}", report.strategies_compared);
19179
19180 println!();
19181 println!("Strategy summary:");
19182 for summary in &report.strategy_summaries {
19183 println!(
19184 " #{} {}: localization {}/{} ({:.1}%), avg calls {}, avg latency {}ms, avg tokens {}",
19185 summary.rank,
19186 summary.strategy,
19187 summary.localized,
19188 summary.task_runs,
19189 summary.localization_rate * 100.0,
19190 dci_benchmark::format_number(summary.avg_tool_calls),
19191 dci_benchmark::format_number(summary.avg_latency_ms),
19192 dci_benchmark::format_number(summary.avg_estimated_tokens)
19193 );
19194 }
19195
19196 println!();
19197 println!("Task winners:");
19198 for row in &report.task_rows {
19199 let label = row
19200 .label
19201 .as_ref()
19202 .map(|value| format!(" ({value})"))
19203 .unwrap_or_default();
19204 println!(" {}{}", row.task_id, label);
19205 println!(" localized: {}", row.best_localization.join(", "));
19206 println!(
19207 " lowest calls: {}, lowest latency: {}, lowest tokens: {}",
19208 row.lowest_tool_calls.as_deref().unwrap_or("-"),
19209 row.lowest_latency.as_deref().unwrap_or("-"),
19210 row.lowest_token_budget.as_deref().unwrap_or("-")
19211 );
19212 }
19213
19214 for warning in &report.warnings {
19215 println!("warning: {warning}");
19216 }
19217 Ok(())
19218}
19219
19220#[derive(Clone, Serialize)]
19221struct SessionReviewBudgetSessionPreview {
19222 handle: String,
19223 source: String,
19224 path: String,
19225 matched_by: Vec<String>,
19226 total_tokens: u64,
19227 largest_turn_total_tokens: u64,
19228 prompt_targets: usize,
19229 failures: usize,
19230 expand: String,
19231}
19232
19233#[derive(Clone, Serialize)]
19234struct SessionReviewBudgetPromptPreview {
19235 handle: String,
19236 text: String,
19237 occurrences: usize,
19238 expand: String,
19239}
19240
19241#[derive(Clone, Serialize)]
19242struct SessionReviewBudgetFailurePreview {
19243 handle: String,
19244 kind: String,
19245 message: String,
19246 occurrences: usize,
19247 #[serde(skip_serializing_if = "Option::is_none")]
19248 command: Option<String>,
19249 #[serde(skip_serializing_if = "Option::is_none")]
19250 session_path: Option<String>,
19251 expand: String,
19252}
19253
19254#[derive(Clone, Serialize)]
19255struct SessionReviewBudgetReport {
19256 target: String,
19257 target_kind: String,
19258 max_items: usize,
19259 max_bytes: usize,
19260 sessions_matched: usize,
19261 prompt_tokens: u64,
19262 cached_input_tokens: u64,
19263 total_tokens: u64,
19264 #[serde(skip_serializing_if = "Option::is_none")]
19265 latest_session_total_tokens: Option<u64>,
19266 #[serde(skip_serializing_if = "Option::is_none")]
19267 latest_session_largest_turn_total_tokens: Option<u64>,
19268 truncated: bool,
19269 sessions: Vec<SessionReviewBudgetSessionPreview>,
19270 prompt_targets: Vec<SessionReviewBudgetPromptPreview>,
19271 failures: Vec<SessionReviewBudgetFailurePreview>,
19272 guardrails: Vec<String>,
19273 warnings: Vec<String>,
19274}
19275
19276#[derive(Clone, Serialize)]
19277struct SessionReviewNextTokenAction {
19278 priority: usize,
19279 kind: String,
19280 severity: String,
19281 message: String,
19282 guidance: String,
19283 #[serde(skip_serializing_if = "Option::is_none")]
19284 compact_command: Option<String>,
19285 #[serde(skip_serializing_if = "Option::is_none")]
19286 restart_command: Option<String>,
19287 digest_commands: Vec<String>,
19288}
19289
19290#[derive(Clone, Serialize)]
19291struct SessionReviewNextContextBudgetReport {
19292 contract_version: &'static str,
19293 target: String,
19294 max_items: usize,
19295 max_bytes: usize,
19296 prompt_target_total: usize,
19297 touched_file_total: usize,
19298 touched_symbol_total: usize,
19299 unresolved_failure_total: usize,
19300 truncated: bool,
19301 prompt_targets: Vec<String>,
19302 touched_files: Vec<String>,
19303 touched_symbols: Vec<String>,
19304 #[serde(skip_serializing_if = "Vec::is_empty", default)]
19305 touched_symbol_refs: Vec<CompactSymbolRefPreview>,
19306 unresolved_failures: Vec<SessionReviewBudgetFailurePreview>,
19307 #[serde(skip_serializing_if = "Vec::is_empty", default)]
19308 next_token_actions: Vec<SessionReviewNextTokenAction>,
19309 next_digest_commands: Vec<String>,
19310}
19311
19312#[derive(Clone, Serialize)]
19313struct ContextPackGraphOrchestration {
19314 contract_version: &'static str,
19315 graph_db_command: String,
19316 projection_freshness: GraphDbFreshnessReport,
19317 projection_hashes: Vec<String>,
19318 evidence_packet_ids: Vec<String>,
19319 conflict_matrix_decisions: Vec<String>,
19320 worker_ownership_blocks: Vec<String>,
19321 follow_up_commands: Vec<String>,
19322 #[serde(skip_serializing_if = "Vec::is_empty", default)]
19323 warnings: Vec<String>,
19324}
19325
19326#[derive(Clone, Serialize)]
19327struct ContextPackReport {
19328 root: String,
19329 target: String,
19330 target_kind: String,
19331 max_items: usize,
19332 max_bytes: usize,
19333 #[serde(skip_serializing_if = "Vec::is_empty", default)]
19334 status_reminders: Vec<String>,
19335 #[serde(skip_serializing_if = "Vec::is_empty", default)]
19336 ontology_refs: Vec<CompactOntologyRefPreview>,
19337 next_context: SessionReviewNextContextBudgetReport,
19338 diff_digest: ContextPackDiffPreview,
19339 test_digest: ContextPackOptionalSection<ContextPackTestPreview>,
19340 log_digest: ContextPackOptionalSection<ContextPackLogPreview>,
19341 exploration: ExplorationPacket,
19342 graph_orchestration: ContextPackGraphOrchestration,
19343 resume_commands: Vec<String>,
19344}
19345
19346#[derive(Clone, Serialize)]
19347struct ContextPackOptionalSection<T> {
19348 status: String,
19349 command: String,
19350 #[serde(skip_serializing_if = "Option::is_none")]
19351 source: Option<String>,
19352 #[serde(skip_serializing_if = "Option::is_none")]
19353 report: Option<T>,
19354}
19355
19356#[derive(Clone, Serialize)]
19357struct ContextPackDiffPreview {
19358 mode: String,
19359 files_changed: usize,
19360 files_with_current_summaries: usize,
19361 symbols_touched: usize,
19362 call_edges_added: usize,
19363 call_edges_removed: usize,
19364 truncated: bool,
19365 files: Vec<ContextPackDiffFilePreview>,
19366}
19367
19368#[derive(Clone, Serialize)]
19369struct ContextPackDiffFilePreview {
19370 path: String,
19371 status: String,
19372 touched_symbols: Vec<String>,
19373 #[serde(skip_serializing_if = "Vec::is_empty", default)]
19374 touched_symbol_refs: Vec<CompactSymbolRefPreview>,
19375 summary_state: String,
19376 #[serde(skip_serializing_if = "Vec::is_empty", default)]
19377 summary_refs: Vec<ContextPackSummaryRefPreview>,
19378 added_call_edges: usize,
19379 removed_call_edges: usize,
19380 warnings: Vec<String>,
19381}
19382
19383#[derive(Clone, Serialize)]
19384struct ContextPackSummaryRefPreview {
19385 handle: String,
19386 symbol: String,
19387 #[serde(skip_serializing_if = "Option::is_none")]
19388 tag_alias: Option<String>,
19389 #[serde(skip_serializing_if = "Vec::is_empty", default)]
19390 ontology_refs: Vec<CompactOntologyRefPreview>,
19391 summary: String,
19392 expand: String,
19393}
19394
19395#[derive(Clone, Serialize)]
19396struct ContextPackTestPreview {
19397 runner: String,
19398 failures: usize,
19399 grouped_failures: usize,
19400 counts: ContextPackTestCounts,
19401 truncated: bool,
19402 failure_groups: Vec<ContextPackTestFailurePreview>,
19403 warnings: Vec<String>,
19404}
19405
19406#[derive(Clone, Serialize)]
19407struct ContextPackTestCounts {
19408 #[serde(skip_serializing_if = "Option::is_none")]
19409 passed: Option<usize>,
19410 #[serde(skip_serializing_if = "Option::is_none")]
19411 failed: Option<usize>,
19412 #[serde(skip_serializing_if = "Option::is_none")]
19413 skipped: Option<usize>,
19414}
19415
19416#[derive(Clone, Serialize)]
19417struct ContextPackTestFailurePreview {
19418 tests: Vec<String>,
19419 message: String,
19420 #[serde(skip_serializing_if = "Option::is_none")]
19421 path: Option<String>,
19422 #[serde(skip_serializing_if = "Option::is_none")]
19423 line: Option<usize>,
19424 occurrences: usize,
19425 summary_state: String,
19426 #[serde(skip_serializing_if = "Vec::is_empty", default)]
19427 summary_refs: Vec<ContextPackSummaryRefPreview>,
19428}
19429
19430#[derive(Clone, Serialize)]
19431struct ContextPackLogPreview {
19432 total_lines: usize,
19433 non_empty_lines: usize,
19434 signal_groups: usize,
19435 repeated_line_groups: usize,
19436 file_ref_groups: usize,
19437 symbol_ref_groups: usize,
19438 stack_groups: usize,
19439 truncated: bool,
19440 signals: Vec<ContextPackLogSignalPreview>,
19441 repeated_lines: Vec<ContextPackLogRepeatedLinePreview>,
19442 file_refs: Vec<ContextPackLogFileRefPreview>,
19443 symbol_refs: Vec<ContextPackLogSymbolRefPreview>,
19444 warnings: Vec<String>,
19445}
19446
19447#[derive(Clone, Serialize)]
19448struct ContextPackLogSignalPreview {
19449 severity: String,
19450 message: String,
19451 #[serde(skip_serializing_if = "Option::is_none")]
19452 path: Option<String>,
19453 #[serde(skip_serializing_if = "Option::is_none")]
19454 line: Option<usize>,
19455 occurrences: usize,
19456 summary_state: String,
19457 #[serde(skip_serializing_if = "Vec::is_empty", default)]
19458 summary_refs: Vec<ContextPackSummaryRefPreview>,
19459}
19460
19461#[derive(Clone, Serialize)]
19462struct ContextPackLogRepeatedLinePreview {
19463 line: String,
19464 occurrences: usize,
19465}
19466
19467#[derive(Clone, Serialize)]
19468struct ContextPackLogFileRefPreview {
19469 path: String,
19470 #[serde(skip_serializing_if = "Option::is_none")]
19471 line: Option<usize>,
19472 occurrences: usize,
19473 summary_state: String,
19474 #[serde(skip_serializing_if = "Vec::is_empty", default)]
19475 summary_refs: Vec<ContextPackSummaryRefPreview>,
19476}
19477
19478#[derive(Clone, Serialize)]
19479struct ContextPackLogSymbolRefPreview {
19480 handle: String,
19481 symbol: String,
19482 #[serde(skip_serializing_if = "Option::is_none")]
19483 tag_alias: Option<String>,
19484 #[serde(skip_serializing_if = "Vec::is_empty", default)]
19485 ontology_refs: Vec<CompactOntologyRefPreview>,
19486 occurrences: usize,
19487 summary_state: String,
19488 #[serde(skip_serializing_if = "Vec::is_empty", default)]
19489 summary_refs: Vec<ContextPackSummaryRefPreview>,
19490}
19491
19492fn session_review_source_flag(source: &str) -> &'static str {
19493 match source {
19494 "claude_jsonl" => "claude-jsonl",
19495 "codex_jsonl" => "codex-jsonl",
19496 "agent_doc_log" => "agent-doc-log",
19497 _ => "markdown",
19498 }
19499}
19500
19501pub(crate) fn build_session_review_budget_report(
19502 report: &session_review::SessionReviewReport,
19503 budget: ResponseBudget,
19504) -> SessionReviewBudgetReport {
19505 let max_items = budget.preview_items();
19506 let max_bytes = budget.preview_bytes();
19507 let review_expand = format!(
19508 "tsift session-review {} --json",
19509 shell_quote(&report.target)
19510 );
19511 let sessions = report
19512 .sessions
19513 .iter()
19514 .take(max_items)
19515 .map(|entry| SessionReviewBudgetSessionPreview {
19516 handle: stable_handle(
19517 "srev",
19518 &format!("{}:{}:{}", entry.source, entry.path, entry.total_tokens),
19519 ),
19520 source: entry.source.clone(),
19521 path: truncate_for_budget(&entry.path, max_bytes),
19522 matched_by: entry
19523 .matched_by
19524 .iter()
19525 .take(max_items)
19526 .map(|value| truncate_for_budget(value, max_bytes))
19527 .collect(),
19528 total_tokens: entry.total_tokens,
19529 largest_turn_total_tokens: entry.largest_turn_total_tokens,
19530 prompt_targets: entry.prompt_target_count,
19531 failures: entry.failure_groups,
19532 expand: format!(
19533 "tsift session-digest --path {} --input {} --source {}",
19534 shell_quote(&report.root),
19535 shell_quote(&entry.path),
19536 session_review_source_flag(&entry.source)
19537 ),
19538 })
19539 .collect();
19540 let prompt_targets = report
19541 .prompt_targets
19542 .iter()
19543 .take(max_items)
19544 .map(|entry| SessionReviewBudgetPromptPreview {
19545 handle: stable_handle("spt", &entry.text),
19546 text: truncate_for_budget(&entry.text, max_bytes),
19547 occurrences: entry.occurrences,
19548 expand: review_expand.clone(),
19549 })
19550 .collect();
19551 let failures = report
19552 .failures
19553 .iter()
19554 .take(max_items)
19555 .map(|entry| SessionReviewBudgetFailurePreview {
19556 handle: stable_handle("sfl", &format!("{}:{}", entry.kind, entry.message)),
19557 kind: entry.kind.clone(),
19558 message: truncate_for_budget(&entry.message, max_bytes),
19559 occurrences: entry.occurrences,
19560 command: entry
19561 .command
19562 .as_ref()
19563 .map(|command| truncate_for_budget(command, max_bytes)),
19564 session_path: entry
19565 .session_path
19566 .as_ref()
19567 .map(|path| truncate_for_budget(path, max_bytes)),
19568 expand: review_expand.clone(),
19569 })
19570 .collect();
19571 let guardrails = report
19572 .guardrails
19573 .iter()
19574 .take(max_items)
19575 .map(|entry| truncate_for_budget(&entry.message, max_bytes))
19576 .collect();
19577 let warnings = report
19578 .warnings
19579 .iter()
19580 .take(max_items)
19581 .map(|entry| truncate_for_budget(entry, max_bytes))
19582 .collect();
19583
19584 SessionReviewBudgetReport {
19585 target: report.target.clone(),
19586 target_kind: report.target_kind.clone(),
19587 max_items,
19588 max_bytes,
19589 sessions_matched: report.sessions_matched,
19590 prompt_tokens: report.prompt_tokens,
19591 cached_input_tokens: report.cached_input_tokens,
19592 total_tokens: report.total_tokens,
19593 latest_session_total_tokens: report
19594 .latest_session_cost
19595 .as_ref()
19596 .map(|cost| cost.total_tokens),
19597 latest_session_largest_turn_total_tokens: report
19598 .latest_session_cost
19599 .as_ref()
19600 .map(|cost| cost.largest_turn_total_tokens),
19601 truncated: report.sessions.len() > max_items
19602 || report.prompt_targets.len() > max_items
19603 || report.failures.len() > max_items
19604 || report.guardrails.len() > max_items
19605 || report.warnings.len() > max_items,
19606 sessions,
19607 prompt_targets,
19608 failures,
19609 guardrails,
19610 warnings,
19611 }
19612}
19613
19614pub(crate) fn build_session_review_next_context_budget_report(
19615 report: &session_review::SessionReviewReport,
19616 budget: ResponseBudget,
19617 ontology: Option<&TagOntologyPreviewContext>,
19618) -> SessionReviewNextContextBudgetReport {
19619 let max_items = budget.preview_items();
19620 let max_bytes = budget.preview_bytes();
19621 let follow_up_items = budget.follow_up_items();
19622 let next_token_actions = build_next_token_actions(report, max_items, max_bytes);
19623 let actionable_guardrail_failures = next_token_actions
19624 .iter()
19625 .map(|action| format!("guardrail:{}", action.kind))
19626 .collect::<BTreeSet<_>>();
19627 let unresolved_failures = report
19628 .next_context
19629 .unresolved_failures
19630 .iter()
19631 .filter(|entry| !actionable_guardrail_failures.contains(&entry.kind))
19632 .collect::<Vec<_>>();
19633 let unresolved_failure_total = unresolved_failures.len();
19634 SessionReviewNextContextBudgetReport {
19635 contract_version: SESSION_REVIEW_FOLLOW_UP_CONTRACT_VERSION,
19636 target: report.next_context.target.clone(),
19637 max_items,
19638 max_bytes,
19639 prompt_target_total: report.next_context.active_prompt_targets.len(),
19640 touched_file_total: report.next_context.touched_files.len(),
19641 touched_symbol_total: report.next_context.touched_symbols.len(),
19642 unresolved_failure_total,
19643 truncated: report.next_context.active_prompt_targets.len() > max_items
19644 || report.next_context.touched_files.len() > max_items
19645 || report.next_context.touched_symbols.len() > max_items
19646 || unresolved_failure_total > max_items
19647 || report.next_context.next_digest_commands.len() > follow_up_items,
19648 prompt_targets: report
19649 .next_context
19650 .active_prompt_targets
19651 .iter()
19652 .take(max_items)
19653 .map(|entry| truncate_for_budget(entry, max_bytes))
19654 .collect(),
19655 touched_files: report
19656 .next_context
19657 .touched_files
19658 .iter()
19659 .take(max_items)
19660 .map(|entry| truncate_for_budget(entry, max_bytes))
19661 .collect(),
19662 touched_symbols: report
19663 .next_context
19664 .touched_symbols
19665 .iter()
19666 .take(max_items)
19667 .map(|entry| truncate_for_budget(entry, max_bytes))
19668 .collect(),
19669 touched_symbol_refs: report
19670 .next_context
19671 .touched_symbols
19672 .iter()
19673 .take(max_items)
19674 .map(|entry| {
19675 build_compact_symbol_ref_with_ontology(
19676 "ncsym",
19677 &format!("{}:{}", report.next_context.target, entry),
19678 entry,
19679 None,
19680 max_bytes,
19681 ontology,
19682 )
19683 })
19684 .collect(),
19685 unresolved_failures: unresolved_failures
19686 .iter()
19687 .take(max_items)
19688 .map(|entry| SessionReviewBudgetFailurePreview {
19689 handle: stable_handle("snf", &format!("{}:{}", entry.kind, entry.message)),
19690 kind: entry.kind.clone(),
19691 message: truncate_for_budget(&entry.message, max_bytes),
19692 occurrences: entry.occurrences,
19693 command: entry
19694 .command
19695 .as_ref()
19696 .map(|command| truncate_for_budget(command, max_bytes)),
19697 session_path: entry
19698 .session_path
19699 .as_ref()
19700 .map(|path| truncate_for_budget(path, max_bytes)),
19701 expand: format!(
19702 "tsift session-review {} --next-context --json",
19703 shell_quote(&report.target)
19704 ),
19705 })
19706 .collect(),
19707 next_token_actions,
19708 next_digest_commands: report
19709 .next_context
19710 .next_digest_commands
19711 .iter()
19712 .take(follow_up_items)
19713 .cloned()
19714 .collect(),
19715 }
19716}
19717
19718fn build_next_token_actions(
19719 report: &session_review::SessionReviewReport,
19720 max_items: usize,
19721 max_bytes: usize,
19722) -> Vec<SessionReviewNextTokenAction> {
19723 let target = shell_quote(&report.target);
19724 let doc_command_target =
19725 (report.target_kind == "file" && report.target.ends_with(".md")).then_some(target.clone());
19726 let mut actions = report
19727 .guardrails
19728 .iter()
19729 .filter_map(|guardrail| {
19730 let priority = token_action_priority(&guardrail.kind)?;
19731 let compact_command = doc_command_target
19732 .as_ref()
19733 .map(|target| format!("agent-doc compact {target} --commit"));
19734 let restart_command = doc_command_target
19735 .as_ref()
19736 .map(|target| format!("agent-doc start {target}"));
19737 Some(SessionReviewNextTokenAction {
19738 priority,
19739 kind: guardrail.kind.clone(),
19740 severity: guardrail.severity.clone(),
19741 message: truncate_for_budget(&guardrail.message, max_bytes),
19742 guidance: truncate_for_budget(&guardrail.guidance, max_bytes),
19743 compact_command,
19744 restart_command,
19745 digest_commands: vec![
19746 format!(
19747 "tsift --envelope session-review {target} --next-context --budget normal"
19748 ),
19749 format!("tsift --envelope context-pack {target} --budget normal"),
19750 ],
19751 })
19752 })
19753 .collect::<Vec<_>>();
19754 actions.sort_by(|left, right| {
19755 left.priority
19756 .cmp(&right.priority)
19757 .then(left.kind.cmp(&right.kind))
19758 });
19759 actions.dedup_by(|left, right| left.kind == right.kind);
19760 actions.truncate(max_items);
19761 actions
19762}
19763
19764fn token_action_priority(kind: &str) -> Option<usize> {
19765 match kind {
19766 "prompt_budget" => Some(1),
19767 "cache_resend" => Some(2),
19768 "restart_loop" => Some(3),
19769 "noop_closeout" => Some(4),
19770 _ => None,
19771 }
19772}
19773
19774pub(crate) fn print_session_review_budget_human(report: &SessionReviewBudgetReport) {
19775 let latest_total = report
19776 .latest_session_total_tokens
19777 .map(format_compact_count)
19778 .unwrap_or_else(|| "-".to_string());
19779 let latest_largest_turn = report
19780 .latest_session_largest_turn_total_tokens
19781 .map(format_compact_count)
19782 .unwrap_or_else(|| "-".to_string());
19783 println!(
19784 "session-review-budget target:{} kind:{} sessions:{}/{} aggregate_prompt:{} aggregate_cached:{} aggregate_total:{} latest_total:{} latest_largest_turn:{}",
19785 shell_quote(&report.target),
19786 report.target_kind,
19787 report.sessions.len(),
19788 report.sessions_matched,
19789 format_compact_count(report.prompt_tokens),
19790 format_compact_count(report.cached_input_tokens),
19791 format_compact_count(report.total_tokens),
19792 latest_total,
19793 latest_largest_turn
19794 );
19795 for session in &report.sessions {
19796 println!(
19797 "session {} {} total:{} largest_turn:{} prompts:{} fails:{} expand:{}",
19798 session.handle,
19799 session.path,
19800 format_compact_count(session.total_tokens),
19801 format_compact_count(session.largest_turn_total_tokens),
19802 session.prompt_targets,
19803 session.failures,
19804 session.expand
19805 );
19806 }
19807 for prompt in &report.prompt_targets {
19808 println!(
19809 "prompt {} count:{} {} expand:{}",
19810 prompt.handle, prompt.occurrences, prompt.text, prompt.expand
19811 );
19812 }
19813 for failure in &report.failures {
19814 println!(
19815 "fail {} {} count:{} {}{}{} expand:{}",
19816 failure.handle,
19817 failure.kind,
19818 failure.occurrences,
19819 failure.message,
19820 failure
19821 .command
19822 .as_ref()
19823 .map(|command| format!(" command:{command}"))
19824 .unwrap_or_default(),
19825 failure
19826 .session_path
19827 .as_ref()
19828 .map(|path| format!(" session:{path}"))
19829 .unwrap_or_default(),
19830 failure.expand
19831 );
19832 }
19833 for guardrail in &report.guardrails {
19834 println!("guardrail {guardrail}");
19835 }
19836 for warning in &report.warnings {
19837 println!("warning {warning}");
19838 }
19839 if report.truncated {
19840 println!(
19841 "budget truncated items:{} bytes:{}",
19842 report.max_items, report.max_bytes
19843 );
19844 }
19845}
19846
19847pub(crate) fn print_session_review_next_context_budget_human(
19848 report: &SessionReviewNextContextBudgetReport,
19849) {
19850 println!(
19851 "next-context-budget target:{} prompts:{}/{} files:{}/{} symbols:{}/{} failures:{}/{}",
19852 shell_quote(&report.target),
19853 report.prompt_targets.len(),
19854 report.prompt_target_total,
19855 report.touched_files.len(),
19856 report.touched_file_total,
19857 report.touched_symbols.len(),
19858 report.touched_symbol_total,
19859 report.unresolved_failures.len(),
19860 report.unresolved_failure_total
19861 );
19862 for prompt in &report.prompt_targets {
19863 println!("prompt {prompt}");
19864 }
19865 for file in &report.touched_files {
19866 println!("file {file}");
19867 }
19868 for symbol in &report.touched_symbols {
19869 if let Some(symbol_ref) = report
19870 .touched_symbol_refs
19871 .iter()
19872 .find(|entry| entry.name == *symbol)
19873 {
19874 println!(
19875 "symbol {}",
19876 format_symbol_preview_line(
19877 &symbol_ref.handle,
19878 &symbol_ref.name,
19879 symbol_ref.tag_alias.as_deref()
19880 )
19881 );
19882 } else {
19883 println!("symbol {symbol}");
19884 }
19885 }
19886 for failure in &report.unresolved_failures {
19887 println!(
19888 "fail {} {} count:{} {}{}{} expand:{}",
19889 failure.handle,
19890 failure.kind,
19891 failure.occurrences,
19892 failure.message,
19893 failure
19894 .command
19895 .as_ref()
19896 .map(|command| format!(" command:{command}"))
19897 .unwrap_or_default(),
19898 failure
19899 .session_path
19900 .as_ref()
19901 .map(|path| format!(" session:{path}"))
19902 .unwrap_or_default(),
19903 failure.expand
19904 );
19905 }
19906 for action in &report.next_token_actions {
19907 println!(
19908 "token-action {} {} severity:{} {} guidance:{}",
19909 action.priority, action.kind, action.severity, action.message, action.guidance
19910 );
19911 if let Some(command) = &action.compact_command {
19912 println!("token-action-command {} compact {}", action.kind, command);
19913 }
19914 if let Some(command) = &action.restart_command {
19915 println!("token-action-command {} restart {}", action.kind, command);
19916 }
19917 for command in &action.digest_commands {
19918 println!("token-action-command {} digest {}", action.kind, command);
19919 }
19920 }
19921 for command in &report.next_digest_commands {
19922 println!("next {command}");
19923 }
19924 if report.truncated {
19925 println!(
19926 "budget truncated items:{} bytes:{}",
19927 report.max_items, report.max_bytes
19928 );
19929 }
19930}
19931
19932fn effective_context_budget(budget: ResponseBudget) -> ResponseBudget {
19933 ResponseBudget::new(Some(budget.preview_items()), Some(budget.preview_bytes()))
19934}
19935
19936fn build_context_summary_refs<'a>(
19937 prefix: &str,
19938 key_scope: &str,
19939 file_path: Option<&str>,
19940 snippets: impl Iterator<Item = (&'a str, &'a str)>,
19941 budget: ResponseBudget,
19942 ontology: Option<&TagOntologyPreviewContext>,
19943) -> Vec<ContextPackSummaryRefPreview> {
19944 let max_items = budget.preview_items();
19945 let max_bytes = budget.preview_bytes();
19946 snippets
19947 .take(max_items)
19948 .map(|(symbol, summary)| {
19949 let tag_alias = tag_alias_from_name(symbol);
19950 let ontology_refs = tag_alias
19951 .as_deref()
19952 .map(|alias| ontology_refs_for_alias(ontology, alias))
19953 .unwrap_or_default();
19954 let expand = match file_path {
19955 Some(path) => format!("tsift summarize --file {}", shell_quote(path)),
19956 None => format!("tsift summarize {}", shell_quote(symbol)),
19957 };
19958 ContextPackSummaryRefPreview {
19959 handle: stable_handle(prefix, &format!("{key_scope}:{symbol}:{summary}")),
19960 symbol: truncate_for_budget(symbol, max_bytes),
19961 tag_alias: tag_alias.map(|alias| truncate_for_budget(&alias, max_bytes)),
19962 ontology_refs,
19963 summary: truncate_for_budget(summary, max_bytes),
19964 expand,
19965 }
19966 })
19967 .collect()
19968}
19969
19970fn build_context_pack_diff_preview(
19971 report: &diff_digest::DiffDigestReport,
19972 budget: ResponseBudget,
19973 ontology: Option<&TagOntologyPreviewContext>,
19974) -> ContextPackDiffPreview {
19975 let max_items = budget.preview_items();
19976 let max_bytes = budget.preview_bytes();
19977 ContextPackDiffPreview {
19978 mode: diff_digest_mode_label(report.mode).to_string(),
19979 files_changed: report.files_changed,
19980 files_with_current_summaries: report.files_with_current_summaries,
19981 symbols_touched: report.symbols_touched,
19982 call_edges_added: report.call_edges_added,
19983 call_edges_removed: report.call_edges_removed,
19984 truncated: report.files.len() > max_items,
19985 files: report
19986 .files
19987 .iter()
19988 .take(max_items)
19989 .map(|file| ContextPackDiffFilePreview {
19990 path: truncate_for_budget(&file.path, max_bytes),
19991 status: diff_digest_status_label(file.status).to_string(),
19992 touched_symbols: file
19993 .touched_symbols
19994 .iter()
19995 .take(max_items)
19996 .map(|symbol| truncate_for_budget(symbol, max_bytes))
19997 .collect(),
19998 touched_symbol_refs: file
19999 .touched_symbols
20000 .iter()
20001 .take(max_items)
20002 .map(|symbol| {
20003 build_compact_symbol_ref_with_ontology(
20004 "cdsym",
20005 &format!("{}:{}", file.path, symbol),
20006 symbol,
20007 None,
20008 max_bytes,
20009 ontology,
20010 )
20011 })
20012 .collect(),
20013 summary_state: diff_digest_summary_label(file.summary_state).to_string(),
20014 summary_refs: build_context_summary_refs(
20015 "cdsum",
20016 &file.path,
20017 Some(&file.path),
20018 file.current_summaries
20019 .iter()
20020 .map(|snippet| (snippet.symbol.as_str(), snippet.summary.as_str())),
20021 budget,
20022 ontology,
20023 ),
20024 added_call_edges: file.added_call_edges.len(),
20025 removed_call_edges: file.removed_call_edges.len(),
20026 warnings: file
20027 .warnings
20028 .iter()
20029 .take(max_items)
20030 .map(|warning| truncate_for_budget(warning, max_bytes))
20031 .collect(),
20032 })
20033 .collect(),
20034 }
20035}
20036
20037fn enrich_next_context_with_diff_symbols(
20038 next_context: &mut SessionReviewNextContextBudgetReport,
20039 diff_digest: &ContextPackDiffPreview,
20040 ontology: Option<&TagOntologyPreviewContext>,
20041) {
20042 let mut symbols = next_context.touched_symbols.clone();
20043 for file in &diff_digest.files {
20044 for symbol in &file.touched_symbol_refs {
20045 if !symbols.iter().any(|existing| existing == &symbol.name) {
20046 symbols.push(symbol.name.clone());
20047 }
20048 }
20049 }
20050
20051 if symbols.is_empty() {
20052 return;
20053 }
20054
20055 let max_items = next_context.max_items;
20056 let max_bytes = next_context.max_bytes;
20057 next_context.touched_symbol_total = next_context.touched_symbol_total.max(symbols.len());
20058 next_context.truncated |= symbols.len() > max_items;
20059 next_context.touched_symbols = symbols
20060 .iter()
20061 .take(max_items)
20062 .map(|entry| truncate_for_budget(entry, max_bytes))
20063 .collect();
20064 next_context.touched_symbol_refs = symbols
20065 .iter()
20066 .take(max_items)
20067 .map(|entry| {
20068 build_compact_symbol_ref_with_ontology(
20069 "ncsym",
20070 &format!("{}:{}", next_context.target, entry),
20071 entry,
20072 None,
20073 max_bytes,
20074 ontology,
20075 )
20076 })
20077 .collect();
20078}
20079
20080fn context_exploration_source_window(
20081 root: &Path,
20082 file: &str,
20083 reason: String,
20084 budget: &ExplorationBudget,
20085) -> ExplorationSourceWindow {
20086 let start = 1;
20087 let end = budget.lines_per_window;
20088 ExplorationSourceWindow {
20089 handle: stable_handle("xwin", &format!("context:{file}:{start}:{end}:{reason}")),
20090 file: file.to_string(),
20091 start,
20092 end,
20093 reason,
20094 expand: source_read_command(root, file, start, budget.lines_per_window),
20095 }
20096}
20097
20098fn build_context_pack_exploration_packet(
20099 root: &Path,
20100 next_context: &SessionReviewNextContextBudgetReport,
20101 diff_digest: &ContextPackDiffPreview,
20102) -> ExplorationPacket {
20103 let node_count = diff_digest
20104 .files_changed
20105 .saturating_add(next_context.touched_file_total)
20106 .saturating_add(next_context.touched_symbol_total);
20107 let edge_count = diff_digest
20108 .call_edges_added
20109 .saturating_add(diff_digest.call_edges_removed)
20110 .saturating_add(
20111 diff_digest
20112 .files
20113 .iter()
20114 .map(|file| file.touched_symbol_refs.len())
20115 .sum::<usize>(),
20116 );
20117 let budget = exploration_budget_for_counts(node_count, edge_count);
20118
20119 let mut relationship_map = Vec::new();
20120 for file in &diff_digest.files {
20121 for symbol in &file.touched_symbol_refs {
20122 if relationship_map.len() >= budget.relationship_limit {
20123 break;
20124 }
20125 relationship_map.push(ExplorationRelation {
20126 from: format!("file:{}", file.path),
20127 relation: "touches_symbol".to_string(),
20128 to: format!("symbol:{}", symbol.name),
20129 label: Some(format!("{} diff", file.status)),
20130 });
20131 }
20132 }
20133 for symbol in &next_context.touched_symbol_refs {
20134 if relationship_map.len() >= budget.relationship_limit {
20135 break;
20136 }
20137 relationship_map.push(ExplorationRelation {
20138 from: format!("context:{}", next_context.target),
20139 relation: "mentions_symbol".to_string(),
20140 to: format!("symbol:{}", symbol.name),
20141 label: Some("session next-context symbol".to_string()),
20142 });
20143 }
20144
20145 let mut source_windows = Vec::new();
20146 let mut seen_files = BTreeSet::new();
20147 for file in &diff_digest.files {
20148 if source_windows.len() >= budget.max_source_windows {
20149 break;
20150 }
20151 if seen_files.insert(file.path.clone()) {
20152 source_windows.push(context_exploration_source_window(
20153 root,
20154 &file.path,
20155 format!("changed file ({})", file.status),
20156 &budget,
20157 ));
20158 }
20159 }
20160 for file in &next_context.touched_files {
20161 if source_windows.len() >= budget.max_source_windows {
20162 break;
20163 }
20164 if seen_files.insert(file.clone()) {
20165 source_windows.push(context_exploration_source_window(
20166 root,
20167 file,
20168 "session touched file".to_string(),
20169 &budget,
20170 ));
20171 }
20172 }
20173
20174 let worker_seeds = if next_context.prompt_targets.is_empty() {
20175 next_context.next_digest_commands.clone()
20176 } else {
20177 next_context.prompt_targets.clone()
20178 };
20179 let mut worker_context = Vec::new();
20180 for (idx, prompt) in worker_seeds
20181 .iter()
20182 .take(budget.relationship_limit)
20183 .enumerate()
20184 {
20185 let summary = truncate_for_budget(prompt, next_context.max_bytes);
20186 worker_context.push(ExplorationWorkerContext {
20187 handle: stable_handle(
20188 "xwrk",
20189 &format!("{}:{}:{}", next_context.target, idx, prompt),
20190 ),
20191 target: next_context.target.clone(),
20192 summary,
20193 expand: format!(
20194 "tsift --envelope context-pack {} --budget normal",
20195 shell_quote(&next_context.target)
20196 ),
20197 });
20198 }
20199
20200 ExplorationPacket {
20201 budget,
20202 relationship_map,
20203 source_windows,
20204 worker_context,
20205 no_reread_guidance:
20206 "Use worker_context for bounded handoff scope, then source_windows expand commands before broad file reads; relationship_map explains why each window is in the handoff."
20207 .to_string(),
20208 }
20209}
20210
20211fn exploration_ref_id(label: &str) -> String {
20212 stable_handle("xref", label)
20213}
20214
20215fn context_pack_exploration_projection(packet: &ExplorationPacket) -> Result<GraphProjection> {
20216 let provenance = GraphProvenance::new("tsift.context-pack", "exploration");
20217 let mut nodes = BTreeMap::<String, SubstrateGraphNode>::new();
20218 let mut edges = Vec::new();
20219
20220 for relation in &packet.relationship_map {
20221 for label in [&relation.from, &relation.to] {
20222 let id = exploration_ref_id(label);
20223 nodes.entry(id.clone()).or_insert_with(|| {
20224 SubstrateGraphNode::new(id, "exploration_ref", label.clone())
20225 .with_property("label", label.clone())
20226 .with_provenance(provenance.clone())
20227 });
20228 }
20229 let mut edge = SubstrateGraphEdge::new(
20230 exploration_ref_id(&relation.from),
20231 exploration_ref_id(&relation.to),
20232 relation.relation.clone(),
20233 )
20234 .with_provenance(provenance.clone());
20235 if let Some(label) = &relation.label {
20236 edge = edge.with_property("label", label.clone());
20237 }
20238 edges.push(edge_with_content_freshness(edge)?);
20239 }
20240
20241 for window in &packet.source_windows {
20242 let label = format!("{}:{}-{}", window.file, window.start, window.end);
20243 let node = SubstrateGraphNode::new(window.handle.clone(), "source_handle", label)
20244 .with_property("handle", window.handle.clone())
20245 .with_property("file", window.file.clone())
20246 .with_property("start", window.start.to_string())
20247 .with_property("end", window.end.to_string())
20248 .with_property("reason", window.reason.clone())
20249 .with_property("expand", window.expand.clone())
20250 .with_provenance(provenance.clone());
20251 nodes.insert(window.handle.clone(), node_with_content_freshness(node)?);
20252
20253 let file_ref = format!("file:{}", window.file);
20254 let file_ref_id = exploration_ref_id(&file_ref);
20255 nodes.entry(file_ref_id.clone()).or_insert_with(|| {
20256 SubstrateGraphNode::new(file_ref_id.clone(), "exploration_ref", file_ref.clone())
20257 .with_property("label", file_ref.clone())
20258 .with_provenance(provenance.clone())
20259 });
20260 let edge = SubstrateGraphEdge::new(window.handle.clone(), file_ref_id, "expands_source")
20261 .with_property("label", window.reason.clone())
20262 .with_provenance(provenance.clone());
20263 edges.push(edge_with_content_freshness(edge)?);
20264 }
20265
20266 for worker in &packet.worker_context {
20267 let node = SubstrateGraphNode::new(
20268 worker.handle.clone(),
20269 "worker_context",
20270 worker.summary.clone(),
20271 )
20272 .with_property("handle", worker.handle.clone())
20273 .with_property("target", worker.target.clone())
20274 .with_property("summary", worker.summary.clone())
20275 .with_property("expand", worker.expand.clone())
20276 .with_provenance(provenance.clone());
20277 nodes.insert(worker.handle.clone(), node_with_content_freshness(node)?);
20278
20279 let target_ref = format!("context:{}", worker.target);
20280 let target_ref_id = exploration_ref_id(&target_ref);
20281 nodes.entry(target_ref_id.clone()).or_insert_with(|| {
20282 SubstrateGraphNode::new(target_ref_id.clone(), "exploration_ref", target_ref.clone())
20283 .with_property("label", target_ref.clone())
20284 .with_provenance(provenance.clone())
20285 });
20286 edges.push(edge_with_content_freshness(
20287 SubstrateGraphEdge::new(worker.handle.clone(), target_ref_id, "scopes_context")
20288 .with_property("label", "bounded worker context".to_string())
20289 .with_provenance(provenance.clone()),
20290 )?);
20291
20292 for window in &packet.source_windows {
20293 edges.push(edge_with_content_freshness(
20294 SubstrateGraphEdge::new(
20295 worker.handle.clone(),
20296 window.handle.clone(),
20297 "scopes_source",
20298 )
20299 .with_property("label", window.reason.clone())
20300 .with_provenance(provenance.clone()),
20301 )?);
20302 }
20303 }
20304
20305 let mut nodes = nodes.into_values().collect::<Vec<_>>();
20306 for node in &mut nodes {
20307 if node.freshness.is_none() {
20308 let fresh = node_with_content_freshness(node.clone())?;
20309 *node = fresh;
20310 }
20311 }
20312
20313 Ok(GraphProjection { nodes, edges })
20314}
20315
20316fn source_window_from_graph_node(node: SubstrateGraphNode) -> Result<ExplorationSourceWindow> {
20317 let file = node
20318 .properties
20319 .get("file")
20320 .cloned()
20321 .with_context(|| format!("source handle {} missing file property", node.id))?;
20322 let start = node
20323 .properties
20324 .get("start")
20325 .with_context(|| format!("source handle {} missing start property", node.id))?
20326 .parse::<usize>()
20327 .with_context(|| format!("source handle {} has invalid start", node.id))?;
20328 let end = node
20329 .properties
20330 .get("end")
20331 .with_context(|| format!("source handle {} missing end property", node.id))?
20332 .parse::<usize>()
20333 .with_context(|| format!("source handle {} has invalid end", node.id))?;
20334 Ok(ExplorationSourceWindow {
20335 handle: node
20336 .properties
20337 .get("handle")
20338 .cloned()
20339 .unwrap_or_else(|| node.id.clone()),
20340 file,
20341 start,
20342 end,
20343 reason: node
20344 .properties
20345 .get("reason")
20346 .cloned()
20347 .unwrap_or_else(|| "source context".to_string()),
20348 expand: node.properties.get("expand").cloned().unwrap_or_default(),
20349 })
20350}
20351
20352fn materialize_context_pack_exploration_packet(
20353 root: &Path,
20354 packet: ExplorationPacket,
20355) -> Result<ExplorationPacket> {
20356 let projection = context_pack_exploration_projection(&packet)?;
20357 let graph_db = graph_substrate_db_path(root, None);
20358 let mut store = SqliteGraphStore::open(&graph_db)?;
20359 store.upsert_projection(&projection)?;
20360
20361 let mut source_windows = Vec::new();
20362 for window in &packet.source_windows {
20363 let node = store
20364 .node(&window.handle)?
20365 .with_context(|| format!("source handle {} was not materialized", window.handle))?;
20366 source_windows.push(source_window_from_graph_node(node)?);
20367 }
20368
20369 let mut relationship_map = Vec::new();
20370 for relation in &packet.relationship_map {
20371 let from_id = exploration_ref_id(&relation.from);
20372 let to_id = exploration_ref_id(&relation.to);
20373 let from = store
20374 .node(&from_id)?
20375 .with_context(|| format!("exploration ref {} was not materialized", relation.from))?;
20376 let to = store
20377 .node(&to_id)?
20378 .with_context(|| format!("exploration ref {} was not materialized", relation.to))?;
20379 let edge = store
20380 .outgoing_edges(&from_id, Some(&relation.relation))?
20381 .into_iter()
20382 .find(|edge| edge.to_id == to_id)
20383 .with_context(|| {
20384 format!(
20385 "exploration relation {} -> {} ({}) was not materialized",
20386 relation.from, relation.to, relation.relation
20387 )
20388 })?;
20389 relationship_map.push(ExplorationRelation {
20390 from: from.label,
20391 relation: edge.kind,
20392 to: to.label,
20393 label: edge.properties.get("label").cloned(),
20394 });
20395 }
20396
20397 Ok(ExplorationPacket {
20398 budget: packet.budget,
20399 relationship_map,
20400 source_windows,
20401 worker_context: packet.worker_context,
20402 no_reread_guidance: packet.no_reread_guidance,
20403 })
20404}
20405
20406fn build_context_pack_test_preview(
20407 report: &test_digest::TestDigestReport,
20408 budget: ResponseBudget,
20409 ontology: Option<&TagOntologyPreviewContext>,
20410) -> ContextPackTestPreview {
20411 let max_items = budget.preview_items();
20412 let max_bytes = budget.preview_bytes();
20413 ContextPackTestPreview {
20414 runner: report.runner.clone(),
20415 failures: report.failures,
20416 grouped_failures: report.grouped_failures,
20417 counts: ContextPackTestCounts {
20418 passed: report.counts.passed,
20419 failed: report.counts.failed,
20420 skipped: report.counts.skipped,
20421 },
20422 truncated: report.failure_groups.len() > max_items || report.warnings.len() > max_items,
20423 failure_groups: report
20424 .failure_groups
20425 .iter()
20426 .take(max_items)
20427 .map(|failure| ContextPackTestFailurePreview {
20428 tests: failure
20429 .tests
20430 .iter()
20431 .take(max_items)
20432 .map(|test| truncate_for_budget(test, max_bytes))
20433 .collect(),
20434 message: truncate_for_budget(&failure.message, max_bytes),
20435 path: failure
20436 .path
20437 .as_ref()
20438 .map(|path| truncate_for_budget(path, max_bytes)),
20439 line: failure.line,
20440 occurrences: failure.occurrences,
20441 summary_state: test_digest_summary_label(failure.summary_state).to_string(),
20442 summary_refs: build_context_summary_refs(
20443 "ctsum",
20444 failure.path.as_deref().unwrap_or("test-failure"),
20445 failure.path.as_deref(),
20446 failure
20447 .current_summaries
20448 .iter()
20449 .map(|snippet| (snippet.symbol.as_str(), snippet.summary.as_str())),
20450 budget,
20451 ontology,
20452 ),
20453 })
20454 .collect(),
20455 warnings: report
20456 .warnings
20457 .iter()
20458 .take(max_items)
20459 .map(|warning| truncate_for_budget(warning, max_bytes))
20460 .collect(),
20461 }
20462}
20463
20464fn build_context_pack_log_preview(
20465 report: &log_digest::LogDigestReport,
20466 budget: ResponseBudget,
20467 ontology: Option<&TagOntologyPreviewContext>,
20468) -> ContextPackLogPreview {
20469 let max_items = budget.preview_items();
20470 let max_bytes = budget.preview_bytes();
20471 ContextPackLogPreview {
20472 total_lines: report.total_lines,
20473 non_empty_lines: report.non_empty_lines,
20474 signal_groups: report.signal_groups,
20475 repeated_line_groups: report.repeated_line_groups,
20476 file_ref_groups: report.file_ref_groups,
20477 symbol_ref_groups: report.symbol_ref_groups,
20478 stack_groups: report.stack_groups,
20479 truncated: report.signals.len() > max_items
20480 || report.repeated_lines.len() > max_items
20481 || report.file_refs.len() > max_items
20482 || report.symbol_refs.len() > max_items
20483 || report.warnings.len() > max_items,
20484 signals: report
20485 .signals
20486 .iter()
20487 .take(max_items)
20488 .map(|signal| ContextPackLogSignalPreview {
20489 severity: signal.severity.clone(),
20490 message: truncate_for_budget(&signal.message, max_bytes),
20491 path: signal
20492 .path
20493 .as_ref()
20494 .map(|path| truncate_for_budget(path, max_bytes)),
20495 line: signal.line,
20496 occurrences: signal.occurrences,
20497 summary_state: log_digest_summary_label(signal.summary_state).to_string(),
20498 summary_refs: build_context_summary_refs(
20499 "clsum",
20500 signal.path.as_deref().unwrap_or("log-signal"),
20501 signal.path.as_deref(),
20502 signal
20503 .current_summaries
20504 .iter()
20505 .map(|snippet| (snippet.symbol.as_str(), snippet.summary.as_str())),
20506 budget,
20507 ontology,
20508 ),
20509 })
20510 .collect(),
20511 repeated_lines: report
20512 .repeated_lines
20513 .iter()
20514 .take(max_items)
20515 .map(|line| ContextPackLogRepeatedLinePreview {
20516 line: truncate_for_budget(&line.line, max_bytes),
20517 occurrences: line.occurrences,
20518 })
20519 .collect(),
20520 file_refs: report
20521 .file_refs
20522 .iter()
20523 .take(max_items)
20524 .map(|file| ContextPackLogFileRefPreview {
20525 path: truncate_for_budget(&file.path, max_bytes),
20526 line: file.line,
20527 occurrences: file.occurrences,
20528 summary_state: log_digest_summary_label(file.summary_state).to_string(),
20529 summary_refs: build_context_summary_refs(
20530 "clfsum",
20531 &file.path,
20532 Some(&file.path),
20533 file.current_summaries
20534 .iter()
20535 .map(|snippet| (snippet.symbol.as_str(), snippet.summary.as_str())),
20536 budget,
20537 ontology,
20538 ),
20539 })
20540 .collect(),
20541 symbol_refs: report
20542 .symbol_refs
20543 .iter()
20544 .take(max_items)
20545 .map(|symbol| ContextPackLogSymbolRefPreview {
20546 handle: stable_handle("clsym", &symbol.symbol),
20547 symbol: truncate_for_budget(&symbol.symbol, max_bytes),
20548 tag_alias: tag_alias_from_name(&symbol.symbol)
20549 .map(|alias| truncate_for_budget(&alias, max_bytes)),
20550 ontology_refs: tag_alias_from_name(&symbol.symbol)
20551 .as_deref()
20552 .map(|alias| ontology_refs_for_alias(ontology, alias))
20553 .unwrap_or_default(),
20554 occurrences: symbol.occurrences,
20555 summary_state: log_digest_summary_label(symbol.summary_state).to_string(),
20556 summary_refs: build_context_summary_refs(
20557 "clssum",
20558 &symbol.symbol,
20559 None,
20560 symbol
20561 .current_summaries
20562 .iter()
20563 .map(|snippet| (snippet.symbol.as_str(), snippet.summary.as_str())),
20564 budget,
20565 ontology,
20566 ),
20567 })
20568 .collect(),
20569 warnings: report
20570 .warnings
20571 .iter()
20572 .take(max_items)
20573 .map(|warning| truncate_for_budget(warning, max_bytes))
20574 .collect(),
20575 }
20576}
20577
20578fn enrich_log_preview_with_diff_symbols(
20579 log_preview: &mut ContextPackLogPreview,
20580 diff_digest: &ContextPackDiffPreview,
20581 ontology: Option<&TagOntologyPreviewContext>,
20582) {
20583 if !log_preview.symbol_refs.is_empty() {
20584 return;
20585 }
20586
20587 let mut symbols = Vec::new();
20588 for file in &diff_digest.files {
20589 for symbol in &file.touched_symbol_refs {
20590 if !symbols
20591 .iter()
20592 .any(|existing: &String| existing == &symbol.name)
20593 {
20594 symbols.push(symbol.name.clone());
20595 }
20596 }
20597 }
20598
20599 if symbols.is_empty() {
20600 return;
20601 }
20602
20603 log_preview.symbol_ref_groups = log_preview.symbol_ref_groups.max(symbols.len());
20604 log_preview.symbol_refs = symbols
20605 .into_iter()
20606 .map(|symbol| ContextPackLogSymbolRefPreview {
20607 handle: stable_handle("clsym", &symbol),
20608 symbol: symbol.clone(),
20609 tag_alias: tag_alias_from_name(&symbol),
20610 ontology_refs: tag_alias_from_name(&symbol)
20611 .as_deref()
20612 .map(|alias| ontology_refs_for_alias(ontology, alias))
20613 .unwrap_or_default(),
20614 occurrences: 1,
20615 summary_state: "unavailable".to_string(),
20616 summary_refs: Vec::new(),
20617 })
20618 .collect();
20619}
20620
20621fn insert_ontology_refs(
20622 refs: &mut BTreeMap<String, CompactOntologyRefPreview>,
20623 candidates: &[CompactOntologyRefPreview],
20624) {
20625 for candidate in candidates {
20626 refs.entry(candidate.handle.clone())
20627 .or_insert_with(|| candidate.clone());
20628 }
20629}
20630
20631fn collect_context_pack_ontology_refs(
20632 next_context: &SessionReviewNextContextBudgetReport,
20633 diff_digest: &ContextPackDiffPreview,
20634 test_digest: &ContextPackOptionalSection<ContextPackTestPreview>,
20635 log_digest: &ContextPackOptionalSection<ContextPackLogPreview>,
20636) -> Vec<CompactOntologyRefPreview> {
20637 let mut refs = BTreeMap::new();
20638 for symbol in &next_context.touched_symbol_refs {
20639 insert_ontology_refs(&mut refs, &symbol.ontology_refs);
20640 }
20641 for file in &diff_digest.files {
20642 for symbol in &file.touched_symbol_refs {
20643 insert_ontology_refs(&mut refs, &symbol.ontology_refs);
20644 }
20645 for summary in &file.summary_refs {
20646 insert_ontology_refs(&mut refs, &summary.ontology_refs);
20647 }
20648 }
20649 if let Some(test) = &test_digest.report {
20650 for failure in &test.failure_groups {
20651 for summary in &failure.summary_refs {
20652 insert_ontology_refs(&mut refs, &summary.ontology_refs);
20653 }
20654 }
20655 }
20656 if let Some(log) = &log_digest.report {
20657 for signal in &log.signals {
20658 for summary in &signal.summary_refs {
20659 insert_ontology_refs(&mut refs, &summary.ontology_refs);
20660 }
20661 }
20662 for file in &log.file_refs {
20663 for summary in &file.summary_refs {
20664 insert_ontology_refs(&mut refs, &summary.ontology_refs);
20665 }
20666 }
20667 for symbol in &log.symbol_refs {
20668 insert_ontology_refs(&mut refs, &symbol.ontology_refs);
20669 for summary in &symbol.summary_refs {
20670 insert_ontology_refs(&mut refs, &summary.ontology_refs);
20671 }
20672 }
20673 }
20674 refs.into_values().collect()
20675}
20676
20677pub(crate) fn build_context_pack_report(
20678 path: &Path,
20679 test_input: Option<&Path>,
20680 runner: Option<&str>,
20681 log_input: Option<&Path>,
20682 budget: ResponseBudget,
20683) -> Result<ContextPackReport> {
20684 Ok(build_context_pack_report_with_profile(path, test_input, runner, log_input, budget)?.0)
20685}
20686
20687fn build_context_pack_report_with_profile(
20688 path: &Path,
20689 test_input: Option<&Path>,
20690 runner: Option<&str>,
20691 log_input: Option<&Path>,
20692 budget: ResponseBudget,
20693) -> Result<(ContextPackReport, Vec<GraphDbBackendEvalPhaseTiming>)> {
20694 let _inspect_scope = index::InspectScopeGuard::new();
20702 let budget = effective_context_budget(budget);
20703 let mut phases = Vec::new();
20704 let session_review_started = Instant::now();
20705 let (review, session_review_sub_phases) = session_review::compute_with_phases(path)?;
20706 let session_review_total_micros = session_review_started.elapsed().as_micros();
20707 phases.push(graph_db_backend_eval_phase_timing(
20708 "session_review_compute",
20709 session_review_total_micros,
20710 "session-review prompt/touched-file/touched-symbol/failure aggregation for the context-pack handoff",
20711 ));
20712 for sub_phase in &session_review_sub_phases {
20713 phases.push(graph_db_backend_eval_phase_timing(
20714 &format!("session_review_compute.{}", sub_phase.name),
20715 sub_phase.duration_micros,
20716 &sub_phase.detail,
20717 ));
20718 }
20719 let root = PathBuf::from(&review.root);
20720 let status_index_gate_started = Instant::now();
20721 let mut status_index_gate_sub_phases: Vec<(String, u128, String)> = Vec::with_capacity(3);
20722 let index_gate_started = Instant::now();
20723 let (gate, gate_cache_detail) =
20724 prepare_agent_doc_index_gate_cached(&root, path, None, "context-pack handoff");
20725 let index_gate_micros = index_gate_started.elapsed().as_micros();
20726 status_index_gate_sub_phases.push((
20727 "prepare_agent_doc_index_gate".to_string(),
20728 index_gate_micros,
20729 gate_cache_detail,
20730 ));
20731
20732 let reminders_started = Instant::now();
20733 let mut status_reminders = gate.diagnostics.clone();
20734 status_reminders.extend(context_pack_status_reminders(&root));
20735 let reminders_micros = reminders_started.elapsed().as_micros();
20736 status_index_gate_sub_phases.push((
20737 "context_pack_status_reminders".to_string(),
20738 reminders_micros,
20739 "tsift status reminders for the cached preparation context".to_string(),
20740 ));
20741
20742 let ontology_started = Instant::now();
20743 let ontology = load_tag_ontology_preview_context(&root);
20744 let ontology_micros = ontology_started.elapsed().as_micros();
20745 status_index_gate_sub_phases.push((
20746 "load_tag_ontology_preview_context".to_string(),
20747 ontology_micros,
20748 "tag ontology preview context load".to_string(),
20749 ));
20750
20751 let status_index_gate_total_micros = status_index_gate_started.elapsed().as_micros();
20752 phases.push(graph_db_backend_eval_phase_timing(
20753 "status_index_gate",
20754 status_index_gate_total_micros,
20755 "agent-doc index gate, tsift status reminders, and ontology preview loading",
20756 ));
20757 for (name, micros, detail) in &status_index_gate_sub_phases {
20758 phases.push(graph_db_backend_eval_phase_timing(
20759 &format!("status_index_gate.{name}"),
20760 *micros,
20761 detail,
20762 ));
20763 }
20764 let ontology_ref = ontology.as_ref();
20765 let mut next_context =
20766 build_session_review_next_context_budget_report(&review, budget, ontology_ref);
20767 let diff_parse_budget = budget.preview_items();
20774 let diff_digest = graph_db_backend_eval_timed_phase(
20775 &mut phases,
20776 "context_pack_diff",
20777 "working-tree diff digest preview used to enrich next-context symbols",
20778 || {
20779 Ok(build_context_pack_diff_preview(
20780 &diff_digest::compute(
20781 &root,
20782 diff_digest::DiffDigestOptions {
20783 cached: false,
20784 revision: None,
20785 max_parsed_files: Some(diff_parse_budget),
20786 },
20787 )
20788 .with_context(|| {
20789 format!("computing context-pack diff digest for {}", root.display())
20790 })?,
20791 budget,
20792 ontology_ref,
20793 ))
20794 },
20795 )?;
20796 enrich_next_context_with_diff_symbols(&mut next_context, &diff_digest, ontology_ref);
20797 let test_digest = match test_input {
20798 Some(file_path) => {
20799 let input = fs::read_to_string(file_path)
20800 .with_context(|| format!("reading test output: {}", file_path.display()))?;
20801 if input.trim().is_empty() {
20802 bail!("no test output provided in {}", file_path.display());
20803 }
20804 let report = test_digest::compute(&root, &input, runner)?;
20805 ContextPackOptionalSection {
20806 status: "included".to_string(),
20807 command: format!(
20808 "tsift test-digest --path . --input {}{}",
20809 shell_quote(file_path.to_str().unwrap_or_default()),
20810 runner
20811 .map(|value| format!(" --runner {}", shell_quote(value)))
20812 .unwrap_or_default()
20813 ),
20814 source: Some(file_path.display().to_string()),
20815 report: Some(build_context_pack_test_preview(
20816 &report,
20817 budget,
20818 ontology_ref,
20819 )),
20820 }
20821 }
20822 None => ContextPackOptionalSection {
20823 status: "not_provided".to_string(),
20824 command: "tsift test-digest --path . < test.log".to_string(),
20825 source: None,
20826 report: None,
20827 },
20828 };
20829 let log_digest = match log_input {
20830 Some(file_path) => {
20831 let input = fs::read_to_string(file_path)
20832 .with_context(|| format!("reading log output: {}", file_path.display()))?;
20833 if input.trim().is_empty() {
20834 bail!("no log output provided in {}", file_path.display());
20835 }
20836 let report = log_digest::compute(&root, &input)?;
20837 let mut preview = build_context_pack_log_preview(&report, budget, ontology_ref);
20838 enrich_log_preview_with_diff_symbols(&mut preview, &diff_digest, ontology_ref);
20839 ContextPackOptionalSection {
20840 status: "included".to_string(),
20841 command: format!(
20842 "tsift log-digest --path . --input {}",
20843 shell_quote(file_path.to_str().unwrap_or_default())
20844 ),
20845 source: Some(file_path.display().to_string()),
20846 report: Some(preview),
20847 }
20848 }
20849 None => ContextPackOptionalSection {
20850 status: "not_provided".to_string(),
20851 command: "tsift log-digest --path . < build.log".to_string(),
20852 source: None,
20853 report: None,
20854 },
20855 };
20856
20857 let ontology_refs =
20858 collect_context_pack_ontology_refs(&next_context, &diff_digest, &test_digest, &log_digest);
20859 let exploration = graph_db_backend_eval_timed_phase(
20860 &mut phases,
20861 "exploration_materialization",
20862 "context-pack source-window and worker-context exploration packet projection",
20863 || {
20864 materialize_context_pack_exploration_packet(
20865 &root,
20866 build_context_pack_exploration_packet(&root, &next_context, &diff_digest),
20867 )
20868 },
20869 )?;
20870 let graph_orchestration = graph_db_backend_eval_timed_phase(
20871 &mut phases,
20872 "graph_orchestration",
20873 "context-pack graph freshness, evidence packet ids, and conflict-matrix follow-up commands",
20874 || context_pack_graph_orchestration(&root, path, &next_context, &exploration),
20875 )?;
20876
20877 Ok((
20878 ContextPackReport {
20879 root: review.root,
20880 target: review.target,
20881 target_kind: review.target_kind,
20882 max_items: budget.preview_items(),
20883 max_bytes: budget.preview_bytes(),
20884 status_reminders,
20885 ontology_refs,
20886 next_context,
20887 diff_digest,
20888 test_digest,
20889 log_digest,
20890 exploration,
20891 graph_orchestration,
20892 resume_commands: review.next_context.next_digest_commands,
20893 },
20894 phases,
20895 ))
20896}
20897
20898fn context_pack_status_reminders(root: &Path) -> Vec<String> {
20899 status::check_status(root)
20900 .map(|report| report.reminders)
20901 .unwrap_or_default()
20902}
20903
20904fn context_pack_graph_orchestration(
20905 root: &Path,
20906 path: &Path,
20907 next_context: &SessionReviewNextContextBudgetReport,
20908 exploration: &ExplorationPacket,
20909) -> Result<ContextPackGraphOrchestration> {
20910 let graph_db = graph_substrate_db_path(root, None);
20911 let store = SqliteGraphStore::open_read_only_resilient(&graph_db)
20912 .with_context(|| format!("opening graph-db projection: {}", graph_db.display()))?;
20913 let projection_freshness = sqlite_graph_freshness(&store, "root")?;
20914 let mut warnings = projection_freshness.diagnostics.clone();
20915 if let Some(recovery) = store.read_only_recovery() {
20916 warnings.push(graph_db_read_recovery_diagnostic(recovery));
20917 }
20918 let mut targets = next_context
20919 .prompt_targets
20920 .iter()
20921 .flat_map(|prompt| extract_conflict_target_refs(prompt))
20922 .collect::<Vec<_>>();
20923 if targets.is_empty() {
20924 targets.extend(
20925 exploration
20926 .worker_context
20927 .iter()
20928 .flat_map(|worker| extract_conflict_target_refs(&worker.summary)),
20929 );
20930 }
20931 targets = dedupe_preserve_order(targets);
20932
20933 let mut evidence_packet_ids = Vec::new();
20934 let mut resolvable_targets = Vec::new();
20935 for target in &targets {
20936 match graph_db_resolve_evidence_target(&store, target)? {
20937 Some(node) => {
20938 evidence_packet_ids.push(graph_db_evidence_packet_id(
20939 target,
20940 &node,
20941 &projection_freshness,
20942 ));
20943 resolvable_targets.push(target.clone());
20944 }
20945 None => warnings.push(format!("graph evidence target not found: {target}")),
20946 }
20947 }
20948
20949 let mut follow_up_commands = vec![format!(
20950 "tsift graph-db --path {} status --json",
20951 shell_quote(root.to_string_lossy().as_ref())
20952 )];
20953 for target in &resolvable_targets {
20954 follow_up_commands.push(format!(
20955 "tsift graph-db --path {} evidence {} --depth 3 --limit 8 --json",
20956 shell_quote(root.to_string_lossy().as_ref()),
20957 shell_quote(target)
20958 ));
20959 }
20960 if !resolvable_targets.is_empty() {
20961 follow_up_commands.push(format!(
20962 "tsift conflict-matrix --path {} {} --json",
20963 shell_quote(path.to_string_lossy().as_ref()),
20964 resolvable_targets
20965 .iter()
20966 .map(|target| shell_quote(target))
20967 .collect::<Vec<_>>()
20968 .join(" ")
20969 ));
20970 }
20971
20972 let conflict_matrix_decisions = if resolvable_targets.is_empty() {
20973 vec!["no resolvable backlog/job targets found for conflict-matrix".to_string()]
20974 } else {
20975 vec![format!(
20976 "run conflict-matrix before parallel dispatch for {} target(s)",
20977 resolvable_targets.len()
20978 )]
20979 };
20980 let worker_ownership_blocks = exploration
20981 .worker_context
20982 .iter()
20983 .map(|worker| format!("{} scopes {}", worker.handle, worker.summary))
20984 .collect::<Vec<_>>();
20985 let projection_hashes = projection_freshness
20986 .content_hash
20987 .clone()
20988 .into_iter()
20989 .collect();
20990
20991 Ok(ContextPackGraphOrchestration {
20992 contract_version: CONTEXT_PACK_GRAPH_ORCHESTRATION_CONTRACT_VERSION,
20993 graph_db_command: format!(
20994 "tsift graph-db --path {} status --json",
20995 shell_quote(root.to_string_lossy().as_ref())
20996 ),
20997 projection_freshness,
20998 projection_hashes,
20999 evidence_packet_ids,
21000 conflict_matrix_decisions,
21001 worker_ownership_blocks,
21002 follow_up_commands: dedupe_preserve_order(follow_up_commands),
21003 warnings,
21004 })
21005}
21006
21007pub(crate) fn print_context_pack_human(report: &ContextPackReport, compact: bool) {
21008 if compact {
21009 println!(
21010 "context-pack target:{} prompts:{}/{} diff:{}/{} test:{} log:{}",
21011 shell_quote(&report.target),
21012 report.next_context.prompt_targets.len(),
21013 report.next_context.prompt_target_total,
21014 report.diff_digest.files.len(),
21015 report.diff_digest.files_changed,
21016 report.test_digest.status,
21017 report.log_digest.status
21018 );
21019 for reminder in &report.status_reminders {
21020 println!("reminder {reminder}");
21021 }
21022 for prompt in &report.next_context.prompt_targets {
21023 println!("prompt {prompt}");
21024 }
21025 for action in &report.next_context.next_token_actions {
21026 println!(
21027 "token-action {} {} commands:{}",
21028 action.priority,
21029 action.kind,
21030 action.digest_commands.len()
21031 + usize::from(action.compact_command.is_some())
21032 + usize::from(action.restart_command.is_some())
21033 );
21034 }
21035 for file in &report.diff_digest.files {
21036 println!(
21037 "diff {} status:{} syms:{} sums:{}",
21038 file.path,
21039 file.status,
21040 if file.touched_symbol_refs.is_empty() {
21041 "-".to_string()
21042 } else {
21043 file.touched_symbol_refs
21044 .iter()
21045 .map(compact_symbol_ref_token)
21046 .collect::<Vec<_>>()
21047 .join(",")
21048 },
21049 if file.summary_refs.is_empty() {
21050 "-".to_string()
21051 } else {
21052 file.summary_refs
21053 .iter()
21054 .map(|summary| summary.handle.as_str())
21055 .collect::<Vec<_>>()
21056 .join(",")
21057 }
21058 );
21059 }
21060 if let Some(test) = &report.test_digest.report {
21061 println!(
21062 "test runner:{} failures:{} groups:{}",
21063 test.runner, test.failures, test.grouped_failures
21064 );
21065 } else {
21066 println!("test {}", report.test_digest.command);
21067 }
21068 if let Some(log) = &report.log_digest.report {
21069 println!(
21070 "log lines:{} signals:{} files:{} syms:{}",
21071 log.non_empty_lines, log.signal_groups, log.file_ref_groups, log.symbol_ref_groups
21072 );
21073 } else {
21074 println!("log {}", report.log_digest.command);
21075 }
21076 println!(
21077 "explore windows:{} relations:{} budget:{}",
21078 report.exploration.source_windows.len(),
21079 report.exploration.relationship_map.len(),
21080 report.exploration.budget.project_size
21081 );
21082 println!(
21083 "graph-orchestration freshness:{} evidence:{} ownership:{}",
21084 report.graph_orchestration.projection_freshness.status,
21085 report.graph_orchestration.evidence_packet_ids.len(),
21086 report.graph_orchestration.worker_ownership_blocks.len()
21087 );
21088 return;
21089 }
21090
21091 println!("Context pack");
21092 println!(" target: {}", report.target);
21093 println!(" target kind: {}", report.target_kind);
21094 println!(" root: {}", report.root);
21095 println!(
21096 " preview budget: {} items / {} bytes",
21097 report.max_items, report.max_bytes
21098 );
21099 if !report.status_reminders.is_empty() {
21100 println!(" status reminders:");
21101 for reminder in &report.status_reminders {
21102 println!(" - {reminder}");
21103 }
21104 }
21105 println!();
21106 println!("Next context");
21107 println!(
21108 " prompt targets: {}/{}",
21109 report.next_context.prompt_targets.len(),
21110 report.next_context.prompt_target_total
21111 );
21112 println!(
21113 " touched files: {}/{}",
21114 report.next_context.touched_files.len(),
21115 report.next_context.touched_file_total
21116 );
21117 println!(
21118 " touched symbols: {}/{}",
21119 report.next_context.touched_symbols.len(),
21120 report.next_context.touched_symbol_total
21121 );
21122 println!(
21123 " unresolved failures: {}/{}",
21124 report.next_context.unresolved_failures.len(),
21125 report.next_context.unresolved_failure_total
21126 );
21127 if !report.next_context.prompt_targets.is_empty() {
21128 for prompt in &report.next_context.prompt_targets {
21129 println!(" - prompt: {prompt}");
21130 }
21131 }
21132 if !report.next_context.touched_files.is_empty() {
21133 for path in &report.next_context.touched_files {
21134 println!(" - file: {path}");
21135 }
21136 }
21137 if !report.next_context.touched_symbols.is_empty() {
21138 for symbol in &report.next_context.touched_symbol_refs {
21139 println!(
21140 " - symbol: {}",
21141 format_symbol_preview_line(
21142 &symbol.handle,
21143 &symbol.name,
21144 symbol.tag_alias.as_deref()
21145 )
21146 );
21147 }
21148 }
21149 if !report.next_context.next_token_actions.is_empty() {
21150 println!(" token actions:");
21151 for action in &report.next_context.next_token_actions {
21152 println!(
21153 " - [{}:{}] {} | guidance: {}",
21154 action.priority, action.kind, action.message, action.guidance
21155 );
21156 if let Some(command) = &action.compact_command {
21157 println!(" compact: {command}");
21158 }
21159 if let Some(command) = &action.restart_command {
21160 println!(" restart: {command}");
21161 }
21162 for command in &action.digest_commands {
21163 println!(" digest: {command}");
21164 }
21165 }
21166 }
21167
21168 println!();
21169 println!("Diff digest");
21170 println!(" mode: {}", report.diff_digest.mode);
21171 println!(
21172 " files changed: {}/{}",
21173 report.diff_digest.files.len(),
21174 report.diff_digest.files_changed
21175 );
21176 println!(
21177 " touched symbols: {}",
21178 report.diff_digest.symbols_touched
21179 );
21180 println!(
21181 " call edges: +{} / -{}",
21182 report.diff_digest.call_edges_added, report.diff_digest.call_edges_removed
21183 );
21184 for file in &report.diff_digest.files {
21185 println!(" - {} [{}]", file.path, file.status);
21186 if !file.touched_symbol_refs.is_empty() {
21187 println!(
21188 " symbols: {}",
21189 file.touched_symbol_refs
21190 .iter()
21191 .map(|symbol| format_symbol_preview_line(
21192 &symbol.handle,
21193 &symbol.name,
21194 symbol.tag_alias.as_deref()
21195 ))
21196 .collect::<Vec<_>>()
21197 .join(" | ")
21198 );
21199 }
21200 if !file.warnings.is_empty() {
21201 println!(" warnings: {}", file.warnings.join(" | "));
21202 }
21203 if !file.summary_refs.is_empty() {
21204 println!(
21205 " summaries: {}",
21206 file.summary_refs
21207 .iter()
21208 .map(format_summary_ref_line)
21209 .collect::<Vec<_>>()
21210 .join(" | ")
21211 );
21212 }
21213 }
21214
21215 println!();
21216 println!("Test digest");
21217 println!(" status: {}", report.test_digest.status);
21218 match &report.test_digest.report {
21219 Some(test) => {
21220 println!(" runner: {}", test.runner);
21221 println!(" failures: {}", test.failures);
21222 println!(" failure groups: {}", test.grouped_failures);
21223 for failure in &test.failure_groups {
21224 let location = match (&failure.path, failure.line) {
21225 (Some(path), Some(line)) => format!("{path}:{line}"),
21226 (Some(path), None) => path.clone(),
21227 _ => "(no file anchor)".to_string(),
21228 };
21229 println!(
21230 " - {} count:{} msg:{}",
21231 location, failure.occurrences, failure.message
21232 );
21233 if !failure.summary_refs.is_empty() {
21234 println!(
21235 " summaries: {}",
21236 failure
21237 .summary_refs
21238 .iter()
21239 .map(format_summary_ref_line)
21240 .collect::<Vec<_>>()
21241 .join(" | ")
21242 );
21243 }
21244 }
21245 }
21246 None => println!(" capture: {}", report.test_digest.command),
21247 }
21248
21249 println!();
21250 println!("Log digest");
21251 println!(" status: {}", report.log_digest.status);
21252 match &report.log_digest.report {
21253 Some(log) => {
21254 println!(" non-empty lines: {}", log.non_empty_lines);
21255 println!(" signal groups: {}", log.signal_groups);
21256 println!(" file refs: {}", log.file_ref_groups);
21257 println!(" symbol refs: {}", log.symbol_ref_groups);
21258 for signal in &log.signals {
21259 let location = match (&signal.path, signal.line) {
21260 (Some(path), Some(line)) => format!("{path}:{line}"),
21261 (Some(path), None) => path.clone(),
21262 _ => "(no file anchor)".to_string(),
21263 };
21264 println!(
21265 " - {} {} count:{} msg:{}",
21266 location, signal.severity, signal.occurrences, signal.message
21267 );
21268 if !signal.summary_refs.is_empty() {
21269 println!(
21270 " summaries: {}",
21271 signal
21272 .summary_refs
21273 .iter()
21274 .map(format_summary_ref_line)
21275 .collect::<Vec<_>>()
21276 .join(" | ")
21277 );
21278 }
21279 }
21280 for symbol in &log.symbol_refs {
21281 println!(
21282 " - symbol: {} count:{} state:{}",
21283 format_symbol_preview_line(
21284 &symbol.handle,
21285 &symbol.symbol,
21286 symbol.tag_alias.as_deref()
21287 ),
21288 symbol.occurrences,
21289 symbol.summary_state
21290 );
21291 if !symbol.summary_refs.is_empty() {
21292 println!(
21293 " summaries: {}",
21294 symbol
21295 .summary_refs
21296 .iter()
21297 .map(format_summary_ref_line)
21298 .collect::<Vec<_>>()
21299 .join(" | ")
21300 );
21301 }
21302 }
21303 }
21304 None => println!(" capture: {}", report.log_digest.command),
21305 }
21306
21307 println!();
21308 println!("Exploration packet");
21309 println!(
21310 " budget: {} ({} windows x {} lines)",
21311 report.exploration.budget.project_size,
21312 report.exploration.budget.max_source_windows,
21313 report.exploration.budget.lines_per_window
21314 );
21315 for window in &report.exploration.source_windows {
21316 println!(
21317 " - window {}:{}-{} ({})",
21318 window.file, window.start, window.end, window.reason
21319 );
21320 println!(" expand: {}", window.expand);
21321 }
21322 for relation in &report.exploration.relationship_map {
21323 println!(
21324 " - relation {} -{}-> {}",
21325 relation.from, relation.relation, relation.to
21326 );
21327 }
21328
21329 println!();
21330 println!("Graph orchestration");
21331 println!(
21332 " projection freshness: {}",
21333 report.graph_orchestration.projection_freshness.status
21334 );
21335 for evidence in &report.graph_orchestration.evidence_packet_ids {
21336 println!(" - evidence: {evidence}");
21337 }
21338 for decision in &report.graph_orchestration.conflict_matrix_decisions {
21339 println!(" - decision: {decision}");
21340 }
21341 for block in &report.graph_orchestration.worker_ownership_blocks {
21342 println!(" - ownership: {block}");
21343 }
21344 for command in &report.graph_orchestration.follow_up_commands {
21345 println!(" - next: {command}");
21346 }
21347
21348 println!();
21349 println!("Resume commands:");
21350 for command in &report.resume_commands {
21351 println!(" - {}", command);
21352 }
21353}
21354
21355pub(crate) fn format_compact_count(value: u64) -> String {
21356 if value >= 1_000_000 {
21357 format!("{:.1}M", value as f64 / 1_000_000.0)
21358 } else if value >= 1_000 {
21359 format!("{:.1}K", value as f64 / 1_000.0)
21360 } else {
21361 value.to_string()
21362 }
21363}
21364
21365fn cmd_digest_runner(
21366 kind: &str,
21367 path: &Path,
21368 runner: Option<&str>,
21369 shell_command: &str,
21370 format: OutputFormat,
21371) -> Result<()> {
21372 let digest_kind = DigestRunnerKind::parse(kind)?;
21373 let root = transcript_artifact_root(path)?;
21374 let execution = run_digest_runner_command(shell_command)?;
21375 let output = &execution.output;
21376 let captured = String::from_utf8_lossy(&output.stdout).into_owned();
21377 let exit_code = output.status.code().unwrap_or(-1);
21378 if format.json_output && format.envelope {
21379 let artifact_key = format!(
21380 "{}:{}:{}:{}",
21381 digest_kind.as_str(),
21382 shell_command,
21383 execution.executed_command,
21384 captured
21385 );
21386 let artifact = if captured.trim().is_empty() {
21387 None
21388 } else {
21389 let (suffix, expand) = match digest_kind {
21390 DigestRunnerKind::Test => (
21391 "test.log",
21392 format!(
21393 "tsift test-digest --path {} --input {}{} --json",
21394 shell_quote(root.to_string_lossy().as_ref()),
21395 shell_quote(
21396 root.join(".tsift/artifacts")
21397 .join(format!("{}.test.log", stable_handle("tart", &artifact_key)))
21398 .to_string_lossy()
21399 .as_ref()
21400 ),
21401 runner
21402 .map(|value| format!(" --runner {}", shell_quote(value)))
21403 .unwrap_or_default()
21404 ),
21405 ),
21406 DigestRunnerKind::Log => (
21407 "log",
21408 format!(
21409 "tsift log-digest --path {} --input {} --json",
21410 shell_quote(root.to_string_lossy().as_ref()),
21411 shell_quote(
21412 root.join(".tsift/artifacts")
21413 .join(format!("{}.log", stable_handle("tart", &artifact_key)))
21414 .to_string_lossy()
21415 .as_ref()
21416 )
21417 ),
21418 ),
21419 };
21420 Some(persist_transcript_artifact(
21421 &root,
21422 "tart",
21423 suffix,
21424 &artifact_key,
21425 &captured,
21426 expand,
21427 )?)
21428 };
21429 let filter_report = execution.filter.as_ref().map(DigestRunnerFilter::to_json);
21430
21431 match digest_kind {
21432 DigestRunnerKind::Test => {
21433 let digest_report = test_digest::compute(path, &captured, runner)?;
21434 let report = serde_json::json!({
21435 "kind": digest_kind.as_str(),
21436 "command": shell_command,
21437 "executed_command": execution.executed_command,
21438 "exit_code": exit_code,
21439 "success": output.status.success(),
21440 "filter": filter_report,
21441 "artifact": artifact,
21442 "digest": digest_report,
21443 });
21444 let mut follow_up = artifact
21445 .as_ref()
21446 .map(|entry| vec![entry.expand.clone()])
21447 .unwrap_or_default();
21448 follow_up.push(format!(
21449 "tsift rewrite --run {}",
21450 shell_quote(shell_command)
21451 ));
21452 let summary_text = if output.status.success() && digest_report.failures == 0 {
21453 format!("test run passed for {}", runner.unwrap_or("auto"))
21454 } else {
21455 format!("test run captured {} failure(s)", digest_report.failures)
21456 };
21457 print_json_or_envelope(
21458 &report,
21459 &format,
21460 "digest-runner",
21461 "test-run",
21462 ToolEnvelopeSummary {
21463 text: summary_text,
21464 metrics: vec![
21465 envelope_metric("runner", &digest_report.runner),
21466 envelope_metric("exit_code", exit_code),
21467 envelope_metric("filter", execution.filter_label()),
21468 envelope_metric("failures", digest_report.failures),
21469 envelope_metric("groups", digest_report.grouped_failures),
21470 envelope_metric(
21471 "artifact",
21472 artifact
21473 .as_ref()
21474 .map(|entry| entry.handle.as_str())
21475 .unwrap_or("-"),
21476 ),
21477 ],
21478 },
21479 false,
21480 follow_up,
21481 )?;
21482 }
21483 DigestRunnerKind::Log => {
21484 let digest_report = log_digest::compute(path, &captured)?;
21485 let report = serde_json::json!({
21486 "kind": digest_kind.as_str(),
21487 "command": shell_command,
21488 "executed_command": execution.executed_command,
21489 "exit_code": exit_code,
21490 "success": output.status.success(),
21491 "filter": filter_report,
21492 "artifact": artifact,
21493 "digest": digest_report,
21494 });
21495 let mut follow_up = artifact
21496 .as_ref()
21497 .map(|entry| vec![entry.expand.clone()])
21498 .unwrap_or_default();
21499 follow_up.push(format!(
21500 "tsift rewrite --run {}",
21501 shell_quote(shell_command)
21502 ));
21503 let summary_text = if output.status.success() && digest_report.signal_groups == 0 {
21504 "command finished without log signals".to_string()
21505 } else {
21506 format!(
21507 "command emitted {} log signal group(s)",
21508 digest_report.signal_groups
21509 )
21510 };
21511 print_json_or_envelope(
21512 &report,
21513 &format,
21514 "digest-runner",
21515 "command-run",
21516 ToolEnvelopeSummary {
21517 text: summary_text,
21518 metrics: vec![
21519 envelope_metric("exit_code", exit_code),
21520 envelope_metric("filter", execution.filter_label()),
21521 envelope_metric("signals", digest_report.signal_groups),
21522 envelope_metric("file_refs", digest_report.file_ref_groups),
21523 envelope_metric(
21524 "artifact",
21525 artifact
21526 .as_ref()
21527 .map(|entry| entry.handle.as_str())
21528 .unwrap_or("-"),
21529 ),
21530 ],
21531 },
21532 false,
21533 follow_up,
21534 )?;
21535 }
21536 }
21537
21538 if output.status.success() {
21539 return Ok(());
21540 }
21541 if let Some(code) = output.status.code() {
21542 std::process::exit(code);
21543 }
21544 bail!("digest-wrapped command terminated by signal: {shell_command}");
21545 }
21546
21547 if captured.trim().is_empty() {
21548 let label = match digest_kind {
21549 DigestRunnerKind::Test => "test",
21550 DigestRunnerKind::Log => "log",
21551 };
21552 println!("No {label} output captured.");
21553 } else {
21554 match digest_kind {
21555 DigestRunnerKind::Test => {
21556 render_test_digest_from_input(path, &captured, runner, format)?
21557 }
21558 DigestRunnerKind::Log => render_log_digest_from_input(path, &captured, format)?,
21559 }
21560 }
21561
21562 if output.status.success() {
21563 return Ok(());
21564 }
21565 if let Some(code) = output.status.code() {
21566 std::process::exit(code);
21567 }
21568 bail!("digest-wrapped command terminated by signal: {shell_command}");
21569}
21570
21571struct DigestRunnerExecution {
21572 output: std::process::Output,
21573 executed_command: String,
21574 filter: Option<DigestRunnerFilter>,
21575}
21576
21577impl DigestRunnerExecution {
21578 fn filter_label(&self) -> &'static str {
21579 self.filter
21580 .as_ref()
21581 .map(|filter| filter.tool)
21582 .unwrap_or("none")
21583 }
21584}
21585
21586struct DigestRunnerFilter {
21587 tool: &'static str,
21588 command: String,
21589}
21590
21591impl DigestRunnerFilter {
21592 fn to_json(&self) -> serde_json::Value {
21593 serde_json::json!({
21594 "tool": self.tool,
21595 "command": self.command,
21596 })
21597 }
21598}
21599
21600fn run_digest_runner_command(shell_command: &str) -> Result<DigestRunnerExecution> {
21601 let filter = rtk_rewrite_for_digest_runner(shell_command);
21602 let executed_command = filter
21603 .as_ref()
21604 .map(|filter| filter.command.as_str())
21605 .unwrap_or(shell_command);
21606 let output = Command::new("sh")
21607 .arg("-lc")
21608 .arg(format!("({executed_command}) 2>&1"))
21609 .stdout(Stdio::piped())
21610 .output()
21611 .with_context(|| format!("running digest-wrapped command: {executed_command}"))?;
21612
21613 Ok(DigestRunnerExecution {
21614 output,
21615 executed_command: executed_command.to_string(),
21616 filter,
21617 })
21618}
21619
21620fn rtk_rewrite_for_digest_runner(shell_command: &str) -> Option<DigestRunnerFilter> {
21621 if shell_command.trim_start().starts_with("rtk ") || find_command_on_path("rtk").is_none() {
21622 return None;
21623 }
21624 let output = Command::new("rtk")
21625 .arg("rewrite")
21626 .arg(shell_command)
21627 .output()
21628 .ok()?;
21629 if !output.status.success() {
21630 return None;
21631 }
21632 let rewritten = String::from_utf8_lossy(&output.stdout).trim().to_string();
21633 if rewritten.is_empty() || rewritten == shell_command {
21634 return None;
21635 }
21636 Some(DigestRunnerFilter {
21637 tool: "rtk",
21638 command: rewritten,
21639 })
21640}
21641
21642fn find_command_on_path(command: &str) -> Option<PathBuf> {
21643 let path_var = std::env::var_os("PATH")?;
21644 std::env::split_paths(&path_var)
21645 .map(|dir| dir.join(command))
21646 .find(|candidate| candidate.is_file())
21647}
21648
21649pub(crate) fn open_existing_summary_db_read_only(db_path: &Path) -> Result<summarize::SummaryDb> {
21650 if !db_path.exists() {
21651 bail!("no summaries.db found — run `tsift summarize --extract <path>` first");
21652 }
21653 summarize::SummaryDb::open_read_only_resilient(db_path)
21654}
21655
21656fn status_index_needs_fix(report: &status::StatusReport) -> bool {
21657 !matches!(report.index, status::IndexStatus::Fresh { .. })
21658}
21659
21660fn status_instructions_need_fix(report: &status::StatusReport) -> bool {
21661 !matches!(report.instructions, init::InstructionStatus::Current { .. })
21662}
21663
21664pub(crate) fn apply_status_fixes(root: &Path, report: &status::StatusReport) -> Result<()> {
21665 if status_instructions_need_fix(report) {
21666 eprintln!("status fix: refreshing tsift instructions");
21667 init::init(root, false, false)?;
21668 }
21669
21670 if !status_index_needs_fix(report) {
21671 return Ok(());
21672 }
21673
21674 let scopes = config::Config::submodule_dirs(root)?;
21675 if scopes.is_empty() {
21676 eprintln!("status fix: refreshing index");
21677 run_index_update(
21678 &root.join(".tsift/index.db"),
21679 root,
21680 "status --fix refreshing index".to_string(),
21681 root,
21682 None,
21683 false,
21684 false,
21685 )?;
21686 return Ok(());
21687 }
21688
21689 let cfg = config::Config::load(root)?;
21690 for scope in scopes {
21691 if !scope.source_root.exists() {
21692 eprintln!(
21693 "status fix: skipping missing submodule `{}` ({})",
21694 scope.id,
21695 scope.source_root.display()
21696 );
21697 continue;
21698 }
21699 eprintln!("status fix: refreshing submodule `{}` index", scope.id);
21700 run_index_update(
21701 &cfg.db_path_for(root, &scope.id),
21702 &scope.source_root,
21703 format!("status --fix refreshing submodule `{}` index", scope.id),
21704 root,
21705 Some(scope.id.as_str()),
21706 false,
21707 false,
21708 )?;
21709 }
21710
21711 Ok(())
21712}
21713
21714pub(crate) fn status_missing_workspace_scopes(report: &status::StatusReport) -> bool {
21715 match &report.index {
21716 status::IndexStatus::Fresh { missing_scopes, .. }
21717 | status::IndexStatus::Stale { missing_scopes, .. }
21718 | status::IndexStatus::Missing { missing_scopes } => !missing_scopes.is_empty(),
21719 }
21720}
21721
21722pub(crate) fn autoindex_missing_workspace_scopes(
21723 root: &Path,
21724 report: &status::StatusReport,
21725) -> Result<()> {
21726 let missing_scopes = match &report.index {
21727 status::IndexStatus::Fresh { missing_scopes, .. }
21728 | status::IndexStatus::Stale { missing_scopes, .. }
21729 | status::IndexStatus::Missing { missing_scopes } => missing_scopes,
21730 };
21731 if missing_scopes.is_empty() {
21732 return Ok(());
21733 }
21734
21735 let missing_scope_ids = missing_scopes
21736 .iter()
21737 .map(|scope| scope.scope.as_str())
21738 .collect::<std::collections::HashSet<_>>();
21739 let cfg = config::Config::load(root)?;
21740 for scope in config::Config::submodule_dirs(root)? {
21741 if !missing_scope_ids.contains(scope.id.as_str()) || !scope.source_root.exists() {
21742 continue;
21743 }
21744 let db_path = cfg.db_path_for(root, &scope.id);
21745 run_index_update(
21746 &db_path,
21747 &scope.source_root,
21748 format!(
21749 "autoindexing missing submodule `{}` during status",
21750 scope.id
21751 ),
21752 root,
21753 Some(scope.id.as_str()),
21754 false,
21755 false,
21756 )?;
21757 }
21758 Ok(())
21759}
21760
21761pub(crate) fn emit_summary_stats_warnings(stats: &summarize::SummaryStats, root: &Path) {
21762 for warning in &stats.warnings {
21763 let rel_path = relativize_pathbuf(&warning.path, root);
21764 eprintln!(
21765 "warning: summarize stats {}: {}",
21766 rel_path.display(),
21767 warning.message
21768 );
21769 }
21770}
21771
21772fn contextualize_error(err: anyhow::Error, context: String) -> anyhow::Error {
21773 Result::<(), anyhow::Error>::Err(err)
21774 .context(context)
21775 .unwrap_err()
21776}
21777
21778fn should_attach_lock_diagnostics(err: &anyhow::Error) -> bool {
21779 let message = err.to_string();
21780 message.contains("another tsift index writer is already active")
21781 || substrate::error_mentions_locked_db(err)
21782}
21783
21784fn add_write_lock_context(
21785 err: anyhow::Error,
21786 action: String,
21787 root: &std::path::Path,
21788 scope: Option<&str>,
21789) -> anyhow::Error {
21790 if !should_attach_lock_diagnostics(&err) {
21791 return contextualize_error(err, action);
21792 }
21793
21794 let Ok(report) = status::check_locks(root, None, scope) else {
21795 return contextualize_error(err, action);
21796 };
21797
21798 contextualize_error(
21799 err,
21800 format!(
21801 "{}\n\nlock diagnostics:\n{}",
21802 action,
21803 status::format_locks_human(&report, false).trim_end()
21804 ),
21805 )
21806}
21807
21808pub(crate) fn run_index_update(
21809 db_path: &std::path::Path,
21810 source_root: &std::path::Path,
21811 action: String,
21812 root: &std::path::Path,
21813 scope: Option<&str>,
21814 rebuild: bool,
21815 prune: bool,
21816) -> Result<index::IndexSummary> {
21817 let result = (|| {
21818 let db = index::IndexDb::open(db_path)?;
21819 if rebuild {
21820 db.rebuild(source_root)
21821 } else if prune {
21822 db.apply_changes_pruned(source_root)
21823 } else {
21824 db.apply_changes(source_root)
21825 }
21826 })();
21827
21828 let summary = result.map_err(|err| add_write_lock_context(err, action, root, scope))?;
21829 emit_index_warnings(&summary, source_root, scope);
21830 Ok(summary)
21831}
21832
21833pub(crate) fn relativize_index_summary(summary: &mut index::IndexSummary, root: &Path) {
21834 for change in &mut summary.changes {
21835 change.path = relativize_pathbuf(&change.path, root);
21836 }
21837 for warning in &mut summary.warnings {
21838 warning.path = relativize_pathbuf(&warning.path, root);
21839 }
21840}
21841
21842fn emit_index_warnings(summary: &index::IndexSummary, root: &Path, scope: Option<&str>) {
21843 for warning in &summary.warnings {
21844 let rel_path = relativize_pathbuf(&warning.path, root);
21845 let stage = match warning.stage {
21846 index::IndexWarningStage::ReadSource => "read failed",
21847 index::IndexWarningStage::ExtractSymbols => "symbol extraction failed",
21848 index::IndexWarningStage::ExtractCallSites => "call extraction failed",
21849 index::IndexWarningStage::ExtractRoutes => "route extraction failed",
21850 };
21851 let scope_prefix = scope.map(|name| format!("[{}] ", name)).unwrap_or_default();
21852 let lang_suffix = warning
21853 .language
21854 .as_deref()
21855 .map(|lang| format!(" [{}]", lang))
21856 .unwrap_or_default();
21857 eprintln!(
21858 "warning: {}{}{}: {}: {}",
21859 scope_prefix,
21860 rel_path.display(),
21861 lang_suffix,
21862 stage,
21863 warning.message
21864 );
21865 }
21866}
21867
21868pub(crate) fn load_summarize_config(root: &std::path::Path) -> summarize::SummarizeConfig {
21869 let config_path = root.join(".tsift/config.toml");
21870 if !config_path.exists() {
21871 return summarize::SummarizeConfig::default();
21872 }
21873 #[derive(serde::Deserialize, Default)]
21874 struct RawConfig {
21875 #[serde(default)]
21876 summarize: Option<RawSummarize>,
21877 }
21878 #[derive(serde::Deserialize)]
21879 struct RawSummarize {
21880 model: Option<String>,
21881 max_file_tokens: Option<usize>,
21882 api_key_env: Option<String>,
21883 }
21884 let content = std::fs::read_to_string(&config_path).unwrap_or_default();
21885 let raw: RawConfig = toml::from_str(&content).unwrap_or_default();
21886 let defaults = summarize::SummarizeConfig::default();
21887 match raw.summarize {
21888 Some(s) => summarize::SummarizeConfig {
21889 model: s.model.unwrap_or(defaults.model),
21890 max_file_tokens: s.max_file_tokens.unwrap_or(defaults.max_file_tokens),
21891 api_key_env: s.api_key_env.unwrap_or(defaults.api_key_env),
21892 },
21893 None => defaults,
21894 }
21895}
21896
21897#[derive(Debug, Clone, PartialEq, Eq)]
21898struct ExtractSymbolContext {
21899 db_path: PathBuf,
21900 source_root: PathBuf,
21901}
21902
21903pub(crate) fn find_symbols_db_for_file(
21904 root: &Path,
21905 file_path: &Path,
21906) -> Result<Option<ExtractSymbolContext>> {
21907 let cfg = config::Config::load(root)?;
21908 let mut submodules = config::Config::submodule_dirs(root)?;
21909 submodules.sort_by(|left, right| {
21910 right
21911 .source_root
21912 .components()
21913 .count()
21914 .cmp(&left.source_root.components().count())
21915 });
21916
21917 for scope in submodules {
21918 if !file_path.starts_with(&scope.source_root) {
21919 continue;
21920 }
21921 let db_path = cfg.db_path_for(root, &scope.id);
21922 if db_path.exists() {
21923 return Ok(Some(ExtractSymbolContext {
21924 db_path,
21925 source_root: scope.source_root,
21926 }));
21927 }
21928 }
21929
21930 let single = root.join(".tsift/index.db");
21931 if single.exists() && file_path.starts_with(root) {
21932 return Ok(Some(ExtractSymbolContext {
21933 db_path: single,
21934 source_root: root.to_path_buf(),
21935 }));
21936 }
21937
21938 Ok(None)
21939}
21940
21941pub(crate) fn resolve_extract_base(path: &Path) -> Result<PathBuf> {
21942 let canonical = path
21943 .canonicalize()
21944 .with_context(|| format!("canonicalizing {}", path.display()))?;
21945
21946 Ok(if canonical.is_dir() {
21947 canonical
21948 } else {
21949 canonical
21950 .parent()
21951 .map(Path::to_path_buf)
21952 .unwrap_or(canonical)
21953 })
21954}
21955
21956fn normalize_extract_scope_path(path: &Path) -> Result<PathBuf> {
21957 if path.exists() {
21958 return path
21959 .canonicalize()
21960 .with_context(|| format!("canonicalizing extract scope {}", path.display()));
21961 }
21962
21963 Ok(summarize::normalize_lexical_path(path))
21964}
21965
21966pub(crate) fn resolve_extract_scope(root: &Path, extract_path: &Path) -> Result<PathBuf> {
21967 let scope = if extract_path.is_absolute() {
21968 extract_path.to_path_buf()
21969 } else {
21970 root.join(extract_path)
21971 };
21972 normalize_extract_scope_path(&scope)
21973}
21974
21975pub(crate) fn summarize_diff_matches_scope(changed_path: &Path, extract_scope: &Path) -> bool {
21976 normalize_extract_scope_path(changed_path)
21977 .unwrap_or_else(|_| summarize::normalize_lexical_path(changed_path))
21978 .starts_with(extract_scope)
21979}
21980
21981pub(crate) fn summarize_relative_file_path(root: &Path, file_path: &Path) -> String {
21982 summarize::normalize_summary_file_key(file_path.strip_prefix(root).unwrap_or(file_path))
21983}
21984
21985pub(crate) fn summarize_full_extract_deleted_summary_paths(
21986 summary_db: &summarize::SummaryDb,
21987 root: &Path,
21988 extract_scope: &Path,
21989 files_to_extract: &[PathBuf],
21990) -> Result<BTreeSet<String>> {
21991 let live_paths = files_to_extract
21992 .iter()
21993 .map(|file_path| summarize_relative_file_path(root, file_path))
21994 .collect::<BTreeSet<_>>();
21995 let mut deleted = BTreeSet::new();
21996
21997 for cached_path in summary_db.cached_file_paths()? {
21998 if !summarize_diff_matches_scope(&root.join(&cached_path), extract_scope) {
21999 continue;
22000 }
22001 if !live_paths.contains(&cached_path) {
22002 deleted.insert(cached_path);
22003 }
22004 }
22005
22006 Ok(deleted)
22007}
22008
22009#[derive(Debug, Clone)]
22010struct SearchIndexTarget {
22011 label: String,
22012 db_path: PathBuf,
22013 source_root: PathBuf,
22014 scope_name: Option<String>,
22015 reindex_cmd: String,
22016}
22017
22018#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22019enum SearchIndexState {
22020 Missing,
22021 Fresh,
22022 Stale { stale_files: usize },
22023}
22024
22025fn resolve_search_index_targets(
22026 root: &Path,
22027 path_hint: &Path,
22028 scope: Option<&str>,
22029 federated: bool,
22030) -> Result<Vec<SearchIndexTarget>> {
22031 if let Some(scope_name) = scope {
22032 let scope = config::Config::resolve_submodule(root, scope_name)?;
22033 let cfg = config::Config::load(root)?;
22034 return Ok(vec![SearchIndexTarget {
22035 label: format!("submodule `{}` index", scope.id),
22036 db_path: cfg.db_path_for(root, &scope.id),
22037 source_root: scope.source_root.clone(),
22038 scope_name: Some(scope.id.clone()),
22039 reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
22040 }]);
22041 }
22042
22043 if federated {
22044 let cfg = config::Config::load(root)?;
22045 let mut targets = Vec::new();
22046 for scope in config::Config::submodule_dirs(root)? {
22047 if !cfg.federation_for_scope(&scope) {
22048 continue;
22049 }
22050 targets.push(SearchIndexTarget {
22051 label: format!("submodule `{}` index", scope.id),
22052 db_path: cfg.db_path_for(root, &scope.id),
22053 source_root: scope.source_root.clone(),
22054 scope_name: Some(scope.id.clone()),
22055 reindex_cmd: format!("tsift index --workspace {}", root.display()),
22056 });
22057 }
22058 return Ok(targets);
22059 }
22060
22061 if let Some(scope) = config::Config::infer_submodule_from_path(root, path_hint)? {
22062 let cfg = config::Config::load(root)?;
22063 return Ok(vec![SearchIndexTarget {
22064 label: format!("submodule `{}` index", scope.id),
22065 db_path: cfg.db_path_for(root, &scope.id),
22066 source_root: scope.source_root.clone(),
22067 scope_name: Some(scope.id.clone()),
22068 reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
22069 }]);
22070 }
22071
22072 if let Some(scope) = infer_agent_doc_task_submodule(root, path_hint)? {
22073 let cfg = config::Config::load(root)?;
22074 return Ok(vec![SearchIndexTarget {
22075 label: format!("submodule `{}` index", scope.id),
22076 db_path: cfg.db_path_for(root, &scope.id),
22077 source_root: scope.source_root.clone(),
22078 scope_name: Some(scope.id.clone()),
22079 reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
22080 }]);
22081 }
22082
22083 let scopes = config::Config::submodule_dirs(root)?;
22084 if !scopes.is_empty() {
22085 let root_db = root.join(".tsift/index.db");
22086 if !root_db.exists() {
22087 let available_scopes = scopes
22088 .iter()
22089 .map(|scope| scope.id.as_str())
22090 .collect::<Vec<_>>()
22091 .join(", ");
22092 let cfg = config::Config::load(root)?;
22093 let indexed_scopes = scopes
22094 .iter()
22095 .filter(|scope| cfg.db_path_for(root, &scope.id).exists())
22096 .map(|scope| scope.id.as_str())
22097 .collect::<Vec<_>>();
22098 let indexed_label = if indexed_scopes.is_empty() {
22099 "none".to_string()
22100 } else {
22101 indexed_scopes.join(", ")
22102 };
22103 bail!(
22104 "workspace root {} has no shared root index at {}. Default search requires `--scope <scope>` or `--federated` when the workspace uses scoped `.tsift/indexes/*/index.db` files. Available scopes: {}. Indexed scopes: {}.",
22105 root.display(),
22106 root_db.display(),
22107 available_scopes,
22108 indexed_label,
22109 );
22110 }
22111 }
22112
22113 Ok(vec![SearchIndexTarget {
22114 label: "index".to_string(),
22115 db_path: root.join(".tsift/index.db"),
22116 source_root: root.to_path_buf(),
22117 scope_name: None,
22118 reindex_cmd: format!("tsift index {}", root.display()),
22119 }])
22120}
22121
22122fn inspect_search_index(target: &SearchIndexTarget) -> Result<SearchIndexState> {
22123 if !target.source_root.exists() || !target.db_path.exists() {
22124 return Ok(SearchIndexState::Missing);
22125 }
22126
22127 let inspection =
22128 index::IndexDb::inspect_read_only(&target.db_path, &target.source_root, false)?;
22129 let stale_files =
22130 inspection.summary.new + inspection.summary.modified + inspection.summary.deleted;
22131 if stale_files == 0 {
22132 Ok(SearchIndexState::Fresh)
22133 } else {
22134 Ok(SearchIndexState::Stale { stale_files })
22135 }
22136}
22137
22138#[derive(Debug, Clone, PartialEq, Eq)]
22139struct RebuildSearchTarget {
22140 label: String,
22141 reason: RebuildSearchReason,
22142 reindex_cmd: String,
22143}
22144
22145#[derive(Debug, Clone, PartialEq, Eq)]
22146enum RebuildSearchReason {
22147 Missing,
22148 Stale { stale_files: usize },
22149}
22150
22151#[derive(Debug, Clone, PartialEq, Eq)]
22152struct DegradedSearchTarget {
22153 label: String,
22154 reason: RebuildSearchReason,
22155 reindex_cmd: String,
22156}
22157
22158#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22159pub(crate) enum DegradedSearchMode {
22160 ReadOnly,
22161 Exact,
22162}
22163
22164#[derive(Debug)]
22165struct SearchPrecheck {
22166 targets: Vec<SearchIndexTarget>,
22167 degraded_targets: Vec<DegradedSearchTarget>,
22168}
22169
22170fn is_active_writer_lock_error(err: &anyhow::Error) -> bool {
22171 err.chain().any(|cause| {
22172 cause
22173 .to_string()
22174 .contains("another tsift index writer is already active")
22175 })
22176}
22177
22178fn infer_agent_doc_task_submodule(
22179 root: &Path,
22180 path_hint: &Path,
22181) -> Result<Option<config::WorkspaceScope>> {
22182 let hinted_path = if path_hint.is_absolute() {
22183 path_hint.to_path_buf()
22184 } else {
22185 root.join(path_hint)
22186 };
22187 let Ok(relative) = hinted_path.strip_prefix(root) else {
22188 return Ok(None);
22189 };
22190 let mut components = relative.components();
22191 let Some(std::path::Component::Normal(first)) = components.next() else {
22192 return Ok(None);
22193 };
22194 if first != "tasks" {
22195 return Ok(None);
22196 }
22197 let Some(file_stem) = relative.file_stem().and_then(|stem| stem.to_str()) else {
22198 return Ok(None);
22199 };
22200 config::Config::find_submodule(root, file_stem)
22201}
22202
22203fn degraded_search_target(
22204 target: &SearchIndexTarget,
22205 reason: RebuildSearchReason,
22206) -> DegradedSearchTarget {
22207 DegradedSearchTarget {
22208 label: target.label.clone(),
22209 reason,
22210 reindex_cmd: target.reindex_cmd.clone(),
22211 }
22212}
22213
22214fn apply_search_index_update(
22215 root: &Path,
22216 target: &SearchIndexTarget,
22217) -> Result<index::IndexSummary> {
22218 run_index_update(
22219 &target.db_path,
22220 &target.source_root,
22221 format!("autoindexing {}", target.label),
22222 root,
22223 target.scope_name.as_deref(),
22224 false,
22225 false,
22226 )
22227}
22228
22229fn collect_rebuild_search_targets(
22230 targets: &[SearchIndexTarget],
22231) -> Result<Vec<RebuildSearchTarget>> {
22232 let mut rebuild_targets = Vec::new();
22233 for target in targets {
22234 let reason = match inspect_search_index(target)? {
22235 SearchIndexState::Missing => RebuildSearchReason::Missing,
22236 SearchIndexState::Fresh => continue,
22237 SearchIndexState::Stale { stale_files } => RebuildSearchReason::Stale { stale_files },
22238 };
22239 rebuild_targets.push(RebuildSearchTarget {
22240 label: target.label.clone(),
22241 reason,
22242 reindex_cmd: target.reindex_cmd.clone(),
22243 });
22244 }
22245 Ok(rebuild_targets)
22246}
22247
22248fn rebuild_search_target_detail(target: &RebuildSearchTarget) -> String {
22249 match target.reason {
22250 RebuildSearchReason::Missing => format!("{} is missing", target.label),
22251 RebuildSearchReason::Stale { stale_files } => {
22252 let file_suffix = if stale_files == 1 { "" } else { "s" };
22253 format!(
22254 "{} is stale ({} file{})",
22255 target.label, stale_files, file_suffix
22256 )
22257 }
22258 }
22259}
22260
22261fn rebuild_search_targets_message(rebuild_targets: &[RebuildSearchTarget]) -> String {
22262 if rebuild_targets.len() == 1 {
22263 let target = &rebuild_targets[0];
22264 return format!(
22265 "{}. Run `{}` to rebuild before retrying.",
22266 rebuild_search_target_detail(target),
22267 target.reindex_cmd
22268 );
22269 }
22270
22271 let summary: Vec<String> = rebuild_targets
22272 .iter()
22273 .take(3)
22274 .map(rebuild_search_target_detail)
22275 .collect();
22276 let overflow = rebuild_targets.len().saturating_sub(summary.len());
22277 let mut details = summary.join(", ");
22278 if overflow > 0 {
22279 details.push_str(&format!(", +{} more", overflow));
22280 }
22281 let reindex_cmd = rebuild_targets[0].reindex_cmd.clone();
22282 format!(
22283 "{} indexes need rebuild: {}. Run `{}` to rebuild before retrying.",
22284 rebuild_targets.len(),
22285 details,
22286 reindex_cmd
22287 )
22288}
22289
22290pub(crate) fn precheck_search_indexes(
22291 root: &Path,
22292 path_hint: &Path,
22293 scope: Option<&str>,
22294 federated: bool,
22295 autoindex: bool,
22296) -> Result<SearchPrecheck> {
22297 let targets = resolve_search_index_targets(root, path_hint, scope, federated)?;
22298 let mut stale_targets = Vec::new();
22299 let mut degraded_targets = Vec::new();
22300
22301 for target in &targets {
22302 match inspect_search_index(target)? {
22303 SearchIndexState::Missing => {
22304 if autoindex && let Err(err) = apply_search_index_update(root, target) {
22305 if is_active_writer_lock_error(&err) {
22306 degraded_targets
22307 .push(degraded_search_target(target, RebuildSearchReason::Missing));
22308 } else {
22309 return Err(err);
22310 }
22311 }
22312 }
22313 SearchIndexState::Fresh => {}
22314 SearchIndexState::Stale { stale_files } => {
22315 if autoindex {
22316 if let Err(err) = apply_search_index_update(root, target) {
22317 if is_active_writer_lock_error(&err) {
22318 degraded_targets.push(degraded_search_target(
22319 target,
22320 RebuildSearchReason::Stale { stale_files },
22321 ));
22322 } else {
22323 return Err(err);
22324 }
22325 }
22326 } else {
22327 stale_targets.push(RebuildSearchTarget {
22328 label: target.label.clone(),
22329 reason: RebuildSearchReason::Stale { stale_files },
22330 reindex_cmd: target.reindex_cmd.clone(),
22331 });
22332 }
22333 }
22334 }
22335 }
22336
22337 if stale_targets.is_empty() {
22338 return Ok(SearchPrecheck {
22339 targets,
22340 degraded_targets,
22341 });
22342 }
22343
22344 bail!(
22345 "tsift search aborted: {} \
22346 or re-run without `--no-autoindex`.",
22347 rebuild_search_targets_message(&stale_targets),
22348 );
22349}
22350
22351pub(crate) fn degraded_search_mode(targets: &[DegradedSearchTarget]) -> Option<DegradedSearchMode> {
22352 if targets.is_empty() {
22353 return None;
22354 }
22355
22356 if targets
22357 .iter()
22358 .all(|target| matches!(target.reason, RebuildSearchReason::Missing))
22359 {
22360 Some(DegradedSearchMode::Exact)
22361 } else {
22362 Some(DegradedSearchMode::ReadOnly)
22363 }
22364}
22365
22366fn degraded_search_targets_summary(targets: &[DegradedSearchTarget]) -> String {
22367 if targets.len() == 1 {
22368 let target = &targets[0];
22369 return match target.reason {
22370 RebuildSearchReason::Missing => format!("{} is missing", target.label),
22371 RebuildSearchReason::Stale { stale_files } => {
22372 let file_suffix = if stale_files == 1 { "" } else { "s" };
22373 format!(
22374 "{} is stale ({} file{})",
22375 target.label, stale_files, file_suffix
22376 )
22377 }
22378 };
22379 }
22380
22381 let missing = targets
22382 .iter()
22383 .filter(|target| matches!(target.reason, RebuildSearchReason::Missing))
22384 .count();
22385 let stale = targets.len().saturating_sub(missing);
22386 let mut parts = Vec::new();
22387 if stale > 0 {
22388 let suffix = if stale == 1 { "" } else { "es" };
22389 parts.push(format!("{stale} stale index{suffix}"));
22390 }
22391 if missing > 0 {
22392 let suffix = if missing == 1 { "" } else { "es" };
22393 parts.push(format!("{missing} missing index{suffix}"));
22394 }
22395 parts.join(", ")
22396}
22397
22398pub(crate) fn emit_degraded_search_note(
22399 targets: &[DegradedSearchTarget],
22400 mode: DegradedSearchMode,
22401) {
22402 let summary = degraded_search_targets_summary(targets);
22403 let reindex_cmd = &targets[0].reindex_cmd;
22404 match mode {
22405 DegradedSearchMode::ReadOnly => eprintln!(
22406 "note: active tsift writer detected; skipping autoindex because {}. \
22407 Continuing with read-only search and the current index snapshot; symbol hits may lag. \
22408 Retry `{}` after the active writer finishes for fresh index results.",
22409 summary, reindex_cmd
22410 ),
22411 DegradedSearchMode::Exact => eprintln!(
22412 "note: active tsift writer detected; skipping autoindex because {}. \
22413 Continuing with exact live-file search. Retry `{}` after the active writer finishes \
22414 for indexed symbol hits.",
22415 summary, reindex_cmd
22416 ),
22417 }
22418}
22419
22420fn search_timeout_message(
22421 timeout_secs: u64,
22422 strategy: &str,
22423 targets: &[SearchIndexTarget],
22424) -> Result<String> {
22425 let rebuild_targets = collect_rebuild_search_targets(targets)?;
22426 if rebuild_targets.is_empty() {
22427 return Ok(format!(
22428 "tsift search timed out after {}s (strategy: {}). \
22429 The search root looks fresh, so reindexing is unlikely to help. \
22430 Re-run with `--timeout 0` to disable the timeout, narrow `--path` / `--scope`, \
22431 or try a different strategy.",
22432 timeout_secs, strategy,
22433 ));
22434 }
22435
22436 Ok(format!(
22437 "tsift search timed out after {}s (strategy: {}). {}",
22438 timeout_secs,
22439 strategy,
22440 rebuild_search_targets_message(&rebuild_targets),
22441 ))
22442}
22443
22444fn is_exact_preferring_query_char(ch: char) -> bool {
22445 matches!(ch, '-' | '_' | '/' | '\\' | '.' | ':' | '#' | '@')
22446}
22447
22448fn query_prefers_exact_search(query: &str) -> bool {
22449 let trimmed = query.trim();
22450 !trimmed.is_empty()
22451 && !trimmed.chars().any(char::is_whitespace)
22452 && trimmed.chars().any(|ch| ch.is_alphanumeric())
22453 && trimmed.chars().any(is_exact_preferring_query_char)
22454 && trimmed
22455 .chars()
22456 .all(|ch| ch.is_alphanumeric() || is_exact_preferring_query_char(ch))
22457}
22458
22459pub(crate) fn resolve_search_strategy(query: &str, strategy: Option<String>) -> String {
22460 strategy.unwrap_or_else(|| {
22461 if query_prefers_exact_search(query) {
22462 "exact".to_string()
22463 } else {
22464 "lexical".to_string()
22465 }
22466 })
22467}
22468
22469#[derive(Serialize)]
22470struct SearchBudgetSymbolPreview {
22471 handle: String,
22472 #[serde(skip_serializing_if = "Option::is_none")]
22473 tag_alias: Option<String>,
22474 match_type: String,
22475 kind: String,
22476 name: String,
22477 file: String,
22478 line: i64,
22479 score: f64,
22480 match_count: usize,
22481 surface_count: usize,
22482 file_count: usize,
22483 #[serde(skip_serializing_if = "Vec::is_empty", default)]
22484 surface_examples: Vec<String>,
22485 expand: String,
22486}
22487
22488#[derive(Serialize)]
22489struct SearchBudgetHitPreview {
22490 handle: String,
22491 rank: usize,
22492 path: String,
22493 confidence: String,
22494 score: f64,
22495 preview: String,
22496 expand: String,
22497}
22498
22499#[derive(Serialize)]
22500struct SearchScaleSignals {
22501 preview_symbols: usize,
22502 symbol_families: usize,
22503 raw_symbol_matches: usize,
22504 preview_hits: usize,
22505 returned_hits: usize,
22506 indexed_artifacts: usize,
22507 skipped_artifacts: usize,
22508 max_items: usize,
22509 max_bytes: usize,
22510}
22511
22512#[derive(Serialize)]
22513struct SearchScaleGuard {
22514 level: String,
22515 warning: String,
22516 signals: SearchScaleSignals,
22517 narrow_commands: Vec<String>,
22518}
22519
22520#[derive(Serialize)]
22521struct SearchBudgetReport {
22522 query: String,
22523 strategy: String,
22524 indexed_artifacts: usize,
22525 skipped_artifacts: usize,
22526 max_items: usize,
22527 max_bytes: usize,
22528 symbol_total: usize,
22529 raw_symbol_total: usize,
22530 hit_total: usize,
22531 truncated: bool,
22532 #[serde(skip_serializing_if = "Option::is_none")]
22533 scale_guard: Option<SearchScaleGuard>,
22534 symbols: Vec<SearchBudgetSymbolPreview>,
22535 hits: Vec<SearchBudgetHitPreview>,
22536}
22537
22538const SEARCH_BUDGET_SURFACE_PREVIEW_LIMIT: usize = 3;
22539
22540struct SearchBudgetSymbolFamily {
22541 canonical_family: Option<String>,
22542 canonical_tag_alias: Option<String>,
22543 representative_name: String,
22544 representative_kind: String,
22545 representative_match_type: String,
22546 representative_file: String,
22547 representative_line: i64,
22548 representative_score: f64,
22549 seen_surfaces: HashSet<String>,
22550 seen_files: HashSet<String>,
22551 surface_examples: Vec<String>,
22552 match_count: usize,
22553}
22554
22555fn search_budget_family_query(tag_alias: Option<&str>, fallback_name: &str) -> String {
22556 if let Some(alias) = tag_alias
22557 && let Some(query) = family_query_from_tag_alias(alias)
22558 {
22559 return query;
22560 }
22561 fallback_name.to_string()
22562}
22563
22564fn build_search_budget_family_expand(
22565 strategy: &str,
22566 path: &str,
22567 tag_alias: Option<&str>,
22568 fallback_name: &str,
22569) -> String {
22570 let query = search_budget_family_query(tag_alias, fallback_name);
22571 let effective_strategy = if strategy == "exact" {
22572 "lexical"
22573 } else {
22574 strategy
22575 };
22576 build_search_budget_follow_up(&query, effective_strategy, path)
22577}
22578
22579fn format_search_budget_symbol_name(name: &str, surface_count: usize, max_bytes: usize) -> String {
22580 let preview = if surface_count > 1 {
22581 let extra = surface_count - 1;
22582 let label = if extra == 1 { "variant" } else { "variants" };
22583 format!("{name} (+{extra} {label})")
22584 } else {
22585 name.to_string()
22586 };
22587 truncate_for_budget(&preview, max_bytes)
22588}
22589
22590fn format_search_budget_symbol_file(file: &str, file_count: usize, max_bytes: usize) -> String {
22591 let preview = if file_count > 1 {
22592 let extra = file_count - 1;
22593 let label = if extra == 1 { "file" } else { "files" };
22594 format!("{file} (+{extra} {label})")
22595 } else {
22596 file.to_string()
22597 };
22598 truncate_for_budget(&preview, max_bytes)
22599}
22600
22601pub(crate) fn build_search_budget_follow_up(query: &str, strategy: &str, path: &str) -> String {
22602 let mut command = format!(
22603 "tsift search {} --path {} --limit 20",
22604 shell_quote(query),
22605 shell_quote(path)
22606 );
22607 if strategy == "exact" {
22608 command.push_str(" --exact");
22609 } else if strategy != "lexical" {
22610 command.push_str(&format!(" --strategy {}", shell_quote(strategy)));
22611 }
22612 command
22613}
22614
22615fn build_search_exact_narrow_command(query: &str, path: &str, max_items: usize) -> String {
22616 format!(
22617 "tsift search {} --path {} --limit {} --exact",
22618 shell_quote(query),
22619 shell_quote(path),
22620 max_items.max(1)
22621 )
22622}
22623
22624fn build_search_path_narrow_command(query: &str, strategy: &str, path: &str) -> String {
22625 let mut command = format!(
22626 "tsift search {} --path {} --limit 20",
22627 shell_quote(query),
22628 shell_quote(path)
22629 );
22630 if strategy == "exact" {
22631 command.push_str(" --exact");
22632 } else if strategy != "lexical" {
22633 command.push_str(&format!(" --strategy {}", shell_quote(strategy)));
22634 }
22635 command
22636}
22637
22638#[allow(clippy::too_many_arguments)]
22639fn build_search_scale_guard(
22640 query: &str,
22641 strategy: &str,
22642 root: &Path,
22643 response: &sift::SearchResponse,
22644 symbol_total: usize,
22645 raw_symbol_total: usize,
22646 hit_total: usize,
22647 max_items: usize,
22648 max_bytes: usize,
22649 symbols: &[SearchBudgetSymbolPreview],
22650 hits: &[SearchBudgetHitPreview],
22651) -> Option<SearchScaleGuard> {
22652 let broad_symbols = symbol_total > max_items || raw_symbol_total > max_items;
22653 let broad_hits = hit_total > max_items;
22654 let broad_corpus = response
22655 .indexed_artifacts
22656 .saturating_add(response.skipped_artifacts)
22657 >= 250;
22658 if !broad_symbols && !broad_hits && !broad_corpus {
22659 return None;
22660 }
22661
22662 let mut narrow_commands = Vec::new();
22663 let root_path = root.to_string_lossy();
22664 if strategy != "exact" {
22665 narrow_commands.push(build_search_exact_narrow_command(
22666 query,
22667 root_path.as_ref(),
22668 max_items,
22669 ));
22670 }
22671 if let Some(symbol) = symbols.first() {
22672 narrow_commands.push(symbol.expand.clone());
22673 }
22674 if let Some(hit) = hits.first() {
22675 narrow_commands.push(build_search_path_narrow_command(query, strategy, &hit.path));
22676 }
22677 narrow_commands.push(
22678 "tsift workflow search --json # preserve handles, expand only cited parents".to_string(),
22679 );
22680
22681 Some(SearchScaleGuard {
22682 level: if broad_hits || broad_symbols {
22683 "high-hit".to_string()
22684 } else {
22685 "corpus-size".to_string()
22686 },
22687 warning: "Broad search surface: inspect the preview first and run a narrowing command before dispatching parallel agents."
22688 .to_string(),
22689 signals: SearchScaleSignals {
22690 preview_symbols: symbols.len(),
22691 symbol_families: symbol_total,
22692 raw_symbol_matches: raw_symbol_total,
22693 preview_hits: hits.len(),
22694 returned_hits: hit_total,
22695 indexed_artifacts: response.indexed_artifacts,
22696 skipped_artifacts: response.skipped_artifacts,
22697 max_items,
22698 max_bytes,
22699 },
22700 narrow_commands: dedupe_preserve_order(narrow_commands),
22701 })
22702}
22703
22704pub(crate) fn build_search_budget_report(
22705 query: &str,
22706 strategy: &str,
22707 root: &Path,
22708 response: &sift::SearchResponse,
22709 symbol_hits: &[index::SymbolHit],
22710 absolute: bool,
22711 budget: ResponseBudget,
22712) -> SearchBudgetReport {
22713 let max_items = budget.preview_items();
22714 let max_bytes = budget.preview_bytes();
22715 let raw_symbol_total = symbol_hits.len();
22716 let hit_total = response.hits.len();
22717 let mut family_positions = HashMap::new();
22718 let mut families = Vec::new();
22719
22720 for hit in symbol_hits {
22721 let display_file = if absolute {
22722 hit.file.clone()
22723 } else {
22724 relativize(&hit.file, root)
22725 };
22726 let canonical_family = canonical_tag_family_from_symbol(&hit.name, hit.tags.as_deref());
22727 let family_key = canonical_family
22728 .as_ref()
22729 .map(|family| family.canonical.clone())
22730 .unwrap_or_else(|| hit.name.clone());
22731 let position = *family_positions.entry(family_key).or_insert_with(|| {
22732 families.push(SearchBudgetSymbolFamily {
22733 canonical_family: canonical_family
22734 .as_ref()
22735 .map(|family| family.canonical.clone()),
22736 canonical_tag_alias: canonical_family
22737 .as_ref()
22738 .map(|family| family.tag_alias.clone()),
22739 representative_name: hit.name.clone(),
22740 representative_kind: hit.kind.clone(),
22741 representative_match_type: hit.match_type.clone(),
22742 representative_file: display_file.clone(),
22743 representative_line: hit.line,
22744 representative_score: hit.score,
22745 seen_surfaces: HashSet::new(),
22746 seen_files: HashSet::new(),
22747 surface_examples: Vec::new(),
22748 match_count: 0,
22749 });
22750 families.len() - 1
22751 });
22752
22753 let family = &mut families[position];
22754 family.match_count += 1;
22755 if family.seen_surfaces.insert(hit.name.clone())
22756 && family.surface_examples.len() < SEARCH_BUDGET_SURFACE_PREVIEW_LIMIT
22757 {
22758 family
22759 .surface_examples
22760 .push(truncate_for_budget(&hit.name, max_bytes));
22761 }
22762 family.seen_files.insert(display_file);
22763 }
22764
22765 let symbol_total = families.len();
22766 let symbols: Vec<SearchBudgetSymbolPreview> = families
22767 .into_iter()
22768 .take(max_items)
22769 .map(|family| {
22770 let file_count = family.seen_files.len();
22771 let surface_count = family.seen_surfaces.len();
22772 let key = format!(
22773 "{}:{}:{}:{}:{}:{}:{}",
22774 family
22775 .canonical_family
22776 .as_deref()
22777 .or(family.canonical_tag_alias.as_deref())
22778 .unwrap_or(&family.representative_name),
22779 family.canonical_tag_alias.as_deref().unwrap_or(""),
22780 family.representative_kind,
22781 family.representative_file,
22782 family.representative_line,
22783 query,
22784 strategy
22785 );
22786 SearchBudgetSymbolPreview {
22787 handle: stable_handle("sfam", &key),
22788 tag_alias: family
22789 .canonical_tag_alias
22790 .as_deref()
22791 .map(|alias| truncate_for_budget(alias, max_bytes)),
22792 match_type: family.representative_match_type,
22793 kind: family.representative_kind,
22794 name: format_search_budget_symbol_name(
22795 &family.representative_name,
22796 surface_count,
22797 max_bytes,
22798 ),
22799 file: format_search_budget_symbol_file(
22800 &family.representative_file,
22801 file_count,
22802 max_bytes,
22803 ),
22804 line: family.representative_line,
22805 score: family.representative_score,
22806 match_count: family.match_count,
22807 surface_count,
22808 file_count,
22809 surface_examples: family.surface_examples,
22810 expand: build_search_budget_family_expand(
22811 strategy,
22812 root.to_string_lossy().as_ref(),
22813 family.canonical_tag_alias.as_deref(),
22814 &family.representative_name,
22815 ),
22816 }
22817 })
22818 .collect();
22819
22820 let hits: Vec<SearchBudgetHitPreview> = response
22821 .hits
22822 .iter()
22823 .take(max_items)
22824 .map(|hit| {
22825 let display_path = if absolute {
22826 hit.path.clone()
22827 } else {
22828 relativize(&hit.path, root)
22829 };
22830 let key = format!("{}:{}:{}:{}", display_path, hit.rank, hit.score, query);
22831 let preview = compact_snippet(&hit.snippet)
22832 .map(|snippet| truncate_for_budget(&snippet, max_bytes))
22833 .unwrap_or_default();
22834 SearchBudgetHitPreview {
22835 handle: stable_handle("shit", &key),
22836 rank: hit.rank,
22837 path: truncate_for_budget(&display_path, max_bytes),
22838 confidence: format!("{:?}", hit.confidence),
22839 score: hit.score,
22840 preview,
22841 expand: build_search_budget_follow_up(query, strategy, &display_path),
22842 }
22843 })
22844 .collect();
22845
22846 let scale_guard = build_search_scale_guard(
22847 query,
22848 strategy,
22849 root,
22850 response,
22851 symbol_total,
22852 raw_symbol_total,
22853 hit_total,
22854 max_items,
22855 max_bytes,
22856 &symbols,
22857 &hits,
22858 );
22859
22860 SearchBudgetReport {
22861 query: query.to_string(),
22862 strategy: strategy.to_string(),
22863 indexed_artifacts: response.indexed_artifacts,
22864 skipped_artifacts: response.skipped_artifacts,
22865 max_items,
22866 max_bytes,
22867 symbol_total,
22868 raw_symbol_total,
22869 hit_total,
22870 truncated: symbol_total > max_items || hit_total > max_items,
22871 scale_guard,
22872 symbols,
22873 hits,
22874 }
22875}
22876
22877pub(crate) fn print_search_budget_human(report: &SearchBudgetReport) {
22878 println!(
22879 "search-budget q:{} strategy:{} symbols:{}/{} raw-symbols:{} hits:{}/{} indexed:{} skipped:{}",
22880 shell_quote(&report.query),
22881 report.strategy,
22882 report.symbols.len(),
22883 report.symbol_total,
22884 report.raw_symbol_total,
22885 report.hits.len(),
22886 report.hit_total,
22887 report.indexed_artifacts,
22888 report.skipped_artifacts
22889 );
22890 for symbol in &report.symbols {
22891 let variants = if symbol.surface_examples.is_empty() {
22892 String::new()
22893 } else {
22894 format!(" variants:{}", symbol.surface_examples.join(", "))
22895 };
22896 println!(
22897 "sym {} [{}] {} {}:{} sc:{} matches:{} files:{}{} expand:{}",
22898 format_symbol_preview_line(&symbol.handle, &symbol.name, symbol.tag_alias.as_deref()),
22899 symbol.match_type,
22900 symbol.kind,
22901 symbol.file,
22902 symbol.line,
22903 format_score(symbol.score, true),
22904 symbol.match_count,
22905 symbol.file_count,
22906 variants,
22907 symbol.expand
22908 );
22909 }
22910 for hit in &report.hits {
22911 if hit.preview.is_empty() {
22912 println!(
22913 "hit {} #{} {} [{} {}] expand:{}",
22914 hit.handle,
22915 hit.rank,
22916 hit.path,
22917 hit.confidence,
22918 format_score(hit.score, true),
22919 hit.expand
22920 );
22921 } else {
22922 println!(
22923 "hit {} #{} {} [{} {}] {} expand:{}",
22924 hit.handle,
22925 hit.rank,
22926 hit.path,
22927 hit.confidence,
22928 format_score(hit.score, true),
22929 hit.preview,
22930 hit.expand
22931 );
22932 }
22933 }
22934 if report.truncated {
22935 println!(
22936 "budget truncated items:{} bytes:{}",
22937 report.max_items, report.max_bytes
22938 );
22939 }
22940 if let Some(guard) = &report.scale_guard {
22941 println!("scale guard [{}]: {}", guard.level, guard.warning);
22942 println!(
22943 "signals preview-symbols:{} symbol-families:{} raw-symbols:{} preview-hits:{} hits:{} indexed:{} skipped:{} budget-items:{} budget-bytes:{}",
22944 guard.signals.preview_symbols,
22945 guard.signals.symbol_families,
22946 guard.signals.raw_symbol_matches,
22947 guard.signals.preview_hits,
22948 guard.signals.returned_hits,
22949 guard.signals.indexed_artifacts,
22950 guard.signals.skipped_artifacts,
22951 guard.signals.max_items,
22952 guard.signals.max_bytes
22953 );
22954 for command in &guard.narrow_commands {
22955 println!("narrow: {command}");
22956 }
22957 }
22958}
22959
22960pub(crate) fn collect_source_files(path: &std::path::Path) -> Result<Vec<PathBuf>> {
22961 let mut files = Vec::new();
22962 if path.is_file() {
22963 files.push(path.to_path_buf());
22964 return Ok(files);
22965 }
22966 let walker = ignore::WalkBuilder::new(path)
22967 .hidden(true)
22968 .git_ignore(true)
22969 .build();
22970 for entry in walker {
22971 let entry = entry?;
22972 if entry.file_type().is_some_and(|ft| ft.is_file()) {
22973 let p = entry.path();
22974 if let Some(ext) = p.extension() {
22975 let ext = ext.to_string_lossy();
22976 if matches!(
22977 ext.as_ref(),
22978 "rs" | "py"
22979 | "ts"
22980 | "tsx"
22981 | "js"
22982 | "jsx"
22983 | "kt"
22984 | "kts"
22985 | "zig"
22986 | "sh"
22987 | "bash"
22988 | "zsh"
22989 ) {
22990 files.push(p.to_path_buf());
22991 }
22992 }
22993 }
22994 }
22995 Ok(files)
22996}
22997
22998#[cfg(test)]
22999mod tests {
23000 use super::*;
23001
23002 use std::cell::RefCell;
23003 use substrate::{ConvexEdgeRow, ConvexGraphClient, ConvexGraphStore, ConvexNodeRow};
23004 fn parse_cli<I, T>(itr: I) -> Cli
23005 where
23006 I: IntoIterator<Item = T> + Send + 'static,
23007 T: Into<std::ffi::OsString> + Clone + Send + 'static,
23008 {
23009 std::thread::Builder::new()
23010 .name("cli-parse".to_string())
23011 .stack_size(16 * 1024 * 1024)
23012 .spawn(move || Cli::parse_from(itr))
23013 .unwrap()
23014 .join()
23015 .unwrap()
23016 }
23017
23018 fn try_parse_cli<I, T>(itr: I) -> std::result::Result<Cli, clap::Error>
23019 where
23020 I: IntoIterator<Item = T> + Send + 'static,
23021 T: Into<std::ffi::OsString> + Clone + Send + 'static,
23022 {
23023 std::thread::Builder::new()
23024 .name("cli-try-parse".to_string())
23025 .stack_size(16 * 1024 * 1024)
23026 .spawn(move || Cli::try_parse_from(itr))
23027 .unwrap()
23028 .join()
23029 .unwrap()
23030 }
23031
23032 #[derive(Default)]
23033 struct MemoryConvexGraphClient {
23034 nodes: RefCell<BTreeMap<String, ConvexNodeRow>>,
23035 edges: RefCell<BTreeMap<String, ConvexEdgeRow>>,
23036 }
23037
23038 impl ConvexGraphClient for MemoryConvexGraphClient {
23039 fn upsert_node_row(&self, row: &ConvexNodeRow) -> Result<()> {
23040 self.nodes
23041 .borrow_mut()
23042 .insert(row.external_id.clone(), row.clone());
23043 Ok(())
23044 }
23045
23046 fn upsert_edge_row(&self, row: &ConvexEdgeRow) -> Result<()> {
23047 self.edges
23048 .borrow_mut()
23049 .insert(row.edge_key.clone(), row.clone());
23050 Ok(())
23051 }
23052
23053 fn delete_node_row(&self, external_id: &str) -> Result<usize> {
23054 Ok(usize::from(
23055 self.nodes.borrow_mut().remove(external_id).is_some(),
23056 ))
23057 }
23058
23059 fn delete_edge_row(&self, edge_key: &str) -> Result<usize> {
23060 Ok(usize::from(
23061 self.edges.borrow_mut().remove(edge_key).is_some(),
23062 ))
23063 }
23064
23065 fn node_row(&self, external_id: &str) -> Result<Option<ConvexNodeRow>> {
23066 Ok(self.nodes.borrow().get(external_id).cloned())
23067 }
23068
23069 fn node_rows(&self) -> Result<Vec<ConvexNodeRow>> {
23070 Ok(self.nodes.borrow().values().cloned().collect())
23071 }
23072
23073 fn edge_rows(&self) -> Result<Vec<ConvexEdgeRow>> {
23074 Ok(self.edges.borrow().values().cloned().collect())
23075 }
23076
23077 fn node_rows_by_kind(&self, kind: &str) -> Result<Vec<ConvexNodeRow>> {
23078 Ok(self
23079 .nodes
23080 .borrow()
23081 .values()
23082 .filter(|row| row.kind == kind)
23083 .cloned()
23084 .collect())
23085 }
23086
23087 fn outgoing_edge_rows(
23088 &self,
23089 from_external_id: &str,
23090 kind: Option<&str>,
23091 ) -> Result<Vec<ConvexEdgeRow>> {
23092 Ok(self
23093 .edges
23094 .borrow()
23095 .values()
23096 .filter(|row| row.from_external_id == from_external_id)
23097 .filter(|row| kind.is_none_or(|kind| row.kind == kind))
23098 .cloned()
23099 .collect())
23100 }
23101 }
23102
23103 fn init_git_repo(path: &Path) {
23104 let status = std::process::Command::new("git")
23105 .args(["init"])
23106 .current_dir(path)
23107 .status()
23108 .unwrap();
23109 assert!(status.success(), "git init failed");
23110
23111 let status = std::process::Command::new("git")
23112 .args(["add", "."])
23113 .current_dir(path)
23114 .status()
23115 .unwrap();
23116 assert!(status.success(), "git add failed");
23117
23118 let status = std::process::Command::new("git")
23119 .args([
23120 "-c",
23121 "user.name=tsift-tests",
23122 "-c",
23123 "user.email=tsift-tests@example.com",
23124 "commit",
23125 "--quiet",
23126 "-m",
23127 "init",
23128 ])
23129 .current_dir(path)
23130 .status()
23131 .unwrap();
23132 assert!(status.success(), "git commit failed");
23133 }
23134
23135 fn write_empty_root_index(root: &Path) {
23136 let index_dir = root.join(".tsift");
23137 fs::create_dir_all(&index_dir).unwrap();
23138 fs::write(index_dir.join("index.db"), "").unwrap();
23139 }
23140
23141 fn write_repeated_lines(path: &Path, line: &str, lines: usize) -> PathBuf {
23142 if let Some(parent) = path.parent() {
23143 fs::create_dir_all(parent).unwrap();
23144 }
23145 let body = std::iter::repeat_n(line, lines)
23146 .collect::<Vec<_>>()
23147 .join("\n");
23148 fs::write(path, format!("{body}\n")).unwrap();
23149 path.to_path_buf()
23150 }
23151
23152 #[test]
23155 fn route_search_defaults_to_haiku() {
23156 let (tier, model) = classify_task("find all uses of authenticate");
23157 assert_eq!(tier, "haiku");
23158 assert!(
23159 model.contains("haiku"),
23160 "expected haiku model, got {}",
23161 model
23162 );
23163 }
23164
23165 #[test]
23166 fn route_edit_keywords_to_sonnet() {
23167 for kw in &[
23168 "edit the file",
23169 "fix the bug",
23170 "update the config",
23171 "remove dead code",
23172 "create a new module",
23173 ] {
23174 let (tier, _) = classify_task(kw);
23175 assert_eq!(tier, "sonnet", "expected sonnet for {:?}", kw);
23176 }
23177 }
23178
23179 #[test]
23180 fn route_architecture_keywords_to_opus() {
23181 for kw in &[
23182 "design the API",
23183 "architecture review",
23184 "plan the migration",
23185 "analyze the system",
23186 "evaluate trade-offs",
23187 ] {
23188 let (tier, _) = classify_task(kw);
23189 assert_eq!(tier, "opus", "expected opus for {:?}", kw);
23190 }
23191 }
23192
23193 #[test]
23194 fn route_architecture_beats_edit() {
23195 let (tier, _) = classify_task("design and implement the new auth service");
23197 assert_eq!(tier, "opus");
23198 }
23199
23200 #[test]
23201 fn cli_accepts_global_compact_flag() {
23202 let cli = parse_cli(["tsift", "--compact", "status"]);
23203 assert!(cli.compact);
23204 assert!(matches!(cli.command, Some(Commands::Status { .. })));
23205 }
23206
23207 #[test]
23208 fn summarize_diff_scope_matches_relative_directory() {
23209 let root = Path::new("/repo");
23210 let extract_scope = resolve_extract_scope(root, Path::new("src/feature")).unwrap();
23211
23212 assert!(summarize_diff_matches_scope(
23213 Path::new("/repo/src/feature/main.rs"),
23214 &extract_scope
23215 ));
23216 assert!(!summarize_diff_matches_scope(
23217 Path::new("/repo/src/other/main.rs"),
23218 &extract_scope
23219 ));
23220 }
23221
23222 #[test]
23223 fn summarize_diff_scope_matches_relative_file() {
23224 let root = Path::new("/repo");
23225 let extract_scope = resolve_extract_scope(root, Path::new("src/feature/main.rs")).unwrap();
23226
23227 assert!(summarize_diff_matches_scope(
23228 Path::new("/repo/src/feature/main.rs"),
23229 &extract_scope
23230 ));
23231 assert!(!summarize_diff_matches_scope(
23232 Path::new("/repo/src/feature/lib.rs"),
23233 &extract_scope
23234 ));
23235 }
23236
23237 #[test]
23238 fn summarize_extract_scope_walks_relative_paths_from_root() {
23239 let dir = tempfile::tempdir().unwrap();
23240 let source_dir = dir.path().join("src");
23241 std::fs::create_dir_all(&source_dir).unwrap();
23242 let main_rs = source_dir.join("main.rs");
23243 std::fs::write(&main_rs, "fn alpha() {}\n").unwrap();
23244
23245 let extract_scope = resolve_extract_scope(dir.path(), Path::new("src")).unwrap();
23246 let files = collect_source_files(&extract_scope).unwrap();
23247
23248 assert_eq!(files, vec![main_rs]);
23249 }
23250
23251 #[test]
23252 fn summarize_extract_base_uses_nested_path_instead_of_project_root() {
23253 let dir = tempfile::tempdir().unwrap();
23254 let nested = dir.path().join("src/nested");
23255 std::fs::create_dir_all(&nested).unwrap();
23256 std::fs::write(dir.path().join("root.rs"), "fn root_level() {}\n").unwrap();
23257 let nested_file = nested.join("main.rs");
23258 std::fs::write(&nested_file, "fn nested_only() {}\n").unwrap();
23259
23260 let extract_base = resolve_extract_base(&nested).unwrap();
23261 let extract_scope = resolve_extract_scope(&extract_base, Path::new(".")).unwrap();
23262 let files = collect_source_files(&extract_scope).unwrap();
23263
23264 assert_eq!(extract_scope, nested);
23265 assert_eq!(files, vec![nested_file]);
23266 }
23267
23268 #[test]
23269 fn summarize_extract_base_uses_parent_of_file_path() {
23270 let dir = tempfile::tempdir().unwrap();
23271 let nested = dir.path().join("src/nested");
23272 std::fs::create_dir_all(&nested).unwrap();
23273 let file_path = nested.join("main.rs");
23274 std::fs::write(&file_path, "fn nested_only() {}\n").unwrap();
23275
23276 let extract_base = resolve_extract_base(&file_path).unwrap();
23277
23278 assert_eq!(extract_base, nested);
23279 }
23280
23281 #[test]
23282 fn summarize_extract_scope_normalizes_dotdot_segments() {
23283 let dir = tempfile::tempdir().unwrap();
23284 let source_dir = dir.path().join("src");
23285 std::fs::create_dir_all(&source_dir).unwrap();
23286
23287 let extract_scope = resolve_extract_scope(dir.path(), Path::new("src/../src")).unwrap();
23288
23289 assert_eq!(extract_scope, source_dir.canonicalize().unwrap());
23290 assert!(summarize_diff_matches_scope(
23291 &source_dir.join("main.rs"),
23292 &extract_scope
23293 ));
23294 }
23295
23296 #[cfg(unix)]
23297 #[test]
23298 fn summarize_extract_scope_canonicalizes_absolute_symlink_paths() {
23299 use std::os::unix::fs::symlink;
23300
23301 let dir = tempfile::tempdir().unwrap();
23302 let real_root = dir.path().join("real");
23303 let source_dir = real_root.join("src");
23304 std::fs::create_dir_all(&source_dir).unwrap();
23305 let symlink_scope = dir.path().join("scope-link");
23306 symlink(&source_dir, &symlink_scope).unwrap();
23307
23308 let extract_scope = resolve_extract_scope(&real_root, &symlink_scope).unwrap();
23309
23310 assert_eq!(extract_scope, source_dir.canonicalize().unwrap());
23311 assert!(summarize_diff_matches_scope(
23312 &source_dir.join("lib.rs"),
23313 &extract_scope
23314 ));
23315 }
23316
23317 #[test]
23318 fn summarize_diff_extract_includes_untracked_files() {
23319 let dir = tempfile::tempdir().unwrap();
23320 std::fs::write(dir.path().join("README.md"), "# repo\n").unwrap();
23321 init_git_repo(dir.path());
23322
23323 let source_dir = dir.path().join("src");
23324 std::fs::create_dir_all(&source_dir).unwrap();
23325 let new_file = source_dir.join("new.rs");
23326 std::fs::write(&new_file, "fn alpha_helper() {}\n").unwrap();
23327
23328 let files = summarize::git_changed_files(dir.path()).unwrap();
23329
23330 assert_eq!(files.existing, vec![new_file]);
23331 assert!(files.deleted.is_empty());
23332 }
23333
23334 #[test]
23335 fn summarize_diff_extract_treats_unborn_head_as_untracked_only() {
23336 let dir = tempfile::tempdir().unwrap();
23337 let status = std::process::Command::new("git")
23338 .args(["init"])
23339 .current_dir(dir.path())
23340 .status()
23341 .unwrap();
23342 assert!(status.success(), "git init failed");
23343
23344 let source_dir = dir.path().join("src");
23345 std::fs::create_dir_all(&source_dir).unwrap();
23346 let new_file = source_dir.join("new.rs");
23347 std::fs::write(&new_file, "fn alpha_helper() {}\n").unwrap();
23348
23349 let files = summarize::git_changed_files(dir.path()).unwrap();
23350
23351 assert_eq!(files.existing, vec![new_file]);
23352 assert!(files.deleted.is_empty());
23353 }
23354
23355 #[test]
23356 fn summarize_diff_extract_tracks_deleted_files() {
23357 let dir = tempfile::tempdir().unwrap();
23358 let source_dir = dir.path().join("src");
23359 std::fs::create_dir_all(&source_dir).unwrap();
23360 let deleted_file = source_dir.join("gone.rs");
23361 std::fs::write(&deleted_file, "fn stale() {}\n").unwrap();
23362 init_git_repo(dir.path());
23363
23364 std::fs::remove_file(&deleted_file).unwrap();
23365
23366 let files = summarize::git_changed_files(dir.path()).unwrap();
23367
23368 assert!(files.existing.is_empty());
23369 assert_eq!(files.deleted, vec![deleted_file]);
23370 }
23371
23372 #[test]
23373 fn summarize_diff_extract_tracks_git_renames() {
23374 let dir = tempfile::tempdir().unwrap();
23375 let source_dir = dir.path().join("src");
23376 std::fs::create_dir_all(&source_dir).unwrap();
23377 let old_file = source_dir.join("old.rs");
23378 let new_file = source_dir.join("new.rs");
23379 std::fs::write(&old_file, "fn stale() {}\n").unwrap();
23380 init_git_repo(dir.path());
23381
23382 let status = std::process::Command::new("git")
23383 .args(["mv", "src/old.rs", "src/new.rs"])
23384 .current_dir(dir.path())
23385 .status()
23386 .unwrap();
23387 assert!(status.success(), "git mv failed");
23388
23389 let files = summarize::git_changed_files(dir.path()).unwrap();
23390
23391 assert_eq!(files.existing, vec![new_file]);
23392 assert_eq!(files.deleted, vec![old_file]);
23393 }
23394
23395 #[test]
23396 fn summarize_diff_extract_deletes_removed_summary_rows() {
23397 let dir = tempfile::tempdir().unwrap();
23398 let source_dir = dir.path().join("src");
23399 std::fs::create_dir_all(&source_dir).unwrap();
23400 let deleted_file = source_dir.join("gone.rs");
23401 std::fs::write(&deleted_file, "fn stale() {}\n").unwrap();
23402 std::fs::write(dir.path().join("README.md"), "# repo\n").unwrap();
23403 init_git_repo(dir.path());
23404
23405 let summary_db =
23406 summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
23407 summary_db
23408 .insert(&summarize::Summary {
23409 id: 0,
23410 symbol_name: "stale".to_string(),
23411 file_path: "src/gone.rs".to_string(),
23412 content_hash: "hash1".to_string(),
23413 summary: "stale summary".to_string(),
23414 entities: None,
23415 relationships: None,
23416 concept_labels: None,
23417 extracted_at: "1700000000".to_string(),
23418 model: "test".to_string(),
23419 tokens_input: Some(100),
23420 tokens_output: Some(50),
23421 })
23422 .unwrap();
23423
23424 std::fs::remove_file(&deleted_file).unwrap();
23425
23426 cmd_summarize(
23427 None,
23428 None,
23429 Some(PathBuf::from("src")),
23430 true,
23431 false,
23432 dir.path(),
23433 false,
23434 true,
23435 false,
23436 false,
23437 false,
23438 )
23439 .unwrap();
23440
23441 assert!(summary_db.get_by_file("src/gone.rs").unwrap().is_empty());
23442 }
23443
23444 #[test]
23445 fn summarize_diff_extract_deletes_renamed_summary_rows() {
23446 let dir = tempfile::tempdir().unwrap();
23447 let source_dir = dir.path().join("src");
23448 std::fs::create_dir_all(&source_dir).unwrap();
23449 let old_file = source_dir.join("old.rs");
23450 std::fs::write(&old_file, "fn stale() {}\n").unwrap();
23451 std::fs::write(dir.path().join("README.md"), "# repo\n").unwrap();
23452 init_git_repo(dir.path());
23453
23454 let summary_db =
23455 summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
23456 summary_db
23457 .insert(&summarize::Summary {
23458 id: 0,
23459 symbol_name: "stale".to_string(),
23460 file_path: "src/old.rs".to_string(),
23461 content_hash: "hash1".to_string(),
23462 summary: "stale summary".to_string(),
23463 entities: None,
23464 relationships: None,
23465 concept_labels: None,
23466 extracted_at: "1700000000".to_string(),
23467 model: "test".to_string(),
23468 tokens_input: Some(100),
23469 tokens_output: Some(50),
23470 })
23471 .unwrap();
23472
23473 let status = std::process::Command::new("git")
23474 .args(["mv", "src/old.rs", "src/new.rs"])
23475 .current_dir(dir.path())
23476 .status()
23477 .unwrap();
23478 assert!(status.success(), "git mv failed");
23479
23480 cmd_summarize(
23481 None,
23482 None,
23483 Some(PathBuf::from("src")),
23484 true,
23485 false,
23486 dir.path(),
23487 false,
23488 true,
23489 false,
23490 false,
23491 false,
23492 )
23493 .unwrap();
23494
23495 assert!(summary_db.get_by_file("src/old.rs").unwrap().is_empty());
23496 }
23497
23498 #[test]
23499 fn summarize_full_extract_deletes_removed_summary_rows_when_scope_is_empty() {
23500 let dir = tempfile::tempdir().unwrap();
23501 let source_dir = dir.path().join("src");
23502 std::fs::create_dir_all(&source_dir).unwrap();
23503 let deleted_file = source_dir.join("gone.rs");
23504 std::fs::write(&deleted_file, "fn stale() {}\n").unwrap();
23505
23506 let summary_db =
23507 summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
23508 summary_db
23509 .insert(&summarize::Summary {
23510 id: 0,
23511 symbol_name: "stale".to_string(),
23512 file_path: "src/gone.rs".to_string(),
23513 content_hash: "hash1".to_string(),
23514 summary: "stale summary".to_string(),
23515 entities: None,
23516 relationships: None,
23517 concept_labels: None,
23518 extracted_at: "1700000000".to_string(),
23519 model: "test".to_string(),
23520 tokens_input: Some(100),
23521 tokens_output: Some(50),
23522 })
23523 .unwrap();
23524
23525 std::fs::remove_file(&deleted_file).unwrap();
23526
23527 cmd_summarize(
23528 None,
23529 None,
23530 Some(PathBuf::from("src")),
23531 false,
23532 false,
23533 dir.path(),
23534 false,
23535 true,
23536 false,
23537 false,
23538 false,
23539 )
23540 .unwrap();
23541
23542 assert!(summary_db.get_by_file("src/gone.rs").unwrap().is_empty());
23543 }
23544
23545 #[test]
23546 fn summarize_extract_fails_fast_when_summary_writer_lock_is_live() {
23547 let dir = tempfile::tempdir().unwrap();
23548 let source_dir = dir.path().join("src");
23549 std::fs::create_dir_all(&source_dir).unwrap();
23550 let file = source_dir.join("lib.rs");
23551 std::fs::write(&file, "fn helper() {}\n").unwrap();
23552
23553 let content = std::fs::read(&file).unwrap();
23554 let summary_db =
23555 summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
23556 summary_db
23557 .insert(&summarize::Summary {
23558 id: 0,
23559 symbol_name: "lib.rs".to_string(),
23560 file_path: "src/lib.rs".to_string(),
23561 content_hash: summarize::content_hash(&content),
23562 summary: "cached summary".to_string(),
23563 entities: None,
23564 relationships: None,
23565 concept_labels: None,
23566 extracted_at: "1700000000".to_string(),
23567 model: "test".to_string(),
23568 tokens_input: Some(100),
23569 tokens_output: Some(50),
23570 })
23571 .unwrap();
23572 drop(summary_db);
23573
23574 let lock_path = summarize::writer_lock_path(&dir.path().join(".tsift/summaries.db"));
23575 let _lock = hold_writer_lock(&lock_path);
23576
23577 let err = cmd_summarize(
23578 None,
23579 None,
23580 Some(PathBuf::from("src")),
23581 false,
23582 false,
23583 dir.path(),
23584 false,
23585 true,
23586 false,
23587 false,
23588 false,
23589 )
23590 .unwrap_err();
23591 let message = err.to_string();
23592
23593 assert!(message.contains("another tsift summarize extractor is already active"));
23594 assert!(message.contains("tsift summarize --extract"));
23595 }
23596
23597 #[test]
23598 fn summarize_stats_fails_closed_when_cache_missing() {
23599 let dir = tempfile::tempdir().unwrap();
23600 let err = cmd_summarize(
23601 None,
23602 None,
23603 None,
23604 false,
23605 true,
23606 dir.path(),
23607 false,
23608 false,
23609 false,
23610 false,
23611 false,
23612 )
23613 .unwrap_err();
23614
23615 assert!(
23616 err.to_string().contains("no summaries.db found"),
23617 "got: {err}"
23618 );
23619 assert!(!dir.path().join(".tsift/summaries.db").exists());
23620 }
23621
23622 #[test]
23623 fn summarize_stats_uses_snapshot_fallback_when_rollback_journal_is_locked() {
23624 let dir = tempfile::tempdir().unwrap();
23625 let summary_db =
23626 summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
23627 summary_db
23628 .insert(&summarize::Summary {
23629 id: 0,
23630 symbol_name: "alpha_helper".to_string(),
23631 file_path: "src/lib.rs".to_string(),
23632 content_hash: "hash1".to_string(),
23633 summary: "cached summary".to_string(),
23634 entities: None,
23635 relationships: None,
23636 concept_labels: None,
23637 extracted_at: "1700000000".to_string(),
23638 model: "claude-haiku-4-5-20251001".to_string(),
23639 tokens_input: Some(100),
23640 tokens_output: Some(40),
23641 })
23642 .unwrap();
23643 drop(summary_db);
23644 let _lock = hold_rollback_journal_lock(&dir.path().join(".tsift/summaries.db"));
23645
23646 let result = cmd_summarize(
23647 None,
23648 None,
23649 None,
23650 false,
23651 true,
23652 dir.path(),
23653 false,
23654 false,
23655 false,
23656 false,
23657 false,
23658 );
23659
23660 assert!(result.is_ok());
23661 }
23662
23663 #[test]
23664 fn summarize_symbol_query_uses_snapshot_fallback_when_rollback_journal_is_locked() {
23665 let dir = tempfile::tempdir().unwrap();
23666 let summary_db =
23667 summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
23668 summary_db
23669 .insert(&summarize::Summary {
23670 id: 0,
23671 symbol_name: "alpha_helper".to_string(),
23672 file_path: "src/lib.rs".to_string(),
23673 content_hash: "hash1".to_string(),
23674 summary: "cached summary".to_string(),
23675 entities: None,
23676 relationships: None,
23677 concept_labels: None,
23678 extracted_at: "1700000000".to_string(),
23679 model: "claude-haiku-4-5-20251001".to_string(),
23680 tokens_input: Some(100),
23681 tokens_output: Some(40),
23682 })
23683 .unwrap();
23684 drop(summary_db);
23685 let _lock = hold_rollback_journal_lock(&dir.path().join(".tsift/summaries.db"));
23686
23687 let result = cmd_summarize(
23688 Some("alpha_helper".to_string()),
23689 None,
23690 None,
23691 false,
23692 false,
23693 dir.path(),
23694 false,
23695 true,
23696 false,
23697 false,
23698 false,
23699 );
23700
23701 assert!(result.is_ok());
23702 }
23703
23704 #[test]
23705 fn summarize_cmd_uses_ancestor_project_root_for_nested_paths() {
23706 let dir = tempfile::tempdir().unwrap();
23707 let nested = dir.path().join("src/nested");
23708 std::fs::create_dir_all(&nested).unwrap();
23709
23710 let summary_db =
23711 summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
23712 summary_db
23713 .insert(&summarize::Summary {
23714 id: 0,
23715 symbol_name: "alpha_helper".to_string(),
23716 file_path: "src/lib.rs".to_string(),
23717 content_hash: "hash1".to_string(),
23718 summary: "cached summary".to_string(),
23719 entities: None,
23720 relationships: None,
23721 concept_labels: None,
23722 extracted_at: "1700000000".to_string(),
23723 model: "claude-haiku-4-5-20251001".to_string(),
23724 tokens_input: Some(100),
23725 tokens_output: Some(40),
23726 })
23727 .unwrap();
23728
23729 let result = cmd_summarize(
23730 Some("alpha_helper".to_string()),
23731 None,
23732 None,
23733 false,
23734 false,
23735 &nested,
23736 false,
23737 true,
23738 false,
23739 false,
23740 false,
23741 );
23742
23743 assert!(result.is_ok());
23744 assert!(!nested.join(".tsift/summaries.db").exists());
23745 }
23746
23747 #[test]
23748 fn summarize_extract_uses_matching_scoped_index_for_workspace_file() {
23749 let dir = tempfile::tempdir().unwrap();
23750 std::fs::write(
23751 dir.path().join(".gitmodules"),
23752 r#"[submodule "src/alpha"]
23753 path = src/alpha
23754 url = https://example.com/alpha
23755[submodule "src/beta"]
23756 path = src/beta
23757 url = https://example.com/beta
23758"#,
23759 )
23760 .unwrap();
23761
23762 let alpha_root = dir.path().join("src/alpha");
23763 let beta_root = dir.path().join("src/beta");
23764 std::fs::create_dir_all(alpha_root.join("src")).unwrap();
23765 std::fs::create_dir_all(beta_root.join("src")).unwrap();
23766 std::fs::create_dir_all(dir.path().join(".tsift/indexes/alpha")).unwrap();
23767 std::fs::create_dir_all(dir.path().join(".tsift/indexes/beta")).unwrap();
23768 std::fs::write(alpha_root.join("src/lib.rs"), "fn alpha_helper() {}\n").unwrap();
23769 let beta_file = beta_root.join("src/lib.rs");
23770 std::fs::write(&beta_file, "fn beta_helper() {}\n").unwrap();
23771 std::fs::write(dir.path().join(".tsift/indexes/alpha/index.db"), "").unwrap();
23772 std::fs::write(dir.path().join(".tsift/indexes/beta/index.db"), "").unwrap();
23773
23774 let context = find_symbols_db_for_file(dir.path(), &beta_file)
23775 .unwrap()
23776 .expect("expected matching scoped index");
23777
23778 assert_eq!(
23779 context.db_path,
23780 dir.path().join(".tsift/indexes/beta/index.db")
23781 );
23782 assert_eq!(context.source_root, beta_root);
23783 }
23784
23785 fn make_op(old: &str, new: &str, replace_all: bool) -> EditOp {
23788 EditOp {
23789 file: PathBuf::from("dummy.txt"),
23790 old: old.to_string(),
23791 new: new.to_string(),
23792 replace_all,
23793 }
23794 }
23795
23796 #[test]
23797 fn edit_replaces_single_occurrence() {
23798 let content = "hello world";
23799 let op = make_op("world", "rust", false);
23800 let (result, count) = apply_edit_op(content, &op).unwrap();
23801 assert_eq!(result, "hello rust");
23802 assert_eq!(count, 1);
23803 }
23804
23805 #[test]
23806 fn edit_replace_all_replaces_every_occurrence() {
23807 let content = "foo foo foo";
23808 let op = make_op("foo", "bar", true);
23809 let (result, count) = apply_edit_op(content, &op).unwrap();
23810 assert_eq!(result, "bar bar bar");
23811 assert_eq!(count, 3);
23812 }
23813
23814 #[test]
23815 fn edit_fails_when_old_not_found() {
23816 let content = "hello world";
23817 let op = make_op("missing", "x", false);
23818 assert!(apply_edit_op(content, &op).is_err());
23819 }
23820
23821 #[test]
23822 fn edit_fails_when_ambiguous_without_replace_all() {
23823 let content = "foo foo";
23824 let op = make_op("foo", "bar", false);
23825 let err = apply_edit_op(content, &op).unwrap_err();
23826 assert!(err.to_string().contains("2 times"), "got: {}", err);
23827 }
23828
23829 #[test]
23830 fn edit_fails_when_old_equals_new() {
23831 let content = "hello";
23832 let op = make_op("hello", "hello", false);
23833 assert!(apply_edit_op(content, &op).is_err());
23834 }
23835
23836 #[test]
23837 fn edit_batch_rolls_back_when_later_swap_fails() {
23838 let dir = tempfile::tempdir().unwrap();
23839 let alpha = dir.path().join("alpha.txt");
23840 let beta = dir.path().join("beta.txt");
23841 fs::write(&alpha, "alpha old\n").unwrap();
23842 fs::write(&beta, "beta old\n").unwrap();
23843
23844 let batch = EditBatch {
23845 edits: vec![
23846 EditOp {
23847 file: alpha.clone(),
23848 old: "old".to_string(),
23849 new: "new".to_string(),
23850 replace_all: false,
23851 },
23852 EditOp {
23853 file: beta.clone(),
23854 old: "old".to_string(),
23855 new: "new".to_string(),
23856 replace_all: false,
23857 },
23858 ],
23859 };
23860
23861 let plan = build_edit_plan(&batch).unwrap();
23862 let err = match apply_edit_plan_atomically_inner(plan, |commit_index, _| {
23863 if commit_index == 1 {
23864 bail!("simulated swap failure");
23865 }
23866 Ok(())
23867 }) {
23868 Ok(_) => panic!("expected simulated swap failure"),
23869 Err(err) => err,
23870 };
23871
23872 assert!(err.to_string().contains("simulated swap failure"));
23873 assert_eq!(fs::read_to_string(&alpha).unwrap(), "alpha old\n");
23874 assert_eq!(fs::read_to_string(&beta).unwrap(), "beta old\n");
23875 }
23876
23877 fn setup_test_db() -> (tempfile::NamedTempFile, Connection) {
23880 let tmp = tempfile::NamedTempFile::new().unwrap();
23881 let conn = Connection::open(tmp.path()).unwrap();
23882 conn.execute_batch(
23883 "CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT NOT NULL, email TEXT);
23884 INSERT INTO users VALUES (1, 'Alice', 'alice@example.com');
23885 INSERT INTO users VALUES (2, 'Bob', NULL);
23886 CREATE TABLE posts (id INTEGER PRIMARY KEY, user_id INTEGER NOT NULL, title TEXT NOT NULL, body TEXT,
23887 FOREIGN KEY(user_id) REFERENCES users(id));
23888 INSERT INTO posts VALUES (1, 1, 'Hello World', 'First post');
23889 INSERT INTO posts VALUES (2, 1, 'Second', NULL);
23890 INSERT INTO posts VALUES (3, 2, 'Bob post', 'Content here');"
23891 ).unwrap();
23892 (tmp, conn)
23893 }
23894
23895 #[test]
23898 fn rewrite_rg_simple_pattern() {
23899 let result = rewrite_command("rg authenticate");
23900 assert_eq!(
23901 result,
23902 Some("tsift --envelope search \"authenticate\" --exact --budget normal".to_string(),)
23903 );
23904 }
23905
23906 #[test]
23907 fn rewrite_rg_with_path() {
23908 let result = rewrite_command("rg authenticate src/");
23909 assert_eq!(
23910 result,
23911 Some(
23912 "tsift --envelope search \"authenticate\" --exact --budget normal --path \"src/\""
23913 .to_string()
23914 )
23915 );
23916 }
23917
23918 #[test]
23919 fn rewrite_rg_with_flags_ignored() {
23920 let result = rewrite_command("rg -i authenticate src/");
23921 assert_eq!(
23922 result,
23923 Some(
23924 "tsift --envelope search \"authenticate\" --exact --budget normal --path \"src/\""
23925 .to_string()
23926 )
23927 );
23928 }
23929
23930 #[test]
23931 fn rewrite_rg_with_type_flag() {
23932 let result = rewrite_command("rg -t rs authenticate");
23934 assert_eq!(
23935 result,
23936 Some("tsift --envelope search \"authenticate\" --exact --budget normal".to_string())
23937 );
23938 }
23939
23940 #[test]
23941 fn rewrite_rg_pipe_passthrough() {
23942 let result = rewrite_command("rg authenticate | head -5");
23944 assert_eq!(result, None);
23945 }
23946
23947 #[test]
23948 fn rewrite_rg_files_passthrough() {
23949 let result = rewrite_command("rg --files src/tsift .agent-doc logs");
23950 assert_eq!(result, None);
23951 }
23952
23953 #[test]
23954 fn rewrite_find_passthrough() {
23955 let result = rewrite_command("find src/tsift .agent-doc -type f -name '*.rs'");
23956 assert_eq!(result, None);
23957 }
23958
23959 #[test]
23960 fn rewrite_grep_recursive() {
23961 let result = rewrite_command("grep -r authenticate src/");
23962 assert_eq!(
23963 result,
23964 Some(
23965 "tsift --envelope search \"authenticate\" --exact --budget normal --path \"src/\""
23966 .to_string()
23967 )
23968 );
23969 }
23970
23971 #[test]
23972 fn rewrite_grep_non_recursive_passthrough() {
23973 let result = rewrite_command("grep authenticate file.txt");
23974 assert_eq!(result, None);
23975 }
23976
23977 #[test]
23978 fn rewrite_tsift_passthrough() {
23979 let result = rewrite_command("tsift search \"foo\"");
23980 assert_eq!(result, Some("tsift search \"foo\"".to_string()));
23981 }
23982
23983 #[test]
23984 fn rewrite_run_tsift_search_disables_timeout_by_default() {
23985 let result = effective_rewrite_run_command("tsift search hookcaps --exact --path /tmp/x");
23986 assert_eq!(
23987 result,
23988 "tsift search hookcaps --exact --path /tmp/x --timeout 0"
23989 );
23990 }
23991
23992 #[test]
23993 fn rewrite_run_preserves_explicit_search_timeout() {
23994 let result = effective_rewrite_run_command(
23995 "tsift search hookcaps --exact --path /tmp/x --timeout 5",
23996 );
23997 assert_eq!(
23998 result,
23999 "tsift search hookcaps --exact --path /tmp/x --timeout 5"
24000 );
24001 }
24002
24003 #[test]
24004 fn rewrite_unrelated_passthrough() {
24005 let result = rewrite_command("echo cargo build");
24006 assert_eq!(result, None);
24007 }
24008
24009 #[test]
24010 fn rewrite_rg_quoted_pattern() {
24011 let result = rewrite_command("rg \"fn main\"");
24012 assert_eq!(
24013 result,
24014 Some("tsift --envelope search \"fn main\" --exact --budget normal".to_string())
24015 );
24016 }
24017
24018 #[test]
24019 fn rewrite_git_diff_to_diff_digest() {
24020 let result = rewrite_command("git diff");
24021 assert_eq!(result, Some("tsift diff-digest .".to_string()));
24022 }
24023
24024 #[test]
24025 fn rewrite_git_diff_cached_to_diff_digest() {
24026 let result = rewrite_command("git diff --cached");
24027 assert_eq!(result, Some("tsift diff-digest --cached .".to_string()));
24028 }
24029
24030 #[test]
24031 fn rewrite_git_diff_with_path_to_diff_digest() {
24032 let result = rewrite_command("git diff -- src/");
24033 assert_eq!(result, Some("tsift diff-digest \"src/\"".to_string()));
24034 }
24035
24036 #[test]
24037 fn rewrite_git_diff_with_revision_passthrough() {
24038 let result = rewrite_command("git diff HEAD~1");
24039 assert_eq!(result, None);
24040 }
24041
24042 #[test]
24043 fn rewrite_git_show_to_revision_diff_digest() {
24044 let result = rewrite_command("git show HEAD~1");
24045 assert_eq!(
24046 result,
24047 Some("tsift diff-digest --revision \"HEAD~1\" .".to_string())
24048 );
24049 }
24050
24051 #[test]
24052 fn rewrite_git_log_patch_history_to_revision_diff_digest() {
24053 let result = rewrite_command("git log -p -1 HEAD~2");
24054 assert_eq!(
24055 result,
24056 Some("tsift diff-digest --revision \"HEAD~2\" .".to_string())
24057 );
24058 }
24059
24060 #[test]
24061 fn rewrite_cat_long_agent_doc_session_to_session_digest() {
24062 let dir = tempfile::tempdir().unwrap();
24063 let session = dir.path().join("tsift.md");
24064 let mut body = String::from("---\nagent_doc_session: tsift-v0.1\n---\n\n## Exchange\n");
24065 for index in 0..90 {
24066 body.push_str(&format!("❯ prompt {index}?\n"));
24067 }
24068 fs::write(&session, body).unwrap();
24069
24070 let result = rewrite_command(&format!("cat {}", shell_quote(session.to_str().unwrap())));
24071 assert_eq!(
24072 result,
24073 Some(format!(
24074 "tsift session-digest --path {} --input {} --source markdown",
24075 shell_quote(&resolve_digest_context_path(&session)),
24076 shell_quote(session.to_str().unwrap())
24077 ))
24078 );
24079 }
24080
24081 #[test]
24082 fn rewrite_head_long_claude_jsonl_to_session_digest() {
24083 let dir = tempfile::tempdir().unwrap();
24084 let session = dir.path().join("session.jsonl");
24085 let line =
24086 r#"{"message":{"role":"assistant","content":[{"type":"text","text":"❯ do [#yyhd]"}]}}"#;
24087 let body = std::iter::repeat_n(line, 120)
24088 .collect::<Vec<_>>()
24089 .join("\n");
24090 fs::write(&session, format!("{body}\n")).unwrap();
24091
24092 let result = rewrite_command(&format!(
24093 "head -n 120 {}",
24094 shell_quote(session.to_str().unwrap())
24095 ));
24096 assert_eq!(
24097 result,
24098 Some(format!(
24099 "tsift session-digest --path {} --input {} --source claude-jsonl",
24100 shell_quote(&resolve_digest_context_path(&session)),
24101 shell_quote(session.to_str().unwrap())
24102 ))
24103 );
24104 }
24105
24106 #[test]
24107 fn rewrite_head_long_codex_jsonl_to_session_digest() {
24108 let dir = tempfile::tempdir().unwrap();
24109 let session = dir.path().join("codex.jsonl");
24110 let line = r#"{"type":"event_msg","payload":{"type":"user_message","message":"do [#cdxlog]. spec-test-build-install-commit-push"}}"#;
24111 let body = std::iter::repeat_n(line, 120)
24112 .collect::<Vec<_>>()
24113 .join("\n");
24114 fs::write(&session, format!("{body}\n")).unwrap();
24115
24116 let result = rewrite_command(&format!(
24117 "head -n 120 {}",
24118 shell_quote(session.to_str().unwrap())
24119 ));
24120 assert_eq!(
24121 result,
24122 Some(format!(
24123 "tsift session-digest --path {} --input {} --source codex-jsonl",
24124 shell_quote(&resolve_digest_context_path(&session)),
24125 shell_quote(session.to_str().unwrap())
24126 ))
24127 );
24128 }
24129
24130 #[test]
24131 fn rewrite_small_transcript_window_passthrough() {
24132 let dir = tempfile::tempdir().unwrap();
24133 let session = dir.path().join("session.jsonl");
24134 let line = r#"{"message":{"role":"assistant","content":[{"type":"text","text":"hello"}]}}"#;
24135 let body = std::iter::repeat_n(line, 120)
24136 .collect::<Vec<_>>()
24137 .join("\n");
24138 fs::write(&session, format!("{body}\n")).unwrap();
24139
24140 let result = rewrite_command(&format!(
24141 "tail -n 20 {}",
24142 shell_quote(session.to_str().unwrap())
24143 ));
24144 assert_eq!(result, None);
24145 }
24146
24147 #[test]
24148 fn rewrite_sed_large_agent_doc_range_to_session_digest() {
24149 let dir = tempfile::tempdir().unwrap();
24150 let session = dir.path().join("tsift.md");
24151 let mut body = String::from("---\nagent_doc_session: tsift-v0.1\n---\n\n## Exchange\n");
24152 for index in 0..120 {
24153 body.push_str(&format!("### Re: topic {index}\n"));
24154 }
24155 fs::write(&session, body).unwrap();
24156
24157 let result = rewrite_command(&format!(
24158 "sed -n '1,120p' {}",
24159 shell_quote(session.to_str().unwrap())
24160 ));
24161 assert_eq!(
24162 result,
24163 Some(format!(
24164 "tsift session-digest --path {} --input {} --source markdown",
24165 shell_quote(&resolve_digest_context_path(&session)),
24166 shell_quote(session.to_str().unwrap())
24167 ))
24168 );
24169 }
24170
24171 #[test]
24172 fn rewrite_cat_large_agent_doc_log_to_session_digest() {
24173 let dir = tempfile::tempdir().unwrap();
24174 let session = dir.path().join("tsift.log");
24175 let line = "[1776528398] claude_start mode=fresh_restart restart_count=1";
24176 let body = std::iter::repeat_n(line, 120)
24177 .collect::<Vec<_>>()
24178 .join("\n");
24179 fs::write(&session, format!("{body}\n")).unwrap();
24180
24181 let result = rewrite_command(&format!("cat {}", shell_quote(session.to_str().unwrap())));
24182 assert_eq!(
24183 result,
24184 Some(format!(
24185 "tsift session-digest --path {} --input {} --source agent-doc-log",
24186 shell_quote(&resolve_digest_context_path(&session)),
24187 shell_quote(session.to_str().unwrap())
24188 ))
24189 );
24190 }
24191
24192 #[test]
24193 fn rewrite_session_reads_prefer_submodule_root_for_digest_path() {
24194 let dir = tempfile::tempdir().unwrap();
24195 fs::write(
24196 dir.path().join(".gitmodules"),
24197 r#"[submodule "src/tsift"]
24198 path = src/tsift
24199 url = https://example.com/tsift
24200"#,
24201 )
24202 .unwrap();
24203 let submodule = dir.path().join("src/tsift");
24204 fs::create_dir_all(submodule.join("tasks")).unwrap();
24205 fs::write(
24206 submodule.join(".git"),
24207 "gitdir: ../../.git/modules/src/tsift\n",
24208 )
24209 .unwrap();
24210 let session = submodule.join("tasks/plan.md");
24211 let mut body = String::from("---\nagent_doc_session: tsift-v0.1\n---\n\n## Exchange\n");
24212 for index in 0..90 {
24213 body.push_str(&format!("❯ prompt {index}?\n"));
24214 }
24215 fs::write(&session, body).unwrap();
24216
24217 let result = rewrite_command(&format!("cat {}", shell_quote(session.to_str().unwrap())));
24218
24219 assert_eq!(
24220 result,
24221 Some(format!(
24222 "tsift session-digest --path {} --input {} --source markdown",
24223 shell_quote(submodule.to_str().unwrap()),
24224 shell_quote(session.to_str().unwrap())
24225 ))
24226 );
24227 }
24228
24229 #[test]
24230 fn rewrite_regular_markdown_read_passthrough() {
24231 let dir = tempfile::tempdir().unwrap();
24232 let readme = dir.path().join("README.md");
24233 let body = std::iter::repeat_n("plain markdown", 120)
24234 .collect::<Vec<_>>()
24235 .join("\n");
24236 fs::write(&readme, format!("{body}\n")).unwrap();
24237
24238 let result = rewrite_command(&format!("cat {}", shell_quote(readme.to_str().unwrap())));
24239 assert_eq!(result, None);
24240 }
24241
24242 #[test]
24243 fn rewrite_cat_large_source_to_source_read_in_indexed_repo() {
24244 let dir = tempfile::tempdir().unwrap();
24245 write_empty_root_index(dir.path());
24246 let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 120);
24247
24248 let result = rewrite_command(&format!("cat {}", shell_quote(source.to_str().unwrap())));
24249
24250 assert_eq!(
24251 result,
24252 Some(format!(
24253 "tsift --envelope source-read \"src/lib.rs\" --path {} --start 1 --lines 80 --budget normal",
24254 shell_quote(&dir.path().to_string_lossy())
24255 ))
24256 );
24257 }
24258
24259 #[test]
24260 fn rewrite_head_small_source_window_passthrough() {
24261 let dir = tempfile::tempdir().unwrap();
24262 write_empty_root_index(dir.path());
24263 let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 120);
24264
24265 let result = rewrite_command(&format!(
24266 "head -n 20 {}",
24267 shell_quote(source.to_str().unwrap())
24268 ));
24269
24270 assert_eq!(result, None);
24271 }
24272
24273 #[test]
24274 fn rewrite_sed_large_source_range_to_source_read() {
24275 let dir = tempfile::tempdir().unwrap();
24276 write_empty_root_index(dir.path());
24277 let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 200);
24278
24279 let result = rewrite_command(&format!(
24280 "sed -n '40,160p' {}",
24281 shell_quote(source.to_str().unwrap())
24282 ));
24283
24284 assert_eq!(
24285 result,
24286 Some(format!(
24287 "tsift --envelope source-read \"src/lib.rs\" --path {} --start 40 --lines 121 --budget normal",
24288 shell_quote(&dir.path().to_string_lossy())
24289 ))
24290 );
24291 }
24292
24293 #[test]
24294 fn rewrite_tail_large_source_window_preserves_tail_anchor() {
24295 let dir = tempfile::tempdir().unwrap();
24296 write_empty_root_index(dir.path());
24297 let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 200);
24298
24299 let result = rewrite_command(&format!(
24300 "tail -n 120 {}",
24301 shell_quote(source.to_str().unwrap())
24302 ));
24303
24304 assert_eq!(
24305 result,
24306 Some(format!(
24307 "tsift --envelope source-read \"src/lib.rs\" --path {} --start 81 --lines 120 --budget normal",
24308 shell_quote(&dir.path().to_string_lossy())
24309 ))
24310 );
24311 }
24312
24313 #[test]
24314 fn rewrite_large_non_source_read_passthrough_even_when_indexed() {
24315 let dir = tempfile::tempdir().unwrap();
24316 write_empty_root_index(dir.path());
24317 let text = write_repeated_lines(&dir.path().join("notes.txt"), "plain text", 120);
24318
24319 let result = rewrite_command(&format!("cat {}", shell_quote(text.to_str().unwrap())));
24320
24321 assert_eq!(result, None);
24322 }
24323
24324 #[test]
24325 fn rewrite_large_source_read_passthrough_without_index() {
24326 let dir = tempfile::tempdir().unwrap();
24327 let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 120);
24328
24329 let result = rewrite_command(&format!("cat {}", shell_quote(source.to_str().unwrap())));
24330
24331 assert_eq!(result, None);
24332 }
24333
24334 #[test]
24335 fn rewrite_cargo_test_to_digest_runner() {
24336 let result = rewrite_command("cargo test --lib");
24337 assert_eq!(
24338 result,
24339 Some(
24340 "tsift --envelope __digest-runner --kind \"test\" --path \".\" --shell-command \"cargo test --lib\" --runner \"cargo\"".to_string()
24341 )
24342 );
24343 }
24344
24345 #[test]
24346 fn rewrite_pytest_to_digest_runner() {
24347 let result = rewrite_command("pytest -q tests/test_cli.py");
24348 assert_eq!(
24349 result,
24350 Some(
24351 "tsift --envelope __digest-runner --kind \"test\" --path \".\" --shell-command \"pytest -q tests/test_cli.py\" --runner \"pytest\"".to_string()
24352 )
24353 );
24354 }
24355
24356 #[test]
24357 fn rewrite_python_m_pytest_to_digest_runner() {
24358 let result = rewrite_command("python -m pytest tests/test_cli.py");
24359 assert_eq!(
24360 result,
24361 Some(
24362 "tsift --envelope __digest-runner --kind \"test\" --path \".\" --shell-command \"python -m pytest tests/test_cli.py\" --runner \"pytest\"".to_string()
24363 )
24364 );
24365 }
24366
24367 #[test]
24368 fn rewrite_cargo_build_to_log_digest_runner() {
24369 let result = rewrite_command("cargo build --release");
24370 assert_eq!(
24371 result,
24372 Some(
24373 "tsift --envelope __digest-runner --kind \"log\" --path \".\" --shell-command \"cargo build --release\"".to_string()
24374 )
24375 );
24376 }
24377
24378 #[test]
24379 fn rewrite_cargo_install_to_log_digest_runner() {
24380 let result = rewrite_command("cargo install --path . --force");
24381 assert_eq!(
24382 result,
24383 Some(
24384 "tsift --envelope __digest-runner --kind \"log\" --path \".\" --shell-command \"cargo install --path . --force\"".to_string()
24385 )
24386 );
24387 }
24388
24389 #[test]
24390 fn rewrite_metacharacter_command_passthrough() {
24391 let result = rewrite_command("cargo test | head");
24392 assert_eq!(result, None);
24393 }
24394
24395 #[test]
24396 fn rewrite_output_cap_detects_search_even_with_global_flag() {
24397 let cap = rewrite_output_cap("tsift --compact search foo").expect("cap");
24398 assert_eq!(cap.max_lines, 50);
24399 assert_eq!(cap.strip_prefix, Some("Strategy:"));
24400 }
24401
24402 #[test]
24403 fn rewrite_output_cap_skips_structured_output() {
24404 assert!(rewrite_output_cap("tsift search foo --json").is_none());
24405 assert!(rewrite_output_cap("tsift --schema graph foo").is_none());
24406 assert!(rewrite_output_cap("tsift --envelope search foo").is_none());
24407 }
24408
24409 #[test]
24410 fn rewrite_output_format_forwards_envelope_to_digest_runner() {
24411 let command = rewrite_command("cargo test --lib").expect("rewrite");
24412 let forwarded = apply_rewrite_output_format(
24413 &command,
24414 OutputFormat {
24415 json_output: true,
24416 compact: false,
24417 pretty: false,
24418 terse: false,
24419 schema: false,
24420 envelope: true,
24421 },
24422 );
24423 assert_eq!(
24424 forwarded,
24425 "tsift --envelope __digest-runner --kind \"test\" --path \".\" --shell-command \"cargo test --lib\" --runner \"cargo\""
24426 );
24427 }
24428
24429 #[test]
24430 fn rewrite_output_format_forwards_json_when_requested() {
24431 let command = rewrite_command("cargo build --release").expect("rewrite");
24432 let forwarded = apply_rewrite_output_format(
24433 &command,
24434 OutputFormat {
24435 json_output: true,
24436 compact: false,
24437 pretty: true,
24438 terse: false,
24439 schema: false,
24440 envelope: false,
24441 },
24442 );
24443 assert_eq!(
24444 forwarded,
24445 "tsift --pretty --envelope __digest-runner --kind \"log\" --path \".\" --shell-command \"cargo build --release\""
24446 );
24447 }
24448
24449 #[test]
24450 fn output_cap_strips_search_header_and_truncates() {
24451 let capped = apply_output_cap(
24452 b"Strategy: exact | Indexed: 0 | Skipped: 0\n\nline1\nline2\nline3\n",
24453 OutputCap {
24454 max_lines: 2,
24455 strip_prefix: Some("Strategy:"),
24456 },
24457 );
24458 assert_eq!(
24459 capped,
24460 "line1\nline2\n... (+1 more lines; rerun the underlying tsift command directly for the full output)\n"
24461 );
24462 }
24463
24464 #[test]
24465 fn sql_schema_overview_lists_tables() {
24466 let (_tmp, conn) = setup_test_db();
24467 let tables = schema_overview(&conn).unwrap();
24468 let names: Vec<&str> = tables.iter().map(|t| t.name.as_str()).collect();
24469 assert_eq!(names, &["posts", "users"]);
24470 }
24471
24472 #[test]
24473 fn sql_schema_overview_row_counts() {
24474 let (_tmp, conn) = setup_test_db();
24475 let tables = schema_overview(&conn).unwrap();
24476 let users = tables.iter().find(|t| t.name == "users").unwrap();
24477 let posts = tables.iter().find(|t| t.name == "posts").unwrap();
24478 assert_eq!(users.row_count, 2);
24479 assert_eq!(posts.row_count, 3);
24480 }
24481
24482 #[test]
24483 fn sql_table_columns_metadata() {
24484 let (_tmp, conn) = setup_test_db();
24485 let cols = table_columns(&conn, "users").unwrap();
24486 assert_eq!(cols.len(), 3);
24487 assert_eq!(cols[0].name, "id");
24488 assert!(cols[0].pk);
24489 assert_eq!(cols[1].name, "name");
24490 assert!(cols[1].notnull);
24491 assert_eq!(cols[2].name, "email");
24492 assert!(!cols[2].notnull);
24493 }
24494
24495 #[test]
24496 fn sql_execute_query_returns_rows() {
24497 let (_tmp, conn) = setup_test_db();
24498 let (columns, rows) =
24499 execute_query(&conn, "SELECT name, email FROM users ORDER BY id").unwrap();
24500 assert_eq!(columns, &["name", "email"]);
24501 assert_eq!(rows.len(), 2);
24502 assert_eq!(rows[0][0], serde_json::json!("Alice"));
24503 assert_eq!(rows[0][1], serde_json::json!("alice@example.com"));
24504 assert_eq!(rows[1][1], serde_json::Value::Null);
24505 }
24506
24507 #[test]
24508 fn sql_execute_query_aggregate() {
24509 let (_tmp, conn) = setup_test_db();
24510 let (columns, rows) = execute_query(&conn, "SELECT COUNT(*) as cnt FROM posts").unwrap();
24511 assert_eq!(columns, &["cnt"]);
24512 assert_eq!(rows[0][0], serde_json::json!(3));
24513 }
24514
24515 #[test]
24516 fn sql_execute_query_join() {
24517 let (_tmp, conn) = setup_test_db();
24518 let (_cols, rows) = execute_query(
24519 &conn,
24520 "SELECT u.name, p.title FROM users u JOIN posts p ON u.id = p.user_id ORDER BY p.id",
24521 )
24522 .unwrap();
24523 assert_eq!(rows.len(), 3);
24524 assert_eq!(rows[0][0], serde_json::json!("Alice"));
24525 assert_eq!(rows[2][0], serde_json::json!("Bob"));
24526 }
24527
24528 #[test]
24529 fn sql_open_db_read_only() {
24530 let (tmp, _conn) = setup_test_db();
24531 drop(_conn);
24532 let ro_conn = open_db(tmp.path()).unwrap();
24533 let result = ro_conn.execute("INSERT INTO users VALUES (99, 'Fail', NULL)", []);
24534 assert!(result.is_err(), "read-only connection should reject writes");
24535 }
24536
24537 #[test]
24538 fn sql_empty_table_schema() {
24539 let tmp = tempfile::NamedTempFile::new().unwrap();
24540 let conn = Connection::open(tmp.path()).unwrap();
24541 conn.execute_batch("CREATE TABLE empty_tbl (id INTEGER PRIMARY KEY, data BLOB)")
24542 .unwrap();
24543 let tables = schema_overview(&conn).unwrap();
24544 assert_eq!(tables[0].row_count, 0);
24545 assert_eq!(tables[0].columns.len(), 2);
24546 }
24547
24548 fn setup_graph_index() -> tempfile::TempDir {
24551 let dir = tempfile::tempdir().unwrap();
24552 std::fs::write(
24553 dir.path().join("main.rs"),
24554 "fn helper() { println!(\"hi\"); }\nfn main() { helper(); Vec::new(); }",
24555 )
24556 .unwrap();
24557 let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
24558 db.apply_changes(dir.path()).unwrap();
24559 dir
24560 }
24561
24562 fn setup_traversal_project() -> tempfile::TempDir {
24563 let dir = setup_graph_index();
24564 let task_dir = dir.path().join("tasks/software");
24565 std::fs::create_dir_all(&task_dir).unwrap();
24566 std::fs::write(
24567 task_dir.join("tsift.md"),
24568 r#"---
24569agent_doc_session: tsift-v0.1
24570agent_doc_format: template
24571---
24572
24573## Exchange
24574
24575<!-- agent:exchange patch=append -->
24576❯ do [#kgnv]
24577Completed `#kgnv`; touched files `main.rs`; tests `cargo test traversal_graph`; follow-up `#gfix`.
24578<!-- /agent:exchange -->
24579
24580<!-- agent:queue -->
24581dispatch #spec-test-build-install-commit-push
24582- do [#kgnv]
24583<!-- /agent:queue -->
24584
24585## Backlog
24586
24587<!-- agent:backlog -->
24588- [ ] [#kgnv] Fix helper traversal handles while preserving graph navigation.
24589<!-- /agent:backlog -->
24590"#,
24591 )
24592 .unwrap();
24593 dir
24594 }
24595
24596 fn setup_dependency_dag_project() -> tempfile::TempDir {
24597 let dir = tempfile::tempdir().unwrap();
24598 std::fs::write(
24599 dir.path().join("main.rs"),
24600 "fn shared_helper() {}\nfn main() { shared_helper(); }\n",
24601 )
24602 .unwrap();
24603 std::fs::write(
24604 dir.path().join("Cargo.toml"),
24605 "[package]\nname = \"dag-fixture\"\n",
24606 )
24607 .unwrap();
24608 let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
24609 db.apply_changes(dir.path()).unwrap();
24610
24611 let task_dir = dir.path().join("tasks/software");
24612 std::fs::create_dir_all(&task_dir).unwrap();
24613 std::fs::write(
24614 task_dir.join("tsift.md"),
24615 r#"---
24616agent_doc_session: tsift-dag
24617agent_doc_format: template
24618---
24619
24620## Exchange
24621
24622<!-- agent:exchange patch=append -->
24623Completed `#alpha`; touched files `main.rs`; tests `cargo test dependency_dag`; follow-up `#gamma`.
24624<!-- /agent:exchange -->
24625
24626## Backlog
24627
24628<!-- agent:backlog -->
24629- [ ] [#prep] Prepare Cargo.toml configuration before shared helper work.
24630- [ ] [#alpha] Update shared_helper in main.rs after #prep.
24631- [ ] [#beta] Refactor shared_helper tests in main.rs.
24632- [ ] [#gamma] Follow-up review for graph navigation.
24633<!-- /agent:backlog -->
24634"#,
24635 )
24636 .unwrap();
24637 dir
24638 }
24639
24640 fn setup_dependency_dag_cycle_project() -> tempfile::TempDir {
24641 let dir = setup_graph_index();
24642 let task_dir = dir.path().join("tasks/software");
24643 std::fs::create_dir_all(&task_dir).unwrap();
24644 std::fs::write(
24645 task_dir.join("tsift.md"),
24646 r#"---
24647agent_doc_session: tsift-dag-cycle
24648agent_doc_format: template
24649---
24650
24651## Backlog
24652
24653<!-- agent:backlog -->
24654- [ ] [#left] Left side depends on #right.
24655- [ ] [#right] Right side depends on #left.
24656<!-- /agent:backlog -->
24657"#,
24658 )
24659 .unwrap();
24660 dir
24661 }
24662
24663 fn seed_traversal_semantic_summaries(dir: &Path) {
24664 let summary_db = summarize::SummaryDb::open(&dir.join(".tsift/summaries.db")).unwrap();
24665 summary_db
24666 .insert(&summarize::Summary {
24667 id: 0,
24668 symbol_name: "helper".to_string(),
24669 file_path: "main.rs".to_string(),
24670 content_hash: "hash-main".to_string(),
24671 summary: "helper builds graph navigation handles for traversal.".to_string(),
24672 entities: Some(vec![
24673 summarize::Entity {
24674 name: "helper".to_string(),
24675 kind: "function".to_string(),
24676 description: "Builds graph navigation handles.".to_string(),
24677 },
24678 summarize::Entity {
24679 name: "TraversalGraph".to_string(),
24680 kind: "type".to_string(),
24681 description: "Carries GraphStore-backed traversal rows.".to_string(),
24682 },
24683 ]),
24684 relationships: Some(vec![summarize::Relationship {
24685 from: "helper".to_string(),
24686 to: "TraversalGraph".to_string(),
24687 kind: "uses".to_string(),
24688 }]),
24689 concept_labels: Some(vec![
24690 "graph navigation".to_string(),
24691 "semantic extraction".to_string(),
24692 ]),
24693 extracted_at: "1700000000".to_string(),
24694 model: "test-model".to_string(),
24695 tokens_input: Some(10),
24696 tokens_output: Some(5),
24697 })
24698 .unwrap();
24699 }
24700
24701 #[test]
24702 fn graph_callers_query() {
24703 let dir = setup_graph_index();
24704 let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
24705 let callers = db.callers_of("helper").unwrap();
24706 assert_eq!(callers.len(), 1);
24707 assert_eq!(callers[0].caller_name, "main");
24708 }
24709
24710 #[test]
24711 fn graph_callees_query() {
24712 let dir = setup_graph_index();
24713 let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
24714 let callees = db.callees_of("main").unwrap();
24715 let names: Vec<&str> = callees.iter().map(|e| e.callee_name.as_str()).collect();
24716 assert!(names.contains(&"helper"));
24717 assert!(names.contains(&"new"));
24718 }
24719
24720 #[test]
24721 fn graph_no_callers_returns_empty() {
24722 let dir = setup_graph_index();
24723 let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
24724 let callers = db.callers_of("nonexistent").unwrap();
24725 assert!(callers.is_empty());
24726 }
24727
24728 #[test]
24729 fn graph_cmd_autoindexes_missing_index_by_default() {
24730 let dir = tempfile::tempdir().unwrap();
24731 std::fs::write(
24732 dir.path().join("main.rs"),
24733 "fn helper() {}\nfn main() { helper(); }\n",
24734 )
24735 .unwrap();
24736 let result = cmd_graph(
24737 "helper",
24738 dir.path(),
24739 true,
24740 false,
24741 None,
24742 20,
24743 false,
24744 true,
24745 false,
24746 false,
24747 false,
24748 false,
24749 false,
24750 TagpathSearchOpts::default(),
24751 );
24752
24753 assert!(result.is_ok());
24754 let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
24755 let summary = db.compute_changes(dir.path()).unwrap();
24756 assert_eq!(summary.new + summary.modified + summary.deleted, 0);
24757 }
24758
24759 #[test]
24760 fn traversal_graph_has_stable_typed_handles() {
24761 let dir = setup_traversal_project();
24762 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
24763 let graph_again = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
24764
24765 let file = resolve_traversal_node(&graph, "main.rs").unwrap();
24766 let symbol = resolve_traversal_node(&graph, "helper").unwrap();
24767 let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
24768 let session = resolve_traversal_node(&graph, "tsift-v0.1").unwrap();
24769
24770 assert!(file.handle.starts_with("gfil-"));
24771 assert!(symbol.handle.starts_with("gsym-"));
24772 assert!(backlog.handle.starts_with("gbak-"));
24773 assert!(session.handle.starts_with("gses-"));
24774
24775 assert_eq!(
24776 symbol.handle,
24777 resolve_traversal_node(&graph_again, "helper")
24778 .unwrap()
24779 .handle
24780 );
24781 assert_eq!(
24782 backlog.handle,
24783 resolve_traversal_node(&graph_again, "#kgnv")
24784 .unwrap()
24785 .handle
24786 );
24787 }
24788
24789 #[test]
24790 fn traversal_graph_links_backlog_items_to_code_tokens() {
24791 let dir = setup_traversal_project();
24792 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
24793 let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
24794 let helper = resolve_traversal_node(&graph, "helper").unwrap();
24795
24796 assert!(graph.edges.iter().any(|edge| {
24797 edge.from == backlog.handle && edge.to == helper.handle && edge.relation == "mentions"
24798 }));
24799 }
24800
24801 #[test]
24802 fn session_hinted_traversal_skips_global_call_edges() {
24803 let dir = setup_traversal_project();
24804 let session = dir.path().join("tasks/software/tsift.md");
24805 let bounded = build_traversal_graph_source(dir.path(), &session, None).unwrap();
24806 let backlog = resolve_traversal_node(&bounded, "#kgnv").unwrap();
24807 let helper = resolve_traversal_node(&bounded, "helper").unwrap();
24808
24809 assert!(bounded.edges.iter().any(|edge| {
24810 edge.from == backlog.handle && edge.to == helper.handle && edge.relation == "mentions"
24811 }));
24812 assert!(
24813 !bounded.edges.iter().any(|edge| edge.relation == "calls"),
24814 "session-hinted graph-db projections should not materialize unrelated global call edges"
24815 );
24816
24817 let full = build_traversal_graph_source(dir.path(), dir.path(), None).unwrap();
24818 assert!(
24819 full.edges.iter().any(|edge| edge.relation == "calls"),
24820 "root/full projections still carry the complete indexed call graph"
24821 );
24822 }
24823
24824 #[test]
24825 fn agent_doc_task_path_infers_matching_workspace_scope() {
24826 let dir = tempfile::tempdir().unwrap();
24827 std::fs::create_dir_all(dir.path().join("src/tsift")).unwrap();
24828 std::fs::create_dir_all(dir.path().join("tasks/software")).unwrap();
24829 std::fs::write(
24830 dir.path().join(".gitmodules"),
24831 "[submodule \"src/tsift\"]\n\tpath = src/tsift\n\turl = https://example.invalid/tsift.git\n",
24832 )
24833 .unwrap();
24834 let task = dir.path().join("tasks/software/tsift.md");
24835 std::fs::write(&task, "# tsift\n").unwrap();
24836
24837 let targets = resolve_search_index_targets(dir.path(), &task, None, false).unwrap();
24838 let query_db_path = resolve_query_db_path(dir.path(), &task, None).unwrap();
24839 let cfg = config::Config::load(dir.path()).unwrap();
24840
24841 assert_eq!(targets.len(), 1);
24842 assert_eq!(targets[0].scope_name.as_deref(), Some("tsift"));
24843 assert_eq!(targets[0].source_root, dir.path().join("src/tsift"));
24844 assert!(
24845 targets[0]
24846 .db_path
24847 .ends_with(".tsift/indexes/tsift/index.db")
24848 );
24849 assert_eq!(query_db_path, cfg.db_path_for(dir.path(), "tsift"));
24850 }
24851
24852 #[test]
24853 fn traversal_graph_links_agent_doc_queue_job_packets_to_backlog() {
24854 let dir = setup_traversal_project();
24855 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
24856 let job = resolve_traversal_node(&graph, "do #kgnv").unwrap();
24857 let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
24858
24859 assert_eq!(job.kind, "job_packet");
24860 assert!(job.handle.starts_with("gjob-"));
24861 assert!(graph.edges.iter().any(|edge| {
24862 edge.from == job.handle && edge.to == backlog.handle && edge.relation == "targets"
24863 }));
24864
24865 let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
24866 let jobs = store.nodes_by_kind("job_packet").unwrap();
24867 assert!(
24868 jobs.iter()
24869 .any(|node| node.properties.get("ref_id") == Some(&"kgnv".to_string())),
24870 "expected queued job packet in graph store, got {jobs:?}"
24871 );
24872 }
24873
24874 #[test]
24875 fn traversal_graph_includes_routes_and_handler_edges() {
24876 let dir = tempfile::tempdir().unwrap();
24877 std::fs::write(
24878 dir.path().join("api.py"),
24879 r#"@router.get("/items")
24880def list_items():
24881 return []
24882"#,
24883 )
24884 .unwrap();
24885 let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
24886 db.apply_changes(dir.path()).unwrap();
24887
24888 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
24889 let route = resolve_traversal_node(&graph, "/items").unwrap();
24890 let handler = resolve_traversal_node(&graph, "list_items").unwrap();
24891
24892 assert_eq!(route.kind, "route");
24893 assert!(graph.edges.iter().any(|edge| {
24894 edge.from == route.handle && edge.to == handler.handle && edge.relation == "handled_by"
24895 }));
24896 }
24897
24898 #[test]
24899 fn traversal_neighborhood_handles_prioritizes_high_signal_edges_when_limited() {
24900 let edges = vec![
24901 TraversalEdge {
24902 from: "origin".to_string(),
24903 to: "aaa_low".to_string(),
24904 relation: "unknown".to_string(),
24905 label: None,
24906 weight: 1,
24907 },
24908 TraversalEdge {
24909 from: "origin".to_string(),
24910 to: "zzz_high".to_string(),
24911 relation: "mentions".to_string(),
24912 label: None,
24913 weight: 1,
24914 },
24915 ];
24916
24917 let handles = traversal_neighborhood_handles(&edges, "origin", 1, 2);
24918
24919 assert!(handles.contains("origin"));
24920 assert!(handles.contains("zzz_high"), "{handles:?}");
24921 assert!(!handles.contains("aaa_low"), "{handles:?}");
24922 }
24923
24924 #[test]
24925 fn traversal_materializes_provider_neutral_sqlite_graph() {
24926 let dir = setup_traversal_project();
24927 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
24928 let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
24929
24930 let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
24931 let backlog_nodes = store.nodes_by_kind("backlog").unwrap();
24932 assert!(
24933 backlog_nodes.iter().any(|node| node.id == backlog.handle
24934 && node.properties.get("ref_id") == Some(&"kgnv".to_string())),
24935 "expected materialized backlog node, got {backlog_nodes:?}"
24936 );
24937 assert!(
24938 store
24939 .all_nodes()
24940 .unwrap()
24941 .iter()
24942 .any(|node| node.kind == GRAPH_PROJECTION_META_KIND
24943 && node.properties.get("projection_version")
24944 == Some(&GRAPH_PROJECTION_VERSION.to_string())),
24945 "expected projection metadata node"
24946 );
24947 let source_handles = store.nodes_by_kind("source_handle").unwrap();
24948 assert!(
24949 source_handles
24950 .iter()
24951 .any(|node| node.properties.get("file") == Some(&"main.rs".to_string())),
24952 "expected bounded source_handle rows, got {source_handles:?}"
24953 );
24954 let worker_context = store.nodes_by_kind("worker_context").unwrap();
24955 assert!(
24956 worker_context
24957 .iter()
24958 .any(|node| node.properties.get("target")
24959 == Some(&"tasks/software/tsift.md".to_string())),
24960 "expected bounded worker_context rows, got {worker_context:?}"
24961 );
24962 let worker_results = store.nodes_by_kind("worker_result").unwrap();
24963 assert!(
24964 worker_results.iter().any(|node| {
24965 node.properties.get("ref_id") == Some(&"kgnv".to_string())
24966 && node.properties.get("status") == Some(&"completed".to_string())
24967 && node.properties.get("touched_files") == Some(&"main.rs".to_string())
24968 && node.properties.get("follow_up_ids") == Some(&"gfix".to_string())
24969 }),
24970 "expected worker_result rows, got {worker_results:?}"
24971 );
24972 }
24973
24974 #[test]
24975 fn traversal_projection_materializes_cached_semantic_rows() {
24976 let dir = setup_traversal_project();
24977 seed_traversal_semantic_summaries(dir.path());
24978 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
24979 let helper = resolve_traversal_node(&graph, "helper").unwrap();
24980 let concept = resolve_traversal_node(&graph, "graph navigation").unwrap();
24981 let entity = resolve_traversal_node(&graph, "TraversalGraph").unwrap();
24982
24983 assert_eq!(concept.kind, "semantic_concept");
24984 assert_eq!(entity.kind, "semantic_entity");
24985 assert!(concept.handle.starts_with("gcon-"));
24986 assert!(entity.handle.starts_with("gent-"));
24987
24988 let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
24989 assert!(
24990 store
24991 .nodes_by_kind("semantic_concept")
24992 .unwrap()
24993 .iter()
24994 .any(|node| node.label == "semantic extraction"
24995 && node.properties.contains_key("embedding")),
24996 "expected persisted concept embeddings"
24997 );
24998 assert!(
24999 store
25000 .outgoing_edges(&helper.handle, Some("mentions_concept"))
25001 .unwrap()
25002 .iter()
25003 .any(|edge| edge.to_id == concept.handle),
25004 "expected helper symbol to link to cached summary concept"
25005 );
25006 assert!(
25007 store
25008 .outgoing_edges(
25009 &semantic_entity_handle("helper", "function"),
25010 Some("semantic_relation")
25011 )
25012 .unwrap()
25013 .iter()
25014 .any(|edge| edge.to_id == entity.handle
25015 && edge.properties.get("relationship_kind") == Some(&"uses".to_string())),
25016 "expected LLM relationship rows projected into GraphStore"
25017 );
25018 }
25019
25020 #[test]
25021 fn semantic_related_query_uses_persisted_graph_embeddings() {
25022 let dir = setup_traversal_project();
25023 seed_traversal_semantic_summaries(dir.path());
25024 refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
25025 let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
25026
25027 let report = semantic_related_report_from_store(
25028 dir.path(),
25029 None,
25030 "graph navigation",
25031 5,
25032 SemanticRelatedKind::Concept,
25033 &store,
25034 )
25035 .unwrap();
25036
25037 assert_eq!(report.embedding_model, SEMANTIC_EMBEDDING_MODEL);
25038 assert!(
25039 report
25040 .items
25041 .iter()
25042 .any(|item| item.label == "graph navigation"
25043 && item.kind == "semantic_concept"
25044 && item.score > 0.9),
25045 "expected nearest concept match from graph embeddings, got {:?}",
25046 report.items
25047 );
25048 }
25049
25050 #[test]
25051 fn graph_db_related_query_uses_semantic_seeds_and_incident_neighborhoods() {
25052 let dir = setup_traversal_project();
25053 seed_traversal_semantic_summaries(dir.path());
25054 refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
25055 let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
25056
25057 let report = graph_db_report_from_store(
25058 dir.path(),
25059 None,
25060 "sqlite",
25061 GraphDbQuery::Related {
25062 query: "graph navigation".to_string(),
25063 kind: SemanticRelatedKind::All,
25064 depth: 1,
25065 seed_limit: 2,
25066 limit: 20,
25067 },
25068 &store,
25069 sqlite_graph_freshness(&store, "root").unwrap(),
25070 Vec::new(),
25071 )
25072 .unwrap();
25073
25074 let knowledge = report.knowledge_retrieval.as_ref().unwrap();
25075 assert_eq!(knowledge.mode, "semantic_seeded_neighborhood");
25076 assert_eq!(knowledge.seed_kind, "all");
25077 assert_eq!(knowledge.depth, 1);
25078 assert!(
25079 knowledge
25080 .diagnostics
25081 .iter()
25082 .any(|diagnostic| diagnostic.contains("incident"))
25083 );
25084 assert!(
25085 report
25086 .semantic_related
25087 .iter()
25088 .any(|item| item.label == "graph navigation"
25089 && item.kind == "semantic_concept"
25090 && item.score > 0.9),
25091 "expected natural-language query to seed the graph navigation concept, got {:?}",
25092 report.semantic_related
25093 );
25094 assert!(
25095 report
25096 .nodes
25097 .iter()
25098 .any(|node| node.kind == "semantic_concept" && node.label == "graph navigation")
25099 );
25100 assert!(
25101 report
25102 .nodes
25103 .iter()
25104 .any(|node| node.kind == "symbol" && node.label == "helper"),
25105 "incident expansion from semantic seed should recover source symbols, got {:?}",
25106 report
25107 .nodes
25108 .iter()
25109 .map(|node| (&node.kind, &node.label))
25110 .collect::<Vec<_>>()
25111 );
25112 assert!(
25113 report
25114 .edges
25115 .iter()
25116 .any(|edge| edge.kind == "mentions_concept")
25117 );
25118 }
25119
25120 #[test]
25121 fn graph_db_semantic_seeded_neighborhood_scores_before_caps() {
25122 let mut nodes = vec![
25123 SubstrateGraphNode::new("seed", "semantic_concept", "graph budget"),
25124 SubstrateGraphNode::new("zzz_high", "symbol", "high_signal"),
25125 ];
25126 let mut edges = vec![SubstrateGraphEdge::new(
25127 "zzz_high",
25128 "seed",
25129 "mentions_concept",
25130 )];
25131 for idx in 0..24 {
25132 let id = format!("aaa_low_{idx:02}");
25133 nodes.push(SubstrateGraphNode::new(
25134 id.clone(),
25135 "note",
25136 format!("low {idx}"),
25137 ));
25138 edges.push(SubstrateGraphEdge::new(id, "seed", "weak_link"));
25139 }
25140 let mut store = SqliteGraphStore::in_memory().unwrap();
25141 store
25142 .replace_projection(&GraphProjection { nodes, edges })
25143 .unwrap();
25144
25145 let subgraph =
25146 graph_db_semantic_seeded_neighborhood(&store, &["seed".to_string()], 1, 3).unwrap();
25147
25148 assert_eq!(subgraph.nodes.len(), 3);
25149 assert_eq!(subgraph.nodes[0].id, "seed");
25150 assert_eq!(
25151 subgraph.nodes[1].id, "zzz_high",
25152 "expected semantic mention edge to survive caps before lexicographic low-signal nodes: {:?}",
25153 subgraph.nodes
25154 );
25155 assert!(subgraph.truncated);
25156 assert!(
25157 subgraph
25158 .diagnostics
25159 .iter()
25160 .any(|diagnostic| diagnostic.contains("per-node edge scan cap")),
25161 "{:?}",
25162 subgraph.diagnostics
25163 );
25164 assert!(
25165 subgraph
25166 .diagnostics
25167 .iter()
25168 .any(|diagnostic| diagnostic.contains("skipped")),
25169 "{:?}",
25170 subgraph.diagnostics
25171 );
25172 }
25173
25174 #[test]
25175 fn conflict_matrix_uses_semantic_rows_as_dispatch_ranking_signal() {
25176 let dir = setup_traversal_project();
25177 seed_traversal_semantic_summaries(dir.path());
25178 init_git_repo(dir.path());
25179 let session = dir.path().join("tasks/software/tsift.md");
25180 refresh_traversal_graph_store(dir.path(), &session, None).unwrap();
25181 let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
25182 let freshness = sqlite_graph_freshness(&store, "root").unwrap();
25183 let evidence = graph_db_evidence_report_from_store(GraphDbEvidenceInput {
25184 root: dir.path(),
25185 scope: None,
25186 backend: "sqlite",
25187 target: "kgnv",
25188 depth: 4,
25189 limit: 8,
25190 store: &store,
25191 freshness,
25192 warnings: Vec::new(),
25193 })
25194 .unwrap();
25195 assert!(
25196 evidence
25197 .semantic_related
25198 .iter()
25199 .any(|node| node.kind == "semantic_concept" && node.label == "graph navigation"),
25200 "expected semantic evidence rows, got {:?}",
25201 evidence
25202 .semantic_related
25203 .iter()
25204 .map(|node| (&node.kind, &node.label))
25205 .collect::<Vec<_>>()
25206 );
25207
25208 let cached_diff = diff_digest::compute(
25209 dir.path(),
25210 diff_digest::DiffDigestOptions {
25211 cached: true,
25212 revision: None,
25213 max_parsed_files: None,
25214 },
25215 )
25216 .unwrap();
25217 let impact_report = impact::compute(
25218 dir.path(),
25219 impact::ImpactOptions {
25220 cached: true,
25221 revision: None,
25222 scope: None,
25223 limit: 10,
25224 },
25225 )
25226 .unwrap();
25227 let graph_nodes = store.all_nodes().unwrap();
25228 let graph_index = conflict_matrix_graph_index(&graph_nodes);
25229 let semantic_candidate = conflict_matrix_candidate_from_evidence(
25230 dir.path(),
25231 &evidence,
25232 &graph_index,
25233 &cached_diff,
25234 &impact_report,
25235 );
25236 assert!(semantic_candidate.semantic_dispatch_score > 0);
25237 assert!(
25238 semantic_candidate
25239 .semantic_dispatch_reasons
25240 .iter()
25241 .any(|reason| reason.contains("semantic_concept") && reason.contains("owned file")),
25242 "expected semantic ranking explanations, got {:?}",
25243 semantic_candidate.semantic_dispatch_reasons
25244 );
25245 assert!(
25246 semantic_candidate
25247 .semantic_related
25248 .iter()
25249 .any(|item| item.label == "graph navigation")
25250 );
25251
25252 let mut plain_candidate = semantic_candidate.clone();
25253 plain_candidate.target = "plain".to_string();
25254 plain_candidate.semantic_related.clear();
25255 plain_candidate.semantic_dispatch_score = 0;
25256 plain_candidate.semantic_dispatch_reasons.clear();
25257 let mut ranked = [plain_candidate, semantic_candidate];
25258 ranked.sort_by(|left, right| {
25259 left.risk
25260 .cmp(&right.risk)
25261 .then_with(|| left.risk_score.cmp(&right.risk_score))
25262 .then_with(|| {
25263 right
25264 .semantic_dispatch_score
25265 .cmp(&left.semantic_dispatch_score)
25266 })
25267 .then_with(|| left.target.cmp(&right.target))
25268 });
25269 assert_eq!(ranked[0].target, "kgnv");
25270 }
25271
25272 #[test]
25273 fn dependency_dag_extracts_explicit_overlap_and_follow_up_edges() {
25274 let dir = setup_dependency_dag_project();
25275 let session = dir.path().join("tasks/software/tsift.md");
25276 let report = build_dependency_dag_report(dir.path(), None, &[], 4, 12).unwrap();
25277
25278 assert_eq!(report.contract_version, "dependency-dag-v1");
25279 assert_eq!(
25280 report.targets,
25281 vec![
25282 "prep".to_string(),
25283 "alpha".to_string(),
25284 "beta".to_string(),
25285 "gamma".to_string()
25286 ]
25287 );
25288 assert!(report.edges.iter().any(|edge| {
25289 edge.from == "prep" && edge.to == "alpha" && edge.kind == "explicit_depends_on"
25290 }));
25291 assert!(report.edges.iter().any(|edge| {
25292 edge.from == "alpha" && edge.to == "gamma" && edge.kind == "worker_result_follow_up"
25293 }));
25294 assert!(report.edges.iter().any(|edge| {
25295 edge.from == "alpha"
25296 && edge.to == "beta"
25297 && edge.kind == "shared_resource"
25298 && edge.shared_files.contains(&"main.rs".to_string())
25299 && edge.shared_symbols.contains(&"shared_helper".to_string())
25300 }));
25301 assert!(
25302 !report.cycle_diagnostics.has_cycles,
25303 "{:?}",
25304 report.cycle_diagnostics
25305 );
25306 assert_eq!(report.topo_batches[0].targets, vec!["prep".to_string()]);
25307 assert_eq!(report.topo_batches[1].targets, vec!["alpha".to_string()]);
25308 assert!(
25309 report.replay_commands[0].contains("dependency-dag"),
25310 "{:?}",
25311 report.replay_commands
25312 );
25313
25314 cmd_dependency_dag(
25315 &session,
25316 None,
25317 &["alpha".to_string(), "beta".to_string()],
25318 4,
25319 12,
25320 OutputFormat {
25321 json_output: true,
25322 compact: false,
25323 pretty: false,
25324 terse: false,
25325 schema: false,
25326 envelope: false,
25327 },
25328 )
25329 .unwrap();
25330 }
25331
25332 #[test]
25333 fn dependency_dag_reports_cycles_from_explicit_depends_on_text() {
25334 let dir = setup_dependency_dag_cycle_project();
25335 let report = build_dependency_dag_report(dir.path(), None, &[], 4, 12).unwrap();
25336
25337 assert!(report.cycle_diagnostics.has_cycles);
25338 assert_eq!(
25339 report.cycle_diagnostics.blocked_nodes,
25340 vec!["left".to_string(), "right".to_string()]
25341 );
25342 assert!(report.cycle_diagnostics.cycle_edges.iter().any(|edge| {
25343 edge.from == "left" && edge.to == "right" && edge.kind == "explicit_depends_on"
25344 }));
25345 assert!(report.cycle_diagnostics.cycle_edges.iter().any(|edge| {
25346 edge.from == "right" && edge.to == "left" && edge.kind == "explicit_depends_on"
25347 }));
25348 }
25349
25350 #[test]
25351 fn traversal_projection_queries_match_sqlite_and_convex_stores() {
25352 let dir = setup_traversal_project();
25353 let source_graph = build_traversal_graph_source(dir.path(), dir.path(), None).unwrap();
25354 let projection = traversal_projection_from_graph(dir.path(), None, &source_graph).unwrap();
25355
25356 let mut sqlite = SqliteGraphStore::in_memory().unwrap();
25357 sqlite.replace_projection(&projection).unwrap();
25358 let convex = ConvexGraphStore::new(MemoryConvexGraphClient::default());
25359 projection.upsert_into(&convex).unwrap();
25360
25361 let sqlite_graph = traversal_graph_from_store(dir.path(), &sqlite).unwrap();
25362 let convex_graph = traversal_graph_from_store(dir.path(), &convex).unwrap();
25363 assert_eq!(sqlite_graph.nodes.len(), convex_graph.nodes.len());
25364 assert_eq!(sqlite_graph.edges.len(), convex_graph.edges.len());
25365
25366 let sqlite_backlog = resolve_traversal_node(&sqlite_graph, "#kgnv").unwrap();
25367 let convex_helper = resolve_traversal_node(&convex_graph, "helper").unwrap();
25368 assert!(convex_graph.edges.iter().any(|edge| {
25369 edge.from == sqlite_backlog.handle
25370 && edge.to == convex_helper.handle
25371 && edge.relation == "mentions"
25372 }));
25373 }
25374
25375 #[test]
25376 fn graph_db_api_queries_sqlite_neighborhood_and_schema() {
25377 let dir = setup_traversal_project();
25378 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
25379 let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
25380 let freshness = sqlite_graph_freshness(&store, "root").unwrap();
25381 assert_eq!(freshness.status, "current");
25382
25383 let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
25384 let report = graph_db_report_from_store(
25385 dir.path(),
25386 None,
25387 "sqlite",
25388 GraphDbQuery::Neighborhood {
25389 id: backlog.handle.clone(),
25390 depth: 1,
25391 edge_kind: Some("mentions".to_string()),
25392 cursor: None,
25393 limit: None,
25394 property_filters: Vec::new(),
25395 },
25396 &store,
25397 freshness,
25398 Vec::new(),
25399 )
25400 .unwrap();
25401 assert!(
25402 report
25403 .edges
25404 .iter()
25405 .any(|edge| edge.from_id == backlog.handle && edge.kind == "mentions"),
25406 "expected backlog mention edge, got {:?}",
25407 report.edges
25408 );
25409 assert!(
25410 report.ranked_neighbors.iter().any(|neighbor| {
25411 neighbor.depth == Some(1)
25412 && neighbor.edge_kinds.iter().any(|kind| kind == "mentions")
25413 && neighbor.node_id != backlog.handle
25414 && neighbor.handle_coverage_pct >= 95.0
25415 && neighbor.duplicate_name_precision >= 0.99
25416 }),
25417 "expected ranked neighborhood neighbors with quality scores, got {:?}",
25418 report.ranked_neighbors
25419 );
25420 assert!(report.ranked_neighbors.len() <= GRAPH_DB_RANKED_NEIGHBOR_CAP);
25421 let ranking_gate = report.neighborhood_ranking_gate.as_ref().unwrap();
25422 assert!(!ranking_gate.ranked_output_default);
25423 assert_eq!(ranking_gate.default_order, "stable_node_id");
25424 assert!(
25425 ranking_gate
25426 .diagnostics
25427 .iter()
25428 .any(|diagnostic| diagnostic.contains("score-capped")),
25429 "{ranking_gate:?}"
25430 );
25431 assert!(
25432 ranking_gate
25433 .required_metrics
25434 .iter()
25435 .any(|metric| metric == "handle_coverage_pct")
25436 );
25437 assert!(
25438 ranking_gate
25439 .required_metrics
25440 .iter()
25441 .any(|metric| metric == "duplicate_name_precision")
25442 );
25443 assert!(
25444 report
25445 .page
25446 .as_ref()
25447 .unwrap()
25448 .diagnostics
25449 .iter()
25450 .any(|diagnostic| diagnostic.contains("idx_graph_edges_from_kind")),
25451 "expected SQLite neighborhood query plan diagnostics, got {:?}",
25452 report.page.as_ref().unwrap().diagnostics
25453 );
25454 let edge_id = graph_db_edge_key(
25455 report
25456 .edges
25457 .iter()
25458 .find(|edge| edge.from_id == backlog.handle && edge.kind == "mentions")
25459 .unwrap(),
25460 );
25461
25462 let edge_report = graph_db_report_from_store(
25463 dir.path(),
25464 None,
25465 "sqlite",
25466 GraphDbQuery::Edge {
25467 id: edge_id.clone(),
25468 },
25469 &store,
25470 sqlite_graph_freshness(&store, "root").unwrap(),
25471 Vec::new(),
25472 )
25473 .unwrap();
25474 assert_eq!(
25475 edge_report.edge.as_ref().map(graph_db_edge_key),
25476 Some(edge_id.clone())
25477 );
25478
25479 let edges_report = graph_db_report_from_store(
25480 dir.path(),
25481 None,
25482 "sqlite",
25483 GraphDbQuery::Edges {
25484 edge_kind: Some("mentions".to_string()),
25485 cursor: None,
25486 limit: Some(2),
25487 property_filters: Vec::new(),
25488 },
25489 &store,
25490 sqlite_graph_freshness(&store, "root").unwrap(),
25491 Vec::new(),
25492 )
25493 .unwrap();
25494 assert!(edges_report.edges.iter().any(|edge| edge.id == edge_id));
25495 assert_eq!(
25496 edges_report.page.as_ref().unwrap().returned_edges,
25497 edges_report.edges.len()
25498 );
25499
25500 let incident_report = graph_db_report_from_store(
25501 dir.path(),
25502 None,
25503 "sqlite",
25504 GraphDbQuery::Incident {
25505 id: backlog.handle.clone(),
25506 edge_kind: Some("mentions".to_string()),
25507 cursor: None,
25508 limit: Some(1),
25509 property_filters: Vec::new(),
25510 },
25511 &store,
25512 sqlite_graph_freshness(&store, "root").unwrap(),
25513 Vec::new(),
25514 )
25515 .unwrap();
25516 assert_eq!(incident_report.page.as_ref().unwrap().returned_edges, 1);
25517 assert!(
25518 incident_report
25519 .edges
25520 .iter()
25521 .all(|edge| edge.from_id == backlog.handle || edge.to_id == backlog.handle),
25522 "{:?}",
25523 incident_report.edges
25524 );
25525
25526 let schema_report = graph_db_report_from_store(
25527 dir.path(),
25528 None,
25529 "sqlite",
25530 GraphDbQuery::Schema,
25531 &store,
25532 sqlite_graph_freshness(&store, "root").unwrap(),
25533 Vec::new(),
25534 )
25535 .unwrap();
25536 assert!(
25537 schema_report
25538 .schema
25539 .unwrap()
25540 .operations
25541 .iter()
25542 .any(|operation| operation.command.starts_with("neighborhood"))
25543 );
25544 }
25545
25546 #[test]
25547 fn graph_db_status_uses_snapshot_fallback_when_rollback_journal_is_locked() {
25548 let dir = setup_traversal_project();
25549 refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
25550 let graph_db = dir.path().join(".tsift/graph.db");
25551 let _lock = hold_rollback_journal_lock(&graph_db);
25552
25553 let report =
25554 graph_db_operator_report_from_disk(dir.path(), None, &graph_db, "status", None, vec![])
25555 .unwrap();
25556
25557 assert_eq!(report.status, "current");
25558 assert_eq!(
25559 report.recovery,
25560 Some(index::ReadOnlyRecovery::SnapshotFallback)
25561 );
25562 assert!(
25563 report
25564 .warnings
25565 .iter()
25566 .any(|warning| warning.contains("rollback-journal lock")),
25567 "expected rollback-journal recovery warning, got {:?}",
25568 report.warnings
25569 );
25570 }
25571
25572 #[test]
25573 fn graph_db_status_copies_wal_sidecars_when_locked() {
25574 let dir = setup_traversal_project();
25575 refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
25576 let graph_db = dir.path().join(".tsift/graph.db");
25577 let _lock = hold_wal_database_lock(&graph_db);
25578
25579 let report =
25580 graph_db_operator_report_from_disk(dir.path(), None, &graph_db, "status", None, vec![])
25581 .unwrap();
25582
25583 assert_eq!(report.status, "current");
25584 assert_eq!(
25585 report.recovery,
25586 Some(index::ReadOnlyRecovery::SnapshotFallbackWal)
25587 );
25588 assert!(
25589 report
25590 .warnings
25591 .iter()
25592 .any(|warning| warning.contains("WAL-aware snapshot fallback")),
25593 "expected WAL recovery warning, got {:?}",
25594 report.warnings
25595 );
25596 }
25597
25598 #[test]
25599 fn graph_db_evidence_uses_snapshot_fallback_when_graph_db_is_locked() {
25600 let dir = setup_traversal_project();
25601 let session = dir.path().join("tasks/software/tsift.md");
25602 refresh_traversal_graph_store(dir.path(), &session, None).unwrap();
25603 let graph_db = dir.path().join(".tsift/graph.db");
25604 let _lock = hold_rollback_journal_lock(&graph_db);
25605
25606 let result = cmd_graph_db(
25607 &session,
25608 None,
25609 GraphDbBackend::Sqlite,
25610 None,
25611 GraphDbQuery::Evidence {
25612 target: "kgnv".to_string(),
25613 depth: 3,
25614 limit: 8,
25615 },
25616 OutputFormat {
25617 json_output: false,
25618 compact: true,
25619 pretty: false,
25620 terse: false,
25621 schema: false,
25622 envelope: false,
25623 },
25624 );
25625
25626 assert!(result.is_ok());
25627 }
25628
25629 fn current_graph_db_freshness() -> GraphDbFreshnessReport {
25630 GraphDbFreshnessReport {
25631 status: "current".to_string(),
25632 fail_closed: false,
25633 projection_version: Some(GRAPH_PROJECTION_VERSION.to_string()),
25634 content_hash: Some("fixture".to_string()),
25635 source_watermark: None,
25636 diagnostics: Vec::new(),
25637 }
25638 }
25639
25640 #[test]
25641 fn graph_db_evidence_fails_closed_with_repair_command_for_stale_freshness() {
25642 let dir = setup_traversal_project();
25643 refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
25644 let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
25645 let stale = GraphDbFreshnessReport {
25646 status: "stale".to_string(),
25647 fail_closed: true,
25648 projection_version: Some("old-v0".to_string()),
25649 content_hash: None,
25650 source_watermark: None,
25651 diagnostics: vec!["projection content hash is missing".to_string()],
25652 };
25653
25654 let err = match graph_db_evidence_report_from_store(GraphDbEvidenceInput {
25655 root: dir.path(),
25656 scope: None,
25657 backend: "sqlite",
25658 target: "kgnv",
25659 depth: 3,
25660 limit: 8,
25661 store: &store,
25662 freshness: stale,
25663 warnings: Vec::new(),
25664 }) {
25665 Ok(_) => panic!("stale graph freshness should fail closed"),
25666 Err(err) => err,
25667 };
25668 let message = err.to_string();
25669 assert!(message.contains("failed closed"), "{message}");
25670 assert!(message.contains("graph-db --path"), "{message}");
25671 assert!(message.contains("refresh --json"), "{message}");
25672 }
25673
25674 fn paged_graph_ids(
25675 store: &impl GraphStore,
25676 cursor: Option<&str>,
25677 ) -> (Vec<String>, GraphDbPageReport) {
25678 let report = graph_db_report_from_store(
25679 Path::new("."),
25680 None,
25681 "fixture",
25682 GraphDbQuery::Kind {
25683 kind: "backlog".to_string(),
25684 cursor: cursor.map(str::to_string),
25685 limit: Some(2),
25686 property_filters: vec!["phase=open".to_string()],
25687 },
25688 store,
25689 current_graph_db_freshness(),
25690 Vec::new(),
25691 )
25692 .unwrap();
25693 (
25694 report.nodes.iter().map(|node| node.id.clone()).collect(),
25695 report.page.unwrap(),
25696 )
25697 }
25698
25699 #[test]
25700 fn graph_db_query_pagination_and_filters_match_sqlite_and_convex() {
25701 let nodes = (0..5)
25702 .map(|idx| {
25703 let phase = if idx == 1 { "closed" } else { "open" };
25704 SubstrateGraphNode::new(format!("gbak-{idx:02}"), "backlog", format!("#{idx:02}"))
25705 .with_property("phase", phase)
25706 })
25707 .collect::<Vec<_>>();
25708 let projection = GraphProjection {
25709 nodes,
25710 edges: Vec::new(),
25711 };
25712 let sqlite = SqliteGraphStore::in_memory().unwrap();
25713 projection.upsert_into(&sqlite).unwrap();
25714 let convex = ConvexGraphStore::new(MemoryConvexGraphClient::default());
25715 projection.upsert_into(&convex).unwrap();
25716
25717 let (sqlite_first_ids, sqlite_first_page) = paged_graph_ids(&sqlite, None);
25718 let (convex_first_ids, convex_first_page) = paged_graph_ids(&convex, None);
25719 assert_eq!(sqlite_first_ids, vec!["gbak-00", "gbak-02"]);
25720 assert_eq!(sqlite_first_ids, convex_first_ids);
25721 assert_eq!(sqlite_first_page.next_cursor.as_deref(), Some("gbak-02"));
25722 assert!(sqlite_first_page.truncated);
25723 assert_eq!(
25724 sqlite_first_page.returned_nodes,
25725 convex_first_page.returned_nodes
25726 );
25727 assert_eq!(
25728 sqlite_first_page.property_filters,
25729 convex_first_page.property_filters
25730 );
25731 assert!(
25732 sqlite_first_page
25733 .diagnostics
25734 .iter()
25735 .any(|diagnostic| diagnostic.contains("idx_graph_nodes_kind")),
25736 "expected SQLite kind query plan diagnostics, got {:?}",
25737 sqlite_first_page.diagnostics
25738 );
25739
25740 let cursor = sqlite_first_page.next_cursor.as_deref();
25741 let (sqlite_next_ids, sqlite_next_page) = paged_graph_ids(&sqlite, cursor);
25742 let (convex_next_ids, convex_next_page) = paged_graph_ids(&convex, cursor);
25743 assert_eq!(sqlite_next_ids, vec!["gbak-03", "gbak-04"]);
25744 assert_eq!(sqlite_next_ids, convex_next_ids);
25745 assert_eq!(sqlite_next_page.next_cursor, None);
25746 assert!(!sqlite_next_page.truncated);
25747 assert_eq!(
25748 sqlite_next_page.returned_nodes,
25749 convex_next_page.returned_nodes
25750 );
25751 assert_eq!(
25752 sqlite_next_page.property_filters,
25753 convex_next_page.property_filters
25754 );
25755 }
25756
25757 #[test]
25758 fn traversal_shortest_path_crosses_artifacts_and_symbols() {
25759 let dir = setup_traversal_project();
25760 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
25761 let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
25762 let main = resolve_traversal_node(&graph, "main").unwrap();
25763
25764 let path = traversal_shortest_handles(&graph.edges, &backlog.handle, &main.handle).unwrap();
25765 assert_eq!(path.first(), Some(&backlog.handle));
25766 assert_eq!(path.last(), Some(&main.handle));
25767 assert!(
25768 path.len() >= 3,
25769 "expected backlog -> symbol -> main, got {path:?}"
25770 );
25771 }
25772
25773 #[test]
25774 fn traversal_report_recommends_next_bugfix_nodes() {
25775 let dir = setup_traversal_project();
25776 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
25777 let report = traversal_report(dir.path(), None, graph, Some("#kgnv"), None, 1, 50).unwrap();
25778
25779 assert_eq!(report.mode, "neighborhood");
25780 assert!(
25781 report
25782 .recommendations
25783 .iter()
25784 .any(|rec| rec.label == "helper" && rec.reason.contains("matched")),
25785 "expected helper recommendation, got {:?}",
25786 report.recommendations
25787 );
25788 assert!(
25789 !report.exploration.source_windows.is_empty(),
25790 "expected exploration source windows"
25791 );
25792 assert!(
25793 report
25794 .exploration
25795 .no_reread_guidance
25796 .contains("avoid whole-file reads")
25797 );
25798 }
25799
25800 #[test]
25801 fn traversal_graph_refreshes_stale_index_before_loading_symbols() {
25802 let dir = setup_traversal_project();
25803 std::thread::sleep(std::time::Duration::from_millis(50));
25804 std::fs::write(
25805 dir.path().join("main.rs"),
25806 "fn fresh_helper() { println!(\"fresh\"); }\nfn main() { fresh_helper(); }\n",
25807 )
25808 .unwrap();
25809
25810 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
25811
25812 assert!(
25813 graph
25814 .warnings
25815 .iter()
25816 .any(|warning| warning.contains("index refreshed")
25817 && warning.contains("graph traversal packet")),
25818 "expected refresh diagnostic, got {:?}",
25819 graph.warnings
25820 );
25821 assert!(resolve_traversal_node(&graph, "fresh_helper").is_some());
25822
25823 let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
25824 let summary = db.compute_changes(dir.path()).unwrap();
25825 assert_eq!(summary.new + summary.modified + summary.deleted, 0);
25826 }
25827
25828 #[test]
25829 fn traversal_graph_falls_back_to_raw_source_when_stale_refresh_is_blocked() {
25830 let dir = setup_traversal_project();
25831 let db_path = dir.path().join(".tsift/index.db");
25832 let _writer = hold_writer_lock(&index::writer_lock_path(&db_path));
25833 std::thread::sleep(std::time::Duration::from_millis(50));
25834 std::fs::write(
25835 dir.path().join("main.rs"),
25836 "fn fresh_helper() { println!(\"fresh\"); }\nfn main() { fresh_helper(); }\n",
25837 )
25838 .unwrap();
25839
25840 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
25841 let file = resolve_traversal_node(&graph, "main.rs").unwrap();
25842
25843 assert!(
25844 graph
25845 .warnings
25846 .iter()
25847 .any(|warning| warning.contains("falling back to raw source file nodes")),
25848 "expected raw-source fallback diagnostic, got {:?}",
25849 graph.warnings
25850 );
25851 assert!(
25852 file.detail
25853 .as_deref()
25854 .is_some_and(|detail| detail.contains("raw source fallback")),
25855 "expected raw-source detail, got {:?}",
25856 file.detail
25857 );
25858 assert!(
25859 file.expand.contains("source-read"),
25860 "expected source-read fallback command, got {}",
25861 file.expand
25862 );
25863 assert!(
25864 resolve_traversal_node(&graph, "helper").is_none(),
25865 "stale symbol evidence should be skipped when refresh is blocked"
25866 );
25867 }
25868
25869 #[test]
25870 fn traversal_cmd_supports_json_and_html_outputs() {
25871 let dir = setup_traversal_project();
25872 cmd_traverse(
25873 Some("#kgnv"),
25874 Some("main"),
25875 dir.path(),
25876 None,
25877 1,
25878 50,
25879 TraverseFormat::Json,
25880 false,
25881 false,
25882 false,
25883 None,
25884 )
25885 .unwrap();
25886 cmd_traverse(
25887 None,
25888 None,
25889 dir.path(),
25890 None,
25891 1,
25892 50,
25893 TraverseFormat::Html,
25894 false,
25895 false,
25896 false,
25897 None,
25898 )
25899 .unwrap();
25900 }
25901
25902 #[test]
25903 fn traversal_html_renders_inline_graph_visualization() {
25904 let dir = setup_traversal_project();
25905 seed_traversal_semantic_summaries(dir.path());
25906 let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
25907 let report = traversal_report(dir.path(), None, graph, None, None, 1, 50).unwrap();
25908 let html = traversal_report_html(&report).unwrap();
25909
25910 assert!(html.contains("id=\"graph-canvas\""));
25911 assert!(html.contains("semantic_concept"));
25912 assert!(html.contains("graph navigation"));
25913 assert!(html.contains("JSON.parse"));
25914 }
25915
25916 #[test]
25917 fn compact_helpers_trim_scores_and_snippets() {
25918 assert_eq!(format_score(0.12345, true), "0.12");
25919 assert_eq!(format_score(0.12345, false), "0.1235");
25920 let snippet = compact_snippet(" first line with useful context\nsecond");
25921 assert_eq!(snippet.as_deref(), Some("first line with useful context"));
25922 }
25923
25924 #[test]
25925 fn compact_members_caps_list() {
25926 let members: Vec<graph::CommunityMember> = ["a", "b", "c", "d", "e", "f"]
25927 .iter()
25928 .map(|n| graph::CommunityMember::new(*n))
25929 .collect();
25930 assert_eq!(compact_members(&members, 5), "a, b, c, d, e (+1 more)");
25931 }
25932
25933 #[test]
25934 fn abbreviate_kind_maps_common_kinds() {
25935 assert_eq!(abbreviate_kind("function"), "fn");
25936 assert_eq!(abbreviate_kind("method"), "meth");
25937 assert_eq!(abbreviate_kind("class"), "cls");
25938 assert_eq!(abbreviate_kind("interface"), "iface");
25939 assert_eq!(abbreviate_kind("type_alias"), "type");
25940 assert_eq!(abbreviate_kind("data_class"), "data_cls");
25941 assert_eq!(abbreviate_kind("sealed_class"), "sealed_cls");
25942 assert_eq!(abbreviate_kind("enum_class"), "enum_cls");
25943 assert_eq!(abbreviate_kind("companion_object"), "comp_obj");
25944 assert_eq!(abbreviate_kind("object"), "obj");
25945 assert_eq!(abbreviate_kind("heading"), "h");
25946 assert_eq!(abbreviate_kind("code_block"), "code");
25947 assert_eq!(abbreviate_kind("struct"), "struct");
25949 assert_eq!(abbreviate_kind("trait"), "trait");
25950 assert_eq!(abbreviate_kind("enum"), "enum");
25951 assert_eq!(abbreviate_kind("const"), "const");
25952 assert_eq!(abbreviate_kind("unknown_kind"), "unknown_kind");
25953 }
25954
25955 #[test]
25956 fn abbreviate_match_type_maps_search_types() {
25957 assert_eq!(abbreviate_match_type("exact_name"), "exact");
25958 assert_eq!(abbreviate_match_type("partial_tags"), "partial");
25959 assert_eq!(abbreviate_match_type("all_tags"), "all_tags");
25960 assert_eq!(abbreviate_match_type("other_type"), "other_type");
25961 }
25962
25963 #[test]
25964 fn explain_compact_groups_edges_by_file() {
25965 let edges = vec![
25966 index::StoredEdge {
25967 caller_file: "src/main.rs".to_string(),
25968 caller_name: "main".to_string(),
25969 caller_line: 1,
25970 callee_name: "helper".to_string(),
25971 call_site_line: 2,
25972 tagpath_handle: None,
25973 },
25974 index::StoredEdge {
25975 caller_file: "src/main.rs".to_string(),
25976 caller_name: "main".to_string(),
25977 caller_line: 1,
25978 callee_name: "render".to_string(),
25979 call_site_line: 3,
25980 tagpath_handle: None,
25981 },
25982 ];
25983 let lines = format_edge_groups(&edges, false);
25984 assert_eq!(lines, vec![" src/main.rs (2): helper, render"]);
25985 }
25986
25987 #[test]
25988 fn search_hit_groups_preserve_file_counts_and_samples() {
25989 let dir = tempfile::tempdir().unwrap();
25990 let root = dir.path();
25991 let main_rs = root.join("src/main.rs");
25992 fs::create_dir_all(main_rs.parent().unwrap()).unwrap();
25993 fs::write(&main_rs, "claudescore-3 anchor\nclaudescore-3 follow-up\n").unwrap();
25994 let freshness = exact_search_file_timestamp(&main_rs);
25995 let hits = vec![
25996 sift::SearchHit {
25997 artifact_id: "a".to_string(),
25998 artifact_kind: sift::ContextArtifactKind::File,
25999 path: main_rs.display().to_string(),
26000 rank: 1,
26001 score: 10.0,
26002 confidence: sift::ScoreConfidence::High,
26003 location: Some("line 3".to_string()),
26004 snippet: "claudescore-3 anchor".to_string(),
26005 provenance: sift::ArtifactProvenance {
26006 adapter: sift::AcquisitionAdapterKind::FileSystem,
26007 source: "ripgrep -F".to_string(),
26008 synthetic: false,
26009 },
26010 freshness: freshness.clone(),
26011 budget: sift::ArtifactBudget::from_text("claudescore-3 anchor", 1),
26012 },
26013 sift::SearchHit {
26014 artifact_id: "b".to_string(),
26015 artifact_kind: sift::ContextArtifactKind::File,
26016 path: main_rs.display().to_string(),
26017 rank: 2,
26018 score: 9.0,
26019 confidence: sift::ScoreConfidence::High,
26020 location: Some("line 7".to_string()),
26021 snippet: "claudescore-3 follow-up".to_string(),
26022 provenance: sift::ArtifactProvenance {
26023 adapter: sift::AcquisitionAdapterKind::FileSystem,
26024 source: "ripgrep -F".to_string(),
26025 synthetic: false,
26026 },
26027 freshness: freshness.clone(),
26028 budget: sift::ArtifactBudget::from_text("claudescore-3 follow-up", 1),
26029 },
26030 sift::SearchHit {
26031 artifact_id: "c".to_string(),
26032 artifact_kind: sift::ContextArtifactKind::File,
26033 path: main_rs.display().to_string(),
26034 rank: 3,
26035 score: 8.0,
26036 confidence: sift::ScoreConfidence::High,
26037 location: Some("line 9".to_string()),
26038 snippet: "claudescore-3 tail".to_string(),
26039 provenance: sift::ArtifactProvenance {
26040 adapter: sift::AcquisitionAdapterKind::FileSystem,
26041 source: "ripgrep -F".to_string(),
26042 synthetic: false,
26043 },
26044 freshness,
26045 budget: sift::ArtifactBudget::from_text("claudescore-3 tail", 1),
26046 },
26047 ];
26048
26049 let groups = group_search_hits(&hits, root, false);
26050 assert_eq!(groups.len(), 1);
26051 assert_eq!(groups[0].path, "src/main.rs");
26052 assert_eq!(groups[0].hits, 3);
26053 assert_eq!(
26054 groups[0].samples,
26055 vec![
26056 "line 3: claudescore-3 anchor".to_string(),
26057 "line 7: claudescore-3 follow-up".to_string()
26058 ]
26059 );
26060 assert!(should_collapse_search_hits(&hits, root, false));
26061 }
26062
26063 #[test]
26064 fn dense_edge_groups_trigger_collapse() {
26065 let edges = vec![
26066 index::StoredEdge {
26067 caller_file: "src/main.rs".to_string(),
26068 caller_name: "main".to_string(),
26069 caller_line: 1,
26070 callee_name: "helper".to_string(),
26071 call_site_line: 2,
26072 tagpath_handle: None,
26073 },
26074 index::StoredEdge {
26075 caller_file: "src/main.rs".to_string(),
26076 caller_name: "beta".to_string(),
26077 caller_line: 5,
26078 callee_name: "helper".to_string(),
26079 call_site_line: 6,
26080 tagpath_handle: None,
26081 },
26082 index::StoredEdge {
26083 caller_file: "src/main.rs".to_string(),
26084 caller_name: "gamma".to_string(),
26085 caller_line: 9,
26086 callee_name: "helper".to_string(),
26087 call_site_line: 10,
26088 tagpath_handle: None,
26089 },
26090 ];
26091 assert!(should_collapse_edge_groups(&edges));
26092 }
26093
26094 fn setup_workspace() -> tempfile::TempDir {
26097 let dir = tempfile::tempdir().unwrap();
26098 let root = dir.path();
26099 std::fs::write(
26100 root.join(".gitmodules"),
26101 r#"[submodule "src/alpha"]
26102 path = src/alpha
26103 url = https://example.com/alpha
26104[submodule "src/beta"]
26105 path = src/beta
26106 url = https://example.com/beta
26107"#,
26108 )
26109 .unwrap();
26110 let alpha = root.join("src/alpha");
26111 let beta = root.join("src/beta");
26112 std::fs::create_dir_all(&alpha).unwrap();
26113 std::fs::create_dir_all(&beta).unwrap();
26114 std::fs::write(
26115 alpha.join("lib.rs"),
26116 "fn alpha_helper() {}\nfn alpha_main() { alpha_helper(); }",
26117 )
26118 .unwrap();
26119 std::fs::write(beta.join("lib.rs"), "fn beta_func() {}").unwrap();
26120 dir
26121 }
26122
26123 fn setup_workspace_with_duplicate_leaf_names() -> tempfile::TempDir {
26124 let dir = tempfile::tempdir().unwrap();
26125 let root = dir.path();
26126 std::fs::write(
26127 root.join(".gitmodules"),
26128 r#"[submodule "pkg/app/foo"]
26129 path = pkg/app/foo
26130 url = https://example.com/pkg-app-foo
26131[submodule "vendor/foo"]
26132 path = vendor/foo
26133 url = https://example.com/vendor-foo
26134"#,
26135 )
26136 .unwrap();
26137 let pkg_foo = root.join("pkg/app/foo");
26138 let vendor_foo = root.join("vendor/foo");
26139 std::fs::create_dir_all(&pkg_foo).unwrap();
26140 std::fs::create_dir_all(&vendor_foo).unwrap();
26141 std::fs::write(
26142 pkg_foo.join("lib.rs"),
26143 "fn pkg_only() {}\nfn shared_name() { pkg_only(); }\n",
26144 )
26145 .unwrap();
26146 std::fs::write(
26147 vendor_foo.join("lib.rs"),
26148 "fn vendor_only() {}\nfn shared_name() { vendor_only(); }\n",
26149 )
26150 .unwrap();
26151 dir
26152 }
26153
26154 #[test]
26155 fn workspace_index_creates_per_submodule_dbs() {
26156 let dir = setup_workspace();
26157 cmd_index(
26158 dir.path(),
26159 false,
26160 false,
26161 false,
26162 false,
26163 false,
26164 true,
26165 None,
26166 false,
26167 false,
26168 false,
26169 false,
26170 false,
26171 false,
26172 )
26173 .unwrap();
26174 assert!(dir.path().join(".tsift/indexes/alpha/index.db").exists());
26175 assert!(dir.path().join(".tsift/indexes/beta/index.db").exists());
26176 }
26177
26178 #[test]
26179 fn workspace_index_single_submodule() {
26180 let dir = setup_workspace();
26181 cmd_index(
26182 dir.path(),
26183 false,
26184 false,
26185 false,
26186 false,
26187 false,
26188 false,
26189 Some("alpha"),
26190 false,
26191 false,
26192 false,
26193 false,
26194 false,
26195 false,
26196 )
26197 .unwrap();
26198 assert!(dir.path().join(".tsift/indexes/alpha/index.db").exists());
26199 assert!(!dir.path().join(".tsift/indexes/beta/index.db").exists());
26200 }
26201
26202 #[test]
26203 fn workspace_index_single_submodule_errors_on_unknown_scope() {
26204 let dir = setup_workspace();
26205
26206 let err = cmd_index(
26207 dir.path(),
26208 false,
26209 false,
26210 false,
26211 false,
26212 false,
26213 false,
26214 Some("missing"),
26215 false,
26216 false,
26217 false,
26218 false,
26219 false,
26220 false,
26221 )
26222 .unwrap_err();
26223
26224 let msg = err.to_string();
26225 assert!(msg.contains("unknown scope `missing`"));
26226 assert!(msg.contains("Available scopes: alpha, beta"));
26227 assert!(!dir.path().join(".tsift/indexes/missing/index.db").exists());
26228 }
26229
26230 #[test]
26231 fn workspace_index_uses_unique_scope_ids_when_leaf_names_collide() {
26232 let dir = setup_workspace_with_duplicate_leaf_names();
26233 cmd_index(
26234 dir.path(),
26235 false,
26236 false,
26237 false,
26238 false,
26239 false,
26240 true,
26241 None,
26242 false,
26243 false,
26244 false,
26245 false,
26246 false,
26247 false,
26248 )
26249 .unwrap();
26250
26251 assert!(
26252 dir.path()
26253 .join(".tsift/indexes/pkg/app/foo/index.db")
26254 .exists()
26255 );
26256 assert!(
26257 dir.path()
26258 .join(".tsift/indexes/vendor/foo/index.db")
26259 .exists()
26260 );
26261 }
26262
26263 #[test]
26264 fn federated_search_across_submodules() {
26265 let dir = setup_workspace();
26266 cmd_index(
26267 dir.path(),
26268 false,
26269 false,
26270 false,
26271 false,
26272 false,
26273 true,
26274 None,
26275 false,
26276 false,
26277 false,
26278 false,
26279 false,
26280 false,
26281 )
26282 .unwrap();
26283 let (hits, _diag) = federated_symbol_search(
26284 dir.path(),
26285 "alpha_helper",
26286 10,
26287 &TagpathSearchOpts {
26288 no_tagpath: true,
26289 strict: false,
26290 },
26291 )
26292 .unwrap();
26293 assert!(
26294 !hits.is_empty(),
26295 "should find alpha_helper via federated search"
26296 );
26297 }
26298
26299 #[test]
26300 fn federated_search_respects_isolation() {
26301 let dir = setup_workspace();
26302 let tsift_dir = dir.path().join(".tsift");
26303 std::fs::create_dir_all(&tsift_dir).unwrap();
26304 std::fs::write(
26305 tsift_dir.join("config.toml"),
26306 r#"
26307[overrides.alpha]
26308tier = "isolated"
26309"#,
26310 )
26311 .unwrap();
26312 cmd_index(
26313 dir.path(),
26314 false,
26315 false,
26316 false,
26317 false,
26318 false,
26319 true,
26320 None,
26321 false,
26322 false,
26323 false,
26324 false,
26325 false,
26326 false,
26327 )
26328 .unwrap();
26329 let (hits, _diag) = federated_symbol_search(
26330 dir.path(),
26331 "alpha_helper",
26332 10,
26333 &TagpathSearchOpts {
26334 no_tagpath: true,
26335 strict: false,
26336 },
26337 )
26338 .unwrap();
26339 assert!(
26340 hits.is_empty(),
26341 "isolated submodule should not appear in federated search"
26342 );
26343 }
26344
26345 #[test]
26346 fn federated_lexical_search_respects_isolation() {
26347 let dir = setup_workspace();
26348 let tsift_dir = dir.path().join(".tsift");
26349 std::fs::create_dir_all(&tsift_dir).unwrap();
26350 std::fs::write(
26351 tsift_dir.join("config.toml"),
26352 r#"
26353[overrides.alpha]
26354tier = "isolated"
26355"#,
26356 )
26357 .unwrap();
26358 cmd_index(
26359 dir.path(),
26360 false,
26361 false,
26362 false,
26363 false,
26364 false,
26365 true,
26366 None,
26367 false,
26368 false,
26369 false,
26370 false,
26371 false,
26372 false,
26373 )
26374 .unwrap();
26375
26376 let response = federated_sift_search(
26377 dir.path(),
26378 &dir.path().join(".tsift/search-cache"),
26379 "fn",
26380 10,
26381 0,
26382 "lexical",
26383 )
26384 .unwrap();
26385
26386 assert!(
26387 !response.hits.is_empty(),
26388 "shared scopes should still contribute lexical hits"
26389 );
26390 assert!(
26391 response
26392 .hits
26393 .iter()
26394 .all(|hit| hit.path.ends_with("src/beta/lib.rs")),
26395 "isolated scope should not leak lexical hits: {:?}",
26396 response.hits
26397 );
26398 }
26399
26400 #[test]
26401 fn federated_lexical_search_respects_private_tier() {
26402 let dir = setup_workspace();
26403 let tsift_dir = dir.path().join(".tsift");
26404 std::fs::create_dir_all(&tsift_dir).unwrap();
26405 std::fs::write(
26406 tsift_dir.join("config.toml"),
26407 r#"
26408[overrides.alpha]
26409tier = "private"
26410"#,
26411 )
26412 .unwrap();
26413 cmd_index(
26414 dir.path(),
26415 false,
26416 false,
26417 false,
26418 false,
26419 false,
26420 true,
26421 None,
26422 false,
26423 false,
26424 false,
26425 false,
26426 false,
26427 false,
26428 )
26429 .unwrap();
26430
26431 let response = federated_sift_search(
26432 dir.path(),
26433 &dir.path().join(".tsift/search-cache"),
26434 "fn",
26435 10,
26436 0,
26437 "lexical",
26438 )
26439 .unwrap();
26440
26441 assert!(
26442 !response.hits.is_empty(),
26443 "shared scopes should still contribute lexical hits"
26444 );
26445 assert!(
26446 response
26447 .hits
26448 .iter()
26449 .all(|hit| hit.path.ends_with("src/beta/lib.rs")),
26450 "private scope should not leak lexical hits: {:?}",
26451 response.hits
26452 );
26453 }
26454
26455 #[test]
26456 fn scoped_search_finds_submodule_symbols() {
26457 let dir = setup_workspace();
26458 cmd_index(
26459 dir.path(),
26460 false,
26461 false,
26462 false,
26463 false,
26464 false,
26465 true,
26466 None,
26467 false,
26468 false,
26469 false,
26470 false,
26471 false,
26472 false,
26473 )
26474 .unwrap();
26475 let cfg = config::Config::load(dir.path()).unwrap();
26476 let db_path = cfg.db_path_for(dir.path(), "alpha");
26477 let db = index::IndexDb::open(&db_path).unwrap();
26478 let hits = db.symbol_search("alpha_main", 10).unwrap();
26479 assert!(!hits.is_empty());
26480 assert_eq!(hits[0].name, "alpha_main");
26481 }
26482
26483 #[test]
26484 fn scoped_search_cmd_errors_on_unknown_scope() {
26485 let dir = setup_workspace();
26486
26487 let err = cmd_search(
26488 "alpha_main".to_string(),
26489 Some(dir.path().to_path_buf()),
26490 5,
26491 Some("lexical".to_string()),
26492 Some("missing".to_string()),
26493 false,
26494 false,
26495 false,
26496 0,
26497 false,
26498 false,
26499 false,
26500 false,
26501 false,
26502 false,
26503 )
26504 .unwrap_err();
26505
26506 let msg = err.to_string();
26507 assert!(msg.contains("unknown scope `missing`"));
26508 assert!(msg.contains("Available scopes: alpha, beta"));
26509 }
26510
26511 #[test]
26512 fn scoped_search_cmd_errors_on_ambiguous_legacy_scope_name() {
26513 let dir = setup_workspace_with_duplicate_leaf_names();
26514 cmd_index(
26515 dir.path(),
26516 false,
26517 false,
26518 false,
26519 false,
26520 false,
26521 true,
26522 None,
26523 false,
26524 false,
26525 false,
26526 false,
26527 false,
26528 false,
26529 )
26530 .unwrap();
26531
26532 let err = cmd_search(
26533 "vendor_only".to_string(),
26534 Some(dir.path().to_path_buf()),
26535 5,
26536 Some("lexical".to_string()),
26537 Some("foo".to_string()),
26538 false,
26539 false,
26540 false,
26541 0,
26542 false,
26543 false,
26544 false,
26545 false,
26546 false,
26547 false,
26548 )
26549 .unwrap_err();
26550
26551 let msg = err.to_string();
26552 assert!(msg.contains("ambiguous scope `foo`"));
26553 assert!(msg.contains("pkg/app/foo"));
26554 assert!(msg.contains("vendor/foo"));
26555 }
26556
26557 #[test]
26558 fn scoped_graph_query() {
26559 let dir = setup_workspace();
26560 cmd_index(
26561 dir.path(),
26562 false,
26563 false,
26564 false,
26565 false,
26566 false,
26567 true,
26568 None,
26569 false,
26570 false,
26571 false,
26572 false,
26573 false,
26574 false,
26575 )
26576 .unwrap();
26577 let cfg = config::Config::load(dir.path()).unwrap();
26578 let db_path = cfg.db_path_for(dir.path(), "alpha");
26579 let db = index::IndexDb::open(&db_path).unwrap();
26580 let callees = db.callees_of("alpha_main").unwrap();
26581 let names: Vec<&str> = callees.iter().map(|e| e.callee_name.as_str()).collect();
26582 assert!(names.contains(&"alpha_helper"));
26583 }
26584
26585 fn assert_workspace_query_requires_scope(err: anyhow::Error) {
26586 let msg = err.to_string();
26587 assert!(msg.contains("require `--scope <scope>`"), "{msg}");
26588 assert!(msg.contains("Available scopes: alpha, beta"), "{msg}");
26589 assert!(msg.contains("Indexed scopes: alpha, beta"), "{msg}");
26590 assert!(
26591 !msg.contains("no index found at"),
26592 "workspace query should fail with scope guidance, got: {msg}"
26593 );
26594 }
26595
26596 fn assert_workspace_search_requires_explicit_target(err: anyhow::Error) {
26597 let msg = err.to_string();
26598 assert!(
26599 msg.contains("requires `--scope <scope>` or `--federated`"),
26600 "{msg}"
26601 );
26602 assert!(msg.contains("Available scopes: alpha, beta"), "{msg}");
26603 assert!(msg.contains("Indexed scopes: alpha, beta"), "{msg}");
26604 assert!(
26605 !msg.contains("autoindexing index"),
26606 "workspace search should fail before creating a shared root index: {msg}"
26607 );
26608 }
26609
26610 #[test]
26611 fn graph_cmd_requires_scope_for_workspace_root_without_shared_index() {
26612 let dir = setup_workspace();
26613 cmd_index(
26614 dir.path(),
26615 false,
26616 false,
26617 false,
26618 false,
26619 false,
26620 true,
26621 None,
26622 false,
26623 false,
26624 false,
26625 false,
26626 false,
26627 false,
26628 )
26629 .unwrap();
26630
26631 let err = cmd_graph(
26632 "alpha_main",
26633 dir.path(),
26634 false,
26635 false,
26636 None,
26637 20,
26638 false,
26639 false,
26640 false,
26641 false,
26642 false,
26643 false,
26644 false,
26645 TagpathSearchOpts::default(),
26646 )
26647 .unwrap_err();
26648
26649 assert_workspace_query_requires_scope(err);
26650 }
26651
26652 #[test]
26653 fn graph_cmd_infers_scope_from_nested_workspace_path() {
26654 let dir = setup_workspace();
26655 cmd_index(
26656 dir.path(),
26657 false,
26658 false,
26659 false,
26660 false,
26661 false,
26662 true,
26663 None,
26664 false,
26665 false,
26666 false,
26667 false,
26668 false,
26669 false,
26670 )
26671 .unwrap();
26672 let nested = dir.path().join("src/alpha/nested");
26673 std::fs::create_dir_all(&nested).unwrap();
26674
26675 let result = cmd_graph(
26676 "alpha_main",
26677 &nested,
26678 false,
26679 false,
26680 None,
26681 20,
26682 false,
26683 false,
26684 false,
26685 false,
26686 false,
26687 false,
26688 false,
26689 TagpathSearchOpts::default(),
26690 );
26691
26692 assert!(result.is_ok());
26693 }
26694
26695 #[test]
26696 fn communities_cmd_requires_scope_for_workspace_root_without_shared_index() {
26697 let dir = setup_workspace();
26698 cmd_index(
26699 dir.path(),
26700 false,
26701 false,
26702 false,
26703 false,
26704 false,
26705 true,
26706 None,
26707 false,
26708 false,
26709 false,
26710 false,
26711 false,
26712 false,
26713 )
26714 .unwrap();
26715
26716 let err = cmd_communities(
26717 dir.path(),
26718 None,
26719 1,
26720 10,
26721 false,
26722 false,
26723 false,
26724 false,
26725 false,
26726 false,
26727 TagpathSearchOpts::default(),
26728 )
26729 .unwrap_err();
26730
26731 assert_workspace_query_requires_scope(err);
26732 }
26733
26734 #[test]
26735 fn communities_cmd_infers_scope_from_nested_workspace_path() {
26736 let dir = setup_workspace();
26737 cmd_index(
26738 dir.path(),
26739 false,
26740 false,
26741 false,
26742 false,
26743 false,
26744 true,
26745 None,
26746 false,
26747 false,
26748 false,
26749 false,
26750 false,
26751 false,
26752 )
26753 .unwrap();
26754 let nested = dir.path().join("src/alpha/nested");
26755 std::fs::create_dir_all(&nested).unwrap();
26756
26757 let result = cmd_communities(
26758 &nested,
26759 None,
26760 1,
26761 10,
26762 false,
26763 false,
26764 false,
26765 false,
26766 false,
26767 false,
26768 TagpathSearchOpts::default(),
26769 );
26770
26771 assert!(result.is_ok());
26772 }
26773
26774 #[test]
26775 fn path_cmd_requires_scope_for_workspace_root_without_shared_index() {
26776 let dir = setup_workspace();
26777 cmd_index(
26778 dir.path(),
26779 false,
26780 false,
26781 false,
26782 false,
26783 false,
26784 true,
26785 None,
26786 false,
26787 false,
26788 false,
26789 false,
26790 false,
26791 false,
26792 )
26793 .unwrap();
26794
26795 let err = cmd_path(
26796 "alpha_main",
26797 "alpha_helper",
26798 dir.path(),
26799 None,
26800 false,
26801 false,
26802 false,
26803 false,
26804 false,
26805 TagpathSearchOpts::default(),
26806 )
26807 .unwrap_err();
26808
26809 assert_workspace_query_requires_scope(err);
26810 }
26811
26812 #[test]
26813 fn path_cmd_infers_scope_from_nested_workspace_path() {
26814 let dir = setup_workspace();
26815 cmd_index(
26816 dir.path(),
26817 false,
26818 false,
26819 false,
26820 false,
26821 false,
26822 true,
26823 None,
26824 false,
26825 false,
26826 false,
26827 false,
26828 false,
26829 false,
26830 )
26831 .unwrap();
26832 let nested = dir.path().join("src/alpha/nested");
26833 std::fs::create_dir_all(&nested).unwrap();
26834
26835 let result = cmd_path(
26836 "alpha_main",
26837 "alpha_helper",
26838 &nested,
26839 None,
26840 false,
26841 false,
26842 false,
26843 false,
26844 false,
26845 TagpathSearchOpts::default(),
26846 );
26847
26848 assert!(result.is_ok());
26849 }
26850
26851 #[test]
26852 fn path_cmd_uses_snapshot_fallback_when_rollback_journal_is_locked() {
26853 let dir = setup_graph_index();
26854 let db_path = dir.path().join(".tsift/index.db");
26855 let _lock = hold_rollback_journal_lock(&db_path);
26856
26857 let result = cmd_path(
26858 "main",
26859 "helper",
26860 dir.path(),
26861 None,
26862 false,
26863 false,
26864 false,
26865 false,
26866 false,
26867 TagpathSearchOpts::default(),
26868 );
26869
26870 assert!(result.is_ok());
26871 }
26872
26873 #[test]
26874 fn explain_cmd_requires_scope_for_workspace_root_without_shared_index() {
26875 let dir = setup_workspace();
26876 cmd_index(
26877 dir.path(),
26878 false,
26879 false,
26880 false,
26881 false,
26882 false,
26883 true,
26884 None,
26885 false,
26886 false,
26887 false,
26888 false,
26889 false,
26890 false,
26891 )
26892 .unwrap();
26893
26894 let err = cmd_explain(
26895 "alpha_main",
26896 dir.path(),
26897 None,
26898 15,
26899 false,
26900 false,
26901 false,
26902 false,
26903 false,
26904 false,
26905 false,
26906 )
26907 .unwrap_err();
26908
26909 assert_workspace_query_requires_scope(err);
26910 }
26911
26912 #[test]
26913 fn explain_cmd_infers_scope_from_nested_workspace_path() {
26914 let dir = setup_workspace();
26915 cmd_index(
26916 dir.path(),
26917 false,
26918 false,
26919 false,
26920 false,
26921 false,
26922 true,
26923 None,
26924 false,
26925 false,
26926 false,
26927 false,
26928 false,
26929 false,
26930 )
26931 .unwrap();
26932 let nested = dir.path().join("src/alpha/nested");
26933 std::fs::create_dir_all(&nested).unwrap();
26934
26935 let result = cmd_explain(
26936 "alpha_main",
26937 &nested,
26938 None,
26939 15,
26940 false,
26941 false,
26942 false,
26943 false,
26944 false,
26945 false,
26946 false,
26947 );
26948
26949 assert!(result.is_ok());
26950 }
26951
26952 #[test]
26953 fn explain_cmd_uses_snapshot_fallback_when_rollback_journal_is_locked() {
26954 let dir = setup_graph_index();
26955 let db_path = dir.path().join(".tsift/index.db");
26956 let _lock = hold_rollback_journal_lock(&db_path);
26957
26958 let result = cmd_explain(
26959 "main",
26960 dir.path(),
26961 None,
26962 15,
26963 false,
26964 false,
26965 false,
26966 false,
26967 false,
26968 false,
26969 false,
26970 );
26971
26972 assert!(result.is_ok());
26973 }
26974
26975 #[test]
26978 fn community_detection_groups_related() {
26979 let dir = setup_graph_index();
26980 let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
26981 let edges = db.all_edges().unwrap();
26982 let result = graph::detect_communities(&edges);
26983 assert!(result.node_count > 0);
26984 assert!(!result.communities.is_empty());
26985 }
26986
26987 #[test]
26988 fn community_cmd_autoindexes_missing_index_by_default() {
26989 let dir = tempfile::tempdir().unwrap();
26990 let result = cmd_communities(
26991 dir.path(),
26992 None,
26993 2,
26994 10,
26995 false,
26996 false,
26997 false,
26998 false,
26999 false,
27000 false,
27001 TagpathSearchOpts::default(),
27002 );
27003
27004 assert!(result.is_ok());
27005 assert!(dir.path().join(".tsift/index.db").exists());
27006 }
27007
27008 #[test]
27011 fn path_finds_connected_symbols() {
27012 let dir = setup_graph_index();
27013 let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
27014 let edges = db.all_edges().unwrap();
27015 let result = graph::shortest_path(&edges, "main", "helper");
27016 assert!(result.is_some());
27017 let path = result.unwrap();
27018 assert_eq!(path.hops, 1);
27019 }
27020
27021 #[test]
27022 fn path_returns_none_for_unknown() {
27023 let dir = setup_graph_index();
27024 let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
27025 let edges = db.all_edges().unwrap();
27026 assert!(graph::shortest_path(&edges, "main", "nonexistent").is_none());
27027 }
27028
27029 #[test]
27030 fn path_cmd_autoindexes_missing_index_by_default() {
27031 let dir = tempfile::tempdir().unwrap();
27032 let result = cmd_path(
27033 "a",
27034 "b",
27035 dir.path(),
27036 None,
27037 false,
27038 false,
27039 false,
27040 false,
27041 false,
27042 TagpathSearchOpts::default(),
27043 );
27044
27045 assert!(result.is_ok());
27046 assert!(dir.path().join(".tsift/index.db").exists());
27047 }
27048
27049 #[test]
27052 fn explain_shows_symbol_info() {
27053 let dir = setup_graph_index();
27054 let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
27055 let symbols = db.symbol_info("main").unwrap();
27056 assert!(!symbols.is_empty());
27057 assert_eq!(symbols[0].name, "main");
27058 assert_eq!(symbols[0].kind, "function");
27059 }
27060
27061 #[test]
27062 fn explain_cmd_autoindexes_missing_index_by_default() {
27063 let dir = tempfile::tempdir().unwrap();
27064 let result = cmd_explain(
27065 "main",
27066 dir.path(),
27067 None,
27068 15,
27069 false,
27070 false,
27071 false,
27072 false,
27073 false,
27074 false,
27075 false,
27076 );
27077
27078 assert!(result.is_ok());
27079 assert!(dir.path().join(".tsift/index.db").exists());
27080 }
27081
27082 fn hold_write_lock(db_path: &std::path::Path) -> Connection {
27083 let conn = Connection::open(db_path).unwrap();
27084 conn.execute_batch("BEGIN IMMEDIATE").unwrap();
27085 conn
27086 }
27087
27088 fn hold_writer_lock(lock_path: &std::path::Path) -> std::fs::File {
27089 use fs4::fs_std::FileExt;
27090 use std::io::Write;
27091
27092 let mut file = std::fs::OpenOptions::new()
27093 .read(true)
27094 .write(true)
27095 .create(true)
27096 .truncate(false)
27097 .open(lock_path)
27098 .unwrap();
27099 assert!(file.try_lock_exclusive().unwrap());
27100 writeln!(file, "{}", std::process::id()).unwrap();
27101 file
27102 }
27103
27104 fn hold_rollback_journal_lock(db_path: &std::path::Path) -> Connection {
27105 let conn = Connection::open(db_path).unwrap();
27106 conn.execute_batch("PRAGMA journal_mode=DELETE; BEGIN EXCLUSIVE;")
27107 .unwrap();
27108 std::fs::write(substrate::rollback_journal_path(db_path), "locked").unwrap();
27109 conn
27110 }
27111
27112 fn hold_wal_database_lock(db_path: &std::path::Path) -> Connection {
27113 let conn = Connection::open(db_path).unwrap();
27114 conn.execute_batch(
27115 "PRAGMA journal_mode=WAL;
27116 PRAGMA wal_autocheckpoint=0;
27117 CREATE TABLE IF NOT EXISTS wal_lock_probe (id INTEGER PRIMARY KEY);
27118 INSERT INTO wal_lock_probe DEFAULT VALUES;
27119 PRAGMA locking_mode=EXCLUSIVE;
27120 BEGIN EXCLUSIVE;",
27121 )
27122 .unwrap();
27123 assert!(substrate::wal_sidecar_path(db_path).exists());
27124 conn
27125 }
27126
27127 #[test]
27128 fn index_cmd_reports_wal_sidecar_diagnostics_without_tsift_writer_lock() {
27129 let dir = setup_graph_index();
27130 let db_path = dir.path().join(".tsift/index.db");
27131 let _lock = hold_wal_database_lock(&db_path);
27132
27133 let err = cmd_index(
27134 dir.path(),
27135 false,
27136 false,
27137 false,
27138 false,
27139 false,
27140 false,
27141 None,
27142 false,
27143 false,
27144 false,
27145 false,
27146 false,
27147 false,
27148 )
27149 .unwrap_err();
27150
27151 let msg = err.to_string();
27152 assert!(msg.contains("indexing"));
27153 assert!(msg.contains("lock diagnostics:"));
27154 assert!(msg.contains("lock: absent"));
27155 assert!(msg.contains("wal: present") || msg.contains("shm: present"));
27156 assert!(msg.contains("wedged writer holding live WAL sidecars"));
27157 assert!(msg.contains("snapshot fallback"));
27158 }
27159
27160 #[test]
27161 fn search_cmd_succeeds_while_writer_lock_is_held() {
27162 let dir = setup_graph_index();
27163 let db_path = dir.path().join(".tsift/index.db");
27164 let _lock = hold_write_lock(&db_path);
27165
27166 let result = cmd_search(
27167 "main".to_string(),
27168 Some(dir.path().to_path_buf()),
27169 5,
27170 Some("lexical".to_string()),
27171 None,
27172 false,
27173 false,
27174 false,
27175 0,
27176 true,
27177 false,
27178 false,
27179 false,
27180 false,
27181 false,
27182 );
27183
27184 assert!(result.is_ok());
27185 }
27186
27187 #[test]
27188 fn search_cmd_uses_snapshot_fallback_when_rollback_journal_lock_appears_after_precheck() {
27189 let dir = setup_graph_index();
27190 let _hook = install_search_post_precheck_lock(dir.path().join(".tsift/index.db"));
27191
27192 let result = cmd_search(
27193 "main".to_string(),
27194 Some(dir.path().to_path_buf()),
27195 5,
27196 Some("lexical".to_string()),
27197 None,
27198 false,
27199 false,
27200 false,
27201 0,
27202 true,
27203 false,
27204 false,
27205 false,
27206 false,
27207 false,
27208 );
27209
27210 assert!(result.is_ok());
27211 }
27212
27213 #[test]
27214 fn search_cmd_uses_wal_snapshot_fallback_when_lock_appears_after_precheck() {
27215 let dir = setup_graph_index();
27216 let _hook = install_search_post_precheck_wal_lock(dir.path().join(".tsift/index.db"));
27217
27218 let result = cmd_search(
27219 "main".to_string(),
27220 Some(dir.path().to_path_buf()),
27221 5,
27222 Some("lexical".to_string()),
27223 None,
27224 false,
27225 false,
27226 false,
27227 0,
27228 true,
27229 false,
27230 false,
27231 false,
27232 false,
27233 false,
27234 );
27235
27236 assert!(result.is_ok());
27237 }
27238
27239 #[test]
27240 fn search_cmd_fails_fast_when_autoindex_disabled_and_index_is_stale() {
27241 let dir = setup_graph_index();
27242 std::thread::sleep(std::time::Duration::from_millis(50));
27243 std::fs::write(
27244 dir.path().join("main.rs"),
27245 "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
27246 )
27247 .unwrap();
27248
27249 let err = cmd_search(
27250 "helper".to_string(),
27251 Some(dir.path().to_path_buf()),
27252 5,
27253 Some("lexical".to_string()),
27254 None,
27255 false,
27256 false,
27257 false,
27258 0,
27259 false,
27260 false,
27261 false,
27262 false,
27263 false,
27264 false,
27265 )
27266 .unwrap_err();
27267
27268 assert!(err.to_string().contains("search aborted"));
27269 assert!(err.to_string().contains("index is stale"));
27270 assert!(err.to_string().contains("--no-autoindex"));
27271 }
27272
27273 #[test]
27274 fn search_cmd_reports_stale_when_root_index_is_locked_by_rollback_journal() {
27275 let dir = setup_graph_index();
27276 std::thread::sleep(std::time::Duration::from_millis(50));
27277 std::fs::write(
27278 dir.path().join("main.rs"),
27279 "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
27280 )
27281 .unwrap();
27282 let _lock = hold_rollback_journal_lock(&dir.path().join(".tsift/index.db"));
27283
27284 let err = cmd_search(
27285 "helper".to_string(),
27286 Some(dir.path().to_path_buf()),
27287 5,
27288 Some("lexical".to_string()),
27289 None,
27290 false,
27291 false,
27292 false,
27293 0,
27294 false,
27295 false,
27296 false,
27297 false,
27298 false,
27299 false,
27300 )
27301 .unwrap_err();
27302
27303 assert!(err.to_string().contains("search aborted"));
27304 assert!(err.to_string().contains("index is stale"));
27305 assert!(!err.to_string().contains("database is locked"));
27306 }
27307
27308 #[test]
27309 fn search_cmd_autoindexes_stale_index_by_default() {
27310 let dir = setup_graph_index();
27311 std::thread::sleep(std::time::Duration::from_millis(50));
27312 std::fs::write(
27313 dir.path().join("main.rs"),
27314 "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
27315 )
27316 .unwrap();
27317
27318 let result = cmd_search(
27319 "helper".to_string(),
27320 Some(dir.path().to_path_buf()),
27321 5,
27322 Some("lexical".to_string()),
27323 None,
27324 false,
27325 false,
27326 true,
27327 0,
27328 false,
27329 false,
27330 false,
27331 false,
27332 false,
27333 false,
27334 );
27335
27336 assert!(result.is_ok());
27337
27338 let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
27339 let summary = db.compute_changes(dir.path()).unwrap();
27340 assert_eq!(summary.new + summary.modified + summary.deleted, 0);
27341 }
27342
27343 #[test]
27344 fn search_cmd_keeps_read_only_results_when_active_writer_blocks_autoindex() {
27345 let dir = setup_graph_index();
27346 std::thread::sleep(std::time::Duration::from_millis(50));
27347 std::fs::write(
27348 dir.path().join("main.rs"),
27349 "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
27350 )
27351 .unwrap();
27352 let _lock = hold_writer_lock(&dir.path().join(".tsift/index.lock"));
27353
27354 let result = cmd_search(
27355 "helper".to_string(),
27356 Some(dir.path().to_path_buf()),
27357 5,
27358 Some("lexical".to_string()),
27359 None,
27360 false,
27361 false,
27362 true,
27363 0,
27364 false,
27365 false,
27366 false,
27367 false,
27368 false,
27369 false,
27370 );
27371
27372 assert!(result.is_ok());
27373
27374 let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
27375 let summary = db.compute_changes(dir.path()).unwrap();
27376 assert_eq!(summary.modified, 1);
27377 }
27378
27379 #[test]
27380 fn search_cmd_autoindex_reports_lock_diagnostics_when_rollback_journal_blocks_writer() {
27381 let dir = setup_graph_index();
27382 std::thread::sleep(std::time::Duration::from_millis(50));
27383 std::fs::write(
27384 dir.path().join("main.rs"),
27385 "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
27386 )
27387 .unwrap();
27388 let _lock = hold_rollback_journal_lock(&dir.path().join(".tsift/index.db"));
27389
27390 let err = cmd_search(
27391 "helper".to_string(),
27392 Some(dir.path().to_path_buf()),
27393 5,
27394 Some("lexical".to_string()),
27395 None,
27396 false,
27397 false,
27398 true,
27399 0,
27400 false,
27401 false,
27402 false,
27403 false,
27404 false,
27405 false,
27406 )
27407 .unwrap_err();
27408
27409 let msg = err.to_string();
27410 assert!(msg.contains("autoindexing index"));
27411 assert!(msg.contains("lock diagnostics:"));
27412 assert!(msg.contains("journal: present"));
27413 assert!(msg.contains("next: inspect the host for a wedged rollback-journal writer"));
27414 }
27415
27416 #[test]
27417 fn search_cmd_uses_ancestor_project_root_for_nested_paths() {
27418 let dir = setup_graph_index();
27419 let nested = dir.path().join("src/nested");
27420 std::fs::create_dir_all(&nested).unwrap();
27421
27422 let result = cmd_search(
27423 "helper".to_string(),
27424 Some(nested.clone()),
27425 5,
27426 Some("lexical".to_string()),
27427 None,
27428 false,
27429 false,
27430 true,
27431 0,
27432 false,
27433 false,
27434 false,
27435 false,
27436 false,
27437 false,
27438 );
27439
27440 assert!(result.is_ok());
27441 assert!(!nested.join(".tsift/index.db").exists());
27442 }
27443
27444 #[test]
27445 fn exact_search_returns_literal_matches() {
27446 let dir = tempfile::tempdir().unwrap();
27447 std::fs::write(dir.path().join("notes.txt"), "alpha\nclaudescore-3\nbeta\n").unwrap();
27448
27449 let response = run_exact_search_with_timeout(dir.path(), "claudescore-3", 5, 0).unwrap();
27450
27451 assert_eq!(response.strategy, "exact");
27452 assert_eq!(response.hits.len(), 1);
27453 assert!(response.hits[0].path.ends_with("notes.txt"));
27454 assert_eq!(response.hits[0].location.as_deref(), Some("line 2"));
27455 assert!(response.hits[0].snippet.contains("claudescore-3"));
27456 }
27457
27458 #[test]
27459 fn exact_search_skips_stale_index_precheck() {
27460 let dir = setup_graph_index();
27461 std::thread::sleep(std::time::Duration::from_millis(50));
27462 std::fs::write(
27463 dir.path().join("main.rs"),
27464 "fn helper() { println!(\"updated\"); }\nfn main() { helper(); }\n",
27465 )
27466 .unwrap();
27467
27468 let result = cmd_search(
27469 "println!(\"updated\")".to_string(),
27470 Some(dir.path().to_path_buf()),
27471 5,
27472 Some("exact".to_string()),
27473 None,
27474 false,
27475 false,
27476 false,
27477 0,
27478 false,
27479 false,
27480 false,
27481 false,
27482 false,
27483 false,
27484 );
27485
27486 assert!(result.is_ok());
27487 }
27488
27489 #[test]
27490 fn workspace_exact_search_does_not_require_shared_root_index() {
27491 let dir = setup_workspace();
27492 cmd_index(
27493 dir.path(),
27494 false,
27495 false,
27496 false,
27497 false,
27498 false,
27499 true,
27500 None,
27501 false,
27502 false,
27503 false,
27504 false,
27505 false,
27506 false,
27507 )
27508 .unwrap();
27509
27510 let result = cmd_search(
27511 "alpha_helper".to_string(),
27512 Some(dir.path().to_path_buf()),
27513 5,
27514 Some("exact".to_string()),
27515 None,
27516 false,
27517 false,
27518 false,
27519 0,
27520 false,
27521 false,
27522 false,
27523 false,
27524 false,
27525 false,
27526 );
27527
27528 assert!(result.is_ok());
27529 assert!(!dir.path().join(".tsift/index.db").exists());
27530 }
27531
27532 #[test]
27533 fn identifier_like_query_prefers_exact_search() {
27534 assert!(query_prefers_exact_search("claudescore-3"));
27535 assert!(query_prefers_exact_search("alpha_helper"));
27536 assert!(query_prefers_exact_search("src/main.rs"));
27537 assert!(query_prefers_exact_search("crate::module"));
27538 assert!(!query_prefers_exact_search("authenticate"));
27539 assert!(!query_prefers_exact_search("fn main"));
27540 assert!(!query_prefers_exact_search("."));
27541 }
27542
27543 #[test]
27544 fn resolve_search_strategy_auto_promotes_identifier_like_queries() {
27545 assert_eq!(resolve_search_strategy("claudescore-3", None), "exact");
27546 assert_eq!(resolve_search_strategy("authenticate", None), "lexical");
27547 assert_eq!(
27548 resolve_search_strategy("claudescore-3", Some("hybrid".to_string())),
27549 "hybrid"
27550 );
27551 }
27552
27553 #[test]
27554 fn workspace_identifier_like_search_auto_uses_exact_backend() {
27555 let dir = setup_workspace();
27556 cmd_index(
27557 dir.path(),
27558 false,
27559 false,
27560 false,
27561 false,
27562 false,
27563 true,
27564 None,
27565 false,
27566 false,
27567 false,
27568 false,
27569 false,
27570 false,
27571 )
27572 .unwrap();
27573
27574 let result = cmd_search(
27575 "alpha_helper".to_string(),
27576 Some(dir.path().to_path_buf()),
27577 5,
27578 None,
27579 None,
27580 false,
27581 false,
27582 false,
27583 0,
27584 false,
27585 false,
27586 false,
27587 false,
27588 false,
27589 false,
27590 );
27591
27592 assert!(result.is_ok());
27593 assert!(!dir.path().join(".tsift/index.db").exists());
27594 }
27595
27596 #[test]
27597 fn index_cmd_uses_ancestor_project_root_for_nested_paths() {
27598 let dir = setup_graph_index();
27599 let nested = dir.path().join("src/nested");
27600 std::fs::create_dir_all(&nested).unwrap();
27601 std::fs::write(nested.join("extra.rs"), "fn nested_helper() {}\n").unwrap();
27602
27603 let result = cmd_index(
27604 &nested, false, false, false, false, false, false, None, false, false, false, false,
27605 false, false,
27606 );
27607
27608 assert!(result.is_ok());
27609 assert!(dir.path().join(".tsift/index.db").exists());
27610 assert!(!nested.join(".tsift/index.db").exists());
27611 }
27612
27613 #[test]
27614 fn workspace_index_cmd_uses_ancestor_project_root_for_nested_paths() {
27615 let dir = setup_workspace();
27616 let nested = dir.path().join("docs/nested");
27617 std::fs::create_dir_all(&nested).unwrap();
27618
27619 let result = cmd_index(
27620 &nested, false, false, false, false, false, true, None, false, false, false, false,
27621 false, false,
27622 );
27623
27624 let cfg = config::Config::load(dir.path()).unwrap();
27625
27626 assert!(result.is_ok());
27627 assert!(cfg.db_path_for(dir.path(), "alpha").exists());
27628 assert!(cfg.db_path_for(dir.path(), "beta").exists());
27629 }
27630
27631 #[test]
27632 fn status_cmd_autoindexes_missing_workspace_scopes() {
27633 let dir = setup_workspace();
27634 let cfg = config::Config::load(dir.path()).unwrap();
27635 let alpha = config::Config::resolve_submodule(dir.path(), "alpha").unwrap();
27636 let alpha_db_path = cfg.db_path_for(dir.path(), &alpha.id);
27637 let alpha_db = index::IndexDb::open(&alpha_db_path).unwrap();
27638 alpha_db.apply_changes(&alpha.source_root).unwrap();
27639
27640 let beta_db_path = cfg.db_path_for(dir.path(), "beta");
27641 assert!(!beta_db_path.exists());
27642
27643 cmd_status(dir.path(), false, true, false, false, false, false).unwrap();
27644
27645 assert!(beta_db_path.exists());
27646 let report = status::check_status(dir.path()).unwrap();
27647 assert!(matches!(report.index, status::IndexStatus::Fresh { .. }));
27648 }
27649
27650 #[test]
27651 fn status_cmd_autoindexes_workspace_when_all_scopes_are_missing() {
27652 let dir = setup_workspace();
27653 let cfg = config::Config::load(dir.path()).unwrap();
27654
27655 cmd_status(dir.path(), false, true, false, false, false, false).unwrap();
27656
27657 assert!(cfg.db_path_for(dir.path(), "alpha").exists());
27658 assert!(cfg.db_path_for(dir.path(), "beta").exists());
27659 let report = status::check_status(dir.path()).unwrap();
27660 assert!(matches!(report.index, status::IndexStatus::Fresh { .. }));
27661 }
27662
27663 #[test]
27664 fn status_cmd_fix_refreshes_stale_index() {
27665 let dir = setup_graph_index();
27666 std::thread::sleep(std::time::Duration::from_millis(50));
27667 std::fs::write(
27668 dir.path().join("main.rs"),
27669 "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }\n",
27670 )
27671 .unwrap();
27672
27673 let report = status::check_status(dir.path()).unwrap();
27674 assert!(matches!(report.index, status::IndexStatus::Stale { .. }));
27675
27676 cmd_status(dir.path(), true, true, false, false, false, false).unwrap();
27677
27678 let report = status::check_status(dir.path()).unwrap();
27679 assert!(matches!(report.index, status::IndexStatus::Fresh { .. }));
27680 }
27681
27682 #[test]
27683 fn status_cmd_reports_wal_snapshot_recovery_without_tsift_writer_lock() {
27684 let dir = setup_graph_index();
27685 let db_path = dir.path().join(".tsift/index.db");
27686 let _lock = hold_wal_database_lock(&db_path);
27687
27688 cmd_status(dir.path(), false, true, false, false, false, false).unwrap();
27689
27690 let report = status::check_status(dir.path()).unwrap();
27691 assert!(matches!(
27692 report.index,
27693 status::IndexStatus::Fresh {
27694 recovery: Some(index::ReadOnlyRecovery::SnapshotFallbackWal),
27695 ..
27696 }
27697 ));
27698 let locks = status::check_locks(dir.path(), None, None).unwrap();
27699 assert!(matches!(
27700 locks.writer_lock,
27701 status::WriterLockStatus::Absent { .. }
27702 ));
27703 assert!(locks.wal_sidecar.present || locks.shared_memory_sidecar.present);
27704 assert!(
27705 locks
27706 .recommended_action
27707 .contains("wedged writer holding live WAL sidecars")
27708 );
27709 }
27710
27711 #[test]
27712 fn locks_report_uses_ancestor_project_root_for_nested_paths() {
27713 let dir = setup_graph_index();
27714 let nested = dir.path().join("src/nested");
27715 std::fs::create_dir_all(&nested).unwrap();
27716
27717 let root = lint::resolve_project_root_or_canonical_path(&nested).unwrap();
27718 let report = status::check_locks(&root, Some(&nested), None).unwrap();
27719
27720 assert_eq!(report.source_root, dir.path());
27721 assert_eq!(report.db_path, dir.path().join(".tsift/index.db"));
27722 }
27723
27724 #[test]
27725 fn workspace_locks_report_infers_scope_from_nested_path() {
27726 let dir = setup_workspace();
27727 cmd_index(
27728 dir.path(),
27729 false,
27730 false,
27731 false,
27732 false,
27733 false,
27734 true,
27735 None,
27736 false,
27737 false,
27738 false,
27739 false,
27740 false,
27741 false,
27742 )
27743 .unwrap();
27744 let nested = dir.path().join("src/alpha/nested");
27745 std::fs::create_dir_all(&nested).unwrap();
27746
27747 let root = lint::resolve_project_root_or_canonical_path(&nested).unwrap();
27748 let report = status::check_locks(&root, Some(&nested), None).unwrap();
27749 let cfg = config::Config::load(dir.path()).unwrap();
27750
27751 assert_eq!(report.label, "submodule `alpha` index");
27752 assert_eq!(report.source_root, dir.path().join("src/alpha"));
27753 assert_eq!(report.db_path, cfg.db_path_for(dir.path(), "alpha"));
27754 assert_eq!(
27755 report.reindex_command,
27756 format!("tsift index --submodule alpha {}", dir.path().display())
27757 );
27758 }
27759
27760 #[test]
27761 fn scoped_search_cmd_autoindexes_stale_submodule_index_by_default() {
27762 let dir = setup_workspace();
27763 cmd_index(
27764 dir.path(),
27765 false,
27766 false,
27767 false,
27768 false,
27769 false,
27770 true,
27771 None,
27772 false,
27773 false,
27774 false,
27775 false,
27776 false,
27777 false,
27778 )
27779 .unwrap();
27780
27781 let alpha = dir.path().join("src/alpha/lib.rs");
27782 std::thread::sleep(std::time::Duration::from_millis(50));
27783 std::fs::write(
27784 &alpha,
27785 "fn alpha_helper() { println!(\"updated\"); }\nfn alpha_main() { alpha_helper(); }",
27786 )
27787 .unwrap();
27788
27789 let result = cmd_search(
27790 "alpha_helper".to_string(),
27791 Some(dir.path().to_path_buf()),
27792 5,
27793 Some("lexical".to_string()),
27794 Some("alpha".to_string()),
27795 false,
27796 false,
27797 true,
27798 0,
27799 false,
27800 false,
27801 false,
27802 false,
27803 false,
27804 false,
27805 );
27806
27807 assert!(result.is_ok());
27808
27809 let cfg = config::Config::load(dir.path()).unwrap();
27810 let db = index::IndexDb::open_read_only(&cfg.db_path_for(dir.path(), "alpha")).unwrap();
27811 let summary = db.compute_changes(&dir.path().join("src/alpha")).unwrap();
27812 assert_eq!(summary.new + summary.modified + summary.deleted, 0);
27813 }
27814
27815 #[test]
27816 fn scoped_search_cmd_reports_stale_when_submodule_index_is_locked_by_rollback_journal() {
27817 let dir = setup_workspace();
27818 cmd_index(
27819 dir.path(),
27820 false,
27821 false,
27822 false,
27823 false,
27824 false,
27825 true,
27826 None,
27827 false,
27828 false,
27829 false,
27830 false,
27831 false,
27832 false,
27833 )
27834 .unwrap();
27835
27836 let alpha = dir.path().join("src/alpha/lib.rs");
27837 std::thread::sleep(std::time::Duration::from_millis(50));
27838 std::fs::write(
27839 &alpha,
27840 "fn alpha_helper() { println!(\"updated\"); }\nfn alpha_main() { alpha_helper(); }",
27841 )
27842 .unwrap();
27843
27844 let cfg = config::Config::load(dir.path()).unwrap();
27845 let _lock = hold_rollback_journal_lock(&cfg.db_path_for(dir.path(), "alpha"));
27846
27847 let err = cmd_search(
27848 "alpha_helper".to_string(),
27849 Some(dir.path().to_path_buf()),
27850 5,
27851 Some("lexical".to_string()),
27852 Some("alpha".to_string()),
27853 false,
27854 false,
27855 false,
27856 0,
27857 false,
27858 false,
27859 false,
27860 false,
27861 false,
27862 false,
27863 )
27864 .unwrap_err();
27865
27866 assert!(err.to_string().contains("search aborted"));
27867 assert!(err.to_string().contains("submodule `alpha` index"));
27868 assert!(!err.to_string().contains("database is locked"));
27869 }
27870
27871 #[test]
27872 fn federated_search_cmd_autoindexes_stale_indexes_by_default() {
27873 let dir = setup_workspace();
27874 cmd_index(
27875 dir.path(),
27876 false,
27877 false,
27878 false,
27879 false,
27880 false,
27881 true,
27882 None,
27883 false,
27884 false,
27885 false,
27886 false,
27887 false,
27888 false,
27889 )
27890 .unwrap();
27891
27892 let alpha = dir.path().join("src/alpha/lib.rs");
27893 std::thread::sleep(std::time::Duration::from_millis(50));
27894 std::fs::write(
27895 &alpha,
27896 "fn alpha_helper() { println!(\"updated\"); }\nfn alpha_main() { alpha_helper(); }",
27897 )
27898 .unwrap();
27899
27900 let result = cmd_search(
27901 "alpha_helper".to_string(),
27902 Some(dir.path().to_path_buf()),
27903 5,
27904 Some("lexical".to_string()),
27905 None,
27906 true,
27907 false,
27908 true,
27909 0,
27910 false,
27911 false,
27912 false,
27913 false,
27914 false,
27915 false,
27916 );
27917
27918 assert!(result.is_ok());
27919
27920 let cfg = config::Config::load(dir.path()).unwrap();
27921 let db = index::IndexDb::open_read_only(&cfg.db_path_for(dir.path(), "alpha")).unwrap();
27922 let summary = db.compute_changes(&dir.path().join("src/alpha")).unwrap();
27923 assert_eq!(summary.new + summary.modified + summary.deleted, 0);
27924 }
27925
27926 #[test]
27927 fn federated_search_cmd_reports_stale_when_submodule_index_is_locked_by_rollback_journal() {
27928 let dir = setup_workspace();
27929 cmd_index(
27930 dir.path(),
27931 false,
27932 false,
27933 false,
27934 false,
27935 false,
27936 true,
27937 None,
27938 false,
27939 false,
27940 false,
27941 false,
27942 false,
27943 false,
27944 )
27945 .unwrap();
27946
27947 let alpha = dir.path().join("src/alpha/lib.rs");
27948 std::thread::sleep(std::time::Duration::from_millis(50));
27949 std::fs::write(
27950 &alpha,
27951 "fn alpha_helper() { println!(\"updated\"); }\nfn alpha_main() { alpha_helper(); }",
27952 )
27953 .unwrap();
27954
27955 let cfg = config::Config::load(dir.path()).unwrap();
27956 let _lock = hold_rollback_journal_lock(&cfg.db_path_for(dir.path(), "alpha"));
27957
27958 let err = cmd_search(
27959 "alpha_helper".to_string(),
27960 Some(dir.path().to_path_buf()),
27961 5,
27962 Some("lexical".to_string()),
27963 None,
27964 true,
27965 false,
27966 false,
27967 30,
27968 false,
27969 false,
27970 false,
27971 false,
27972 false,
27973 false,
27974 )
27975 .unwrap_err();
27976
27977 assert!(err.to_string().contains("stale"));
27978 assert!(err.to_string().contains("submodule `alpha` index"));
27979 assert!(!err.to_string().contains("database is locked"));
27980 }
27981
27982 #[test]
27983 fn workspace_search_cmd_requires_explicit_target_without_shared_root_index() {
27984 let dir = setup_workspace();
27985 cmd_index(
27986 dir.path(),
27987 false,
27988 false,
27989 false,
27990 false,
27991 false,
27992 true,
27993 None,
27994 false,
27995 false,
27996 false,
27997 false,
27998 false,
27999 false,
28000 )
28001 .unwrap();
28002
28003 let err = cmd_search(
28004 "alpha_helper".to_string(),
28005 Some(dir.path().to_path_buf()),
28006 5,
28007 Some("lexical".to_string()),
28008 None,
28009 false,
28010 false,
28011 true,
28012 0,
28013 false,
28014 false,
28015 false,
28016 false,
28017 false,
28018 false,
28019 )
28020 .unwrap_err();
28021
28022 assert_workspace_search_requires_explicit_target(err);
28023 assert!(!dir.path().join(".tsift/index.db").exists());
28024 }
28025
28026 #[test]
28027 fn workspace_search_cmd_infers_scope_from_nested_path() {
28028 let dir = setup_workspace();
28029 cmd_index(
28030 dir.path(),
28031 false,
28032 false,
28033 false,
28034 false,
28035 false,
28036 true,
28037 None,
28038 false,
28039 false,
28040 false,
28041 false,
28042 false,
28043 false,
28044 )
28045 .unwrap();
28046 let nested = dir.path().join("src/alpha/nested");
28047 std::fs::create_dir_all(&nested).unwrap();
28048
28049 let result = cmd_search(
28050 "alpha_helper".to_string(),
28051 Some(nested),
28052 5,
28053 Some("lexical".to_string()),
28054 None,
28055 false,
28056 false,
28057 false,
28058 0,
28059 false,
28060 false,
28061 false,
28062 false,
28063 false,
28064 false,
28065 );
28066
28067 assert!(result.is_ok());
28068 }
28069
28070 #[test]
28071 fn resolve_query_db_path_infers_matching_duplicate_leaf_scope_from_nested_path() {
28072 let dir = setup_workspace_with_duplicate_leaf_names();
28073 cmd_index(
28074 dir.path(),
28075 false,
28076 false,
28077 false,
28078 false,
28079 false,
28080 true,
28081 None,
28082 false,
28083 false,
28084 false,
28085 false,
28086 false,
28087 false,
28088 )
28089 .unwrap();
28090 let nested = dir.path().join("vendor/foo/nested");
28091 std::fs::create_dir_all(&nested).unwrap();
28092
28093 let root = lint::resolve_project_root_or_canonical_path(&nested).unwrap();
28094 let db_path = resolve_query_db_path(&root, &nested, None).unwrap();
28095 let cfg = config::Config::load(dir.path()).unwrap();
28096
28097 assert_eq!(db_path, cfg.db_path_for(dir.path(), "vendor/foo"));
28098 }
28099
28100 #[test]
28101 fn graph_cmd_succeeds_while_writer_lock_is_held() {
28102 let dir = setup_graph_index();
28103 let db_path = dir.path().join(".tsift/index.db");
28104 let _lock = hold_write_lock(&db_path);
28105
28106 let result = cmd_graph(
28107 "main",
28108 dir.path(),
28109 false,
28110 false,
28111 None,
28112 20,
28113 false,
28114 true,
28115 false,
28116 false,
28117 false,
28118 false,
28119 false,
28120 TagpathSearchOpts::default(),
28121 );
28122
28123 assert!(result.is_ok());
28124 }
28125
28126 #[test]
28127 fn graph_cmd_autoindexes_stale_index_by_default() {
28128 let dir = setup_graph_index();
28129 std::thread::sleep(std::time::Duration::from_millis(50));
28130 std::fs::write(
28131 dir.path().join("main.rs"),
28132 "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }\n",
28133 )
28134 .unwrap();
28135
28136 let result = cmd_graph(
28137 "helper",
28138 dir.path(),
28139 true,
28140 false,
28141 None,
28142 20,
28143 false,
28144 true,
28145 false,
28146 false,
28147 false,
28148 false,
28149 false,
28150 TagpathSearchOpts::default(),
28151 );
28152
28153 assert!(result.is_ok());
28154 let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
28155 let summary = db.compute_changes(dir.path()).unwrap();
28156 assert_eq!(summary.new + summary.modified + summary.deleted, 0);
28157 }
28158
28159 #[test]
28160 fn graph_cmd_uses_snapshot_fallback_when_rollback_journal_is_locked() {
28161 let dir = setup_graph_index();
28162 let db_path = dir.path().join(".tsift/index.db");
28163 let _lock = hold_rollback_journal_lock(&db_path);
28164
28165 let result = cmd_graph(
28166 "main",
28167 dir.path(),
28168 false,
28169 false,
28170 None,
28171 20,
28172 false,
28173 true,
28174 false,
28175 false,
28176 false,
28177 false,
28178 false,
28179 TagpathSearchOpts::default(),
28180 );
28181
28182 assert!(result.is_ok());
28183 }
28184
28185 #[test]
28186 fn graph_cmd_uses_ancestor_project_root_for_nested_paths() {
28187 let dir = setup_graph_index();
28188 let nested = dir.path().join("src/nested");
28189 std::fs::create_dir_all(&nested).unwrap();
28190
28191 let result = cmd_graph(
28192 "helper",
28193 &nested,
28194 true,
28195 false,
28196 None,
28197 20,
28198 false,
28199 false,
28200 false,
28201 false,
28202 false,
28203 false,
28204 false,
28205 TagpathSearchOpts::default(),
28206 );
28207
28208 assert!(result.is_ok());
28209 }
28210
28211 #[test]
28212 fn communities_cmd_succeeds_while_writer_lock_is_held() {
28213 let dir = setup_graph_index();
28214 let _lock = hold_writer_lock(&dir.path().join(".tsift/index.lock"));
28215
28216 let result = cmd_communities(
28217 dir.path(),
28218 None,
28219 1,
28220 10,
28221 false,
28222 false,
28223 false,
28224 false,
28225 false,
28226 false,
28227 TagpathSearchOpts::default(),
28228 );
28229
28230 assert!(result.is_ok());
28231 }
28232
28233 #[test]
28234 fn communities_cmd_uses_snapshot_fallback_when_rollback_journal_is_locked() {
28235 let dir = setup_graph_index();
28236 let db_path = dir.path().join(".tsift/index.db");
28237 let _lock = hold_rollback_journal_lock(&db_path);
28238
28239 let result = cmd_communities(
28240 dir.path(),
28241 None,
28242 1,
28243 10,
28244 false,
28245 false,
28246 false,
28247 false,
28248 false,
28249 false,
28250 TagpathSearchOpts::default(),
28251 );
28252
28253 assert!(result.is_ok());
28254 }
28255
28256 #[test]
28257 fn lint_finds_entities_from_project_root_index_db() {
28258 let dir = tempfile::tempdir().unwrap();
28259 std::fs::write(dir.path().join("main.rs"), "fn alpha_helper() {}\n").unwrap();
28260 std::fs::write(
28261 dir.path().join("README.md"),
28262 "alpha_helper should be backticked.\n",
28263 )
28264 .unwrap();
28265 cmd_index(
28266 dir.path(),
28267 false,
28268 false,
28269 false,
28270 false,
28271 false,
28272 false,
28273 None,
28274 false,
28275 false,
28276 false,
28277 false,
28278 false,
28279 false,
28280 )
28281 .unwrap();
28282
28283 let root = lint::find_project_root_for_path(&dir.path().join("README.md"))
28284 .unwrap()
28285 .unwrap();
28286 let entities = lint::collect_entities_from_index_path(&root).unwrap();
28287 let result = lint::lint_markdown(&dir.path().join("README.md"), &entities).unwrap();
28288
28289 assert!(
28290 result
28291 .annotations
28292 .iter()
28293 .any(|ann| ann.text == "alpha_helper")
28294 );
28295 }
28296
28297 #[test]
28300 fn search_direct_runs_ok() {
28301 let dir = tempfile::tempdir().unwrap();
28302 let search_dir = dir.path().to_path_buf();
28303 let cache_dir = search_dir.join(".tsift/search-cache");
28304 std::fs::write(search_dir.join("test.rs"), "fn main() {}").unwrap();
28305 let result = run_sift_search(&search_dir, &cache_dir, "main", 1, "lexical");
28306 assert!(result.is_ok(), "direct search should succeed");
28307 assert!(
28308 cache_dir.exists(),
28309 "search should create the configured cache dir"
28310 );
28311 }
28312
28313 #[test]
28314 fn search_timeout_zero_disables_timeout() {
28315 let dir = tempfile::tempdir().unwrap();
28316 let search_dir = dir.path().to_path_buf();
28317 let cache_dir = search_dir.join(".tsift/search-cache");
28318 std::fs::write(search_dir.join("test.rs"), "fn main() {}").unwrap();
28319 let result = run_search_with_timeout(&search_dir, &cache_dir, "main", 1, 0, "lexical", &[]);
28320 assert!(result.is_ok(), "timeout=0 should still work (no timeout)");
28321 assert!(
28322 cache_dir.exists(),
28323 "timeout=0 should keep using the stable search cache dir"
28324 );
28325 }
28326
28327 #[test]
28328 fn search_timeout_message_reports_missing_index_as_rebuild_needed() {
28329 let dir = tempfile::tempdir().unwrap();
28330 std::fs::write(dir.path().join("main.rs"), "fn main() {}\n").unwrap();
28331 cmd_index(
28332 dir.path(),
28333 false,
28334 false,
28335 false,
28336 false,
28337 false,
28338 false,
28339 None,
28340 false,
28341 false,
28342 false,
28343 false,
28344 false,
28345 false,
28346 )
28347 .unwrap();
28348 let db_path = dir.path().join(".tsift/index.db");
28349 std::fs::remove_file(&db_path).unwrap();
28350 let search_target = SearchIndexTarget {
28351 label: "index".to_string(),
28352 db_path,
28353 source_root: dir.path().to_path_buf(),
28354 scope_name: None,
28355 reindex_cmd: format!("tsift index {}", dir.path().display()),
28356 };
28357
28358 let message = search_timeout_message(1, "lexical", &[search_target]).unwrap();
28359
28360 assert!(message.contains("timed out after 1s"));
28361 assert!(message.contains("index is missing"));
28362 assert!(message.contains("Run `tsift index"));
28363 assert!(!message.contains("search root looks fresh"));
28364 }
28365
28366 #[test]
28367 fn search_worker_output_path_uses_json_suffix() {
28368 let path = next_search_worker_output_path();
28369 assert!(path.extension().is_some_and(|ext| ext == "json"));
28370 }
28371
28372 #[test]
28375 fn index_quiet_suppresses_file_list() {
28376 let dir = setup_graph_index();
28377 let result = cmd_index(
28378 dir.path(),
28379 false,
28380 true,
28381 false,
28382 false,
28383 true,
28384 false,
28385 None,
28386 false,
28387 false,
28388 false,
28389 false,
28390 false,
28391 false,
28392 );
28393 assert!(result.is_ok());
28394 }
28395
28396 #[test]
28397 fn index_exit_code_implies_quiet() {
28398 let dir = setup_graph_index();
28399 let result = cmd_index(
28400 dir.path(),
28401 false,
28402 true,
28403 false,
28404 false,
28405 false,
28406 false,
28407 None,
28408 false,
28409 false,
28410 false,
28411 false,
28412 false,
28413 false,
28414 );
28415 assert!(result.is_ok());
28416 }
28417
28418 #[test]
28419 fn index_quiet_json_omits_changes() {
28420 let dir = setup_graph_index();
28421 let result = cmd_index(
28422 dir.path(),
28423 false,
28424 true,
28425 false,
28426 false,
28427 true,
28428 false,
28429 None,
28430 true,
28431 false,
28432 false,
28433 false,
28434 false,
28435 false,
28436 );
28437 assert!(result.is_ok());
28438 }
28439
28440 #[test]
28441 fn cli_workflow_defaults_to_search_topic() {
28442 let cli = parse_cli(["tsift", "workflow"]);
28443 match cli.command {
28444 Some(Commands::Workflow { topic, json }) => {
28445 assert_eq!(topic, "search");
28446 assert!(!json);
28447 }
28448 _ => panic!("expected Workflow command"),
28449 }
28450 }
28451
28452 #[test]
28453 fn search_workflow_recipe_preserves_handles_across_expansions() {
28454 let recipe = search_workflow_recipe();
28455 let step_names: Vec<&str> = recipe.steps.iter().map(|step| step.name).collect();
28456 assert_eq!(
28457 step_names,
28458 vec![
28459 "exact-anchor",
28460 "semantic-search",
28461 "explain-symbol",
28462 "summarize-selection",
28463 "digest-expansion"
28464 ]
28465 );
28466 assert!(
28467 recipe
28468 .handle_contract
28469 .iter()
28470 .any(|item| item.contains("originating command"))
28471 );
28472 assert!(
28473 recipe.steps[1]
28474 .preserves
28475 .iter()
28476 .any(|item| item.contains("sfam-*"))
28477 );
28478 assert!(
28479 recipe.steps[2]
28480 .preserves
28481 .iter()
28482 .any(|item| item.contains("ecall-*"))
28483 );
28484 assert!(
28485 recipe.steps[4]
28486 .preserves
28487 .iter()
28488 .any(|item| item.contains("artifact handles"))
28489 );
28490 }
28491
28492 #[test]
28495 fn to_json_compact_default() {
28496 let val = serde_json::json!({"a": 1, "b": [2, 3]});
28497 let compact = to_json(&val, false, false).unwrap();
28498 assert!(!compact.contains('\n'));
28499 assert!(
28500 compact.contains("\"a\":1")
28501 || compact.contains("\"a\": 1")
28502 || compact.contains("\"a\":")
28503 );
28504 }
28505
28506 #[test]
28507 fn to_json_pretty_indents() {
28508 let val = serde_json::json!({"a": 1, "b": [2, 3]});
28509 let pretty = to_json(&val, true, false).unwrap();
28510 assert!(pretty.contains('\n'));
28511 assert!(pretty.contains(" "));
28512 }
28513
28514 #[test]
28515 fn to_json_compact_is_shorter() {
28516 let val =
28517 serde_json::json!({"name": "test", "items": [1, 2, 3], "nested": {"key": "value"}});
28518 let compact = to_json(&val, false, false).unwrap();
28519 let pretty = to_json(&val, true, false).unwrap();
28520 assert!(compact.len() < pretty.len());
28521 }
28522
28523 #[test]
28524 fn terse_renames_keys() {
28525 let val =
28526 serde_json::json!({"caller_file": "a.rs", "caller_name": "main", "call_site_line": 10});
28527 let result = to_json(&val, false, true).unwrap();
28528 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28529 assert!(parsed["_s"].is_object());
28530 let d = &parsed["d"];
28531 assert_eq!(d["cf"], "a.rs");
28532 assert_eq!(d["cn"], "main");
28533 assert_eq!(d["csl"], 10);
28534 }
28535
28536 #[test]
28537 fn terse_schema_only_includes_used_keys() {
28538 let val = serde_json::json!({"name": "test", "score": 0.5});
28539 let result = to_json(&val, false, true).unwrap();
28540 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28541 let schema = parsed["_s"].as_object().unwrap();
28542 assert_eq!(schema["n"], "name");
28543 assert_eq!(schema["sc"], "score");
28544 assert!(!schema.contains_key("cf"));
28545 }
28546
28547 #[test]
28548 fn terse_nested_arrays() {
28549 let val = serde_json::json!({"callers": [{"caller_name": "a", "caller_file": "b.rs", "caller_line": 1, "callee_name": "c", "call_site_line": 2}]});
28550 let result = to_json(&val, false, true).unwrap();
28551 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28552 let d = &parsed["d"];
28553 assert_eq!(d["crs"][0]["cn"], "a");
28554 assert_eq!(d["crs"][0]["cf"], "b.rs");
28555 }
28556
28557 #[test]
28558 fn terse_preserves_unknown_keys() {
28559 let val = serde_json::json!({"custom_field": "value", "name": "test"});
28560 let result = to_json(&val, false, true).unwrap();
28561 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28562 let d = &parsed["d"];
28563 assert_eq!(d["custom_field"], "value");
28564 assert_eq!(d["n"], "test");
28565 }
28566
28567 #[test]
28570 fn schema_converts_homogeneous_arrays() {
28571 let val = serde_json::json!({"symbols": [
28572 {"name": "foo", "kind": "fn", "line": 10},
28573 {"name": "bar", "kind": "fn", "line": 20}
28574 ]});
28575 let result = to_json_schema(&val, false, false, true).unwrap();
28576 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28577 let syms = &parsed["symbols"];
28578 assert_eq!(syms["_c"], serde_json::json!(["kind", "line", "name"]));
28580 assert_eq!(syms["_r"][0], serde_json::json!(["fn", 10, "foo"]));
28581 assert_eq!(syms["_r"][1], serde_json::json!(["fn", 20, "bar"]));
28582 }
28583
28584 #[test]
28585 fn schema_skips_short_arrays() {
28586 let val = serde_json::json!({"items": [{"name": "only"}]});
28587 let result = to_json_schema(&val, false, false, true).unwrap();
28588 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28589 assert!(parsed["items"].is_array());
28590 assert_eq!(parsed["items"][0]["name"], "only");
28591 }
28592
28593 #[test]
28594 fn schema_skips_heterogeneous_arrays() {
28595 let val = serde_json::json!({"items": [{"a": 1}, {"b": 2}]});
28596 let result = to_json_schema(&val, false, false, true).unwrap();
28597 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28598 assert!(parsed["items"].is_array());
28599 assert_eq!(parsed["items"][0]["a"], 1);
28600 }
28601
28602 #[test]
28603 fn schema_with_terse_combines() {
28604 let val = serde_json::json!({"callers": [
28605 {"caller_name": "a", "caller_file": "x.rs"},
28606 {"caller_name": "b", "caller_file": "y.rs"}
28607 ]});
28608 let result = to_json_schema(&val, false, true, true).unwrap();
28609 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28610 assert!(parsed["_s"].is_object());
28611 let d = &parsed["d"];
28612 let crs = &d["crs"];
28613 assert!(crs["_c"].is_array());
28614 assert!(crs["_r"].is_array());
28615 assert_eq!(crs["_r"][0], serde_json::json!(["x.rs", "a"]));
28617 }
28618
28619 #[test]
28620 fn schema_preserves_non_object_arrays() {
28621 let val = serde_json::json!({"tags": ["a", "b", "c"]});
28622 let result = to_json_schema(&val, false, false, true).unwrap();
28623 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28624 assert_eq!(parsed["tags"], serde_json::json!(["a", "b", "c"]));
28625 }
28626
28627 #[test]
28628 fn cli_accepts_global_schema_flag() {
28629 let cli = parse_cli(["tsift", "--schema", "search", "test"]);
28630 assert!(cli.schema);
28631 assert!(matches!(cli.command, Some(Commands::Search { .. })));
28632 }
28633
28634 #[test]
28635 fn cli_accepts_global_envelope_flag() {
28636 let cli = parse_cli([
28637 "tsift",
28638 "--envelope",
28639 "context-pack",
28640 "tasks/software/tsift.md",
28641 ]);
28642 assert!(cli.envelope);
28643 assert!(matches!(cli.command, Some(Commands::ContextPack { .. })));
28644 }
28645
28646 #[test]
28647 fn cli_accepts_locks_command() {
28648 let cli = parse_cli(["tsift", "locks"]);
28649 assert!(matches!(cli.command, Some(Commands::Locks { .. })));
28650 }
28651
28652 #[test]
28653 fn cli_locks_accepts_scope_flag() {
28654 let cli = parse_cli(["tsift", "locks", "--scope", "alpha"]);
28655 match cli.command {
28656 Some(Commands::Locks { scope, .. }) => {
28657 assert_eq!(scope.as_deref(), Some("alpha"));
28658 }
28659 _ => panic!("expected Locks command"),
28660 }
28661 }
28662
28663 #[test]
28664 fn cli_search_accepts_autoindex_flag() {
28665 let cli = parse_cli(["tsift", "search", "test", "--autoindex"]);
28666 match cli.command {
28667 Some(Commands::Search {
28668 autoindex,
28669 no_autoindex,
28670 ..
28671 }) => {
28672 assert!(autoindex);
28673 assert!(!no_autoindex);
28674 }
28675 _ => panic!("expected Search command"),
28676 }
28677 }
28678
28679 #[test]
28680 fn cli_search_accepts_exact_flag() {
28681 let cli = parse_cli(["tsift", "search", "test", "--exact"]);
28682 match cli.command {
28683 Some(Commands::Search {
28684 exact, strategy, ..
28685 }) => {
28686 assert!(exact);
28687 assert!(strategy.is_none());
28688 }
28689 _ => panic!("expected Search command"),
28690 }
28691 }
28692
28693 #[test]
28694 fn cli_parses_diff_digest_command() {
28695 let cli = parse_cli(["tsift", "diff-digest", "--json", "."]);
28696 match cli.command {
28697 Some(Commands::DiffDigest {
28698 json,
28699 path,
28700 cached,
28701 revision,
28702 }) => {
28703 assert!(json);
28704 assert_eq!(path, PathBuf::from("."));
28705 assert!(!cached);
28706 assert!(revision.is_none());
28707 }
28708 _ => panic!("expected DiffDigest command"),
28709 }
28710 }
28711
28712 #[test]
28713 fn cli_rejects_conflicting_diff_digest_modes() {
28714 match try_parse_cli([
28715 "tsift",
28716 "diff-digest",
28717 "--cached",
28718 "--revision",
28719 "HEAD",
28720 ".",
28721 ]) {
28722 Ok(_) => panic!("expected conflicting diff-digest modes to fail"),
28723 Err(err) => {
28724 assert!(err.to_string().contains("--cached"));
28725 assert!(err.to_string().contains("--revision"));
28726 }
28727 }
28728 }
28729
28730 #[test]
28731 fn cli_parses_test_digest_command() {
28732 let cli = parse_cli([
28733 "tsift",
28734 "test-digest",
28735 "--path",
28736 ".",
28737 "--input",
28738 "target/test.log",
28739 "--runner",
28740 "cargo",
28741 "--json",
28742 ]);
28743 match cli.command {
28744 Some(Commands::TestDigest {
28745 json,
28746 path,
28747 input,
28748 runner,
28749 }) => {
28750 assert!(json);
28751 assert_eq!(path, PathBuf::from("."));
28752 assert_eq!(input, Some(PathBuf::from("target/test.log")));
28753 assert_eq!(runner.as_deref(), Some("cargo"));
28754 }
28755 _ => panic!("expected TestDigest command"),
28756 }
28757 }
28758
28759 #[test]
28760 fn cli_parses_log_digest_command() {
28761 let cli = parse_cli([
28762 "tsift",
28763 "log-digest",
28764 "--path",
28765 ".",
28766 "--input",
28767 "target/build.log",
28768 "--json",
28769 ]);
28770 match cli.command {
28771 Some(Commands::LogDigest { json, path, input }) => {
28772 assert!(json);
28773 assert_eq!(path, PathBuf::from("."));
28774 assert_eq!(input, Some(PathBuf::from("target/build.log")));
28775 }
28776 _ => panic!("expected LogDigest command"),
28777 }
28778 }
28779
28780 #[test]
28781 fn cli_parses_metric_digest_command() {
28782 let cli = parse_cli([
28783 "tsift",
28784 "metric-digest",
28785 "--input",
28786 "target/runs.json",
28787 "--baseline",
28788 "target/prior.json",
28789 "--metric",
28790 "session_mae",
28791 "--lower-is-better",
28792 "session_mae",
28793 "--history",
28794 "4",
28795 "--top",
28796 "2",
28797 "--json",
28798 ]);
28799 match cli.command {
28800 Some(Commands::MetricDigest {
28801 input,
28802 baseline,
28803 metrics,
28804 lower_is_better,
28805 history,
28806 top,
28807 json,
28808 ..
28809 }) => {
28810 assert!(json);
28811 assert_eq!(input, Some(PathBuf::from("target/runs.json")));
28812 assert_eq!(baseline, Some(PathBuf::from("target/prior.json")));
28813 assert_eq!(metrics, vec!["session_mae"]);
28814 assert_eq!(lower_is_better, vec!["session_mae"]);
28815 assert_eq!(history, 4);
28816 assert_eq!(top, 2);
28817 }
28818 _ => panic!("expected MetricDigest command"),
28819 }
28820 }
28821
28822 #[test]
28823 fn cli_parses_dci_benchmark_command() {
28824 let cli = parse_cli([
28825 "tsift",
28826 "dci-benchmark",
28827 "--fixture",
28828 "fixtures/dci-search-benchmark.json",
28829 "--json",
28830 ]);
28831 match cli.command {
28832 Some(Commands::DciBenchmark { fixture, json }) => {
28833 assert!(json);
28834 assert_eq!(fixture, PathBuf::from("fixtures/dci-search-benchmark.json"));
28835 }
28836 _ => panic!("expected DciBenchmark command"),
28837 }
28838 }
28839
28840 #[test]
28841 fn cli_parses_session_digest_command() {
28842 let cli = parse_cli([
28843 "tsift",
28844 "session-digest",
28845 "--path",
28846 ".",
28847 "--input",
28848 "target/session.md",
28849 "--source",
28850 "markdown",
28851 "--json",
28852 ]);
28853 match cli.command {
28854 Some(Commands::SessionDigest {
28855 json,
28856 path,
28857 input,
28858 source,
28859 }) => {
28860 assert!(json);
28861 assert_eq!(path, PathBuf::from("."));
28862 assert_eq!(input, Some(PathBuf::from("target/session.md")));
28863 assert_eq!(source.as_deref(), Some("markdown"));
28864 }
28865 _ => panic!("expected SessionDigest command"),
28866 }
28867 }
28868
28869 #[test]
28870 fn cli_parses_session_cost_command() {
28871 let cli = parse_cli([
28872 "tsift",
28873 "session-cost",
28874 "--input",
28875 "target/session.jsonl",
28876 "--source",
28877 "codex-jsonl",
28878 "--json",
28879 ]);
28880 match cli.command {
28881 Some(Commands::SessionCost {
28882 json,
28883 input,
28884 source,
28885 }) => {
28886 assert!(json);
28887 assert_eq!(input, Some(PathBuf::from("target/session.jsonl")));
28888 assert_eq!(source.as_deref(), Some("codex-jsonl"));
28889 }
28890 _ => panic!("expected SessionCost command"),
28891 }
28892 }
28893
28894 #[test]
28895 fn cli_parses_session_review_command() {
28896 let cli = parse_cli([
28897 "tsift",
28898 "session-review",
28899 "tasks/software/tsift.md",
28900 "--next-context",
28901 "--json",
28902 ]);
28903 match cli.command {
28904 Some(Commands::SessionReview {
28905 json,
28906 next_context,
28907 path,
28908 ..
28909 }) => {
28910 assert!(json);
28911 assert!(next_context);
28912 assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
28913 }
28914 _ => panic!("expected SessionReview command"),
28915 }
28916 }
28917
28918 #[test]
28919 fn cli_search_accepts_budget_flags() {
28920 let cli = parse_cli([
28921 "tsift",
28922 "search",
28923 "alpha_helper",
28924 "--max-items",
28925 "3",
28926 "--max-bytes",
28927 "96",
28928 ]);
28929 match cli.command {
28930 Some(Commands::Search {
28931 max_items,
28932 max_bytes,
28933 ..
28934 }) => {
28935 assert_eq!(max_items, Some(3));
28936 assert_eq!(max_bytes, Some(96));
28937 }
28938 _ => panic!("expected Search command"),
28939 }
28940 }
28941
28942 #[test]
28943 fn cli_search_accepts_budget_preset() {
28944 let cli = parse_cli(["tsift", "search", "alpha_helper", "--budget", "small"]);
28945 match cli.command {
28946 Some(Commands::Search { budget, .. }) => {
28947 assert_eq!(budget, Some(ResponseBudgetPreset::Small));
28948 }
28949 _ => panic!("expected Search command"),
28950 }
28951 }
28952
28953 #[test]
28954 fn response_budget_presets_fill_defaults_and_preserve_explicit_caps() {
28955 let small = ResponseBudget::from_cli(None, None, Some(ResponseBudgetPreset::Small), false);
28956 assert_eq!(small.preview_items(), 3);
28957 assert_eq!(small.preview_bytes(), 120);
28958 assert_eq!(small.follow_up_items(), 4);
28959
28960 let overridden =
28961 ResponseBudget::from_cli(Some(7), None, Some(ResponseBudgetPreset::Small), false);
28962 assert_eq!(overridden.preview_items(), 7);
28963 assert_eq!(overridden.preview_bytes(), 120);
28964 assert_eq!(overridden.follow_up_items(), 7);
28965
28966 let envelope_default = ResponseBudget::from_cli(None, None, None, true);
28967 assert!(envelope_default.is_active());
28968 }
28969
28970 #[test]
28971 fn cli_explain_accepts_budget_flags() {
28972 let cli = parse_cli([
28973 "tsift",
28974 "explain",
28975 "alpha_helper",
28976 "--max-items",
28977 "2",
28978 "--max-bytes",
28979 "80",
28980 ]);
28981 match cli.command {
28982 Some(Commands::Explain {
28983 max_items,
28984 max_bytes,
28985 ..
28986 }) => {
28987 assert_eq!(max_items, Some(2));
28988 assert_eq!(max_bytes, Some(80));
28989 }
28990 _ => panic!("expected Explain command"),
28991 }
28992 }
28993
28994 #[test]
28995 fn cli_session_review_accepts_budget_flags() {
28996 let cli = parse_cli([
28997 "tsift",
28998 "session-review",
28999 "tasks/software/tsift.md",
29000 "--max-items",
29001 "4",
29002 "--max-bytes",
29003 "120",
29004 ]);
29005 match cli.command {
29006 Some(Commands::SessionReview {
29007 max_items,
29008 max_bytes,
29009 ..
29010 }) => {
29011 assert_eq!(max_items, Some(4));
29012 assert_eq!(max_bytes, Some(120));
29013 }
29014 _ => panic!("expected SessionReview command"),
29015 }
29016 }
29017
29018 #[test]
29019 fn cli_parses_context_pack_command() {
29020 let cli = parse_cli([
29021 "tsift",
29022 "context-pack",
29023 "tasks/software/tsift.md",
29024 "--test-input",
29025 "target/test.log",
29026 "--runner",
29027 "cargo",
29028 "--log-input",
29029 "target/build.log",
29030 "--max-items",
29031 "3",
29032 "--max-bytes",
29033 "96",
29034 "--json",
29035 ]);
29036 match cli.command {
29037 Some(Commands::ContextPack {
29038 path,
29039 test_input,
29040 runner,
29041 log_input,
29042 json,
29043 max_items,
29044 max_bytes,
29045 budget,
29046 convex_snapshot,
29047 }) => {
29048 assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
29049 assert_eq!(test_input, Some(PathBuf::from("target/test.log")));
29050 assert_eq!(runner.as_deref(), Some("cargo"));
29051 assert_eq!(log_input, Some(PathBuf::from("target/build.log")));
29052 assert!(json);
29053 assert_eq!(max_items, Some(3));
29054 assert_eq!(max_bytes, Some(96));
29055 assert!(budget.is_none());
29056 assert!(convex_snapshot.is_none());
29057 }
29058 _ => panic!("expected ContextPack command"),
29059 }
29060 }
29061
29062 #[test]
29063 fn cli_parses_token_savings_command() {
29064 let cli = parse_cli([
29065 "tsift",
29066 "token-savings",
29067 "--fixture",
29068 "fixtures/tsift-token-savings.json",
29069 "--fail-under",
29070 "--json",
29071 ]);
29072 match cli.command {
29073 Some(Commands::TokenSavings {
29074 fixture,
29075 fail_under,
29076 json,
29077 }) => {
29078 assert_eq!(fixture, PathBuf::from("fixtures/tsift-token-savings.json"));
29079 assert!(fail_under);
29080 assert!(json);
29081 }
29082 _ => panic!("expected TokenSavings command"),
29083 }
29084 }
29085
29086 #[test]
29087 fn token_savings_report_records_fixture_thresholds() {
29088 let raw_symbols = [
29089 "validate_user",
29090 "validateUser",
29091 "ValidateUser",
29092 "validate-user",
29093 "VALIDATE_USER",
29094 "Validate_User",
29095 "raw_symbol",
29096 "rawSymbol",
29097 "RawSymbol",
29098 "raw-symbol",
29099 "RAW_SYMBOL",
29100 "Raw_Symbol",
29101 ]
29102 .iter()
29103 .enumerate()
29104 .map(|(idx, identifier)| TokenSavingsRawSymbol {
29105 identifier: (*identifier).to_string(),
29106 file: format!("src/example_{idx}.rs"),
29107 line: (idx + 1) as u64,
29108 context: "function".to_string(),
29109 })
29110 .collect();
29111 let fixture = TokenSavingsFixture {
29112 schema_version: 1,
29113 description: "fixture".to_string(),
29114 token_estimate: "ceil(utf8_bytes / 4)".to_string(),
29115 cases: vec![TokenSavingsFixtureCase {
29116 name: "search-preview".to_string(),
29117 surface: "search".to_string(),
29118 minimum_savings_percent: 40.0,
29119 raw_symbols,
29120 tagpath_families: vec![
29121 TokenSavingsFamily {
29122 canonical: "validate_user".to_string(),
29123 count: 6,
29124 aliases: BTreeMap::new(),
29125 },
29126 TokenSavingsFamily {
29127 canonical: "raw_symbol".to_string(),
29128 count: 6,
29129 aliases: BTreeMap::new(),
29130 },
29131 ],
29132 context_pack_inputs: None,
29133 session_review_inputs: None,
29134 source_read_inputs: None,
29135 }],
29136 };
29137
29138 let report = build_token_savings_report(&fixture).unwrap();
29139
29140 assert!(report.pass);
29141 assert_eq!(report.cases[0].raw_symbol_count, 12);
29142 assert_eq!(report.cases[0].family_count, 2);
29143 assert_eq!(report.cases[0].status, "pass");
29144 assert!(report.cases[0].byte_delta > 0);
29145 assert!(report.cases[0].raw_estimated_tokens > report.cases[0].envelope_estimated_tokens);
29146 assert!(report.cases[0].savings_percent >= 40.0);
29147 }
29148
29149 #[test]
29150 fn token_savings_source_read_inputs_preserve_required_anchors() {
29151 let fixture = TokenSavingsFixture {
29152 schema_version: 1,
29153 description: "fixture".to_string(),
29154 token_estimate: "ceil(utf8_bytes / 4)".to_string(),
29155 cases: vec![TokenSavingsFixtureCase {
29156 name: "source-read".to_string(),
29157 surface: "source-read".to_string(),
29158 minimum_savings_percent: 40.0,
29159 raw_symbols: Vec::new(),
29160 tagpath_families: Vec::new(),
29161 context_pack_inputs: None,
29162 session_review_inputs: None,
29163 source_read_inputs: Some(TokenSavingsSourceReadInputs {
29164 reads: vec![TokenSavingsSourceReadInput {
29165 command: "sed -n '40,160p' src/main.rs".to_string(),
29166 file: "src/main.rs".to_string(),
29167 raw_start: 40,
29168 raw_lines: 121,
29169 raw_excerpt: "line 40\n".repeat(121),
29170 envelope_start: 40,
29171 envelope_lines: 121,
29172 required_line_anchors: vec![40, 120, 160],
29173 }],
29174 }),
29175 }],
29176 };
29177
29178 let report = build_token_savings_report(&fixture).unwrap();
29179
29180 assert!(report.pass);
29181 assert_eq!(report.cases[0].surface, "source-read");
29182 assert!(report.cases[0].savings_percent >= 40.0);
29183 }
29184
29185 #[test]
29186 fn token_savings_source_read_inputs_fail_when_anchor_is_hidden() {
29187 let fixture = TokenSavingsFixture {
29188 schema_version: 1,
29189 description: "fixture".to_string(),
29190 token_estimate: "ceil(utf8_bytes / 4)".to_string(),
29191 cases: vec![TokenSavingsFixtureCase {
29192 name: "source-read".to_string(),
29193 surface: "source-read".to_string(),
29194 minimum_savings_percent: 40.0,
29195 raw_symbols: Vec::new(),
29196 tagpath_families: Vec::new(),
29197 context_pack_inputs: None,
29198 session_review_inputs: None,
29199 source_read_inputs: Some(TokenSavingsSourceReadInputs {
29200 reads: vec![TokenSavingsSourceReadInput {
29201 command: "cat src/main.rs".to_string(),
29202 file: "src/main.rs".to_string(),
29203 raw_start: 1,
29204 raw_lines: 200,
29205 raw_excerpt: "line\n".repeat(200),
29206 envelope_start: 1,
29207 envelope_lines: 80,
29208 required_line_anchors: vec![120],
29209 }],
29210 }),
29211 }],
29212 };
29213
29214 let err = match build_token_savings_report(&fixture) {
29215 Ok(_) => panic!("hidden anchor should fail the source-read fixture"),
29216 Err(err) => err,
29217 };
29218
29219 assert!(err.to_string().contains("hides required line anchor 120"));
29220 }
29221
29222 #[test]
29223 fn search_budget_report_truncates_symbol_preview_and_emits_stable_handle() {
29224 let response = empty_search_response(Path::new("/repo"), "lexical");
29225 let symbol_hits = vec![index::SymbolHit {
29226 name: "alpha_helper_with_a_long_name".to_string(),
29227 kind: "function".to_string(),
29228 language: "rust".to_string(),
29229 file: "/repo/src/lib.rs".to_string(),
29230 line: 12,
29231 end_line: None,
29232 tags: None,
29233 score: 0.98,
29234 match_type: "exact_name".to_string(),
29235 tagpath_handle: None,
29236 }];
29237
29238 let report = build_search_budget_report(
29239 "alpha_helper_with_a_long_name",
29240 "lexical",
29241 Path::new("/repo"),
29242 &response,
29243 &symbol_hits,
29244 false,
29245 ResponseBudget::new(Some(1), Some(12)),
29246 );
29247
29248 assert_eq!(report.symbols.len(), 1);
29249 assert!(report.symbols[0].handle.starts_with("sfam-"));
29250 assert_eq!(report.symbols[0].tag_alias.as_deref(), Some("alpha/hel..."));
29251 assert_eq!(report.symbols[0].name, "alpha_hel...");
29252 assert_eq!(report.symbols[0].file, "src/lib.rs");
29253 assert!(report.symbols[0].expand.contains("tsift search"));
29254 }
29255
29256 #[test]
29257 fn search_budget_report_groups_repeated_symbols_by_canonical_tag_family() {
29258 let response = empty_search_response(Path::new("/repo"), "lexical");
29259 let symbol_hits = vec![
29260 index::SymbolHit {
29261 name: "alpha_helper".to_string(),
29262 kind: "function".to_string(),
29263 language: "rust".to_string(),
29264 file: "/repo/src/lib.rs".to_string(),
29265 line: 12,
29266 end_line: None,
29267 tags: Some("alpha,helper".to_string()),
29268 score: 0.98,
29269 match_type: "exact_name".to_string(),
29270 tagpath_handle: None,
29271 },
29272 index::SymbolHit {
29273 name: "alphaHelper".to_string(),
29274 kind: "method".to_string(),
29275 language: "rust".to_string(),
29276 file: "/repo/src/main.rs".to_string(),
29277 line: 34,
29278 end_line: None,
29279 tags: Some("alpha,helper".to_string()),
29280 score: 0.93,
29281 match_type: "tag_overlap".to_string(),
29282 tagpath_handle: None,
29283 },
29284 index::SymbolHit {
29285 name: "alpha_helper".to_string(),
29286 kind: "function".to_string(),
29287 language: "rust".to_string(),
29288 file: "/repo/src/worker.rs".to_string(),
29289 line: 56,
29290 end_line: None,
29291 tags: Some("alpha,helper".to_string()),
29292 score: 0.91,
29293 match_type: "tag_overlap".to_string(),
29294 tagpath_handle: None,
29295 },
29296 ];
29297
29298 let report = build_search_budget_report(
29299 "alpha helper",
29300 "lexical",
29301 Path::new("/repo"),
29302 &response,
29303 &symbol_hits,
29304 false,
29305 ResponseBudget::new(Some(5), Some(48)),
29306 );
29307
29308 assert_eq!(report.symbol_total, 1);
29309 assert_eq!(report.raw_symbol_total, 3);
29310 assert_eq!(report.symbols.len(), 1);
29311 assert_eq!(report.symbols[0].tag_alias.as_deref(), Some("alpha/helper"));
29312 assert_eq!(report.symbols[0].match_count, 3);
29313 assert_eq!(report.symbols[0].surface_count, 2);
29314 assert_eq!(report.symbols[0].file_count, 3);
29315 assert_eq!(
29316 report.symbols[0].surface_examples,
29317 vec!["alpha_helper".to_string(), "alphaHelper".to_string()]
29318 );
29319 assert!(report.symbols[0].name.contains("(+1 variant)"));
29320 assert!(report.symbols[0].file.contains("(+2 files)"));
29321 assert!(report.symbols[0].expand.contains("tsift search"));
29322 assert!(report.symbols[0].expand.contains("alpha helper"));
29323 }
29324
29325 #[test]
29326 fn search_budget_report_warns_on_broad_preview_and_lists_narrowing_commands() {
29327 let mut response = empty_search_response(Path::new("/repo"), "lexical");
29328 response.indexed_artifacts = 450;
29329 let symbol_hits = vec![
29330 index::SymbolHit {
29331 name: "alpha_helper".to_string(),
29332 kind: "function".to_string(),
29333 language: "rust".to_string(),
29334 file: "/repo/src/lib.rs".to_string(),
29335 line: 12,
29336 end_line: None,
29337 tags: Some("alpha,helper".to_string()),
29338 score: 0.98,
29339 match_type: "exact_name".to_string(),
29340 tagpath_handle: None,
29341 },
29342 index::SymbolHit {
29343 name: "beta_helper".to_string(),
29344 kind: "function".to_string(),
29345 language: "rust".to_string(),
29346 file: "/repo/src/beta.rs".to_string(),
29347 line: 21,
29348 end_line: None,
29349 tags: Some("beta,helper".to_string()),
29350 score: 0.92,
29351 match_type: "tag_overlap".to_string(),
29352 tagpath_handle: None,
29353 },
29354 ];
29355
29356 let report = build_search_budget_report(
29357 "helper",
29358 "lexical",
29359 Path::new("/repo"),
29360 &response,
29361 &symbol_hits,
29362 false,
29363 ResponseBudget::new(Some(1), Some(64)),
29364 );
29365
29366 let guard = report
29367 .scale_guard
29368 .as_ref()
29369 .expect("broad previews should emit a scale guard");
29370 assert_eq!(guard.level, "high-hit");
29371 assert_eq!(guard.signals.indexed_artifacts, 450);
29372 assert_eq!(guard.signals.raw_symbol_matches, 2);
29373 assert!(
29374 guard
29375 .narrow_commands
29376 .iter()
29377 .any(|command| command.contains("--exact"))
29378 );
29379 assert!(
29380 guard
29381 .narrow_commands
29382 .iter()
29383 .any(|command| command.contains("alpha helper"))
29384 );
29385 assert!(
29386 guard
29387 .narrow_commands
29388 .last()
29389 .unwrap()
29390 .contains("workflow search")
29391 );
29392 }
29393
29394 #[test]
29395 fn explain_budget_report_limits_edges_and_members() {
29396 let symbols = vec![index::StoredSymbol {
29397 name: "alpha_helper".to_string(),
29398 kind: "function".to_string(),
29399 language: "rust".to_string(),
29400 signature: None,
29401 file: "src/lib.rs".to_string(),
29402 line: 10,
29403 end_line: None,
29404 parent_module: None,
29405 visibility: None,
29406 tags: None,
29407 tagpath_handle: None,
29408 }];
29409 let callers = vec![
29410 index::StoredEdge {
29411 caller_file: "src/main.rs".to_string(),
29412 caller_name: "main".to_string(),
29413 caller_line: 1,
29414 callee_name: "alpha_helper".to_string(),
29415 call_site_line: 3,
29416 tagpath_handle: None,
29417 },
29418 index::StoredEdge {
29419 caller_file: "src/worker.rs".to_string(),
29420 caller_name: "worker".to_string(),
29421 caller_line: 5,
29422 callee_name: "alpha_helper".to_string(),
29423 call_site_line: 8,
29424 tagpath_handle: None,
29425 },
29426 ];
29427 let community = graph::Community {
29428 id: 1,
29429 members: vec![
29430 graph::CommunityMember::new("alpha_helper"),
29431 graph::CommunityMember::new("main"),
29432 graph::CommunityMember::new("worker"),
29433 ],
29434 modularity_contribution: 0.5,
29435 };
29436
29437 let report = build_explain_budget_report(
29438 "alpha_helper",
29439 Path::new("/repo"),
29440 &symbols,
29441 &callers,
29442 2,
29443 false,
29444 &[],
29445 0,
29446 false,
29447 Some(&community),
29448 ResponseBudget::new(Some(1), Some(24)),
29449 );
29450
29451 assert_eq!(report.definitions.len(), 1);
29452 assert_eq!(report.callers.len(), 1);
29453 assert!(report.truncated);
29454 assert_eq!(report.community.as_ref().unwrap().members.len(), 1);
29455 assert_eq!(
29456 report.definitions[0].tag_alias.as_deref(),
29457 Some("alpha/helper")
29458 );
29459 assert!(report.callers[0].handle.starts_with("ecall-"));
29460 assert_eq!(report.callers[0].tag_alias.as_deref(), Some("main"));
29461 }
29462
29463 #[test]
29464 fn session_review_next_context_budget_limits_lists() {
29465 let report = session_review::SessionReviewReport {
29466 root: "/repo".to_string(),
29467 target: "tasks/software/tsift.md".to_string(),
29468 target_kind: "file".to_string(),
29469 sessions_considered: 1,
29470 sessions_matched: 1,
29471 claude_sessions: 1,
29472 codex_sessions: 0,
29473 agent_doc_logs: 0,
29474 prompt_target_count: 2,
29475 command_groups: 0,
29476 file_groups: 2,
29477 symbol_groups: 1,
29478 failure_groups: 1,
29479 runtime_event_groups: 0,
29480 restart_churn_groups: 0,
29481 closeout_groups: 0,
29482 usage_samples: 1,
29483 prompt_tokens: 120,
29484 cached_input_tokens: 80,
29485 cache_creation_input_tokens: 0,
29486 output_tokens: 40,
29487 reasoning_output_tokens: 0,
29488 total_tokens: 240,
29489 cached_input_ratio: Some(40.0),
29490 largest_turn_total_tokens: 240,
29491 aggregate_cost: session_review::SessionReviewCostSummary {
29492 scope: "bounded_matched_sessions".to_string(),
29493 sessions: 1,
29494 usage_samples: 1,
29495 prompt_tokens: 120,
29496 cached_input_tokens: 80,
29497 cache_creation_input_tokens: 0,
29498 output_tokens: 40,
29499 reasoning_output_tokens: 0,
29500 total_tokens: 240,
29501 cached_input_ratio: Some(40.0),
29502 largest_turn_total_tokens: 240,
29503 },
29504 latest_session_cost: Some(session_review::SessionReviewCostSummary {
29505 scope: "latest_matched_session".to_string(),
29506 sessions: 1,
29507 usage_samples: 1,
29508 prompt_tokens: 120,
29509 cached_input_tokens: 80,
29510 cache_creation_input_tokens: 0,
29511 output_tokens: 40,
29512 reasoning_output_tokens: 0,
29513 total_tokens: 240,
29514 cached_input_ratio: Some(66.67),
29515 largest_turn_total_tokens: 240,
29516 }),
29517 guardrails: vec![
29518 session_cost::SessionCostGuardrail {
29519 kind: "cache_resend".to_string(),
29520 severity: "warn".to_string(),
29521 message: "cached input ratio was high".to_string(),
29522 guidance: "compact or restart the session".to_string(),
29523 },
29524 session_cost::SessionCostGuardrail {
29525 kind: "prompt_budget".to_string(),
29526 severity: "warn".to_string(),
29527 message: "largest prompt turn reached 999999 tokens".to_string(),
29528 guidance: "compact the session before another large turn".to_string(),
29529 },
29530 session_cost::SessionCostGuardrail {
29531 kind: "restart_loop".to_string(),
29532 severity: "warn".to_string(),
29533 message: "restart churn detected".to_string(),
29534 guidance: "restart cleanly".to_string(),
29535 },
29536 session_cost::SessionCostGuardrail {
29537 kind: "noop_closeout".to_string(),
29538 severity: "warn".to_string(),
29539 message: "commit_already_current appeared 8 times".to_string(),
29540 guidance: "avoid reopening without new edits".to_string(),
29541 },
29542 ],
29543 loop_clusters: vec![],
29544 file_read_diagnostics: vec![],
29545 prompt_targets: vec![
29546 session_review::SessionReviewPromptTarget {
29547 text: "do one".to_string(),
29548 occurrences: 1,
29549 },
29550 session_review::SessionReviewPromptTarget {
29551 text: "do two".to_string(),
29552 occurrences: 1,
29553 },
29554 ],
29555 commands: vec![],
29556 touched_files: vec![],
29557 touched_symbols: vec![],
29558 failures: vec![],
29559 runtime_events: vec![],
29560 restart_churn: vec![],
29561 closeout: vec![],
29562 largest_turns: vec![],
29563 sessions: vec![session_review::SessionReviewSession {
29564 source: "claude_jsonl".to_string(),
29565 path: "/tmp/session.jsonl".to_string(),
29566 matched_by: vec!["path".to_string()],
29567 modified_unix_secs: None,
29568 prompt_target_count: 2,
29569 command_groups: 0,
29570 file_groups: 2,
29571 symbol_groups: 1,
29572 failure_groups: 1,
29573 runtime_event_groups: 0,
29574 restart_churn_groups: 0,
29575 closeout_groups: 0,
29576 usage_samples: 1,
29577 prompt_tokens: 120,
29578 cached_input_tokens: 80,
29579 cache_creation_input_tokens: 0,
29580 output_tokens: 40,
29581 reasoning_output_tokens: 0,
29582 total_tokens: 240,
29583 largest_turn_total_tokens: 240,
29584 }],
29585 next_context: session_review::SessionReviewNextContext {
29586 target: "tasks/software/tsift.md".to_string(),
29587 active_prompt_targets: vec!["do one".to_string(), "do two".to_string()],
29588 last_verification: session_review::SessionReviewVerificationState {
29589 status: "green".to_string(),
29590 detail: "cargo test".to_string(),
29591 },
29592 touched_files: vec!["src/lib.rs".to_string(), "src/main.rs".to_string()],
29593 touched_symbols: vec!["alpha_helper".to_string(), "main".to_string()],
29594 unresolved_failures: vec![session_review::SessionReviewFailure {
29595 kind: "timeout".to_string(),
29596 message: "search timed out".to_string(),
29597 occurrences: 1,
29598 command: None,
29599 session_path: None,
29600 }],
29601 next_digest_commands: vec![
29602 "tsift session-review --next-context tasks/software/tsift.md".to_string(),
29603 "tsift diff-digest .".to_string(),
29604 "tsift test-digest --path . < target/very-long-test-output-file-name-that-must-remain-executable.log".to_string(),
29605 "tsift log-digest --path . < target/very-long-build-output-file-name-that-must-remain-executable.log".to_string(),
29606 ],
29607 },
29608 warnings: vec![],
29609 };
29610
29611 let budget_report = build_session_review_next_context_budget_report(
29612 &report,
29613 ResponseBudget::new(Some(1), Some(12)),
29614 None,
29615 );
29616
29617 assert!(budget_report.truncated);
29618 assert_eq!(budget_report.prompt_targets, vec!["do one"]);
29619 assert_eq!(budget_report.touched_files, vec!["src/lib.rs"]);
29620 assert!(
29621 budget_report.touched_symbol_refs[0]
29622 .handle
29623 .starts_with("ncsym-")
29624 );
29625 assert_eq!(
29626 budget_report.touched_symbol_refs[0].tag_alias.as_deref(),
29627 Some("alpha/helper")
29628 );
29629 assert!(
29630 budget_report.unresolved_failures[0]
29631 .handle
29632 .starts_with("snf-")
29633 );
29634 assert_eq!(budget_report.next_digest_commands.len(), 4);
29635 assert_eq!(
29636 budget_report.next_digest_commands[2],
29637 "tsift test-digest --path . < target/very-long-test-output-file-name-that-must-remain-executable.log"
29638 );
29639 assert_eq!(budget_report.next_token_actions.len(), 1);
29640 assert_eq!(budget_report.next_token_actions[0].kind, "prompt_budget");
29641
29642 let full_action_report = build_session_review_next_context_budget_report(
29643 &report,
29644 ResponseBudget::new(Some(4), Some(120)),
29645 None,
29646 );
29647 assert_eq!(
29648 full_action_report
29649 .next_token_actions
29650 .iter()
29651 .map(|action| action.kind.as_str())
29652 .collect::<Vec<_>>(),
29653 vec![
29654 "prompt_budget",
29655 "cache_resend",
29656 "restart_loop",
29657 "noop_closeout"
29658 ]
29659 );
29660 assert_eq!(
29661 full_action_report.next_token_actions[0]
29662 .compact_command
29663 .as_deref(),
29664 Some("agent-doc compact \"tasks/software/tsift.md\" --commit")
29665 );
29666 assert_eq!(
29667 full_action_report.next_token_actions[0]
29668 .restart_command
29669 .as_deref(),
29670 Some("agent-doc start \"tasks/software/tsift.md\"")
29671 );
29672 assert!(
29673 full_action_report.next_token_actions[0]
29674 .digest_commands
29675 .iter()
29676 .any(|command| command
29677 == "tsift --envelope context-pack \"tasks/software/tsift.md\" --budget normal")
29678 );
29679 }
29680
29681 #[test]
29682 fn context_pack_diff_preview_limits_files_and_symbols() {
29683 let report = diff_digest::DiffDigestReport {
29684 root: "/repo".to_string(),
29685 mode: diff_digest::DiffDigestMode::WorkingTree,
29686 revision: None,
29687 files_changed: 2,
29688 files_with_current_summaries: 1,
29689 symbols_touched: 3,
29690 call_edges_added: 1,
29691 call_edges_removed: 0,
29692 files: vec![
29693 diff_digest::DiffDigestFile {
29694 path: "src/lib.rs".to_string(),
29695 status: diff_digest::DiffDigestFileStatus::Modified,
29696 touched_symbols: vec!["alpha_helper".to_string(), "beta_helper".to_string()],
29697 summary_state: diff_digest::DiffDigestSummaryState::Current,
29698 current_summaries: vec![diff_digest::DiffDigestSummarySnippet {
29699 symbol: "alpha_helper".to_string(),
29700 summary: "alpha helper handles the main alpha workflow".to_string(),
29701 }],
29702 added_call_edges: vec!["alpha->beta".to_string()],
29703 removed_call_edges: vec![],
29704 warnings: vec!["stale parse".to_string()],
29705 },
29706 diff_digest::DiffDigestFile {
29707 path: "src/main.rs".to_string(),
29708 status: diff_digest::DiffDigestFileStatus::Added,
29709 touched_symbols: vec!["main".to_string()],
29710 summary_state: diff_digest::DiffDigestSummaryState::Missing,
29711 current_summaries: vec![],
29712 added_call_edges: vec![],
29713 removed_call_edges: vec![],
29714 warnings: vec![],
29715 },
29716 ],
29717 };
29718
29719 let preview =
29720 build_context_pack_diff_preview(&report, ResponseBudget::new(Some(1), Some(11)), None);
29721
29722 assert!(preview.truncated);
29723 assert_eq!(preview.files.len(), 1);
29724 assert_eq!(preview.files[0].path, "src/lib.rs");
29725 assert_eq!(preview.files[0].touched_symbols, vec!["alpha_he..."]);
29726 assert!(
29727 preview.files[0].touched_symbol_refs[0]
29728 .handle
29729 .starts_with("cdsym-")
29730 );
29731 assert_eq!(
29732 preview.files[0].touched_symbol_refs[0].tag_alias.as_deref(),
29733 Some("alpha/he...")
29734 );
29735 assert!(
29736 preview.files[0].summary_refs[0]
29737 .handle
29738 .starts_with("cdsum-")
29739 );
29740 assert_eq!(
29741 preview.files[0].summary_refs[0].tag_alias.as_deref(),
29742 Some("alpha/he...")
29743 );
29744 assert_eq!(preview.files[0].summary_refs[0].summary, "alpha he...");
29745 assert_eq!(
29746 preview.files[0].summary_refs[0].expand,
29747 "tsift summarize --file \"src/lib.rs\""
29748 );
29749 assert_eq!(preview.files[0].warnings, vec!["stale parse"]);
29750 }
29751
29752 #[test]
29753 fn context_pack_status_reminders_include_stale_index_state() {
29754 let dir = setup_graph_index();
29755 std::thread::sleep(std::time::Duration::from_millis(50));
29756 std::fs::write(
29757 dir.path().join("main.rs"),
29758 "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }\n",
29759 )
29760 .unwrap();
29761
29762 let reminders = context_pack_status_reminders(dir.path());
29763
29764 assert_eq!(reminders.len(), 1);
29765 assert!(reminders[0].contains("index stale"));
29766 assert!(reminders[0].contains("tsift index ."));
29767 }
29768
29769 #[test]
29776 fn build_context_pack_reuses_inspect_within_scope() {
29777 let dir = setup_graph_index();
29778 init_git_repo(dir.path());
29779 let _guard = index::InspectScopeGuard::new();
29780 let _ = build_context_pack_report(
29781 dir.path(),
29782 None,
29783 None,
29784 None,
29785 ResponseBudget::new(Some(2), Some(96)),
29786 )
29787 .unwrap();
29788 let (hits, misses) = index::inspect_scope_stats();
29789 assert!(
29790 hits >= 1,
29791 "expected at least one cached inspect within scope (hits={hits}, misses={misses})"
29792 );
29793 assert!(
29794 misses >= 1,
29795 "expected at least one initial inspect miss (hits={hits}, misses={misses})"
29796 );
29797 }
29798
29799 #[test]
29804 fn inspect_read_only_outside_scope_does_not_cache() {
29805 let dir = setup_graph_index();
29806 let db_path = dir.path().join(".tsift/index.db");
29807 let _first = index::IndexDb::inspect_read_only(&db_path, dir.path(), false).unwrap();
29808 let (hits, misses) = index::inspect_scope_stats();
29809 assert_eq!(
29810 (hits, misses),
29811 (0, 0),
29812 "no scope guard => no hits/misses recorded"
29813 );
29814 let _second = index::IndexDb::inspect_read_only(&db_path, dir.path(), false).unwrap();
29815 let (hits, _) = index::inspect_scope_stats();
29816 assert_eq!(hits, 0, "must not reuse inspection outside of any scope");
29817 }
29818
29819 #[test]
29820 fn context_pack_refreshes_stale_index_before_handoff() {
29821 let dir = setup_graph_index();
29822 init_git_repo(dir.path());
29823 std::thread::sleep(std::time::Duration::from_millis(50));
29824 std::fs::write(
29825 dir.path().join("main.rs"),
29826 "fn helper() { println!(\"updated\"); }\nfn main() { helper(); }\n",
29827 )
29828 .unwrap();
29829
29830 let report = build_context_pack_report(
29831 dir.path(),
29832 None,
29833 None,
29834 None,
29835 ResponseBudget::new(Some(2), Some(96)),
29836 )
29837 .unwrap();
29838
29839 assert!(
29840 report
29841 .status_reminders
29842 .iter()
29843 .any(|reminder| reminder.contains("index refreshed")
29844 && reminder.contains("context-pack handoff")),
29845 "expected context-pack refresh diagnostic, got {:?}",
29846 report.status_reminders
29847 );
29848 assert!(
29849 !report
29850 .status_reminders
29851 .iter()
29852 .any(|reminder| reminder.contains("index stale")),
29853 "stale reminder should be gone after refresh: {:?}",
29854 report.status_reminders
29855 );
29856
29857 let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
29858 let summary = db.compute_changes(dir.path()).unwrap();
29859 assert_eq!(summary.new + summary.modified + summary.deleted, 0);
29860 }
29861
29862 #[test]
29863 fn context_pack_materializes_source_handles_into_graph_store() {
29864 let dir = tempfile::tempdir().unwrap();
29865 let packet = ExplorationPacket {
29866 budget: exploration_budget_for_counts(2, 1),
29867 relationship_map: vec![ExplorationRelation {
29868 from: "file:main.rs".to_string(),
29869 relation: "touches_symbol".to_string(),
29870 to: "symbol:helper".to_string(),
29871 label: Some("modified diff".to_string()),
29872 }],
29873 source_windows: vec![ExplorationSourceWindow {
29874 handle: "xwin-test".to_string(),
29875 file: "main.rs".to_string(),
29876 start: 1,
29877 end: 32,
29878 reason: "changed file".to_string(),
29879 expand: "tsift source-read main.rs --path . --start 1 --lines 32".to_string(),
29880 }],
29881 worker_context: vec![ExplorationWorkerContext {
29882 handle: "xwrk-test".to_string(),
29883 target: "tasks/software/tsift.md".to_string(),
29884 summary: "do #kgnv".to_string(),
29885 expand: "tsift --envelope context-pack tasks/software/tsift.md --budget normal"
29886 .to_string(),
29887 }],
29888 no_reread_guidance: "use windows".to_string(),
29889 };
29890
29891 let packet = materialize_context_pack_exploration_packet(dir.path(), packet).unwrap();
29892 assert_eq!(packet.source_windows[0].handle, "xwin-test");
29893
29894 let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
29895 let source_handles = store.nodes_by_kind("source_handle").unwrap();
29896 assert_eq!(source_handles.len(), 1);
29897 assert_eq!(
29898 source_handles[0].properties.get("file"),
29899 Some(&"main.rs".to_string())
29900 );
29901 assert_eq!(
29902 store
29903 .outgoing_edges(&exploration_ref_id("file:main.rs"), Some("touches_symbol"))
29904 .unwrap()
29905 .len(),
29906 1
29907 );
29908 let worker_context = store.nodes_by_kind("worker_context").unwrap();
29909 assert_eq!(worker_context.len(), 1);
29910 assert_eq!(
29911 store
29912 .outgoing_edges("xwrk-test", Some("scopes_source"))
29913 .unwrap()
29914 .len(),
29915 1
29916 );
29917 }
29918
29919 #[test]
29920 fn context_pack_records_graph_orchestration_observability() {
29921 let dir = setup_traversal_project();
29922 init_git_repo(dir.path());
29923 let session = dir.path().join("tasks/software/tsift.md");
29924 refresh_traversal_graph_store(dir.path(), &session, None).unwrap();
29925
29926 let report = build_context_pack_report(
29927 &session,
29928 None,
29929 None,
29930 None,
29931 ResponseBudget::new(Some(4), Some(160)),
29932 )
29933 .unwrap();
29934
29935 assert_eq!(
29936 report.graph_orchestration.contract_version,
29937 CONTEXT_PACK_GRAPH_ORCHESTRATION_CONTRACT_VERSION
29938 );
29939 assert_eq!(
29940 report
29941 .graph_orchestration
29942 .projection_freshness
29943 .status
29944 .as_str(),
29945 "current"
29946 );
29947 assert!(!report.graph_orchestration.projection_hashes.is_empty());
29948 assert!(
29949 report
29950 .graph_orchestration
29951 .evidence_packet_ids
29952 .iter()
29953 .any(|id| id.starts_with("gevd-")),
29954 "{:?}",
29955 report.graph_orchestration.evidence_packet_ids
29956 );
29957 assert!(
29958 report
29959 .graph_orchestration
29960 .conflict_matrix_decisions
29961 .iter()
29962 .any(|decision| decision.contains("run conflict-matrix")),
29963 "{:?}",
29964 report.graph_orchestration.conflict_matrix_decisions
29965 );
29966 assert!(
29967 report
29968 .graph_orchestration
29969 .follow_up_commands
29970 .iter()
29971 .any(|command| command.contains("conflict-matrix")),
29972 "{:?}",
29973 report.graph_orchestration.follow_up_commands
29974 );
29975 assert!(
29976 !report
29977 .graph_orchestration
29978 .worker_ownership_blocks
29979 .is_empty()
29980 );
29981 }
29982
29983 #[test]
29984 fn convex_sync_report_chunks_upserts_and_tombstones() {
29985 let dir = setup_traversal_project();
29986 let source_graph = build_traversal_graph_source(dir.path(), dir.path(), None).unwrap();
29987 let projection = traversal_projection_from_graph(dir.path(), None, &source_graph).unwrap();
29988 let mut snapshot = projection.to_convex_rows();
29989 snapshot.nodes.push(ConvexNodeRow {
29990 external_id: "stale-node".to_string(),
29991 kind: "backlog".to_string(),
29992 label: "stale".to_string(),
29993 properties: BTreeMap::new(),
29994 provenance: Vec::new(),
29995 freshness: None,
29996 });
29997 snapshot.edges.clear();
29998 snapshot.edges.push(ConvexEdgeRow {
29999 edge_key: "stale-edge".to_string(),
30000 from_external_id: "stale-node".to_string(),
30001 to_external_id: "stale-node".to_string(),
30002 kind: "mentions".to_string(),
30003 properties: BTreeMap::new(),
30004 provenance: Vec::new(),
30005 freshness: None,
30006 });
30007 let snapshot_path = dir.path().join("convex-snapshot.json");
30008 fs::write(&snapshot_path, serde_json::to_string(&snapshot).unwrap()).unwrap();
30009
30010 let report = build_convex_sync_report(dir.path(), None, Some(&snapshot_path), 2).unwrap();
30011
30012 assert_eq!(report.freshness.status, "stale");
30013 assert!(report.freshness.fail_closed);
30014 assert_eq!(report.node_tombstones, vec!["stale-node".to_string()]);
30015 assert!(
30016 report.edge_upserts.len() > 1,
30017 "snapshot without edges should upsert local edges"
30018 );
30019 assert_eq!(report.edge_tombstones, vec!["stale-edge".to_string()]);
30020 assert_eq!(
30021 report.chunks.first().map(|chunk| chunk.operation.as_str()),
30022 Some("delete_edges"),
30023 "edge tombstones should be planned before node tombstones"
30024 );
30025 assert!(
30026 report
30027 .chunks
30028 .iter()
30029 .any(|chunk| chunk.operation == "upsert_edges" && chunk.count <= 2),
30030 "expected chunked edge upserts, got {:?}",
30031 report.chunks
30032 );
30033 }
30034
30035 #[test]
30036 fn convex_snapshot_validation_fails_closed_when_stale() {
30037 let dir = setup_traversal_project();
30038 build_traversal_graph(dir.path(), dir.path(), None).unwrap();
30039 let snapshot = ConvexProjectionRows::default();
30040 let snapshot_path = dir.path().join("empty-convex-snapshot.json");
30041 fs::write(&snapshot_path, serde_json::to_string(&snapshot).unwrap()).unwrap();
30042
30043 let err = verify_convex_projection_snapshot(dir.path(), None, &snapshot_path).unwrap_err();
30044 assert!(
30045 err.to_string()
30046 .contains("Convex graph projection is not current"),
30047 "{err}"
30048 );
30049 }
30050
30051 #[test]
30052 fn convex_sync_report_marks_live_apply_mode_without_network() {
30053 let dir = setup_traversal_project();
30054 let report =
30055 build_convex_sync_report_with_snapshot(dir.path(), None, None, 100, false).unwrap();
30056
30057 assert!(!report.dry_run);
30058 assert!(
30059 !report
30060 .diagnostics
30061 .iter()
30062 .any(|diagnostic| diagnostic.contains("dry-run only")),
30063 "apply-mode report should not claim dry-run diagnostics"
30064 );
30065 assert!(
30066 report
30067 .chunks
30068 .iter()
30069 .any(|chunk| chunk.operation == "upsert_nodes"),
30070 "live apply mode should still expose chunked idempotent operations"
30071 );
30072 }
30073
30074 #[test]
30075 fn convex_sync_apply_round_trips_with_http_backend() {
30076 use std::net::TcpListener;
30077 use std::sync::{Arc, Mutex};
30078
30079 let dir = setup_traversal_project();
30080 let report =
30081 build_convex_sync_report_with_snapshot(dir.path(), None, None, 100, false).unwrap();
30082 let expected_chunks = report.chunks.len();
30083 assert!(expected_chunks > 0);
30084
30085 let listener = TcpListener::bind("127.0.0.1:0").unwrap();
30086 let endpoint = format!("http://{}", listener.local_addr().unwrap());
30087 let operations = Arc::new(Mutex::new(Vec::<String>::new()));
30088 let server_operations = Arc::clone(&operations);
30089 let server = std::thread::spawn(move || {
30090 for _ in 0..expected_chunks {
30091 let (mut stream, _) = listener.accept().unwrap();
30092 let mut reader = BufReader::new(stream.try_clone().unwrap());
30093 let mut request_line = String::new();
30094 reader.read_line(&mut request_line).unwrap();
30095 assert!(request_line.starts_with("POST "));
30096
30097 let mut content_length = 0usize;
30098 loop {
30099 let mut line = String::new();
30100 reader.read_line(&mut line).unwrap();
30101 if line == "\r\n" {
30102 break;
30103 }
30104 if let Some(value) = line.to_ascii_lowercase().strip_prefix("content-length:") {
30105 content_length = value.trim().parse().unwrap();
30106 }
30107 }
30108
30109 let mut body = vec![0u8; content_length];
30110 reader.read_exact(&mut body).unwrap();
30111 let request: serde_json::Value = serde_json::from_slice(&body).unwrap();
30112 server_operations
30113 .lock()
30114 .unwrap()
30115 .push(request["operation"].as_str().unwrap().to_string());
30116
30117 let response = br#"{"status":"ok","message":"accepted"}"#;
30118 write!(
30119 stream,
30120 "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n",
30121 response.len()
30122 )
30123 .unwrap();
30124 stream.write_all(response).unwrap();
30125 }
30126 });
30127
30128 cmd_convex_sync(
30129 ConvexSyncOptions {
30130 path: dir.path(),
30131 scope: None,
30132 snapshot: None,
30133 chunk_size: 100,
30134 remote_snapshot: false,
30135 apply: true,
30136 endpoint: Some(&endpoint),
30137 auth_token_env: "TSIFT_TEST_CONVEX_AUTH_TOKEN",
30138 },
30139 OutputFormat {
30140 json_output: false,
30141 compact: true,
30142 pretty: false,
30143 terse: false,
30144 schema: false,
30145 envelope: false,
30146 },
30147 )
30148 .unwrap();
30149 server.join().unwrap();
30150
30151 let operations = operations.lock().unwrap().clone();
30152 assert!(operations.contains(&"upsert_nodes".to_string()));
30153 assert!(operations.contains(&"upsert_edges".to_string()));
30154 }
30155
30156 #[test]
30157 fn context_pack_diff_preview_attaches_tag_ontology_refs() {
30158 let root = tempfile::tempdir().unwrap();
30159 fs::create_dir_all(root.path().join(".naming/tags")).unwrap();
30160 fs::write(
30161 root.path().join(".naming/tags/alpha.md"),
30162 "+++\ntag = \"alpha\"\ntitle = \"Alpha Domain\"\ndomain = \"fixture\"\n+++\n\nAlpha definition.\n",
30163 )
30164 .unwrap();
30165 let ontology = load_tag_ontology_preview_context(root.path()).unwrap();
30166 let report = diff_digest::DiffDigestReport {
30167 root: root.path().display().to_string(),
30168 mode: diff_digest::DiffDigestMode::WorkingTree,
30169 revision: None,
30170 files_changed: 1,
30171 files_with_current_summaries: 1,
30172 symbols_touched: 1,
30173 call_edges_added: 0,
30174 call_edges_removed: 0,
30175 files: vec![diff_digest::DiffDigestFile {
30176 path: "src/lib.rs".to_string(),
30177 status: diff_digest::DiffDigestFileStatus::Modified,
30178 touched_symbols: vec!["alpha_helper".to_string()],
30179 summary_state: diff_digest::DiffDigestSummaryState::Current,
30180 current_summaries: vec![diff_digest::DiffDigestSummarySnippet {
30181 symbol: "alpha_helper".to_string(),
30182 summary: "alpha helper summary".to_string(),
30183 }],
30184 added_call_edges: vec![],
30185 removed_call_edges: vec![],
30186 warnings: vec![],
30187 }],
30188 };
30189
30190 let preview = build_context_pack_diff_preview(
30191 &report,
30192 ResponseBudget::new(Some(1), Some(80)),
30193 Some(&ontology),
30194 );
30195
30196 let symbol_ref = &preview.files[0].touched_symbol_refs[0].ontology_refs[0];
30197 assert!(symbol_ref.handle.starts_with("tont-"));
30198 assert_eq!(symbol_ref.tag, "alpha");
30199 assert_eq!(symbol_ref.path, ".naming/tags/alpha.md");
30200 assert_eq!(symbol_ref.title.as_deref(), Some("Alpha Domain"));
30201 assert_eq!(symbol_ref.domain.as_deref(), Some("fixture"));
30202 assert_eq!(
30203 preview.files[0].summary_refs[0].ontology_refs[0].path,
30204 ".naming/tags/alpha.md"
30205 );
30206 }
30207
30208 #[test]
30209 fn context_pack_test_preview_limits_failure_groups() {
30210 let report = test_digest::TestDigestReport {
30211 root: "/repo".to_string(),
30212 runner: "cargo".to_string(),
30213 failures: 2,
30214 grouped_failures: 2,
30215 counts: test_digest::TestDigestCounts {
30216 passed: Some(8),
30217 failed: Some(2),
30218 skipped: Some(1),
30219 },
30220 failure_groups: vec![
30221 test_digest::TestDigestFailure {
30222 tests: vec!["suite::alpha_failure".to_string()],
30223 message: "assertion failed".to_string(),
30224 path: Some("src/lib.rs".to_string()),
30225 line: Some(42),
30226 column: None,
30227 occurrences: 1,
30228 summary_state: test_digest::TestDigestSummaryState::Current,
30229 current_summaries: vec![test_digest::TestDigestSummarySnippet {
30230 symbol: "alpha_failure".to_string(),
30231 summary: "failure summary for alpha test".to_string(),
30232 }],
30233 },
30234 test_digest::TestDigestFailure {
30235 tests: vec!["suite::beta_failure".to_string()],
30236 message: "panic".to_string(),
30237 path: Some("src/main.rs".to_string()),
30238 line: Some(7),
30239 column: None,
30240 occurrences: 1,
30241 summary_state: test_digest::TestDigestSummaryState::Missing,
30242 current_summaries: vec![],
30243 },
30244 ],
30245 warnings: vec!["warning text".to_string()],
30246 };
30247
30248 let preview =
30249 build_context_pack_test_preview(&report, ResponseBudget::new(Some(1), Some(14)), None);
30250
30251 assert!(preview.truncated);
30252 assert_eq!(preview.failure_groups.len(), 1);
30253 assert_eq!(preview.failure_groups[0].tests, vec!["suite::alph..."]);
30254 assert_eq!(preview.failure_groups[0].message, "assertion f...");
30255 assert!(
30256 preview.failure_groups[0].summary_refs[0]
30257 .handle
30258 .starts_with("ctsum-")
30259 );
30260 assert_eq!(
30261 preview.failure_groups[0].summary_refs[0].expand,
30262 "tsift summarize --file \"src/lib.rs\""
30263 );
30264 assert_eq!(preview.warnings, vec!["warning text"]);
30265 }
30266
30267 #[test]
30268 fn context_pack_log_preview_limits_signals_and_refs() {
30269 let report = log_digest::LogDigestReport {
30270 root: "/repo".to_string(),
30271 total_lines: 12,
30272 non_empty_lines: 10,
30273 signal_groups: 2,
30274 repeated_line_groups: 2,
30275 repeated_line_occurrences: 3,
30276 file_ref_groups: 2,
30277 symbol_ref_groups: 2,
30278 stack_groups: 1,
30279 signals: vec![
30280 log_digest::LogDigestSignal {
30281 severity: "error".to_string(),
30282 message: "src/lib.rs:42 boom".to_string(),
30283 path: Some("src/lib.rs".to_string()),
30284 line: Some(42),
30285 column: None,
30286 occurrences: 2,
30287 summary_state: log_digest::LogDigestSummaryState::Current,
30288 current_summaries: vec![log_digest::LogDigestSummarySnippet {
30289 symbol: "alpha_helper".to_string(),
30290 summary: "alpha helper cached log summary".to_string(),
30291 }],
30292 },
30293 log_digest::LogDigestSignal {
30294 severity: "warn".to_string(),
30295 message: "slow path".to_string(),
30296 path: None,
30297 line: None,
30298 column: None,
30299 occurrences: 1,
30300 summary_state: log_digest::LogDigestSummaryState::Unavailable,
30301 current_summaries: vec![],
30302 },
30303 ],
30304 repeated_lines: vec![
30305 log_digest::LogDigestRepeatedLine {
30306 line: "retrying work item alpha".to_string(),
30307 occurrences: 3,
30308 },
30309 log_digest::LogDigestRepeatedLine {
30310 line: "retrying work item beta".to_string(),
30311 occurrences: 2,
30312 },
30313 ],
30314 file_refs: vec![
30315 log_digest::LogDigestFileRef {
30316 path: "src/lib.rs".to_string(),
30317 line: Some(42),
30318 column: None,
30319 occurrences: 2,
30320 summary_state: log_digest::LogDigestSummaryState::Current,
30321 current_summaries: vec![log_digest::LogDigestSummarySnippet {
30322 symbol: "alpha_helper".to_string(),
30323 summary: "alpha helper cached file summary".to_string(),
30324 }],
30325 },
30326 log_digest::LogDigestFileRef {
30327 path: "src/main.rs".to_string(),
30328 line: Some(7),
30329 column: None,
30330 occurrences: 1,
30331 summary_state: log_digest::LogDigestSummaryState::Missing,
30332 current_summaries: vec![],
30333 },
30334 ],
30335 symbol_refs: vec![
30336 log_digest::LogDigestSymbolRef {
30337 symbol: "alpha_helper".to_string(),
30338 occurrences: 2,
30339 summary_state: log_digest::LogDigestSummaryState::Current,
30340 current_summaries: vec![log_digest::LogDigestSummarySnippet {
30341 symbol: "alpha_helper".to_string(),
30342 summary: "alpha helper cached symbol summary".to_string(),
30343 }],
30344 },
30345 log_digest::LogDigestSymbolRef {
30346 symbol: "beta_helper".to_string(),
30347 occurrences: 1,
30348 summary_state: log_digest::LogDigestSummaryState::Missing,
30349 current_summaries: vec![],
30350 },
30351 ],
30352 stack_traces: vec![log_digest::LogDigestStackGroup {
30353 frames: vec!["frame one".to_string()],
30354 occurrences: 1,
30355 }],
30356 warnings: vec!["warning text".to_string()],
30357 };
30358
30359 let preview =
30360 build_context_pack_log_preview(&report, ResponseBudget::new(Some(1), Some(14)), None);
30361
30362 assert!(preview.truncated);
30363 assert_eq!(preview.signals.len(), 1);
30364 assert_eq!(preview.signals[0].message, "src/lib.rs:...");
30365 assert_eq!(preview.repeated_lines[0].line, "retrying wo...");
30366 assert_eq!(preview.file_refs.len(), 1);
30367 assert_eq!(preview.symbol_refs[0].symbol, "alpha_helper");
30368 assert!(
30369 preview.signals[0].summary_refs[0]
30370 .handle
30371 .starts_with("clsum-")
30372 );
30373 assert!(
30374 preview.file_refs[0].summary_refs[0]
30375 .handle
30376 .starts_with("clfsum-")
30377 );
30378 assert!(
30379 preview.symbol_refs[0].summary_refs[0]
30380 .handle
30381 .starts_with("clssum-")
30382 );
30383 assert_eq!(
30384 preview.symbol_refs[0].summary_refs[0].tag_alias.as_deref(),
30385 Some("alpha/helper")
30386 );
30387 assert_eq!(
30388 preview.symbol_refs[0].summary_refs[0].expand,
30389 "tsift summarize \"alpha_helper\""
30390 );
30391 assert_eq!(preview.warnings, vec!["warning text"]);
30392 }
30393
30394 #[test]
30395 fn cli_search_rejects_exact_with_strategy_flag() {
30396 let cli = try_parse_cli([
30397 "tsift",
30398 "search",
30399 "test",
30400 "--exact",
30401 "--strategy",
30402 "lexical",
30403 ]);
30404 assert!(cli.is_err());
30405 }
30406
30407 #[test]
30408 fn cli_search_autoindexes_by_default() {
30409 let cli = parse_cli(["tsift", "search", "test"]);
30410 match cli.command {
30411 Some(Commands::Search {
30412 autoindex,
30413 no_autoindex,
30414 ..
30415 }) => {
30416 assert!(!autoindex);
30417 assert!(!no_autoindex);
30418 assert!(autoindex || !no_autoindex);
30419 }
30420 _ => panic!("expected Search command"),
30421 }
30422 }
30423
30424 #[test]
30425 fn cli_search_accepts_no_autoindex_flag() {
30426 let cli = parse_cli(["tsift", "search", "test", "--no-autoindex"]);
30427 match cli.command {
30428 Some(Commands::Search {
30429 autoindex,
30430 no_autoindex,
30431 ..
30432 }) => {
30433 assert!(!autoindex);
30434 assert!(no_autoindex);
30435 }
30436 _ => panic!("expected Search command"),
30437 }
30438 }
30439
30440 #[test]
30441 fn cli_search_rejects_conflicting_autoindex_flags() {
30442 let cli = try_parse_cli(["tsift", "search", "test", "--autoindex", "--no-autoindex"]);
30443 assert!(cli.is_err());
30444 }
30445
30446 #[test]
30449 fn cli_accepts_global_absolute_flag() {
30450 let cli = parse_cli(["tsift", "--absolute", "status"]);
30451 assert!(cli.absolute);
30452 assert!(matches!(cli.command, Some(Commands::Status { .. })));
30453 }
30454
30455 #[test]
30456 fn cli_accepts_global_tabular_flag() {
30457 let cli = parse_cli(["tsift", "--tabular", "search", "test"]);
30458 assert!(cli.tabular);
30459 assert!(matches!(cli.command, Some(Commands::Search { .. })));
30460 }
30461
30462 #[test]
30463 fn cli_tabular_with_graph() {
30464 let cli = parse_cli(["tsift", "--tabular", "graph", "main"]);
30465 assert!(cli.tabular);
30466 assert!(matches!(cli.command, Some(Commands::Graph { .. })));
30467 }
30468
30469 #[test]
30470 fn cli_tabular_with_communities() {
30471 let cli = parse_cli(["tsift", "--tabular", "communities"]);
30472 assert!(cli.tabular);
30473 assert!(matches!(cli.command, Some(Commands::Communities { .. })));
30474 }
30475
30476 #[test]
30477 fn cli_tabular_with_explain() {
30478 let cli = parse_cli(["tsift", "--tabular", "explain", "main"]);
30479 assert!(cli.tabular);
30480 assert!(matches!(cli.command, Some(Commands::Explain { .. })));
30481 }
30482
30483 #[test]
30484 fn cli_traverse_accepts_path_target_and_html_format() {
30485 let cli = parse_cli([
30486 "tsift", "traverse", "#kgnv", "--to", "main", "--path", ".", "--format", "html",
30487 ]);
30488 match cli.command {
30489 Some(Commands::Traverse {
30490 node,
30491 to,
30492 path,
30493 format,
30494 ..
30495 }) => {
30496 assert_eq!(node.as_deref(), Some("#kgnv"));
30497 assert_eq!(to.as_deref(), Some("main"));
30498 assert_eq!(path, PathBuf::from("."));
30499 assert_eq!(format, TraverseFormat::Html);
30500 }
30501 _ => panic!("expected Traverse command"),
30502 }
30503 }
30504
30505 #[test]
30506 fn cli_parses_semantic_related_command() {
30507 let cli = parse_cli([
30508 "tsift",
30509 "semantic",
30510 "graph navigation",
30511 "--path",
30512 ".",
30513 "--kind",
30514 "all",
30515 "--limit",
30516 "3",
30517 "--json",
30518 ]);
30519 match cli.command {
30520 Some(Commands::Semantic {
30521 query,
30522 path,
30523 kind,
30524 limit,
30525 json,
30526 ..
30527 }) => {
30528 assert_eq!(query, "graph navigation");
30529 assert_eq!(path, PathBuf::from("."));
30530 assert_eq!(kind, SemanticRelatedKind::All);
30531 assert_eq!(limit, 3);
30532 assert!(json);
30533 }
30534 _ => panic!("expected Semantic command"),
30535 }
30536 }
30537
30538 #[test]
30539 fn cli_parses_convex_sync_command() {
30540 let cli = parse_cli([
30541 "tsift",
30542 "convex-sync",
30543 ".",
30544 "--snapshot",
30545 "rows.json",
30546 "--chunk-size",
30547 "25",
30548 "--json",
30549 ]);
30550 match cli.command {
30551 Some(Commands::ConvexSync {
30552 path,
30553 snapshot,
30554 chunk_size,
30555 json,
30556 ..
30557 }) => {
30558 assert_eq!(path, PathBuf::from("."));
30559 assert_eq!(snapshot, Some(PathBuf::from("rows.json")));
30560 assert_eq!(chunk_size, 25);
30561 assert!(json);
30562 }
30563 _ => panic!("expected ConvexSync command"),
30564 }
30565 }
30566
30567 #[test]
30568 fn cli_parses_convex_sync_live_flags() {
30569 let cli = parse_cli([
30570 "tsift",
30571 "convex-sync",
30572 ".",
30573 "--remote-snapshot",
30574 "--apply",
30575 "--endpoint",
30576 "https://example.test/convex-graph",
30577 "--auth-token-env",
30578 "TSIFT_TEST_TOKEN",
30579 ]);
30580 match cli.command {
30581 Some(Commands::ConvexSync {
30582 remote_snapshot,
30583 apply,
30584 endpoint,
30585 auth_token_env,
30586 ..
30587 }) => {
30588 assert!(remote_snapshot);
30589 assert!(apply);
30590 assert_eq!(
30591 endpoint.as_deref(),
30592 Some("https://example.test/convex-graph")
30593 );
30594 assert_eq!(auth_token_env, "TSIFT_TEST_TOKEN");
30595 }
30596 _ => panic!("expected ConvexSync command"),
30597 }
30598 }
30599
30600 #[test]
30601 fn cli_parses_graph_db_query() {
30602 let cli = parse_cli([
30603 "tsift",
30604 "graph-db",
30605 "--backend",
30606 "convex-snapshot",
30607 "--convex-snapshot",
30608 "rows.json",
30609 "--json",
30610 "neighborhood",
30611 "gbak-kgnv",
30612 "--depth",
30613 "2",
30614 "--edge-kind",
30615 "mentions",
30616 "--property",
30617 "path=tasks/software/tsift.md",
30618 "--cursor",
30619 "gbak-old",
30620 "--limit",
30621 "10",
30622 ]);
30623 match cli.command {
30624 Some(Commands::GraphDb {
30625 backend,
30626 convex_snapshot,
30627 json,
30628 query,
30629 ..
30630 }) => {
30631 assert_eq!(backend, GraphDbBackend::ConvexSnapshot);
30632 assert_eq!(convex_snapshot, Some(PathBuf::from("rows.json")));
30633 assert!(json);
30634 match query {
30635 GraphDbQuery::Neighborhood {
30636 id,
30637 depth,
30638 edge_kind,
30639 cursor,
30640 limit,
30641 property_filters,
30642 } => {
30643 assert_eq!(id, "gbak-kgnv");
30644 assert_eq!(depth, 2);
30645 assert_eq!(edge_kind.as_deref(), Some("mentions"));
30646 assert_eq!(cursor.as_deref(), Some("gbak-old"));
30647 assert_eq!(limit, Some(10));
30648 assert_eq!(
30649 property_filters,
30650 vec!["path=tasks/software/tsift.md".to_string()]
30651 );
30652 }
30653 _ => panic!("expected graph-db neighborhood query"),
30654 }
30655 }
30656 _ => panic!("expected GraphDb command"),
30657 }
30658 }
30659
30660 #[test]
30661 fn cli_parses_graph_db_tokensave_backend() {
30662 let cli = parse_cli([
30663 "tsift",
30664 "graph-db",
30665 "--backend",
30666 "tokensave",
30667 "--json",
30668 "node",
30669 "fn:main",
30670 ]);
30671 match cli.command {
30672 Some(Commands::GraphDb {
30673 backend,
30674 json,
30675 query,
30676 ..
30677 }) => {
30678 assert_eq!(backend, GraphDbBackend::Tokensave);
30679 assert!(json);
30680 match query {
30681 GraphDbQuery::Node { id } => assert_eq!(id, "fn:main"),
30682 _ => panic!("expected graph-db node query"),
30683 }
30684 }
30685 _ => panic!("expected GraphDb command"),
30686 }
30687 }
30688
30689 #[test]
30690 fn cli_parses_analyze_command() {
30691 let cli = parse_cli([
30692 "tsift", "analyze", ".", "--scope", "core", "--entry", "main", "--entry", "run",
30693 "--limit", "7", "--json",
30694 ]);
30695 match cli.command {
30696 Some(Commands::Analyze {
30697 path,
30698 scope,
30699 entry_points,
30700 limit,
30701 json,
30702 }) => {
30703 assert_eq!(path, PathBuf::from("."));
30704 assert_eq!(scope.as_deref(), Some("core"));
30705 assert_eq!(entry_points, vec!["main".to_string(), "run".to_string()]);
30706 assert_eq!(limit, 7);
30707 assert!(json);
30708 }
30709 _ => panic!("expected Analyze command"),
30710 }
30711 }
30712
30713 #[test]
30714 fn cli_parses_graph_db_related_query() {
30715 let cli = parse_cli([
30716 "tsift",
30717 "graph-db",
30718 "--json",
30719 "related",
30720 "voice avatar memory retrieval",
30721 "--kind",
30722 "all",
30723 "--depth",
30724 "3",
30725 "--seed-limit",
30726 "4",
30727 "--limit",
30728 "12",
30729 ]);
30730 match cli.command {
30731 Some(Commands::GraphDb { json, query, .. }) => {
30732 assert!(json);
30733 match query {
30734 GraphDbQuery::Related {
30735 query,
30736 kind,
30737 depth,
30738 seed_limit,
30739 limit,
30740 } => {
30741 assert_eq!(query, "voice avatar memory retrieval");
30742 assert_eq!(kind, SemanticRelatedKind::All);
30743 assert_eq!(depth, 3);
30744 assert_eq!(seed_limit, 4);
30745 assert_eq!(limit, 12);
30746 }
30747 _ => panic!("expected graph-db related query"),
30748 }
30749 }
30750 _ => panic!("expected GraphDb command"),
30751 }
30752 }
30753
30754 #[test]
30755 fn cli_parses_graph_db_compact_query() {
30756 let cli = parse_cli([
30757 "tsift",
30758 "graph-db",
30759 "--path",
30760 ".",
30761 "compact",
30762 "--apply",
30763 "--prune-tombstones",
30764 "--confirmed-convex-reconciled",
30765 ]);
30766 match cli.command {
30767 Some(Commands::GraphDb { query, .. }) => match query {
30768 GraphDbQuery::Compact {
30769 apply,
30770 prune_tombstones,
30771 confirmed_convex_reconciled,
30772 } => {
30773 assert!(apply);
30774 assert!(prune_tombstones);
30775 assert!(confirmed_convex_reconciled);
30776 }
30777 _ => panic!("expected graph-db compact query"),
30778 },
30779 _ => panic!("expected GraphDb command"),
30780 }
30781 }
30782
30783 #[test]
30784 fn cli_parses_impact_command() {
30785 let cli = parse_cli(["tsift", "impact", ".", "--cached", "--limit", "5"]);
30786 match cli.command {
30787 Some(Commands::Impact {
30788 path,
30789 cached,
30790 limit,
30791 ..
30792 }) => {
30793 assert_eq!(path, PathBuf::from("."));
30794 assert!(cached);
30795 assert_eq!(limit, 5);
30796 }
30797 _ => panic!("expected Impact command"),
30798 }
30799 }
30800
30801 #[test]
30802 fn cli_parses_conflict_matrix_command() {
30803 let cli = parse_cli([
30804 "tsift",
30805 "conflict-matrix",
30806 "--path",
30807 "tasks/software/tsift.md",
30808 "--depth",
30809 "4",
30810 "--limit",
30811 "12",
30812 "--impact-limit",
30813 "6",
30814 "--json",
30815 "pwcm",
30816 "#g6kf",
30817 ]);
30818 match cli.command {
30819 Some(Commands::ConflictMatrix {
30820 targets,
30821 path,
30822 depth,
30823 limit,
30824 impact_limit,
30825 json,
30826 ..
30827 }) => {
30828 assert_eq!(targets, vec!["pwcm".to_string(), "#g6kf".to_string()]);
30829 assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
30830 assert_eq!(depth, 4);
30831 assert_eq!(limit, 12);
30832 assert_eq!(impact_limit, 6);
30833 assert!(json);
30834 }
30835 _ => panic!("expected ConflictMatrix command"),
30836 }
30837 }
30838
30839 #[test]
30840 fn cli_parses_dispatch_trace_command() {
30841 let cli = parse_cli([
30842 "tsift",
30843 "dispatch-trace",
30844 "--path",
30845 "tasks/software/tsift.md",
30846 "--format",
30847 "html",
30848 "--depth",
30849 "4",
30850 "pwcm",
30851 "#g6kf",
30852 ]);
30853 match cli.command {
30854 Some(Commands::DispatchTrace {
30855 targets,
30856 path,
30857 format,
30858 depth,
30859 ..
30860 }) => {
30861 assert_eq!(targets, vec!["pwcm".to_string(), "#g6kf".to_string()]);
30862 assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
30863 assert_eq!(format, DispatchTraceFormat::Html);
30864 assert_eq!(depth, 4);
30865 }
30866 _ => panic!("expected DispatchTrace command"),
30867 }
30868 }
30869
30870 #[test]
30871 fn cli_parses_dependency_dag_command() {
30872 let cli = parse_cli([
30873 "tsift",
30874 "dependency-dag",
30875 "--path",
30876 "tasks/software/tsift.md",
30877 "--depth",
30878 "5",
30879 "--limit",
30880 "20",
30881 "--json",
30882 "alpha",
30883 "#beta",
30884 ]);
30885 match cli.command {
30886 Some(Commands::DependencyDag {
30887 targets,
30888 path,
30889 depth,
30890 limit,
30891 json,
30892 ..
30893 }) => {
30894 assert_eq!(targets, vec!["alpha".to_string(), "#beta".to_string()]);
30895 assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
30896 assert_eq!(depth, 5);
30897 assert_eq!(limit, 20);
30898 assert!(json);
30899 }
30900 _ => panic!("expected DependencyDag command"),
30901 }
30902 }
30903
30904 #[test]
30905 fn relativize_strips_root_prefix() {
30906 let root = std::path::Path::new("/home/user/project");
30907 assert_eq!(
30908 relativize("/home/user/project/src/main.rs", root),
30909 "src/main.rs"
30910 );
30911 }
30912
30913 #[test]
30914 fn relativize_leaves_non_matching_path() {
30915 let root = std::path::Path::new("/home/user/project");
30916 assert_eq!(
30917 relativize("/other/path/file.rs", root),
30918 "/other/path/file.rs"
30919 );
30920 }
30921
30922 #[test]
30923 fn relativize_leaves_already_relative() {
30924 let root = std::path::Path::new("/home/user/project");
30925 assert_eq!(relativize("src/main.rs", root), "src/main.rs");
30926 }
30927
30928 #[test]
30929 fn relativize_pathbuf_strips_prefix() {
30930 let root = std::path::Path::new("/home/user/project");
30931 let path = std::path::Path::new("/home/user/project/src/lib.rs");
30932 assert_eq!(relativize_pathbuf(path, root), PathBuf::from("src/lib.rs"));
30933 }
30934
30935 #[test]
30936 fn relativize_edges_strips_caller_file() {
30937 let root = std::path::Path::new("/tmp/proj");
30938 let mut edges = vec![index::StoredEdge {
30939 caller_file: "/tmp/proj/src/main.rs".to_string(),
30940 caller_name: "main".to_string(),
30941 caller_line: 1,
30942 callee_name: "helper".to_string(),
30943 call_site_line: 5,
30944 tagpath_handle: None,
30945 }];
30946 relativize_edges(&mut edges, root);
30947 assert_eq!(edges[0].caller_file, "src/main.rs");
30948 }
30949
30950 #[test]
30951 fn relativize_json_paths_strips_known_keys() {
30952 let root = std::path::Path::new("/tmp/proj");
30953 let mut val = serde_json::json!({
30954 "file": "/tmp/proj/src/main.rs",
30955 "path": "/tmp/proj/test.rs",
30956 "name": "/tmp/proj/not-a-path",
30957 "hits": [{"path": "/tmp/proj/nested.rs", "score": 1.0}]
30958 });
30959 relativize_json_paths(&mut val, root);
30960 assert_eq!(val["file"], "src/main.rs");
30961 assert_eq!(val["path"], "test.rs");
30962 assert_eq!(val["name"], "/tmp/proj/not-a-path");
30963 assert_eq!(val["hits"][0]["path"], "nested.rs");
30964 }
30965
30966 #[test]
30969 fn cli_graph_accepts_limit_flag() {
30970 let cli = parse_cli(["tsift", "graph", "main", "--limit", "5"]);
30971 match cli.command {
30972 Some(Commands::Graph { limit, .. }) => assert_eq!(limit, 5),
30973 _ => panic!("expected Graph command"),
30974 }
30975 }
30976
30977 #[test]
30978 fn cli_graph_default_limit_is_20() {
30979 let cli = parse_cli(["tsift", "graph", "main"]);
30980 match cli.command {
30981 Some(Commands::Graph { limit, .. }) => assert_eq!(limit, 20),
30982 _ => panic!("expected Graph command"),
30983 }
30984 }
30985
30986 #[test]
30987 fn cli_communities_accepts_limit_flag() {
30988 let cli = parse_cli(["tsift", "communities", "--limit", "3"]);
30989 match cli.command {
30990 Some(Commands::Communities { limit, .. }) => assert_eq!(limit, 3),
30991 _ => panic!("expected Communities command"),
30992 }
30993 }
30994
30995 #[test]
30996 fn cli_communities_default_limit_is_10() {
30997 let cli = parse_cli(["tsift", "communities"]);
30998 match cli.command {
30999 Some(Commands::Communities { limit, .. }) => assert_eq!(limit, 10),
31000 _ => panic!("expected Communities command"),
31001 }
31002 }
31003
31004 #[test]
31005 fn cli_explain_accepts_limit_flag() {
31006 let cli = parse_cli(["tsift", "explain", "main", "--limit", "7"]);
31007 match cli.command {
31008 Some(Commands::Explain { limit, .. }) => assert_eq!(limit, 7),
31009 _ => panic!("expected Explain command"),
31010 }
31011 }
31012
31013 #[test]
31014 fn cli_explain_default_limit_is_15() {
31015 let cli = parse_cli(["tsift", "explain", "main"]);
31016 match cli.command {
31017 Some(Commands::Explain { limit, .. }) => assert_eq!(limit, 15),
31018 _ => panic!("expected Explain command"),
31019 }
31020 }
31021
31022 #[test]
31023 fn cli_limit_zero_means_unlimited() {
31024 let cli = parse_cli(["tsift", "graph", "main", "--limit", "0"]);
31025 match cli.command {
31026 Some(Commands::Graph { limit, .. }) => assert_eq!(limit, 0),
31027 _ => panic!("expected Graph command"),
31028 }
31029 }
31030
31031 #[test]
31032 fn graph_cmd_limit_runs_ok() {
31033 let dir = setup_graph_index();
31034 let result = cmd_graph(
31035 "main",
31036 dir.path(),
31037 false,
31038 false,
31039 None,
31040 1,
31041 false,
31042 false,
31043 false,
31044 false,
31045 false,
31046 false,
31047 false,
31048 TagpathSearchOpts::default(),
31049 );
31050 assert!(result.is_ok());
31051 }
31052
31053 #[test]
31054 fn graph_cmd_unlimited_runs_ok() {
31055 let dir = setup_graph_index();
31056 let result = cmd_graph(
31057 "main",
31058 dir.path(),
31059 false,
31060 false,
31061 None,
31062 0,
31063 false,
31064 false,
31065 false,
31066 false,
31067 false,
31068 false,
31069 false,
31070 TagpathSearchOpts::default(),
31071 );
31072 assert!(result.is_ok());
31073 }
31074
31075 #[test]
31076 fn graph_cmd_tabular_runs_ok() {
31077 let dir = setup_graph_index();
31078 let result = cmd_graph(
31079 "main",
31080 dir.path(),
31081 false,
31082 false,
31083 None,
31084 20,
31085 false,
31086 false,
31087 false,
31088 false,
31089 false,
31090 true,
31091 false,
31092 TagpathSearchOpts::default(),
31093 );
31094 assert!(result.is_ok());
31095 }
31096
31097 #[test]
31098 fn communities_cmd_tabular_runs_ok() {
31099 let dir = setup_graph_index();
31100 let result = cmd_communities(
31101 dir.path(),
31102 None,
31103 1,
31104 10,
31105 false,
31106 false,
31107 false,
31108 false,
31109 true,
31110 false,
31111 TagpathSearchOpts::default(),
31112 );
31113 assert!(result.is_ok());
31114 }
31115
31116 #[test]
31117 fn explain_cmd_tabular_runs_ok() {
31118 let dir = setup_graph_index();
31119 let result = cmd_explain(
31120 "main",
31121 dir.path(),
31122 None,
31123 15,
31124 false,
31125 false,
31126 false,
31127 false,
31128 false,
31129 true,
31130 false,
31131 );
31132 assert!(result.is_ok());
31133 }
31134
31135 #[test]
31136 fn traversal_excludes_agent_doc_runtime_paths_from_source_watermark() {
31137 let cases = [
31142 ".agent-doc",
31143 ".agent-doc/snapshots/abc.md",
31144 ".agent-doc/baselines/abc.md",
31145 ".agent-doc/archives/2026.md",
31146 ".agent-doc/runtime/run.jsonl",
31147 "src/foo/.agent-doc",
31148 "src/foo/.agent-doc/snapshots/x.md",
31149 "./.agent-doc/snapshots/x.md",
31150 ];
31151 for path in cases {
31152 assert!(
31153 traversal_relative_path_is_generated_artifact(path),
31154 "expected `{path}` to be excluded from source watermark"
31155 );
31156 }
31157 for path in [
31159 "src/main.rs",
31160 "tests/perf_gate.rs",
31161 "fixtures/x.json",
31162 "agent-doc/src/lib.rs", "src/.agent-doc-helper.rs",
31164 ] {
31165 assert!(
31166 !traversal_relative_path_is_generated_artifact(path),
31167 "expected `{path}` to be included in source watermark"
31168 );
31169 }
31170 }
31171
31172 #[test]
31173 fn traversal_excludes_tsift_and_target_runtime_paths_from_source_watermark() {
31174 let cases = [
31182 ".tsift",
31183 ".tsift/index.db",
31184 ".tsift/indexes/foo/index.db",
31185 ".tsift/conflict-matrix-cache/inputs/abc.json",
31186 ".tsift/summaries.db",
31187 "src/foo/.tsift",
31188 "src/foo/.tsift/graph.db",
31189 "./.tsift/index.db",
31190 "target",
31191 "target/debug/build/x",
31192 "target/release/tsift",
31193 "src/foo/target/debug/x",
31194 "./target/release/x",
31195 ];
31196 for path in cases {
31197 assert!(
31198 traversal_relative_path_is_generated_artifact(path),
31199 "expected `{path}` to be excluded from source watermark"
31200 );
31201 }
31202 for path in [
31204 "src/ctx-core-dev/lib/a__target/CHANGELOG.md",
31205 "src/ctx-core-dev/lib/a__target/A__Target/index.d.ts",
31206 "src/tsift-extras/lib.rs",
31207 "tsift/README.md",
31208 "src/targeting.rs",
31209 "src/.tsiftrc",
31210 "src/agent-doc-helper.rs",
31211 ] {
31212 assert!(
31213 !traversal_relative_path_is_generated_artifact(path),
31214 "expected `{path}` to be included in source watermark"
31215 );
31216 }
31217 }
31218
31219 #[test]
31220 fn traversal_source_watermark_is_stable_across_invocations_on_quiescent_root() {
31221 let dir = tempfile::tempdir().unwrap();
31230 let root = dir.path();
31231 std::fs::create_dir_all(root.join("src")).unwrap();
31232 std::fs::write(root.join("src/main.rs"), "fn main() {}\n").unwrap();
31233 let hint = root.join("README.md");
31234 std::fs::write(&hint, "# stable\n").unwrap();
31235 std::fs::create_dir_all(root.join(".tsift")).unwrap();
31237 std::fs::write(root.join(".tsift/index.db"), b"placeholder").unwrap();
31238 std::fs::create_dir_all(root.join("target/debug")).unwrap();
31239 std::fs::write(root.join("target/debug/marker"), b"placeholder").unwrap();
31240
31241 let first = traversal_source_watermark(root, &hint, None, true)
31242 .expect("first watermark call must succeed")
31243 .expect("first watermark must produce a hash for hinted markdown");
31244 let second = traversal_source_watermark(root, &hint, None, true)
31245 .expect("second watermark call must succeed")
31246 .expect("second watermark must produce a hash for hinted markdown");
31247 assert_eq!(
31248 first, second,
31249 "watermark must be identical across back-to-back invocations on a quiescent root"
31250 );
31251
31252 std::fs::write(root.join(".tsift/index.db"), b"changed").unwrap();
31254 std::fs::write(root.join("target/debug/marker"), b"changed").unwrap();
31255 let third = traversal_source_watermark(root, &hint, None, true)
31256 .expect("third watermark call must succeed")
31257 .expect("third watermark must produce a hash for hinted markdown");
31258 assert_eq!(
31259 first, third,
31260 "watermark must ignore mutations under .tsift/ and target/"
31261 );
31262
31263 std::thread::sleep(std::time::Duration::from_millis(20));
31268 std::fs::write(&hint, "# stable edited with longer content\n").unwrap();
31269 let fourth = traversal_source_watermark(root, &hint, None, true)
31270 .expect("fourth watermark call must succeed")
31271 .expect("fourth watermark must produce a hash for hinted markdown");
31272 assert_ne!(
31273 first, fourth,
31274 "watermark must invalidate when the hinted markdown file changes"
31275 );
31276 }
31277
31278 #[test]
31279 fn traversal_source_watermark_uses_summary_rows_not_summaries_db_metadata() {
31280 let dir = tempfile::tempdir().unwrap();
31284 let root = dir.path();
31285 std::fs::write(root.join("README.md"), "# stable\n").unwrap();
31286 let summaries_db_path = root.join(".tsift/summaries.db");
31287 let summary_db = summarize::SummaryDb::open(&summaries_db_path).unwrap();
31288 let mut summary = summarize::Summary {
31289 id: 0,
31290 symbol_name: "main".to_string(),
31291 file_path: "src/main.rs".to_string(),
31292 content_hash: "hash-main".to_string(),
31293 summary: "main wires the CLI".to_string(),
31294 entities: Some(vec![summarize::Entity {
31295 name: "Cli".to_string(),
31296 kind: "type".to_string(),
31297 description: "Command-line interface".to_string(),
31298 }]),
31299 relationships: None,
31300 concept_labels: Some(vec!["cli".to_string()]),
31301 extracted_at: "1700000000".to_string(),
31302 model: "test-model".to_string(),
31303 tokens_input: Some(10),
31304 tokens_output: Some(5),
31305 };
31306 summary_db.insert(&summary).unwrap();
31307 drop(summary_db);
31308
31309 let hint = root.join("README.md");
31310 let first = traversal_source_watermark(root, &hint, None, true)
31311 .expect("first watermark call must succeed")
31312 .expect("first watermark must produce a hash");
31313
31314 std::thread::sleep(std::time::Duration::from_millis(20));
31315 let conn = Connection::open(&summaries_db_path).unwrap();
31316 conn.pragma_update(None, "user_version", 1).unwrap();
31317 conn.pragma_update(None, "user_version", 0).unwrap();
31318 drop(conn);
31319
31320 let second = traversal_source_watermark(root, &hint, None, true)
31321 .expect("second watermark call must succeed")
31322 .expect("second watermark must produce a hash");
31323 assert_eq!(
31324 first, second,
31325 "metadata-only summaries.db churn must not invalidate the source watermark"
31326 );
31327
31328 summary.entities = Some(vec![summarize::Entity {
31329 name: "GraphCache".to_string(),
31330 kind: "type".to_string(),
31331 description: "Stable full-projection cache input".to_string(),
31332 }]);
31333 let summary_db = summarize::SummaryDb::open(&summaries_db_path).unwrap();
31334 summary_db.delete_by_file("src/main.rs").unwrap();
31335 summary_db.insert(&summary).unwrap();
31336 drop(summary_db);
31337
31338 let third = traversal_source_watermark(root, &hint, None, true)
31339 .expect("third watermark call must succeed")
31340 .expect("third watermark must produce a hash");
31341 assert_ne!(
31342 first, third,
31343 "semantic summary row changes must invalidate the source watermark"
31344 );
31345 }
31346
31347 #[test]
31348 fn full_projection_source_watermark_ignores_source_mtime_when_index_rows_unchanged() {
31349 let dir = tempfile::tempdir().unwrap();
31353 let root = dir.path();
31354 std::fs::create_dir_all(root.join("src")).unwrap();
31355 std::fs::create_dir_all(root.join(".tsift")).unwrap();
31356 let source = root.join("src/lib.rs");
31357 let source_body = "pub fn alpha() { beta(); }\npub fn beta() {}\n";
31358 std::fs::write(&source, source_body).unwrap();
31359 let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
31360 db.rebuild(root).unwrap();
31361 drop(db);
31362
31363 let first = graph_db_backend_eval_full_projection_source_watermark(root, None)
31364 .unwrap()
31365 .value;
31366 std::thread::sleep(std::time::Duration::from_millis(20));
31367 std::fs::write(&source, source_body).unwrap();
31368 let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
31369 db.apply_changes(root).unwrap();
31370 drop(db);
31371
31372 let second = graph_db_backend_eval_full_projection_source_watermark(root, None)
31373 .unwrap()
31374 .value;
31375 assert_eq!(
31376 first, second,
31377 "mtime-only source index churn must not invalidate the full-projection cache"
31378 );
31379 }
31380
31381 #[test]
31382 fn full_projection_source_watermark_ignores_session_markdown_churn() {
31383 let dir = tempfile::tempdir().unwrap();
31388 let root = dir.path();
31389 std::fs::create_dir_all(root.join("src")).unwrap();
31390 std::fs::create_dir_all(root.join("tasks/software")).unwrap();
31391 std::fs::create_dir_all(root.join(".tsift")).unwrap();
31392 std::fs::write(root.join("src/lib.rs"), "pub fn alpha() {}\n").unwrap();
31393 let task_doc = root.join("tasks/software/tsift.md");
31394 std::fs::write(
31395 &task_doc,
31396 "---\nagent_doc_session: tsift-v0.1\n---\n\n## Backlog\n\n- [ ] [#one] Initial item\n",
31397 )
31398 .unwrap();
31399 let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
31400 db.rebuild(root).unwrap();
31401 drop(db);
31402
31403 let first = graph_db_backend_eval_full_projection_source_watermark(root, None)
31404 .unwrap()
31405 .value;
31406 std::fs::write(
31407 &task_doc,
31408 "---\nagent_doc_session: tsift-v0.1\n---\n\n## Backlog\n\n- [ ] [#one] Edited item\n",
31409 )
31410 .unwrap();
31411 let second = graph_db_backend_eval_full_projection_source_watermark(root, None)
31412 .unwrap()
31413 .value;
31414 assert_eq!(
31415 first, second,
31416 "session markdown churn must not invalidate the full-projection code/summary cache"
31417 );
31418 }
31419
31420 #[test]
31421 fn full_projection_cache_hit_skips_provider_neutral_rebuild_after_mtime_churn() {
31422 let dir = tempfile::tempdir().unwrap();
31426 let root = dir.path();
31427 std::fs::create_dir_all(root.join("src")).unwrap();
31428 std::fs::create_dir_all(root.join(".tsift")).unwrap();
31429 let source = root.join("src/lib.rs");
31430 let source_body = "pub fn alpha() { beta(); }\npub fn beta() {}\n";
31431 std::fs::write(&source, source_body).unwrap();
31432 let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
31433 db.rebuild(root).unwrap();
31434 drop(db);
31435
31436 let (_projection, _warnings, _phases, first_stats) =
31437 graph_db_backend_eval_full_projection_with_profile(root, None).unwrap();
31438 assert!(
31439 !first_stats.hit,
31440 "the first full-projection run should populate the cache"
31441 );
31442
31443 std::thread::sleep(std::time::Duration::from_millis(20));
31444 std::fs::write(&source, source_body).unwrap();
31445 let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
31446 db.apply_changes(root).unwrap();
31447 drop(db);
31448
31449 let (_projection, _warnings, phases, second_stats) =
31450 graph_db_backend_eval_full_projection_with_profile(root, None).unwrap();
31451 assert!(second_stats.hit, "mtime-only churn should still cache-hit");
31452 let source_graph_build = phases
31453 .iter()
31454 .find(|phase| phase.name == "full_projection.source_graph_build")
31455 .expect("cache hit must report source_graph_build");
31456 let projection_rows = phases
31457 .iter()
31458 .find(|phase| phase.name == "full_projection.projection_rows")
31459 .expect("cache hit must report projection_rows");
31460 assert_eq!(source_graph_build.duration_micros, 0);
31461 assert_eq!(projection_rows.duration_micros, 0);
31462 }
31463}
31464
31465#[derive(Serialize)]
31468struct TableInfo {
31469 name: String,
31470 columns: Vec<ColumnInfo>,
31471 row_count: i64,
31472}
31473
31474#[derive(Serialize)]
31475struct ColumnInfo {
31476 name: String,
31477 #[serde(rename = "type")]
31478 col_type: String,
31479 notnull: bool,
31480 pk: bool,
31481 #[serde(skip_serializing_if = "Option::is_none")]
31482 default_value: Option<String>,
31483}
31484
31485pub(crate) fn open_db(path: &std::path::Path) -> Result<Connection> {
31487 let conn = Connection::open_with_flags(
31488 path,
31489 rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
31490 )
31491 .with_context(|| format!("opening database: {}", path.display()))?;
31492 Ok(conn)
31493}
31494
31495pub(crate) fn schema_overview(conn: &Connection) -> Result<Vec<TableInfo>> {
31497 let mut stmt = conn.prepare(
31498 "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%' ORDER BY name",
31499 )?;
31500 let table_names: Vec<String> = stmt
31501 .query_map([], |row| row.get(0))?
31502 .collect::<std::result::Result<Vec<_>, _>>()?;
31503
31504 let mut tables = Vec::new();
31505 for tbl in table_names {
31506 let columns = table_columns(conn, &tbl)?;
31507 let row_count: i64 =
31508 conn.query_row(&format!("SELECT COUNT(*) FROM \"{}\"", tbl), [], |row| {
31509 row.get(0)
31510 })?;
31511 tables.push(TableInfo {
31512 name: tbl,
31513 columns,
31514 row_count,
31515 });
31516 }
31517 Ok(tables)
31518}
31519
31520pub(crate) fn table_columns(conn: &Connection, table: &str) -> Result<Vec<ColumnInfo>> {
31522 let mut stmt = conn.prepare(&format!("PRAGMA table_info(\"{}\")", table))?;
31523 let cols = stmt
31524 .query_map([], |row| {
31525 Ok(ColumnInfo {
31526 name: row.get(1)?,
31527 col_type: row.get::<_, String>(2).unwrap_or_default(),
31528 notnull: row.get::<_, bool>(3).unwrap_or(false),
31529 pk: row.get::<_, i32>(5).unwrap_or(0) > 0,
31530 default_value: row.get(4)?,
31531 })
31532 })?
31533 .collect::<std::result::Result<Vec<_>, _>>()?;
31534 Ok(cols)
31535}
31536
31537pub(crate) fn execute_query(
31539 conn: &Connection,
31540 sql: &str,
31541) -> Result<(Vec<String>, Vec<Vec<serde_json::Value>>)> {
31542 let mut stmt = conn.prepare(sql).context("preparing SQL query")?;
31543 let col_names: Vec<String> = stmt.column_names().iter().map(|s| s.to_string()).collect();
31544 let col_count = col_names.len();
31545
31546 let mut rows = Vec::new();
31547 let mut query_rows = stmt.query([])?;
31548 while let Some(row) = query_rows.next()? {
31549 let mut vals = Vec::with_capacity(col_count);
31550 for i in 0..col_count {
31551 let val = match row.get_ref(i)? {
31552 rusqlite::types::ValueRef::Null => serde_json::Value::Null,
31553 rusqlite::types::ValueRef::Integer(n) => serde_json::json!(n),
31554 rusqlite::types::ValueRef::Real(f) => serde_json::json!(f),
31555 rusqlite::types::ValueRef::Text(s) => {
31556 serde_json::Value::String(String::from_utf8_lossy(s).into_owned())
31557 }
31558 rusqlite::types::ValueRef::Blob(b) => {
31559 serde_json::Value::String(format!("<blob {} bytes>", b.len()))
31560 }
31561 };
31562 vals.push(val);
31563 }
31564 rows.push(vals);
31565 }
31566 Ok((col_names, rows))
31567}
31568
31569#[derive(Clone, Copy)]
31572struct OutputCap {
31573 max_lines: usize,
31574 strip_prefix: Option<&'static str>,
31575}
31576
31577pub(crate) fn execute_rewritten_command(command: &str) -> Result<i32> {
31578 let effective_command = effective_rewrite_run_command(command);
31579 let parts = shell_split(&effective_command);
31580 let Some(program) = parts.first().map(|part| strip_shell_quotes(part)) else {
31581 bail!("rewritten command was empty");
31582 };
31583 let args: Vec<String> = parts[1..]
31584 .iter()
31585 .map(|part| strip_shell_quotes(part).to_string())
31586 .collect();
31587 let mut command = if program == "tsift" {
31588 Command::new(std::env::current_exe().context("resolving current tsift executable")?)
31589 } else {
31590 Command::new(program)
31591 };
31592 let output = command
31593 .args(&args)
31594 .output()
31595 .with_context(|| format!("executing rewritten command `{effective_command}`"))?;
31596
31597 let stdout = if let Some(cap) = rewrite_output_cap(&effective_command) {
31598 apply_output_cap(&output.stdout, cap)
31599 } else {
31600 String::from_utf8_lossy(&output.stdout).into_owned()
31601 };
31602 if !stdout.is_empty() {
31603 print!("{stdout}");
31604 }
31605 if !output.stderr.is_empty() {
31606 eprint!("{}", String::from_utf8_lossy(&output.stderr));
31607 }
31608
31609 Ok(output
31610 .status
31611 .code()
31612 .unwrap_or_else(|| if output.status.success() { 0 } else { 1 }))
31613}
31614
31615fn effective_rewrite_run_command(command: &str) -> String {
31616 let parts = shell_split(command);
31617 if parts.first().map(|part| strip_shell_quotes(part)) != Some("tsift") {
31618 return command.to_string();
31619 }
31620 let structured = parts
31621 .iter()
31622 .skip(1)
31623 .any(|part| strip_shell_quotes(part) == "--timeout");
31624 let subcommand = parts
31625 .iter()
31626 .skip(1)
31627 .map(|part| strip_shell_quotes(part))
31628 .find(|part| !part.starts_with('-'));
31629 if matches!(subcommand, Some("search")) && !structured {
31630 format!("{command} --timeout 0")
31631 } else {
31632 command.to_string()
31633 }
31634}
31635
31636pub(crate) fn apply_rewrite_output_format(command: &str, format: OutputFormat) -> String {
31637 let trimmed = command.trim_start();
31638 let Some(rest) = trimmed.strip_prefix("tsift") else {
31639 return command.to_string();
31640 };
31641 let existing_parts = shell_split(rest);
31642
31643 let mut flags = Vec::new();
31644 if format.compact && !rewrite_has_global_flag(&existing_parts, "--compact") {
31645 flags.push("--compact");
31646 }
31647 if format.pretty && !rewrite_has_global_flag(&existing_parts, "--pretty") {
31648 flags.push("--pretty");
31649 }
31650 if format.terse && !rewrite_has_global_flag(&existing_parts, "--terse") {
31651 flags.push("--terse");
31652 }
31653 if format.schema && !rewrite_has_global_flag(&existing_parts, "--schema") {
31654 flags.push("--schema");
31655 }
31656 if format.envelope {
31657 if !rewrite_has_global_flag(&existing_parts, "--envelope") {
31658 flags.push("--envelope");
31659 }
31660 } else if format.json_output
31661 && !rewrite_has_global_flag(&existing_parts, "--json")
31662 && !rewrite_has_global_flag(&existing_parts, "--envelope")
31663 {
31664 flags.push("--json");
31665 }
31666
31667 if flags.is_empty() {
31668 return command.to_string();
31669 }
31670
31671 let forwarded = flags.join(" ");
31672 if rest.trim().is_empty() {
31673 format!("tsift {forwarded}")
31674 } else {
31675 format!("tsift {forwarded}{rest}")
31676 }
31677}
31678
31679fn rewrite_has_global_flag(parts: &[&str], flag: &str) -> bool {
31680 parts
31681 .iter()
31682 .take_while(|part| {
31683 let value = strip_shell_quotes(part);
31684 value.starts_with('-') || value == "tsift"
31685 })
31686 .any(|part| strip_shell_quotes(part) == flag)
31687}
31688
31689fn rewrite_output_cap(command: &str) -> Option<OutputCap> {
31690 let parts = shell_split(command);
31691 if strip_shell_quotes(parts.first()?) != "tsift" {
31692 return None;
31693 }
31694 let structured = parts.iter().skip(1).any(|part| {
31695 matches!(
31696 strip_shell_quotes(part),
31697 "--json" | "--terse" | "--schema" | "--tabular" | "--envelope"
31698 )
31699 });
31700 if structured {
31701 return None;
31702 }
31703
31704 let subcommand = parts
31705 .iter()
31706 .skip(1)
31707 .map(|part| strip_shell_quotes(part))
31708 .find(|part| !part.starts_with('-'))?;
31709 match subcommand {
31710 "communities" => Some(OutputCap {
31711 max_lines: 80,
31712 strip_prefix: None,
31713 }),
31714 "explain" => Some(OutputCap {
31715 max_lines: 40,
31716 strip_prefix: None,
31717 }),
31718 "graph" => Some(OutputCap {
31719 max_lines: 50,
31720 strip_prefix: None,
31721 }),
31722 "index" => Some(OutputCap {
31723 max_lines: 30,
31724 strip_prefix: None,
31725 }),
31726 "search" => Some(OutputCap {
31727 max_lines: 50,
31728 strip_prefix: Some("Strategy:"),
31729 }),
31730 _ => None,
31731 }
31732}
31733
31734fn apply_output_cap(stdout: &[u8], cap: OutputCap) -> String {
31735 let cleaned = strip_ansi_codes(&String::from_utf8_lossy(stdout));
31736 let mut lines: Vec<String> = cleaned
31737 .lines()
31738 .map(str::trim_end)
31739 .filter(|line| !line.trim().is_empty())
31740 .filter(|line| {
31741 cap.strip_prefix
31742 .map(|prefix| !line.starts_with(prefix))
31743 .unwrap_or(true)
31744 })
31745 .map(ToOwned::to_owned)
31746 .collect();
31747 if lines.len() > cap.max_lines {
31748 let hidden = lines.len() - cap.max_lines;
31749 lines.truncate(cap.max_lines);
31750 lines.push(format!(
31751 "... (+{hidden} more lines; rerun the underlying tsift command directly for the full output)"
31752 ));
31753 }
31754 if lines.is_empty() {
31755 String::new()
31756 } else {
31757 format!("{}\n", lines.join("\n"))
31758 }
31759}
31760
31761fn strip_ansi_codes(input: &str) -> String {
31762 let mut output = String::with_capacity(input.len());
31763 let mut chars = input.chars().peekable();
31764 while let Some(ch) = chars.next() {
31765 if ch == '\u{1b}' && matches!(chars.peek(), Some('[')) {
31766 chars.next();
31767 for next in chars.by_ref() {
31768 if ('@'..='~').contains(&next) {
31769 break;
31770 }
31771 }
31772 continue;
31773 }
31774 output.push(ch);
31775 }
31776 output
31777}
31778
31779pub fn rewrite_command(command: &str) -> Option<String> {
31785 let trimmed = command.trim();
31786
31787 if trimmed.starts_with("tsift ") || trimmed == "tsift" {
31789 return Some(command.to_string());
31790 }
31791
31792 if let Some(rewritten) = rewrite_rg(trimmed) {
31794 return Some(rewritten);
31795 }
31796
31797 if let Some(rewritten) = rewrite_grep(trimmed) {
31799 return Some(rewritten);
31800 }
31801
31802 if let Some(rewritten) = rewrite_git_diff(trimmed) {
31804 return Some(rewritten);
31805 }
31806 if let Some(rewritten) = rewrite_git_show(trimmed) {
31807 return Some(rewritten);
31808 }
31809 if let Some(rewritten) = rewrite_git_patch_history(trimmed) {
31810 return Some(rewritten);
31811 }
31812
31813 if let Some(rewritten) = rewrite_session_read_command(trimmed) {
31815 return Some(rewritten);
31816 }
31817
31818 if let Some(rewritten) = rewrite_source_read_command(trimmed) {
31820 return Some(rewritten);
31821 }
31822
31823 if let Some(rewritten) = rewrite_test_command(trimmed) {
31825 return Some(rewritten);
31826 }
31827
31828 if let Some(rewritten) = rewrite_log_command(trimmed) {
31830 return Some(rewritten);
31831 }
31832
31833 None
31834}
31835
31836pub(crate) fn no_rewrite_message(command: &str, run: bool) -> String {
31837 let trimmed = command.trim();
31838 let parts = shell_split(trimmed);
31839 let reason = if trimmed.is_empty() {
31840 "empty command"
31841 } else if has_shell_metacharacters(trimmed) {
31842 "shell metacharacters such as pipes, redirection, or background operators are not rewritten"
31843 } else if is_file_listing_command(&parts) {
31844 "file-listing commands keep original shell/find/rg semantics"
31845 } else {
31846 "no supported tsift rewrite matched this command"
31847 };
31848 let action = if run {
31849 "`--run` executes only rewritten commands; run the original command directly if intended"
31850 } else {
31851 "run the original command unchanged"
31852 };
31853 format!("tsift rewrite: no rewrite: {reason}; {action}")
31854}
31855
31856fn is_file_listing_command(parts: &[&str]) -> bool {
31857 match parts.first().copied() {
31858 Some("find") => true,
31859 Some("rg") => parts
31860 .iter()
31861 .skip(1)
31862 .any(|part| matches!(*part, "--files" | "--type-list")),
31863 _ => false,
31864 }
31865}
31866
31867fn rewrite_rg(cmd: &str) -> Option<String> {
31869 let parts: Vec<&str> = shell_split(cmd);
31870 if parts.is_empty() || parts[0] != "rg" {
31871 return None;
31872 }
31873
31874 if is_file_listing_command(&parts) {
31877 return None;
31878 }
31879
31880 if cmd.contains('|')
31883 || cmd.contains('>')
31884 || cmd.contains("--replace")
31885 || cmd.contains("--count")
31886 || cmd.contains("-c")
31887 || cmd.contains("--files-with-matches")
31888 || cmd.contains("--files-without-match")
31889 || cmd.contains("-l")
31890 {
31891 return None;
31892 }
31893
31894 let mut pattern = None;
31896 let mut path = None;
31897 let mut skip_next = false;
31898
31899 for part in &parts[1..] {
31900 if skip_next {
31901 skip_next = false;
31902 continue;
31903 }
31904 if matches!(
31906 *part,
31907 "-t" | "--type"
31908 | "-g"
31909 | "--glob"
31910 | "-A"
31911 | "-B"
31912 | "-C"
31913 | "--max-count"
31914 | "--max-depth"
31915 | "-m"
31916 | "-e"
31917 ) {
31918 skip_next = true;
31919 continue;
31920 }
31921 if part.starts_with('-') {
31923 continue;
31924 }
31925 if pattern.is_none() {
31927 pattern = Some(*part);
31928 } else if path.is_none() {
31929 path = Some(*part);
31930 }
31931 }
31932
31933 Some(build_agent_search_preview_command(pattern?, path))
31934}
31935
31936fn rewrite_grep(cmd: &str) -> Option<String> {
31938 let parts: Vec<&str> = shell_split(cmd);
31939 if parts.is_empty() || parts[0] != "grep" {
31940 return None;
31941 }
31942
31943 let has_recursive = parts.iter().any(|p| {
31945 *p == "-r"
31946 || *p == "-R"
31947 || *p == "--recursive"
31948 || p.contains('r') && p.starts_with('-') && !p.starts_with("--")
31949 });
31950 if !has_recursive {
31951 return None;
31952 }
31953
31954 if cmd.contains('|') || cmd.contains('>') {
31956 return None;
31957 }
31958
31959 let mut pattern = None;
31960 let mut path = None;
31961 let mut skip_next = false;
31962
31963 for part in &parts[1..] {
31964 if skip_next {
31965 skip_next = false;
31966 continue;
31967 }
31968 if matches!(*part, "--include" | "--exclude" | "--exclude-dir" | "-e") {
31969 skip_next = true;
31970 continue;
31971 }
31972 if part.starts_with('-') {
31973 continue;
31974 }
31975 if pattern.is_none() {
31976 pattern = Some(*part);
31977 } else if path.is_none() {
31978 path = Some(*part);
31979 }
31980 }
31981
31982 Some(build_agent_search_preview_command(pattern?, path))
31983}
31984
31985fn build_agent_search_preview_command(pattern: &str, path: Option<&str>) -> String {
31986 let mut result = format!(
31987 "tsift --envelope search {} --exact --budget normal",
31988 shell_quote(pattern)
31989 );
31990 if let Some(p) = path {
31991 result.push_str(&format!(" --path {}", shell_quote(p)));
31992 }
31993 result
31994}
31995
31996fn rewrite_git_diff(cmd: &str) -> Option<String> {
31997 if has_shell_metacharacters(cmd) {
31998 return None;
31999 }
32000
32001 let parts: Vec<&str> = shell_split(cmd);
32002 if parts.len() < 2 || parts[0] != "git" || parts[1] != "diff" {
32003 return None;
32004 }
32005 let mut cached = false;
32006 let mut path = None;
32007 let mut after_double_dash = false;
32008
32009 for part in &parts[2..] {
32010 if after_double_dash {
32011 if path.is_none() && !part.starts_with('-') {
32012 path = Some(*part);
32013 continue;
32014 }
32015 return None;
32016 }
32017 match *part {
32018 "--cached" | "--staged" => cached = true,
32019 "--" => after_double_dash = true,
32020 raw if looks_like_path_selector(raw) => {
32021 if path.replace(raw).is_some() {
32022 return None;
32023 }
32024 }
32025 _ => return None,
32026 }
32027 }
32028
32029 Some(build_diff_digest_command(path.unwrap_or("."), cached, None))
32030}
32031
32032fn rewrite_git_show(cmd: &str) -> Option<String> {
32033 if has_shell_metacharacters(cmd) {
32034 return None;
32035 }
32036
32037 let parts: Vec<&str> = shell_split(cmd);
32038 if parts.len() < 2 || parts[0] != "git" || parts[1] != "show" {
32039 return None;
32040 }
32041
32042 let mut revision = "HEAD";
32043 let mut path = None;
32044 let mut after_double_dash = false;
32045
32046 for part in &parts[2..] {
32047 if after_double_dash {
32048 if path.is_none() && !part.starts_with('-') {
32049 path = Some(*part);
32050 continue;
32051 }
32052 return None;
32053 }
32054 match *part {
32055 "--" => after_double_dash = true,
32056 "-p" | "--patch" | "--stat" => {}
32057 raw if raw.starts_with("--format=") => {}
32058 raw if !raw.starts_with('-') => {
32059 if revision != "HEAD" {
32060 return None;
32061 }
32062 revision = raw;
32063 }
32064 _ => return None,
32065 }
32066 }
32067
32068 Some(build_diff_digest_command(
32069 path.unwrap_or("."),
32070 false,
32071 Some(revision),
32072 ))
32073}
32074
32075fn rewrite_git_patch_history(cmd: &str) -> Option<String> {
32076 if has_shell_metacharacters(cmd) {
32077 return None;
32078 }
32079
32080 let parts: Vec<&str> = shell_split(cmd);
32081 if parts.len() < 2 || parts[0] != "git" || parts[1] != "log" {
32082 return None;
32083 }
32084
32085 let mut saw_patch = false;
32086 let mut saw_single_commit = false;
32087 let mut revision = "HEAD";
32088 let mut path = None;
32089 let mut after_double_dash = false;
32090 let mut skip_next = false;
32091
32092 for part in &parts[2..] {
32093 if skip_next {
32094 skip_next = false;
32095 if *part == "1" {
32096 saw_single_commit = true;
32097 continue;
32098 }
32099 return None;
32100 }
32101 if after_double_dash {
32102 if path.is_none() && !part.starts_with('-') {
32103 path = Some(*part);
32104 continue;
32105 }
32106 return None;
32107 }
32108 match *part {
32109 "--" => after_double_dash = true,
32110 "-p" | "--patch" => saw_patch = true,
32111 "-1" | "-n1" | "--max-count=1" => saw_single_commit = true,
32112 "-n" | "--max-count" => skip_next = true,
32113 raw if !raw.starts_with('-') => {
32114 if revision != "HEAD" {
32115 return None;
32116 }
32117 revision = raw;
32118 }
32119 _ => return None,
32120 }
32121 }
32122
32123 if !saw_patch || !saw_single_commit {
32124 return None;
32125 }
32126
32127 Some(build_diff_digest_command(
32128 path.unwrap_or("."),
32129 false,
32130 Some(revision),
32131 ))
32132}
32133
32134fn build_diff_digest_command(path: &str, cached: bool, revision: Option<&str>) -> String {
32135 let mut result = "tsift diff-digest".to_string();
32136 if cached {
32137 result.push_str(" --cached");
32138 }
32139 if let Some(revision) = revision {
32140 result.push_str(&format!(" --revision {}", shell_quote(revision)));
32141 }
32142 if path == "." {
32143 result.push_str(" .");
32144 } else {
32145 result.push_str(&format!(" {}", shell_quote(path)));
32146 }
32147 result
32148}
32149
32150const SESSION_READ_LINE_THRESHOLD: usize = 80;
32151const SOURCE_READ_LINE_THRESHOLD: usize = 80;
32152
32153#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32154enum FileReadWindow {
32155 FullFile,
32156 FromStart { lines: usize },
32157 FromEnd { lines: usize },
32158 Range { start: usize, lines: usize },
32159}
32160
32161struct FileReadTarget {
32162 input: String,
32163 requested_lines: Option<usize>,
32164 window: FileReadWindow,
32165}
32166
32167fn rewrite_session_read_command(cmd: &str) -> Option<String> {
32168 if has_shell_metacharacters(cmd) {
32169 return None;
32170 }
32171
32172 let target = parse_file_read_target(cmd)?;
32173 let input_path = Path::new(&target.input);
32174 let source = detect_session_digest_source(input_path)?;
32175
32176 if let Some(requested_lines) = target.requested_lines {
32177 if requested_lines < SESSION_READ_LINE_THRESHOLD {
32178 return None;
32179 }
32180 } else if !file_has_at_least_lines(input_path, SESSION_READ_LINE_THRESHOLD) {
32181 return None;
32182 }
32183
32184 let digest_path = resolve_digest_context_path(input_path);
32185 Some(build_session_digest_command(
32186 &digest_path,
32187 &target.input,
32188 source,
32189 ))
32190}
32191
32192fn rewrite_source_read_command(cmd: &str) -> Option<String> {
32193 if has_shell_metacharacters(cmd) {
32194 return None;
32195 }
32196
32197 let target = parse_file_read_target(cmd)?;
32198 let input_path = Path::new(&target.input);
32199 if !file_is_supported_source(input_path) {
32200 return None;
32201 }
32202
32203 if let Some(requested_lines) = target.requested_lines {
32204 if requested_lines < SOURCE_READ_LINE_THRESHOLD {
32205 return None;
32206 }
32207 } else if !file_has_at_least_lines(input_path, SOURCE_READ_LINE_THRESHOLD) {
32208 return None;
32209 }
32210
32211 let root = lint::find_project_root_for_path(input_path).ok()??;
32212 if !project_has_index(&root) {
32213 return None;
32214 }
32215 let file_abs = input_path.canonicalize().ok()?;
32216 let file_display = relativize_pathbuf(&file_abs, &root)
32217 .to_string_lossy()
32218 .to_string();
32219 let total_lines = count_file_lines(&file_abs)?;
32220 let (start, lines) = source_window_for_read(target.window, total_lines)?;
32221 Some(build_source_read_rewrite_command(
32222 &root,
32223 &file_display,
32224 start,
32225 lines,
32226 ))
32227}
32228
32229fn parse_file_read_target(cmd: &str) -> Option<FileReadTarget> {
32230 let parts: Vec<&str> = shell_split(cmd);
32231 let head = parts.first().copied()?;
32232 match head {
32233 "cat" | "bat" | "batcat" => parse_cat_like_read_target(&parts),
32234 "head" | "tail" => parse_head_tail_read_target(&parts),
32235 "sed" => parse_sed_read_target(&parts),
32236 _ => None,
32237 }
32238}
32239
32240fn parse_cat_like_read_target(parts: &[&str]) -> Option<FileReadTarget> {
32241 let mut input = None;
32242 for part in &parts[1..] {
32243 if part.starts_with('-') {
32244 continue;
32245 }
32246 if input.replace(strip_shell_quotes(part)).is_some() {
32247 return None;
32248 }
32249 }
32250 Some(FileReadTarget {
32251 input: input?.to_string(),
32252 requested_lines: None,
32253 window: FileReadWindow::FullFile,
32254 })
32255}
32256
32257fn parse_head_tail_read_target(parts: &[&str]) -> Option<FileReadTarget> {
32258 let mut requested_lines = 10;
32259 let mut input = None;
32260 let mut index = 1;
32261
32262 while index < parts.len() {
32263 let part = parts[index];
32264 if part == "-n" || part == "--lines" {
32265 index += 1;
32266 requested_lines = parse_requested_line_count(parts.get(index).copied()?)?;
32267 index += 1;
32268 continue;
32269 }
32270 if let Some(raw) = part.strip_prefix("-n")
32271 && !raw.is_empty()
32272 {
32273 requested_lines = parse_requested_line_count(raw)?;
32274 index += 1;
32275 continue;
32276 }
32277 if let Some(raw) = part.strip_prefix("--lines=") {
32278 requested_lines = parse_requested_line_count(raw)?;
32279 index += 1;
32280 continue;
32281 }
32282 if part.starts_with('-') && part[1..].chars().all(|ch| ch.is_ascii_digit()) {
32283 requested_lines = parse_requested_line_count(&part[1..])?;
32284 index += 1;
32285 continue;
32286 }
32287 if input.replace(strip_shell_quotes(part)).is_some() {
32288 return None;
32289 }
32290 index += 1;
32291 }
32292
32293 let window = match parts[0] {
32294 "head" => FileReadWindow::FromStart {
32295 lines: requested_lines,
32296 },
32297 "tail" => FileReadWindow::FromEnd {
32298 lines: requested_lines,
32299 },
32300 _ => return None,
32301 };
32302
32303 Some(FileReadTarget {
32304 input: input?.to_string(),
32305 requested_lines: Some(requested_lines),
32306 window,
32307 })
32308}
32309
32310fn parse_sed_read_target(parts: &[&str]) -> Option<FileReadTarget> {
32311 if parts.len() != 4 || parts[1] != "-n" {
32312 return None;
32313 }
32314
32315 let (start, lines) = parse_sed_print_window(parts[2])?;
32316 Some(FileReadTarget {
32317 input: strip_shell_quotes(parts[3]).to_string(),
32318 requested_lines: Some(lines),
32319 window: FileReadWindow::Range { start, lines },
32320 })
32321}
32322
32323fn parse_requested_line_count(raw: &str) -> Option<usize> {
32324 let trimmed = strip_shell_quotes(raw);
32325 if let Some(number) = trimmed.strip_prefix('+') {
32326 number.parse::<usize>().ok()?;
32327 return Some(SESSION_READ_LINE_THRESHOLD);
32328 }
32329 trimmed.parse::<usize>().ok()
32330}
32331
32332fn parse_sed_print_window(raw: &str) -> Option<(usize, usize)> {
32333 let trimmed = strip_shell_quotes(raw);
32334 let range = trimmed.strip_suffix('p')?;
32335 let (start, end) = range.split_once(',')?;
32336 let start = start.parse::<usize>().ok()?;
32337 let end = end.parse::<usize>().ok()?;
32338 (end >= start).then_some((start, end - start + 1))
32339}
32340
32341fn file_is_supported_source(path: &Path) -> bool {
32342 path.extension()
32343 .and_then(|ext| ext.to_str())
32344 .and_then(graph::lang::Lang::from_extension)
32345 .is_some()
32346}
32347
32348fn count_file_lines(path: &Path) -> Option<usize> {
32349 let file = fs::File::open(path).ok()?;
32350 Some(
32351 BufReader::new(file)
32352 .lines()
32353 .filter(|line| line.is_ok())
32354 .count(),
32355 )
32356}
32357
32358fn source_window_for_read(window: FileReadWindow, total_lines: usize) -> Option<(usize, usize)> {
32359 if total_lines == 0 {
32360 return None;
32361 }
32362 match window {
32363 FileReadWindow::FullFile => Some((1, SOURCE_READ_LINE_THRESHOLD.min(total_lines))),
32364 FileReadWindow::FromStart { lines } => Some((1, lines.min(total_lines))),
32365 FileReadWindow::FromEnd { lines } => {
32366 let bounded = lines.min(total_lines);
32367 Some((total_lines - bounded + 1, bounded))
32368 }
32369 FileReadWindow::Range { start, lines } => {
32370 if start == 0 || start > total_lines {
32371 return None;
32372 }
32373 Some((start, lines.min(total_lines - start + 1)))
32374 }
32375 }
32376}
32377
32378fn build_source_read_rewrite_command(
32379 root: &Path,
32380 file: &str,
32381 start: usize,
32382 lines: usize,
32383) -> String {
32384 format!(
32385 "tsift --envelope source-read {} --path {} --start {} --lines {} --budget normal",
32386 shell_quote(file),
32387 shell_quote(&root.to_string_lossy()),
32388 start,
32389 lines
32390 )
32391}
32392
32393fn project_has_index(root: &Path) -> bool {
32394 let tsift_dir = root.join(".tsift");
32395 tsift_dir.join("index.db").is_file() || directory_contains_index_db(&tsift_dir.join("indexes"))
32396}
32397
32398fn directory_contains_index_db(path: &Path) -> bool {
32399 let Ok(entries) = fs::read_dir(path) else {
32400 return false;
32401 };
32402 for entry in entries.flatten() {
32403 let path = entry.path();
32404 if path.file_name().is_some_and(|name| name == "index.db") && path.is_file() {
32405 return true;
32406 }
32407 if path.is_dir() && directory_contains_index_db(&path) {
32408 return true;
32409 }
32410 }
32411 false
32412}
32413
32414fn detect_session_digest_source(path: &Path) -> Option<session_digest::SessionDigestSource> {
32415 match path.extension().and_then(|ext| ext.to_str()) {
32416 Some("md") if file_looks_like_agent_doc_session(path) => {
32417 Some(session_digest::SessionDigestSource::Markdown)
32418 }
32419 Some("jsonl") if file_looks_like_claude_jsonl(path) => {
32420 Some(session_digest::SessionDigestSource::ClaudeJsonl)
32421 }
32422 Some("jsonl") if file_looks_like_codex_jsonl(path) => {
32423 Some(session_digest::SessionDigestSource::CodexJsonl)
32424 }
32425 Some("log") if file_looks_like_agent_doc_log(path) => {
32426 Some(session_digest::SessionDigestSource::AgentDocLog)
32427 }
32428 _ => None,
32429 }
32430}
32431
32432fn file_looks_like_agent_doc_session(path: &Path) -> bool {
32433 let prefix = match read_file_prefix(path, 16 * 1024) {
32434 Some(prefix) => prefix,
32435 None => return false,
32436 };
32437 prefix.contains("agent_doc_session:")
32438 || prefix.contains("<!-- agent:exchange")
32439 || prefix.contains("\n## Exchange")
32440}
32441
32442fn file_looks_like_claude_jsonl(path: &Path) -> bool {
32443 let prefix = match read_file_prefix(path, 16 * 1024) {
32444 Some(prefix) => prefix,
32445 None => return false,
32446 };
32447
32448 prefix
32449 .lines()
32450 .map(str::trim)
32451 .filter(|line| !line.is_empty())
32452 .take(3)
32453 .any(|line| {
32454 let value = match serde_json::from_str::<serde_json::Value>(line) {
32455 Ok(value) => value,
32456 Err(_) => return false,
32457 };
32458 value.get("message").is_some()
32459 || value.get("role").is_some()
32460 || value.get("content").is_some()
32461 })
32462}
32463
32464fn file_looks_like_codex_jsonl(path: &Path) -> bool {
32465 let prefix = match read_file_prefix(path, 16 * 1024) {
32466 Some(prefix) => prefix,
32467 None => return false,
32468 };
32469
32470 prefix
32471 .lines()
32472 .map(str::trim)
32473 .filter(|line| !line.is_empty())
32474 .take(8)
32475 .any(|line| {
32476 let value = match serde_json::from_str::<serde_json::Value>(line) {
32477 Ok(value) => value,
32478 Err(_) => return false,
32479 };
32480 matches!(
32481 value.get("type").and_then(serde_json::Value::as_str),
32482 Some("session_meta" | "response_item" | "event_msg")
32483 )
32484 })
32485}
32486
32487fn file_looks_like_agent_doc_log(path: &Path) -> bool {
32488 let prefix = match read_file_prefix(path, 16 * 1024) {
32489 Some(prefix) => prefix,
32490 None => return false,
32491 };
32492 prefix
32493 .lines()
32494 .map(str::trim)
32495 .filter(|line| !line.is_empty())
32496 .take(8)
32497 .all(|line| line.starts_with('[') && line.contains("] "))
32498}
32499
32500fn read_file_prefix(path: &Path, max_bytes: usize) -> Option<String> {
32501 let file = fs::File::open(path).ok()?;
32502 let mut reader = BufReader::new(file);
32503 let mut buffer = Vec::new();
32504 reader
32505 .by_ref()
32506 .take(max_bytes as u64)
32507 .read_to_end(&mut buffer)
32508 .ok()?;
32509 Some(String::from_utf8_lossy(&buffer).into_owned())
32510}
32511
32512fn file_has_at_least_lines(path: &Path, min_lines: usize) -> bool {
32513 let file = match fs::File::open(path) {
32514 Ok(file) => file,
32515 Err(_) => return false,
32516 };
32517 let reader = BufReader::new(file);
32518 reader
32519 .lines()
32520 .take(min_lines)
32521 .filter(|line| line.is_ok())
32522 .count()
32523 >= min_lines
32524}
32525
32526fn build_session_digest_command(
32527 path: &str,
32528 input: &str,
32529 source: session_digest::SessionDigestSource,
32530) -> String {
32531 format!(
32532 "tsift session-digest --path {} --input {} --source {}",
32533 shell_quote(path),
32534 shell_quote(input),
32535 source.cli_arg()
32536 )
32537}
32538
32539fn resolve_digest_context_path(path: &Path) -> String {
32540 lint::resolve_harness_root_or_canonical_path(path)
32541 .map(|root| root.display().to_string())
32542 .unwrap_or_else(|_| ".".to_string())
32543}
32544
32545fn rewrite_test_command(cmd: &str) -> Option<String> {
32546 if has_shell_metacharacters(cmd) {
32547 return None;
32548 }
32549
32550 let parts: Vec<&str> = shell_split(cmd);
32551 if parts.len() >= 2 && parts[0] == "cargo" && parts[1] == "test" {
32552 return Some(build_digest_runner_command("test", ".", Some("cargo"), cmd));
32553 }
32554 if !parts.is_empty() && parts[0] == "pytest" {
32555 return Some(build_digest_runner_command(
32556 "test",
32557 ".",
32558 Some("pytest"),
32559 cmd,
32560 ));
32561 }
32562 if parts.len() >= 3 && parts[0] == "python" && parts[1] == "-m" && parts[2] == "pytest" {
32563 return Some(build_digest_runner_command(
32564 "test",
32565 ".",
32566 Some("pytest"),
32567 cmd,
32568 ));
32569 }
32570 None
32571}
32572
32573fn rewrite_log_command(cmd: &str) -> Option<String> {
32574 if has_shell_metacharacters(cmd) {
32575 return None;
32576 }
32577
32578 let parts: Vec<&str> = shell_split(cmd);
32579 if parts.len() >= 2
32580 && parts[0] == "cargo"
32581 && matches!(parts[1], "build" | "check" | "clippy" | "install")
32582 {
32583 return Some(build_digest_runner_command("log", ".", None, cmd));
32584 }
32585 None
32586}
32587
32588fn build_digest_runner_command(
32589 kind: &str,
32590 path: &str,
32591 runner: Option<&str>,
32592 shell_command: &str,
32593) -> String {
32594 let mut result = format!(
32595 "tsift --envelope __digest-runner --kind {} --path {} --shell-command {}",
32596 shell_quote(kind),
32597 shell_quote(path),
32598 shell_quote(shell_command)
32599 );
32600 if let Some(runner) = runner {
32601 result.push_str(&format!(" --runner {}", shell_quote(runner)));
32602 }
32603 result
32604}
32605
32606fn has_shell_metacharacters(cmd: &str) -> bool {
32607 cmd.contains('|') || cmd.contains('>') || cmd.contains('<') || cmd.contains('&')
32608}
32609
32610fn strip_shell_quotes(s: &str) -> &str {
32611 if s.len() >= 2
32612 && ((s.starts_with('"') && s.ends_with('"')) || (s.starts_with('\'') && s.ends_with('\'')))
32613 {
32614 &s[1..s.len() - 1]
32615 } else {
32616 s
32617 }
32618}
32619
32620fn looks_like_path_selector(raw: &str) -> bool {
32621 raw.ends_with('/')
32622 || raw.starts_with("./")
32623 || raw.starts_with("../")
32624 || raw.contains('/')
32625 || raw.contains('.')
32626}
32627
32628#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32629enum DigestRunnerKind {
32630 Test,
32631 Log,
32632}
32633
32634impl DigestRunnerKind {
32635 fn parse(raw: &str) -> Result<Self> {
32636 match raw.trim().to_ascii_lowercase().as_str() {
32637 "test" => Ok(Self::Test),
32638 "log" => Ok(Self::Log),
32639 other => bail!("unsupported digest runner kind `{other}`; expected test or log"),
32640 }
32641 }
32642
32643 fn as_str(self) -> &'static str {
32644 match self {
32645 Self::Test => "test",
32646 Self::Log => "log",
32647 }
32648 }
32649}
32650
32651fn shell_split(s: &str) -> Vec<&str> {
32653 let mut parts = Vec::new();
32654 let mut i = 0;
32655 let bytes = s.as_bytes();
32656 while i < bytes.len() {
32657 while i < bytes.len() && bytes[i].is_ascii_whitespace() {
32659 i += 1;
32660 }
32661 if i >= bytes.len() {
32662 break;
32663 }
32664 let start = i;
32665 if bytes[i] == b'"' || bytes[i] == b'\'' {
32666 let quote = bytes[i];
32667 i += 1;
32668 while i < bytes.len() && bytes[i] != quote {
32669 i += 1;
32670 }
32671 if i < bytes.len() {
32672 i += 1; }
32674 } else {
32675 while i < bytes.len() && !bytes[i].is_ascii_whitespace() {
32676 i += 1;
32677 }
32678 }
32679 parts.push(&s[start..i]);
32680 }
32681 parts
32682}
32683
32684pub(crate) fn shell_quote(s: &str) -> String {
32686 let unquoted =
32688 if (s.starts_with('"') && s.ends_with('"')) || (s.starts_with('\'') && s.ends_with('\'')) {
32689 &s[1..s.len() - 1]
32690 } else {
32691 s
32692 };
32693
32694 if unquoted
32695 .chars()
32696 .all(|c| c.is_alphanumeric() || c == '_' || c == '-' || c == '.' || c == '/')
32697 {
32698 format!("\"{}\"", unquoted)
32699 } else {
32700 format!(
32701 "\"{}\"",
32702 unquoted.replace('\\', "\\\\").replace('"', "\\\"")
32703 )
32704 }
32705}
32706
32707fn empty_search_coverage() -> sift::SearchCoverageSnapshot {
32708 sift::SearchCoverageSnapshot {
32709 mode: sift::SearchCoverageMode::Sealed,
32710 total_sector_count: 0,
32711 mounted_sector_count: 0,
32712 reused_sector_count: 0,
32713 dirty_sector_count: 0,
32714 completed_dirty_sector_count: 0,
32715 rebuilding_sector_count: 0,
32716 resumed_sector_count: 0,
32717 active_rebuild: None,
32718 }
32719}
32720
32721fn aggregate_search_coverage(responses: &[sift::SearchResponse]) -> sift::SearchCoverageSnapshot {
32722 let total_sector_count = responses
32723 .iter()
32724 .map(|response| response.coverage.total_sector_count)
32725 .sum();
32726 let mounted_sector_count = responses
32727 .iter()
32728 .map(|response| response.coverage.mounted_sector_count)
32729 .sum();
32730 let reused_sector_count = responses
32731 .iter()
32732 .map(|response| response.coverage.reused_sector_count)
32733 .sum();
32734 let dirty_sector_count = responses
32735 .iter()
32736 .map(|response| response.coverage.dirty_sector_count)
32737 .sum();
32738 let completed_dirty_sector_count = responses
32739 .iter()
32740 .map(|response| response.coverage.completed_dirty_sector_count)
32741 .sum();
32742 let rebuilding_sector_count = responses
32743 .iter()
32744 .map(|response| response.coverage.rebuilding_sector_count)
32745 .sum();
32746 let resumed_sector_count = responses
32747 .iter()
32748 .map(|response| response.coverage.resumed_sector_count)
32749 .sum();
32750
32751 let mode = if dirty_sector_count == 0 && rebuilding_sector_count == 0 {
32752 sift::SearchCoverageMode::Sealed
32753 } else if completed_dirty_sector_count > 0
32754 || rebuilding_sector_count > 0
32755 || resumed_sector_count > 0
32756 {
32757 sift::SearchCoverageMode::Converging
32758 } else {
32759 sift::SearchCoverageMode::Frontier
32760 };
32761
32762 sift::SearchCoverageSnapshot {
32763 mode,
32764 total_sector_count,
32765 mounted_sector_count,
32766 reused_sector_count,
32767 dirty_sector_count,
32768 completed_dirty_sector_count,
32769 rebuilding_sector_count,
32770 resumed_sector_count,
32771 active_rebuild: responses
32772 .iter()
32773 .find_map(|response| response.coverage.active_rebuild.clone()),
32774 }
32775}
32776
32777fn empty_search_response(root: &Path, strategy: &str) -> sift::SearchResponse {
32778 sift::SearchResponse {
32779 strategy: strategy.to_string(),
32780 root: root.display().to_string(),
32781 indexed_artifacts: 0,
32782 skipped_artifacts: 0,
32783 coverage: empty_search_coverage(),
32784 hits: Vec::new(),
32785 }
32786}
32787
32788fn absolutize_search_hit_paths(response: &mut sift::SearchResponse, search_root: &Path) {
32789 for hit in &mut response.hits {
32790 let path = Path::new(&hit.path);
32791 if path.is_relative() {
32792 hit.path = search_root.join(path).display().to_string();
32793 }
32794 }
32795}
32796
32797fn merge_search_responses(
32798 root: &Path,
32799 strategy: &str,
32800 limit: usize,
32801 responses: Vec<sift::SearchResponse>,
32802) -> sift::SearchResponse {
32803 let indexed_artifacts = responses
32804 .iter()
32805 .map(|response| response.indexed_artifacts)
32806 .sum();
32807 let skipped_artifacts = responses
32808 .iter()
32809 .map(|response| response.skipped_artifacts)
32810 .sum();
32811 let coverage = if responses.is_empty() {
32812 empty_search_coverage()
32813 } else {
32814 aggregate_search_coverage(&responses)
32815 };
32816 let mut hits: Vec<sift::SearchHit> = responses
32817 .into_iter()
32818 .flat_map(|response| response.hits)
32819 .collect();
32820 hits.sort_by(|left, right| {
32821 right
32822 .score
32823 .partial_cmp(&left.score)
32824 .unwrap_or(Ordering::Equal)
32825 .then_with(|| left.path.cmp(&right.path))
32826 .then_with(|| left.location.cmp(&right.location))
32827 });
32828 hits.truncate(limit);
32829 for (rank, hit) in hits.iter_mut().enumerate() {
32830 hit.rank = rank + 1;
32831 }
32832
32833 sift::SearchResponse {
32834 strategy: strategy.to_string(),
32835 root: root.display().to_string(),
32836 indexed_artifacts,
32837 skipped_artifacts,
32838 coverage,
32839 hits,
32840 }
32841}
32842
32843pub(crate) fn federated_sift_search(
32844 root: &Path,
32845 cache_dir: &Path,
32846 query: &str,
32847 limit: usize,
32848 timeout_secs: u64,
32849 strategy: &str,
32850) -> Result<sift::SearchResponse> {
32851 let targets = resolve_search_index_targets(root, root, None, true)?;
32852 if targets.is_empty() {
32853 if config::Config::submodule_dirs(root)?.is_empty() {
32854 return run_search_with_timeout(
32855 root,
32856 cache_dir,
32857 query,
32858 limit,
32859 timeout_secs,
32860 strategy,
32861 &[],
32862 );
32863 }
32864 return Ok(empty_search_response(root, strategy));
32865 }
32866
32867 let mut responses = Vec::with_capacity(targets.len());
32868 for target in &targets {
32869 let mut response = run_search_with_timeout(
32870 &target.source_root,
32871 cache_dir,
32872 query,
32873 limit,
32874 timeout_secs,
32875 strategy,
32876 std::slice::from_ref(target),
32877 )?;
32878 absolutize_search_hit_paths(&mut response, &target.source_root);
32879 response.root = root.display().to_string();
32880 responses.push(response);
32881 }
32882
32883 Ok(merge_search_responses(root, strategy, limit, responses))
32884}
32885
32886pub(crate) fn federated_symbol_search(
32894 root: &std::path::Path,
32895 query: &str,
32896 limit: usize,
32897 tagpath_opts: &TagpathSearchOpts,
32898) -> Result<(Vec<index::SymbolHit>, TagpathAnnotationDiagnostic)> {
32899 let cfg = config::Config::load(root)?;
32900 let submodules = config::Config::submodule_dirs(root)?;
32901 let mut all_hits: Vec<index::SymbolHit> = Vec::new();
32902 let mut combined = TagpathAnnotationDiagnostic::default();
32903 for scope in &submodules {
32904 if !cfg.federation_for_scope(scope) {
32905 continue;
32906 }
32907 let db_path = cfg.db_path_for(root, &scope.id);
32908 if !db_path.exists() {
32909 continue;
32910 }
32911 let db = index::IndexDb::open_read_only(&db_path)?;
32912 let mut hits = db.symbol_search(query, limit)?;
32913 let diag = annotate_hits_with_tagpath(&mut hits, &scope.source_root, tagpath_opts)?;
32914 combined.loaded |= diag.loaded;
32915 if diag.stale && !combined.stale {
32916 combined.stale = true;
32917 combined.reason = diag.reason;
32918 }
32919 all_hits.append(&mut hits);
32920 }
32921 all_hits.sort_by(|a, b| {
32922 b.score
32923 .partial_cmp(&a.score)
32924 .unwrap_or(std::cmp::Ordering::Equal)
32925 });
32926 all_hits.truncate(limit);
32927 Ok((all_hits, combined))
32928}
32929
32930#[derive(Debug, Deserialize)]
32931#[serde(tag = "type", rename_all = "lowercase")]
32932enum RipgrepJsonEvent {
32933 Match {
32934 data: RipgrepMatchData,
32935 },
32936 #[serde(other)]
32937 Other,
32938}
32939
32940#[derive(Debug, Deserialize)]
32941struct RipgrepMatchData {
32942 path: RipgrepTextField,
32943 lines: RipgrepTextField,
32944 line_number: Option<usize>,
32945}
32946
32947#[derive(Debug, Deserialize)]
32948struct RipgrepTextField {
32949 text: Option<String>,
32950}
32951
32952pub(crate) fn federated_exact_search(
32953 root: &Path,
32954 query: &str,
32955 limit: usize,
32956 timeout_secs: u64,
32957) -> Result<sift::SearchResponse> {
32958 let cfg = config::Config::load(root)?;
32959 let mut responses = Vec::new();
32960 for scope in config::Config::submodule_dirs(root)? {
32961 if !cfg.federation_for_scope(&scope) {
32962 continue;
32963 }
32964 let mut response =
32965 run_exact_search_with_timeout(&scope.source_root, query, limit, timeout_secs)?;
32966 absolutize_search_hit_paths(&mut response, &scope.source_root);
32967 response.root = root.display().to_string();
32968 responses.push(response);
32969 }
32970
32971 Ok(merge_search_responses(root, "exact", limit, responses))
32972}
32973
32974pub(crate) fn run_sift_search(
32975 search_path: &Path,
32976 cache_dir: &Path,
32977 query: &str,
32978 limit: usize,
32979 strategy: &str,
32980) -> Result<sift::SearchResponse> {
32981 let engine = Sift::builder().with_cache_dir(cache_dir).build();
32982 let options = SearchOptions::default()
32983 .with_limit(limit)
32984 .with_strategy(strategy.to_string());
32985 let input = SearchInput::new(search_path, query).with_options(options);
32986 engine.search(input).context("sift search failed")
32987}
32988
32989fn exact_search_timeout_message(timeout_secs: u64) -> String {
32990 format!(
32991 "tsift search timed out after {}s (strategy: exact). \
32992 Re-run with `--timeout 0` to disable the timeout or narrow `--path` / `--scope`.",
32993 timeout_secs
32994 )
32995}
32996
32997fn exact_search_command(search_path: &Path, query: &str) -> Command {
32998 let mut command = Command::new("rg");
32999 command
33000 .arg("--json")
33001 .arg("--fixed-strings")
33002 .arg("--line-number")
33003 .arg("--hidden")
33004 .arg("--")
33005 .arg(query)
33006 .arg(search_path);
33007 command
33008}
33009
33010fn exact_search_file_timestamp(path: &Path) -> sift::ArtifactFreshness {
33011 let observed_unix_secs = SystemTime::now()
33012 .duration_since(UNIX_EPOCH)
33013 .unwrap_or_default()
33014 .as_secs() as i64;
33015 let modified_unix_secs = fs::metadata(path)
33016 .ok()
33017 .and_then(|metadata| metadata.modified().ok())
33018 .and_then(|modified| modified.duration_since(UNIX_EPOCH).ok())
33019 .map(|duration| duration.as_secs() as i64);
33020 sift::ArtifactFreshness {
33021 observed_unix_secs,
33022 modified_unix_secs,
33023 }
33024}
33025
33026fn parse_exact_search_output(
33027 search_path: &Path,
33028 limit: usize,
33029 raw: &str,
33030) -> Result<sift::SearchResponse> {
33031 if limit == 0 {
33032 return Ok(sift::SearchResponse {
33033 strategy: "exact".to_string(),
33034 root: search_path.display().to_string(),
33035 indexed_artifacts: 0,
33036 skipped_artifacts: 0,
33037 coverage: empty_search_coverage(),
33038 hits: Vec::new(),
33039 });
33040 }
33041
33042 let mut hits = Vec::new();
33043 for line in raw.lines() {
33044 let event: RipgrepJsonEvent =
33045 serde_json::from_str(line).context("parsing ripgrep exact-search output")?;
33046 let RipgrepJsonEvent::Match { data } = event else {
33047 continue;
33048 };
33049 let Some(path_text) = data.path.text else {
33050 continue;
33051 };
33052 let Some(lines_text) = data.lines.text else {
33053 continue;
33054 };
33055 let path = PathBuf::from(path_text);
33056 let snippet = lines_text.trim_end_matches(['\r', '\n']).to_string();
33057 let rank = hits.len() + 1;
33058 hits.push(sift::SearchHit {
33059 artifact_id: format!(
33060 "exact:{}:{}:{}",
33061 path.display(),
33062 data.line_number.unwrap_or(0),
33063 rank
33064 ),
33065 artifact_kind: sift::ContextArtifactKind::File,
33066 path: path.display().to_string(),
33067 rank,
33068 score: (limit.saturating_sub(rank).saturating_add(1)) as f64,
33069 confidence: sift::ScoreConfidence::High,
33070 location: data.line_number.map(|line| format!("line {}", line)),
33071 snippet: snippet.clone(),
33072 provenance: sift::ArtifactProvenance {
33073 adapter: sift::AcquisitionAdapterKind::FileSystem,
33074 source: "ripgrep -F".to_string(),
33075 synthetic: false,
33076 },
33077 freshness: exact_search_file_timestamp(&path),
33078 budget: sift::ArtifactBudget::from_text(&snippet, 1),
33079 });
33080 if hits.len() >= limit {
33081 break;
33082 }
33083 }
33084
33085 Ok(sift::SearchResponse {
33086 strategy: "exact".to_string(),
33087 root: search_path.display().to_string(),
33088 indexed_artifacts: hits.len(),
33089 skipped_artifacts: 0,
33090 coverage: empty_search_coverage(),
33091 hits,
33092 })
33093}
33094
33095fn exact_search_response_from_process(
33096 search_path: &Path,
33097 limit: usize,
33098 status: std::process::ExitStatus,
33099 stdout: &[u8],
33100 stderr: &[u8],
33101) -> Result<sift::SearchResponse> {
33102 if !status.success() && status.code() != Some(1) {
33103 let message = String::from_utf8_lossy(stderr);
33104 let trimmed = message.trim();
33105 if trimmed.is_empty() {
33106 bail!("ripgrep exact search exited with status {}", status);
33107 }
33108 bail!("{}", trimmed);
33109 }
33110
33111 let raw = String::from_utf8(stdout.to_vec()).context("decoding ripgrep exact-search output")?;
33112 parse_exact_search_output(search_path, limit, &raw)
33113}
33114
33115fn run_exact_search(search_path: &Path, query: &str, limit: usize) -> Result<sift::SearchResponse> {
33116 let output = exact_search_command(search_path, query)
33117 .output()
33118 .context("running exact search with ripgrep")?;
33119 exact_search_response_from_process(
33120 search_path,
33121 limit,
33122 output.status,
33123 &output.stdout,
33124 &output.stderr,
33125 )
33126}
33127
33128pub(crate) fn run_exact_search_with_timeout(
33129 search_path: &Path,
33130 query: &str,
33131 limit: usize,
33132 timeout_secs: u64,
33133) -> Result<sift::SearchResponse> {
33134 if timeout_secs == 0 {
33135 return run_exact_search(search_path, query, limit);
33136 }
33137
33138 let mut child = exact_search_command(search_path, query)
33139 .stdin(Stdio::null())
33140 .stdout(Stdio::piped())
33141 .stderr(Stdio::piped())
33142 .spawn()
33143 .context("spawning timed exact search worker")?;
33144
33145 let timeout = Duration::from_secs(timeout_secs);
33146 let status = wait_for_child_exit(&mut child, timeout)
33147 .context("waiting for timed exact search worker")?;
33148 if status.is_none() {
33149 let _ = child.kill();
33150 let _ = child.wait();
33151 bail!("{}", exact_search_timeout_message(timeout_secs));
33152 }
33153
33154 let status = status.unwrap();
33155 let stdout = read_child_stdout(&mut child)?;
33156 let stderr = read_child_stderr(&mut child)?;
33157 exact_search_response_from_process(
33158 search_path,
33159 limit,
33160 status,
33161 stdout.as_bytes(),
33162 stderr.as_bytes(),
33163 )
33164}
33165
33166pub(crate) fn run_search_with_timeout(
33167 search_path: &Path,
33168 cache_dir: &Path,
33169 query: &str,
33170 limit: usize,
33171 timeout_secs: u64,
33172 strategy: &str,
33173 search_targets: &[SearchIndexTarget],
33174) -> Result<sift::SearchResponse> {
33175 if timeout_secs == 0 {
33176 return run_sift_search(search_path, cache_dir, query, limit, strategy);
33177 }
33178
33179 let output_path = next_search_worker_output_path();
33180 let mut child = Command::new(
33181 std::env::current_exe().context("resolving tsift executable for timed search")?,
33182 )
33183 .arg("__search-worker")
33184 .arg("--path")
33185 .arg(search_path)
33186 .arg("--cache-dir")
33187 .arg(cache_dir)
33188 .arg("--query")
33189 .arg(query)
33190 .arg("--limit")
33191 .arg(limit.to_string())
33192 .arg("--strategy")
33193 .arg(strategy)
33194 .arg("--output")
33195 .arg(&output_path)
33196 .stdin(Stdio::null())
33197 .stdout(Stdio::null())
33198 .stderr(Stdio::piped())
33199 .spawn()
33200 .context("spawning timed sift search worker")?;
33201
33202 let timeout = Duration::from_secs(timeout_secs);
33203 let status =
33204 wait_for_child_exit(&mut child, timeout).context("waiting for timed sift search worker")?;
33205 if status.is_none() {
33206 let _ = child.kill();
33207 let _ = child.wait();
33208 let _ = fs::remove_file(&output_path);
33209 bail!(
33210 "{}",
33211 search_timeout_message(timeout_secs, strategy, search_targets)?
33212 );
33213 }
33214
33215 let status = status.unwrap();
33216 let stderr = read_child_stderr(&mut child)?;
33217 if !status.success() {
33218 let _ = fs::remove_file(&output_path);
33219 let message = stderr.trim();
33220 if message.is_empty() {
33221 bail!("sift search worker exited with status {}", status);
33222 }
33223 bail!("{}", message);
33224 }
33225
33226 let raw = fs::read_to_string(&output_path)
33227 .with_context(|| format!("reading search worker output: {}", output_path.display()))?;
33228 let _ = fs::remove_file(&output_path);
33229 serde_json::from_str(&raw).context("parsing search worker output")
33230}
33231
33232fn next_search_worker_output_path() -> PathBuf {
33233 let stamp = SystemTime::now()
33234 .duration_since(UNIX_EPOCH)
33235 .unwrap_or_default()
33236 .as_nanos();
33237 std::env::temp_dir().join(format!(
33238 "tsift-search-{}-{}.json",
33239 std::process::id(),
33240 stamp
33241 ))
33242}
33243
33244fn wait_for_child_exit(
33245 child: &mut std::process::Child,
33246 timeout: Duration,
33247) -> Result<Option<std::process::ExitStatus>> {
33248 let started = Instant::now();
33249 loop {
33250 if let Some(status) = child.try_wait()? {
33251 return Ok(Some(status));
33252 }
33253 if started.elapsed() >= timeout {
33254 return Ok(None);
33255 }
33256 let remaining = timeout.saturating_sub(started.elapsed());
33257 std::thread::sleep(remaining.min(Duration::from_millis(10)));
33258 }
33259}
33260
33261fn read_child_stderr(child: &mut std::process::Child) -> Result<String> {
33262 let mut stderr = String::new();
33263 if let Some(mut pipe) = child.stderr.take() {
33264 pipe.read_to_string(&mut stderr)
33265 .context("reading search worker stderr")?;
33266 }
33267 Ok(stderr)
33268}
33269
33270fn read_child_stdout(child: &mut std::process::Child) -> Result<String> {
33271 let mut stdout = String::new();
33272 if let Some(mut pipe) = child.stdout.take() {
33273 pipe.read_to_string(&mut stdout)
33274 .context("reading search worker stdout")?;
33275 }
33276 Ok(stdout)
33277}
33278
33279pub(crate) fn maybe_apply_search_worker_test_hooks() -> Result<()> {
33280 if let Ok(path) = std::env::var("TSIFT_TEST_SEARCH_WORKER_PID_FILE") {
33281 fs::write(&path, std::process::id().to_string())
33282 .with_context(|| format!("writing search worker pid file: {path}"))?;
33283 }
33284 if let Ok(ms) = std::env::var("TSIFT_TEST_SEARCH_WORKER_SLEEP_MS") {
33285 let delay_ms = ms
33286 .parse::<u64>()
33287 .with_context(|| format!("parsing TSIFT_TEST_SEARCH_WORKER_SLEEP_MS={ms}"))?;
33288 std::thread::sleep(Duration::from_millis(delay_ms));
33289 }
33290 Ok(())
33291}
33292
33293#[cfg(test)]
33294thread_local! {
33295 static SEARCH_POST_PRECHECK_LOCK_HOOK: RefCell<Option<SearchPostPrecheckLockHook>> = const { RefCell::new(None) };
33296}
33297
33298#[cfg(test)]
33299enum SearchPostPrecheckLockMode {
33300 RollbackJournal,
33301 Wal,
33302}
33303
33304#[cfg(test)]
33305struct SearchPostPrecheckLockHook {
33306 db_path: PathBuf,
33307 mode: SearchPostPrecheckLockMode,
33308}
33309
33310#[cfg(test)]
33311struct SearchPostPrecheckLockGuard;
33312
33313#[cfg(test)]
33314impl Drop for SearchPostPrecheckLockGuard {
33315 fn drop(&mut self) {
33316 SEARCH_POST_PRECHECK_LOCK_HOOK.with(|hook| {
33317 hook.borrow_mut().take();
33318 });
33319 }
33320}
33321
33322#[cfg(test)]
33323fn install_search_post_precheck_lock(db_path: PathBuf) -> SearchPostPrecheckLockGuard {
33324 install_search_post_precheck_lock_hook(db_path, SearchPostPrecheckLockMode::RollbackJournal)
33325}
33326
33327#[cfg(test)]
33328fn install_search_post_precheck_wal_lock(db_path: PathBuf) -> SearchPostPrecheckLockGuard {
33329 install_search_post_precheck_lock_hook(db_path, SearchPostPrecheckLockMode::Wal)
33330}
33331
33332#[cfg(test)]
33333fn install_search_post_precheck_lock_hook(
33334 db_path: PathBuf,
33335 mode: SearchPostPrecheckLockMode,
33336) -> SearchPostPrecheckLockGuard {
33337 SEARCH_POST_PRECHECK_LOCK_HOOK.with(|hook| {
33338 assert!(
33339 hook.borrow().is_none(),
33340 "search post-precheck lock hook already installed"
33341 );
33342 *hook.borrow_mut() = Some(SearchPostPrecheckLockHook { db_path, mode });
33343 });
33344 SearchPostPrecheckLockGuard
33345}
33346
33347#[cfg(test)]
33348pub(crate) fn maybe_apply_search_post_precheck_test_hooks() -> Result<()> {
33349 let Some(hook) = SEARCH_POST_PRECHECK_LOCK_HOOK.with(|hook| hook.borrow_mut().take()) else {
33350 return Ok(());
33351 };
33352 let (ready_tx, ready_rx) = std::sync::mpsc::sync_channel(1);
33353 std::thread::spawn(move || {
33354 let conn = Connection::open(&hook.db_path).expect("opening db for search lock hook");
33355 match hook.mode {
33356 SearchPostPrecheckLockMode::RollbackJournal => {
33357 conn.execute_batch("PRAGMA journal_mode=DELETE; BEGIN EXCLUSIVE;")
33358 .expect("acquiring rollback-journal hook lock");
33359 fs::write(substrate::rollback_journal_path(&hook.db_path), "locked")
33360 .expect("writing rollback journal marker");
33361 }
33362 SearchPostPrecheckLockMode::Wal => {
33363 conn.execute_batch(
33364 "PRAGMA journal_mode=WAL;
33365 PRAGMA wal_autocheckpoint=0;
33366 CREATE TABLE IF NOT EXISTS search_wal_lock_probe (id INTEGER PRIMARY KEY);
33367 INSERT INTO search_wal_lock_probe DEFAULT VALUES;
33368 PRAGMA locking_mode=EXCLUSIVE;
33369 BEGIN EXCLUSIVE;",
33370 )
33371 .expect("acquiring WAL hook lock");
33372 assert!(substrate::wal_sidecar_path(&hook.db_path).exists());
33373 }
33374 }
33375 ready_tx.send(()).expect("signaling search lock hook");
33376 std::thread::sleep(Duration::from_millis(200));
33377 drop(conn);
33378 let _ = fs::remove_file(substrate::rollback_journal_path(&hook.db_path));
33379 });
33380 ready_rx
33381 .recv_timeout(Duration::from_secs(1))
33382 .context("waiting for search post-precheck lock hook")?;
33383 Ok(())
33384}
33385
33386#[cfg(not(test))]
33387pub(crate) fn maybe_apply_search_post_precheck_test_hooks() -> Result<()> {
33388 Ok(())
33389}