Skip to main content

tsift_cli/
lib.rs

1mod cli;
2mod commands;
3mod output;
4
5use anyhow::{Context, Result, bail};
6use clap::Parser;
7use cli::{Cli, Commands, DispatchTraceFormat, GraphDbQuery, SemanticRelatedKind};
8#[cfg(test)]
9use cli::{GraphDbBackend, TraverseFormat};
10use commands::digests::{
11    cmd_context_pack, cmd_diff_digest, cmd_log_digest, cmd_metric_digest, cmd_session_cost,
12    cmd_session_digest, cmd_session_review_with_budget, cmd_test_digest,
13};
14#[cfg(test)]
15use commands::graph::cmd_explain;
16use commands::graph::{
17    cmd_analyze, cmd_communities, cmd_explain_with_budget, cmd_graph, cmd_path, cmd_traverse,
18};
19#[cfg(test)]
20use commands::index_search::cmd_search;
21use commands::index_search::{cmd_index, cmd_search_with_budget, cmd_search_worker};
22use commands::infra::{
23    cmd_convex_sync, cmd_edit, cmd_graph_db, cmd_init, cmd_locks, cmd_rewrite, cmd_route, cmd_sql,
24    cmd_status,
25};
26use commands::quality::{cmd_audit, cmd_audit_tagpath, cmd_lint};
27use commands::summarize::cmd_summarize;
28use flate2::{Compression, read::GzDecoder, write::GzEncoder};
29use output::tagpath::{
30    CommunityMemberAmbiguityDiagnostic, TagpathAnnotationDiagnostic, TagpathSearchOpts,
31    annotate_communities_with_tagpath, annotate_hits_with_tagpath,
32    annotate_path_nodes_with_tagpath, annotate_stored_edges_with_tagpath,
33    annotate_stored_symbols_with_tagpath,
34};
35use output::{
36    OutputFormat, ResponseBudget, ResponseBudgetPreset, ToolEnvelope, ToolEnvelopeMetric,
37    ToolEnvelopeSummary, TranscriptArtifactRef,
38};
39use rusqlite::{Connection, OptionalExtension};
40use serde::{Deserialize, Serialize};
41use sift::{SearchInput, SearchOptions, Sift};
42#[cfg(test)]
43use std::cell::RefCell;
44use std::cmp::Ordering;
45use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque};
46use std::env;
47use std::fs;
48use std::io::{BufRead as _, BufReader, Read as _, Write as _};
49use std::path::{Path, PathBuf};
50use std::process::{Command, Stdio};
51use std::sync::{Mutex, OnceLock};
52use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
53use substrate::{
54    ConvexEdgeRow, ConvexNodeRow, ConvexProjectionRows, GraphEdge as SubstrateGraphEdge,
55    GraphFreshness, GraphNode as SubstrateGraphNode, GraphProjection, GraphPropertyFilter,
56    GraphProvenance, GraphQueryOptions, GraphQueryPage, GraphStore, SQLITE_GRAPH_SCHEMA_VERSION,
57    SqliteGraphStore, SqliteProjectionRefresh,
58};
59use tagpath::{family as tagpath_family, ontology as tagpath_ontology};
60use tempfile::NamedTempFile;
61#[cfg(test)]
62use tsift_agent_doc::session_cost;
63use tsift_agent_doc::{session_digest, session_review};
64use tsift_digest::{diff_digest, log_digest, metric_digest, test_digest};
65use tsift_graph as graph;
66use tsift_index::{config, index, init, walk};
67use tsift_quality::{dci_benchmark, lint, perf_gate};
68use tsift_resolution as resolution;
69use tsift_search::{impact, sift, tagpath_adapter};
70use tsift_sqlite as substrate;
71use tsift_status::status;
72use tsift_summarize::summarize;
73use tsift_tokensave::TokensaveDb;
74
75#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize)]
76pub(crate) enum GraphDbExperimentalBackend {
77    DuckdbDuckpgq,
78    Falkordb,
79    Ladybug,
80    Kuzu,
81}
82
83#[derive(Serialize)]
84struct GraphDbBackendPromotionGate {
85    status: String,
86    native_adapter_required: bool,
87    required_checks: Vec<String>,
88}
89
90impl GraphDbExperimentalBackend {
91    fn name(self) -> &'static str {
92        match self {
93            Self::DuckdbDuckpgq => "duckdb-duckpgq",
94            Self::Falkordb => "falkordb",
95            Self::Ladybug => "ladybug",
96            Self::Kuzu => "kuzu",
97        }
98    }
99
100    fn adapter_label(self) -> &'static str {
101        match self {
102            Self::DuckdbDuckpgq => "DuckDB/DuckPGQ read-only prototype",
103            Self::Falkordb => "FalkorDB read-only prototype",
104            Self::Ladybug => "Ladybug read-only prototype",
105            Self::Kuzu => "Kuzu (Vela-Engineering/kuzu) read-only prototype",
106        }
107    }
108
109    fn projection_load(self) -> &'static str {
110        match self {
111            Self::Falkordb => {
112                "provider-neutral rows loaded into a FalkorDB-shaped read snapshot for parity and timing only; production FalkorDB storage remains behind backend-eval until a real adapter passes the full-projection gate"
113            }
114            Self::Kuzu => {
115                "provider-neutral rows loaded into a Kuzu-compatible in-process read snapshot for parity and performance gates; production Vela-Engineering/kuzu storage remains behind a future optional adapter"
116            }
117            _ => {
118                "provider-neutral rows loaded into a dependency-free in-process read snapshot for parity and performance gates"
119            }
120        }
121    }
122
123    fn lock_behavior(self) -> &'static str {
124        match self {
125            Self::Falkordb => {
126                "read-only FalkorDB prototype snapshot; production promotion must prove multi-process writer behavior and local fallback semantics before replacing SQLite"
127            }
128            Self::Kuzu => {
129                "read-only Kuzu prototype snapshot; no SQLite writer lock is taken during benchmarks, and production Vela-Engineering/kuzu promotion must prove concurrent writer semantics before replacing SQLite"
130            }
131            _ => "read-only snapshot/row adapter; no writer lock is taken during query benchmarks",
132        }
133    }
134
135    fn install_portability(self) -> &'static str {
136        match self {
137            Self::Falkordb => {
138                "prototype is dependency-free in this binary; production FalkorDB promotion must keep install optional and preserve cargo build/install without a service"
139            }
140            Self::Kuzu => {
141                "prototype is dependency-free in this binary; production Vela-Engineering/kuzu integration must stay optional so cargo build/install works without a native Kuzu toolchain"
142            }
143            _ => {
144                "prototype is dependency-free in this binary; a production engine adapter must remain optional before promotion"
145            }
146        }
147    }
148
149    fn prototype_hold_reason(self) -> Option<&'static str> {
150        match self {
151            Self::DuckdbDuckpgq => Some(
152                "DuckDB/DuckPGQ remains behind backend-eval until a native production adapter proves projection writes, freshness/parity, full_projection wins, install portability, and lock behavior",
153            ),
154            Self::Falkordb => Some(
155                "FalkorDB remains behind backend-eval until a production adapter beats SQLite on full_projection conflict-matrix, evidence, dispatch-trace, path tiers, install portability, and lock behavior",
156            ),
157            Self::Ladybug => Some(
158                "Ladybug remains behind backend-eval until a native production adapter proves projection writes, freshness/parity, full_projection wins, install portability, and lock behavior",
159            ),
160            Self::Kuzu => Some(
161                "Kuzu remains behind backend-eval until a native optional adapter proves projection writes/load, SQLite parity, full_projection wins, install portability, and lock behavior",
162            ),
163        }
164    }
165
166    fn promotion_gate(self) -> GraphDbBackendPromotionGate {
167        match self {
168            Self::DuckdbDuckpgq => GraphDbBackendPromotionGate {
169                status: "hold_native_adapter_required".to_string(),
170                native_adapter_required: true,
171                required_checks: vec![
172                    "native_duckdb_duckpgq_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
173                        .to_string(),
174                    "freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
175                        .to_string(),
176                    "embedded_or_service_lock_behavior_match_or_beat_sqlite".to_string(),
177                    "operator_install_cost_keeps_cargo_build_install_duckdb_extension_free_by_default"
178                        .to_string(),
179                ],
180            },
181            Self::Falkordb => GraphDbBackendPromotionGate {
182                status: "hold_native_adapter_required".to_string(),
183                native_adapter_required: true,
184                required_checks: vec![
185                    "native_falkordb_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
186                        .to_string(),
187                    "freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
188                        .to_string(),
189                    "multi_process_writer_and_read_only_lock_behavior_match_or_beat_sqlite"
190                        .to_string(),
191                    "operator_install_cost_keeps_cargo_build_install_service_free_by_default"
192                        .to_string(),
193                ],
194            },
195            Self::Ladybug => GraphDbBackendPromotionGate {
196                status: "hold_native_adapter_required".to_string(),
197                native_adapter_required: true,
198                required_checks: vec![
199                    "native_ladybug_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
200                        .to_string(),
201                    "freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
202                        .to_string(),
203                    "concurrent_writer_and_read_only_lock_behavior_match_or_beat_sqlite"
204                        .to_string(),
205                    "operator_install_cost_keeps_cargo_build_install_ladybug_free_by_default"
206                        .to_string(),
207                ],
208            },
209            Self::Kuzu => GraphDbBackendPromotionGate {
210                status: "hold_native_adapter_required".to_string(),
211                native_adapter_required: true,
212                required_checks: vec![
213                    "native_kuzu_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
214                        .to_string(),
215                    "freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
216                        .to_string(),
217                    "concurrent_writer_and_read_only_lock_behavior_match_or_beat_sqlite"
218                        .to_string(),
219                    "operator_install_cost_keeps_cargo_build_install_native_kuzu_free_by_default"
220                        .to_string(),
221                ],
222            },
223        }
224    }
225
226    fn parse(raw: &str) -> Result<Self> {
227        match raw {
228            "duckdb-duckpgq" | "duckdb" | "duckpgq" => Ok(Self::DuckdbDuckpgq),
229            "falkordb" | "falkor" => Ok(Self::Falkordb),
230            "ladybug" => Ok(Self::Ladybug),
231            "kuzu" | "vela-kuzu" => Ok(Self::Kuzu),
232            _ => {
233                bail!(
234                    "unknown backend-eval candidate {raw:?}; expected duckdb-duckpgq, falkordb, ladybug, or kuzu"
235                )
236            }
237        }
238    }
239}
240
241#[derive(Deserialize)]
242pub(crate) struct EditBatch {
243    edits: Vec<EditOp>,
244}
245
246#[derive(Deserialize)]
247struct EditOp {
248    /// File path to edit
249    file: PathBuf,
250    /// Text to find and replace
251    old: String,
252    /// Replacement text
253    new: String,
254    /// Replace all occurrences (default: false — fails if not unique)
255    #[serde(default)]
256    replace_all: bool,
257}
258
259pub(crate) struct MetricDigestOptions<'a> {
260    input_path: Option<&'a Path>,
261    baseline_path: Option<&'a Path>,
262    metrics: &'a [String],
263    lower_is_better: &'a [String],
264    higher_is_better: &'a [String],
265    history: usize,
266    top: usize,
267}
268
269#[derive(Serialize)]
270pub(crate) struct EditResult {
271    file: PathBuf,
272    status: EditStatus,
273    #[serde(skip_serializing_if = "Option::is_none")]
274    error: Option<String>,
275    #[serde(skip_serializing_if = "Option::is_none")]
276    replacements: Option<usize>,
277}
278
279#[derive(Serialize)]
280#[serde(rename_all = "lowercase")]
281pub(crate) enum EditStatus {
282    Ok,
283    Skipped,
284}
285
286struct PlannedEdit {
287    index: usize,
288    file: PathBuf,
289    new_content: String,
290    replacements: usize,
291}
292
293struct StagedEdit {
294    index: usize,
295    file: PathBuf,
296    replacements: usize,
297    staged_file: NamedTempFile,
298}
299
300struct AppliedEdit {
301    index: usize,
302    file: PathBuf,
303    replacements: usize,
304    backup_path: PathBuf,
305}
306
307pub fn run() -> Result<()> {
308    let cli = Cli::parse();
309    let compact = cli.compact;
310    let pretty = cli.pretty;
311    let terse = cli.terse;
312    let absolute = cli.absolute;
313    let tabular = cli.tabular;
314    let schema = cli.schema;
315    let envelope = cli.envelope;
316    match cli.command {
317        Some(Commands::Search {
318            query,
319            path,
320            limit,
321            strategy,
322            exact,
323            scope,
324            federated,
325            json,
326            autoindex,
327            no_autoindex,
328            timeout,
329            max_items,
330            max_bytes,
331            budget,
332            no_tagpath,
333            tagpath_strict,
334        }) => cmd_search_with_budget(
335            query,
336            path,
337            limit,
338            if exact {
339                Some("exact".to_string())
340            } else {
341                strategy
342            },
343            scope,
344            federated,
345            json || terse || schema || envelope,
346            autoindex || !no_autoindex,
347            timeout,
348            compact,
349            pretty,
350            terse,
351            absolute,
352            tabular,
353            schema,
354            envelope,
355            ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
356            TagpathSearchOpts {
357                no_tagpath,
358                strict: tagpath_strict,
359            },
360        ),
361        Some(Commands::SearchWorker {
362            path,
363            cache_dir,
364            query,
365            limit,
366            strategy,
367            output,
368        }) => cmd_search_worker(&path, &cache_dir, &query, limit, &strategy, &output),
369        Some(Commands::DigestRunner {
370            kind,
371            path,
372            runner,
373            shell_command,
374            json,
375        }) => cmd_digest_runner(
376            &kind,
377            &path,
378            runner.as_deref(),
379            &shell_command,
380            OutputFormat {
381                json_output: json || terse || schema || envelope,
382                compact,
383                pretty,
384                terse,
385                schema,
386                envelope,
387            },
388        ),
389        Some(Commands::Edit { dry_run, file }) => {
390            cmd_edit(dry_run, file, compact, pretty, terse, schema)
391        }
392        Some(Commands::Index {
393            path,
394            rebuild,
395            check,
396            exit_code,
397            prune,
398            quiet,
399            workspace,
400            submodule,
401            json,
402        }) => cmd_index(
403            &path,
404            rebuild,
405            check,
406            exit_code,
407            prune,
408            quiet,
409            workspace,
410            submodule.as_deref(),
411            json || terse || schema || envelope,
412            compact,
413            pretty,
414            terse,
415            absolute,
416            schema,
417        ),
418        Some(Commands::Rewrite { command, run }) => cmd_rewrite(
419            &command,
420            run,
421            OutputFormat {
422                json_output: terse || schema || envelope,
423                compact,
424                pretty,
425                terse,
426                schema,
427                envelope,
428            },
429        ),
430        Some(Commands::Route { task, id }) => cmd_route(&task, id),
431        Some(Commands::Graph {
432            symbol,
433            path,
434            callers,
435            callees,
436            scope,
437            limit,
438            json,
439            no_tagpath,
440            tagpath_strict,
441        }) => cmd_graph(
442            &symbol,
443            &path,
444            callers,
445            callees,
446            scope.as_deref(),
447            limit,
448            json || terse || schema || envelope,
449            compact,
450            pretty,
451            terse,
452            absolute,
453            tabular,
454            schema,
455            TagpathSearchOpts {
456                no_tagpath,
457                strict: tagpath_strict,
458            },
459        ),
460        Some(Commands::Sql {
461            db,
462            query,
463            table,
464            json,
465        }) => cmd_sql(
466            &db,
467            query,
468            table,
469            json || terse || schema || envelope,
470            compact,
471            pretty,
472            terse,
473            schema,
474        ),
475        Some(Commands::Communities {
476            path,
477            scope,
478            min_size,
479            limit,
480            json,
481            no_tagpath,
482            tagpath_strict,
483        }) => cmd_communities(
484            &path,
485            scope.as_deref(),
486            min_size,
487            limit,
488            json || terse || schema || envelope,
489            compact,
490            pretty,
491            terse,
492            tabular,
493            schema,
494            TagpathSearchOpts {
495                no_tagpath,
496                strict: tagpath_strict,
497            },
498        ),
499        Some(Commands::Analyze {
500            path,
501            scope,
502            entry_points,
503            limit,
504            json,
505        }) => cmd_analyze(
506            &path,
507            scope.as_deref(),
508            &entry_points,
509            limit,
510            OutputFormat {
511                json_output: json || terse || schema || envelope,
512                compact,
513                pretty,
514                terse,
515                schema,
516                envelope,
517            },
518        ),
519        Some(Commands::Path {
520            from,
521            to,
522            path,
523            scope,
524            json,
525            no_tagpath,
526            tagpath_strict,
527        }) => cmd_path(
528            &from,
529            &to,
530            &path,
531            scope.as_deref(),
532            json || terse || schema || envelope,
533            compact,
534            pretty,
535            terse,
536            schema,
537            TagpathSearchOpts {
538                no_tagpath,
539                strict: tagpath_strict,
540            },
541        ),
542        Some(Commands::Explain {
543            symbol,
544            path,
545            scope,
546            limit,
547            json,
548            max_items,
549            max_bytes,
550            budget,
551            no_tagpath,
552            tagpath_strict,
553        }) => cmd_explain_with_budget(
554            &symbol,
555            &path,
556            scope.as_deref(),
557            limit,
558            json || terse || schema || envelope,
559            compact,
560            pretty,
561            terse,
562            absolute,
563            tabular,
564            schema,
565            envelope,
566            ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
567            TagpathSearchOpts {
568                no_tagpath,
569                strict: tagpath_strict,
570            },
571        ),
572        Some(Commands::Traverse {
573            node,
574            to,
575            path,
576            scope,
577            depth,
578            limit,
579            format,
580            convex_snapshot,
581        }) => cmd_traverse(
582            node.as_deref(),
583            to.as_deref(),
584            &path,
585            scope.as_deref(),
586            depth,
587            limit,
588            format,
589            pretty,
590            terse,
591            schema,
592            convex_snapshot.as_deref(),
593        ),
594        Some(Commands::ConvexSync {
595            path,
596            scope,
597            snapshot,
598            chunk_size,
599            remote_snapshot,
600            apply,
601            endpoint,
602            auth_token_env,
603            json,
604        }) => cmd_convex_sync(
605            ConvexSyncOptions {
606                path: &path,
607                scope: scope.as_deref(),
608                snapshot: snapshot.as_deref(),
609                chunk_size,
610                remote_snapshot,
611                apply,
612                endpoint: endpoint.as_deref(),
613                auth_token_env: &auth_token_env,
614            },
615            OutputFormat {
616                json_output: json || terse || schema || envelope,
617                compact,
618                pretty,
619                terse,
620                schema,
621                envelope,
622            },
623        ),
624        Some(Commands::GraphDb {
625            path,
626            scope,
627            backend,
628            convex_snapshot,
629            json,
630            query,
631        }) => cmd_graph_db(
632            &path,
633            scope.as_deref(),
634            backend,
635            convex_snapshot.as_deref(),
636            query,
637            OutputFormat {
638                json_output: json || terse || schema || envelope,
639                compact,
640                pretty,
641                terse,
642                schema,
643                envelope,
644            },
645        ),
646        Some(Commands::SourceRead {
647            file,
648            path,
649            start,
650            lines,
651            end,
652            scope,
653            json,
654            max_items,
655            max_bytes,
656            budget,
657        }) => cmd_source_read(
658            &file,
659            &path,
660            start,
661            lines,
662            end,
663            scope.as_deref(),
664            OutputFormat {
665                json_output: json || terse || schema || envelope,
666                compact,
667                pretty,
668                terse,
669                schema,
670                envelope,
671            },
672            absolute,
673            ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
674        ),
675        Some(Commands::Audit {
676            skills_dir,
677            manifest,
678            usage,
679            cleanup,
680            report,
681            json,
682        }) => cmd_audit(
683            &skills_dir,
684            manifest,
685            usage,
686            cleanup,
687            report,
688            json || terse || schema || envelope,
689            compact,
690            pretty,
691            terse,
692            schema,
693        ),
694        Some(Commands::AuditTagpath { path, scope, json }) => cmd_audit_tagpath(
695            &path,
696            scope.as_deref(),
697            json || terse || schema || envelope,
698            pretty,
699            terse,
700            schema,
701        ),
702        Some(Commands::Init {
703            path,
704            codex,
705            opencode,
706            workspace,
707        }) => cmd_init(&path, codex, opencode, workspace),
708        Some(Commands::Lint {
709            file,
710            index,
711            entities_from,
712            json,
713        }) => cmd_lint(
714            &file,
715            index,
716            entities_from,
717            json || terse || schema || envelope,
718            compact,
719            pretty,
720            terse,
721            schema,
722        ),
723        Some(Commands::Summarize {
724            symbol,
725            file,
726            extract,
727            diff,
728            stats,
729            path,
730            json,
731        }) => cmd_summarize(
732            symbol,
733            file,
734            extract,
735            diff,
736            stats,
737            &path,
738            json || terse || schema || envelope,
739            compact,
740            pretty,
741            terse,
742            schema,
743        ),
744        Some(Commands::Semantic {
745            query,
746            path,
747            scope,
748            limit,
749            kind,
750            json,
751        }) => cmd_semantic_related(
752            &query,
753            &path,
754            scope.as_deref(),
755            limit,
756            kind,
757            json || terse || schema || envelope,
758            compact,
759            pretty,
760            terse,
761            schema,
762        ),
763        Some(Commands::DiffDigest {
764            path,
765            cached,
766            revision,
767            json,
768        }) => cmd_diff_digest(
769            &path,
770            cached,
771            revision.as_deref(),
772            OutputFormat {
773                json_output: json || terse || schema || envelope,
774                compact,
775                pretty,
776                terse,
777                schema,
778                envelope,
779            },
780        ),
781        Some(Commands::Impact {
782            path,
783            cached,
784            revision,
785            scope,
786            limit,
787            json,
788        }) => cmd_impact(
789            &path,
790            cached,
791            revision.as_deref(),
792            scope.as_deref(),
793            limit,
794            OutputFormat {
795                json_output: json || terse || schema || envelope,
796                compact,
797                pretty,
798                terse,
799                schema,
800                envelope,
801            },
802        ),
803        Some(Commands::TestDigest {
804            path,
805            input,
806            runner,
807            json,
808        }) => cmd_test_digest(
809            &path,
810            input.as_deref(),
811            runner.as_deref(),
812            OutputFormat {
813                json_output: json || terse || schema || envelope,
814                compact,
815                pretty,
816                terse,
817                schema,
818                envelope,
819            },
820        ),
821        Some(Commands::LogDigest { path, input, json }) => cmd_log_digest(
822            &path,
823            input.as_deref(),
824            OutputFormat {
825                json_output: json || terse || schema || envelope,
826                compact,
827                pretty,
828                terse,
829                schema,
830                envelope,
831            },
832        ),
833        Some(Commands::ContextPack {
834            path,
835            test_input,
836            runner,
837            log_input,
838            json,
839            max_items,
840            max_bytes,
841            budget,
842            convex_snapshot,
843        }) => cmd_context_pack(
844            &path,
845            test_input.as_deref(),
846            runner.as_deref(),
847            log_input.as_deref(),
848            OutputFormat {
849                json_output: json || terse || schema || envelope,
850                compact,
851                pretty,
852                terse,
853                schema,
854                envelope,
855            },
856            ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
857            convex_snapshot.as_deref(),
858        ),
859        Some(Commands::ConflictMatrix {
860            targets,
861            path,
862            scope,
863            depth,
864            limit,
865            impact_limit,
866            json,
867        }) => cmd_conflict_matrix(
868            &path,
869            scope.as_deref(),
870            &targets,
871            depth,
872            limit,
873            impact_limit,
874            OutputFormat {
875                json_output: json || terse || schema || envelope,
876                compact,
877                pretty,
878                terse,
879                schema,
880                envelope,
881            },
882        ),
883        Some(Commands::DispatchTrace {
884            targets,
885            path,
886            scope,
887            depth,
888            limit,
889            impact_limit,
890            format,
891            json,
892        }) => cmd_dispatch_trace(
893            DispatchTraceOptions {
894                path: &path,
895                scope: scope.as_deref(),
896                raw_targets: &targets,
897                depth,
898                limit,
899                impact_limit,
900                trace_format: if json {
901                    DispatchTraceFormat::Json
902                } else {
903                    format
904                },
905            },
906            OutputFormat {
907                json_output: json || terse || schema || envelope,
908                compact,
909                pretty,
910                terse,
911                schema,
912                envelope,
913            },
914        ),
915        Some(Commands::DependencyDag {
916            targets,
917            path,
918            scope,
919            depth,
920            limit,
921            json,
922        }) => cmd_dependency_dag(
923            &path,
924            scope.as_deref(),
925            &targets,
926            depth,
927            limit,
928            OutputFormat {
929                json_output: json || terse || schema || envelope,
930                compact,
931                pretty,
932                terse,
933                schema,
934                envelope,
935            },
936        ),
937        Some(Commands::TokenSavings {
938            fixture,
939            fail_under,
940            json,
941        }) => cmd_token_savings(
942            &fixture,
943            fail_under,
944            OutputFormat {
945                json_output: json || terse || schema || envelope,
946                compact,
947                pretty,
948                terse,
949                schema,
950                envelope,
951            },
952        ),
953        Some(Commands::MetricDigest {
954            input,
955            baseline,
956            metrics,
957            lower_is_better,
958            higher_is_better,
959            history,
960            top,
961            json,
962        }) => cmd_metric_digest(
963            MetricDigestOptions {
964                input_path: input.as_deref(),
965                baseline_path: baseline.as_deref(),
966                metrics: &metrics,
967                lower_is_better: &lower_is_better,
968                higher_is_better: &higher_is_better,
969                history,
970                top,
971            },
972            OutputFormat {
973                json_output: json || terse || schema || envelope,
974                compact,
975                pretty,
976                terse,
977                schema,
978                envelope,
979            },
980        ),
981        Some(Commands::DciBenchmark { fixture, json }) => cmd_dci_benchmark(
982            &fixture,
983            OutputFormat {
984                json_output: json || terse || schema || envelope,
985                compact,
986                pretty,
987                terse,
988                schema,
989                envelope,
990            },
991        ),
992        Some(Commands::Workflow { topic, json }) => cmd_workflow(
993            &topic,
994            OutputFormat {
995                json_output: json || terse || schema || envelope,
996                compact,
997                pretty,
998                terse,
999                schema,
1000                envelope,
1001            },
1002        ),
1003        Some(Commands::SessionDigest {
1004            path,
1005            input,
1006            source,
1007            json,
1008        }) => cmd_session_digest(
1009            &path,
1010            input.as_deref(),
1011            source.as_deref(),
1012            OutputFormat {
1013                json_output: json || terse || schema || envelope,
1014                compact,
1015                pretty,
1016                terse,
1017                schema,
1018                envelope,
1019            },
1020        ),
1021        Some(Commands::SessionCost {
1022            input,
1023            source,
1024            json,
1025        }) => cmd_session_cost(
1026            input.as_deref(),
1027            source.as_deref(),
1028            OutputFormat {
1029                json_output: json || terse || schema || envelope,
1030                compact,
1031                pretty,
1032                terse,
1033                schema,
1034                envelope,
1035            },
1036        ),
1037        Some(Commands::SessionReview {
1038            path,
1039            next_context,
1040            json,
1041            max_items,
1042            max_bytes,
1043            budget,
1044        }) => cmd_session_review_with_budget(
1045            &path,
1046            next_context,
1047            OutputFormat {
1048                json_output: json || terse || schema || envelope,
1049                compact,
1050                pretty,
1051                terse,
1052                schema,
1053                envelope,
1054            },
1055            ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
1056        ),
1057        Some(Commands::Status { path, fix, json }) => cmd_status(
1058            &path,
1059            fix,
1060            json || terse || schema || envelope,
1061            compact,
1062            pretty,
1063            terse,
1064            schema,
1065        ),
1066        Some(Commands::Locks { path, scope, json }) => cmd_locks(
1067            &path,
1068            scope.as_deref(),
1069            json || terse || schema || envelope,
1070            compact,
1071            pretty,
1072            terse,
1073            schema,
1074        ),
1075        None => {
1076            println!("tsift v{}", env!("CARGO_PKG_VERSION"));
1077            println!("Run `tsift --help` for usage.");
1078            Ok(())
1079        }
1080    }
1081}
1082
1083/// Classify a task description into a model tier.
1084/// Returns (tier_name, model_id).
1085pub fn classify_task(task: &str) -> (&'static str, &'static str) {
1086    let lower = task.to_lowercase();
1087    // Architecture/design signals → opus
1088    for signal in &[
1089        "architect",
1090        "architecture",
1091        "design",
1092        "plan",
1093        "strateg",
1094        "analy",
1095        "review",
1096        "evaluate",
1097        "assess",
1098    ] {
1099        if lower.contains(signal) {
1100            return ("opus", "claude-opus-4-6");
1101        }
1102    }
1103    // Edit/write signals → sonnet
1104    for signal in &[
1105        "edit",
1106        "write",
1107        "fix",
1108        "change",
1109        "update",
1110        "create",
1111        "add ",
1112        "remove",
1113        "delete",
1114        "modify",
1115        "refactor",
1116        "implement",
1117        "build",
1118    ] {
1119        if lower.contains(signal) {
1120            return ("sonnet", "claude-sonnet-4-6");
1121        }
1122    }
1123    // Default: search/lookup → haiku
1124    ("haiku", "claude-haiku-4-5-20251001")
1125}
1126
1127#[cfg(test)]
1128fn to_json<T: serde::Serialize>(val: &T, pretty: bool, terse: bool) -> anyhow::Result<String> {
1129    to_json_schema(val, pretty, terse, false)
1130}
1131
1132/// Add top-level `tagpath_index_stale: true` + `tagpath_stale_reason: <reason>`
1133/// fields to a JSON response when the tagpath adapter reported any helper
1134/// going stale. JSON consumers (`tsift --envelope` / `--json` callers) can
1135/// then act on the same condition the stderr `tagpath_index_stale: …` log
1136/// already surfaces without parsing logs. No-op when `stale=false` or when
1137/// `value` is not a JSON object.
1138pub(crate) fn inject_tagpath_stale_into_json(
1139    value: &mut serde_json::Value,
1140    stale: bool,
1141    reason: Option<&str>,
1142) {
1143    if !stale {
1144        return;
1145    }
1146    if let Some(obj) = value.as_object_mut() {
1147        obj.insert(
1148            "tagpath_index_stale".to_string(),
1149            serde_json::Value::Bool(true),
1150        );
1151        if let Some(reason) = reason {
1152            obj.insert(
1153                "tagpath_stale_reason".to_string(),
1154                serde_json::Value::String(reason.to_string()),
1155            );
1156        }
1157    }
1158}
1159
1160pub(crate) fn to_json_schema<T: serde::Serialize>(
1161    val: &T,
1162    pretty: bool,
1163    terse: bool,
1164    schema: bool,
1165) -> anyhow::Result<String> {
1166    if terse || schema {
1167        let value = serde_json::to_value(val)?;
1168        let mut transformed = if terse { terse_transform(value) } else { value };
1169        if schema {
1170            transformed = schema_transform(transformed);
1171        }
1172        if terse {
1173            let terse_schema = terse_schema_for(&transformed);
1174            let wrapped = serde_json::json!({"_s": terse_schema, "d": transformed});
1175            if pretty {
1176                Ok(serde_json::to_string_pretty(&wrapped)?)
1177            } else {
1178                Ok(serde_json::to_string(&wrapped)?)
1179            }
1180        } else if pretty {
1181            Ok(serde_json::to_string_pretty(&transformed)?)
1182        } else {
1183            Ok(serde_json::to_string(&transformed)?)
1184        }
1185    } else if pretty {
1186        Ok(serde_json::to_string_pretty(val)?)
1187    } else {
1188        Ok(serde_json::to_string(val)?)
1189    }
1190}
1191
1192pub(crate) fn envelope_metric(label: &str, value: impl ToString) -> ToolEnvelopeMetric {
1193    ToolEnvelopeMetric {
1194        label: label.to_string(),
1195        value: value.to_string(),
1196    }
1197}
1198
1199pub(crate) fn dedupe_preserve_order(values: Vec<String>) -> Vec<String> {
1200    let mut seen = HashSet::new();
1201    let mut deduped = Vec::new();
1202    for value in values {
1203        if seen.insert(value.clone()) {
1204            deduped.push(value);
1205        }
1206    }
1207    deduped
1208}
1209
1210pub(crate) fn print_json_or_envelope<T: Serialize>(
1211    report: &T,
1212    format: &OutputFormat,
1213    tool: &str,
1214    view: &str,
1215    summary: ToolEnvelopeSummary,
1216    truncated: bool,
1217    follow_up: Vec<String>,
1218) -> Result<()> {
1219    if format.envelope {
1220        let envelope = ToolEnvelope {
1221            tool,
1222            view,
1223            summary,
1224            truncated,
1225            follow_up: dedupe_preserve_order(follow_up),
1226            report,
1227        };
1228        println!(
1229            "{}",
1230            to_json_schema(&envelope, format.pretty, format.terse, format.schema)?
1231        );
1232    } else {
1233        println!(
1234            "{}",
1235            to_json_schema(report, format.pretty, format.terse, format.schema)?
1236        );
1237    }
1238    Ok(())
1239}
1240
1241#[derive(Serialize)]
1242struct WorkflowStep {
1243    name: &'static str,
1244    goal: &'static str,
1245    command: &'static str,
1246    preserves: Vec<&'static str>,
1247    next: Vec<&'static str>,
1248}
1249
1250#[derive(Serialize)]
1251struct WorkflowRecipe {
1252    topic: &'static str,
1253    summary: &'static str,
1254    handle_contract: Vec<&'static str>,
1255    steps: Vec<WorkflowStep>,
1256}
1257
1258fn search_workflow_recipe() -> WorkflowRecipe {
1259    WorkflowRecipe {
1260        topic: "search",
1261        summary: "Chain exact search, semantic search, explain, summarize, and digest commands without dropping the stable handles emitted by each envelope.",
1262        handle_contract: vec![
1263            "Keep every handle with its originating command, query, path, and strategy.",
1264            "Use each step's expand command for deeper context, but cite the parent handle in notes and follow-up prompts.",
1265            "Prefer --envelope plus --budget normal when handing results to an agent so handles, follow_up commands, and truncation state stay machine-readable.",
1266        ],
1267        steps: vec![
1268            WorkflowStep {
1269                name: "exact-anchor",
1270                goal: "Start from a literal identifier, file path, error text, or prior handle label.",
1271                command: "tsift --envelope search \"<literal>\" --exact --path . --budget normal",
1272                preserves: vec![
1273                    "summary.handle",
1274                    "report.symbols[].handle",
1275                    "report.hits[].handle",
1276                ],
1277                next: vec![
1278                    "Run the matching report.symbols[].expand or report.hits[].expand command before broadening the query.",
1279                ],
1280            },
1281            WorkflowStep {
1282                name: "semantic-search",
1283                goal: "Broaden from the exact anchor to lexical, vector, or hybrid retrieval while keeping search-family handles.",
1284                command: "tsift --envelope search \"<concept>\" --path . --strategy hybrid --budget normal",
1285                preserves: vec![
1286                    "sfam-* symbol-family handles",
1287                    "shit-* content-hit handles",
1288                    "follow_up[]",
1289                ],
1290                next: vec![
1291                    "Use a symbol-family expand command for more search results, or pass the selected symbol name to explain.",
1292                ],
1293            },
1294            WorkflowStep {
1295                name: "explain-symbol",
1296                goal: "Expand a selected symbol into definitions, callers, callees, and community context.",
1297                command: "tsift --envelope explain \"<symbol>\" --path . --budget normal",
1298                preserves: vec![
1299                    "edef-* definition handles",
1300                    "ecall-* caller handles",
1301                    "eces-* callee handles",
1302                ],
1303                next: vec![
1304                    "Run edge expand commands for neighboring symbols, or summarize the selected symbol/file when the cache is available.",
1305                ],
1306            },
1307            WorkflowStep {
1308                name: "summarize-selection",
1309                goal: "Read cached summaries for the selected symbol or file without mutating the summary cache.",
1310                command: "tsift summarize \"<symbol>\" --path . --json",
1311                preserves: vec![
1312                    "summary refs emitted by search, explain, test-digest, log-digest, diff-digest, and context-pack",
1313                ],
1314                next: vec![
1315                    "If summaries are missing, run the status-recommended summarize --extract command outside the read-only query path.",
1316                ],
1317            },
1318            WorkflowStep {
1319                name: "digest-expansion",
1320                goal: "Expand from code navigation into changed files, tests, logs, or session context while retaining digest artifact handles.",
1321                command: "tsift --envelope context-pack <path> --test-input test.log --log-input build.log --budget normal",
1322                preserves: vec![
1323                    "artifact handles",
1324                    "touched symbol handles",
1325                    "digest summary handles",
1326                    "resume_commands[]",
1327                ],
1328                next: vec![
1329                    "Use resume_commands[] or each digest entry's expand command, and carry forward the original search/explain handle that motivated the digest.",
1330                ],
1331            },
1332        ],
1333    }
1334}
1335
1336fn workflow_recipe(topic: &str) -> Result<WorkflowRecipe> {
1337    match topic {
1338        "search" | "search-handles" | "search-workflow" => Ok(search_workflow_recipe()),
1339        other => bail!("unknown workflow `{other}`; available workflows: search"),
1340    }
1341}
1342
1343fn print_workflow_human(recipe: &WorkflowRecipe, compact: bool) {
1344    if compact {
1345        println!("workflow:{} steps:{}", recipe.topic, recipe.steps.len());
1346        for step in &recipe.steps {
1347            println!("  {} cmd:{}", step.name, step.command);
1348        }
1349        return;
1350    }
1351
1352    println!("Workflow: {}", recipe.topic);
1353    println!("{}", recipe.summary);
1354    println!();
1355    println!("Handle contract:");
1356    for item in &recipe.handle_contract {
1357        println!("  - {item}");
1358    }
1359    println!();
1360    println!("Steps:");
1361    for (index, step) in recipe.steps.iter().enumerate() {
1362        println!("  {}. {} - {}", index + 1, step.name, step.goal);
1363        println!("     cmd: {}", step.command);
1364        println!("     preserves: {}", step.preserves.join(", "));
1365        println!("     next: {}", step.next.join(" "));
1366    }
1367}
1368
1369fn cmd_workflow(topic: &str, format: OutputFormat) -> Result<()> {
1370    let recipe = workflow_recipe(topic)?;
1371    if format.json_output {
1372        print_json_or_envelope(
1373            &recipe,
1374            &format,
1375            "workflow",
1376            recipe.topic,
1377            ToolEnvelopeSummary {
1378                text: recipe.summary.to_string(),
1379                metrics: vec![envelope_metric("steps", recipe.steps.len())],
1380            },
1381            false,
1382            recipe
1383                .steps
1384                .iter()
1385                .map(|step| step.command.to_string())
1386                .collect(),
1387        )
1388    } else {
1389        print_workflow_human(&recipe, format.compact);
1390        Ok(())
1391    }
1392}
1393
1394#[derive(Deserialize, Serialize)]
1395struct TokenSavingsFixture {
1396    schema_version: u64,
1397    #[serde(default)]
1398    description: String,
1399    token_estimate: String,
1400    cases: Vec<TokenSavingsFixtureCase>,
1401}
1402
1403#[derive(Deserialize, Serialize)]
1404struct TokenSavingsFixtureCase {
1405    name: String,
1406    surface: String,
1407    minimum_savings_percent: f64,
1408    raw_symbols: Vec<TokenSavingsRawSymbol>,
1409    tagpath_families: Vec<TokenSavingsFamily>,
1410    #[serde(default)]
1411    session_review_inputs: Option<TokenSavingsSessionReviewInputs>,
1412    #[serde(default)]
1413    context_pack_inputs: Option<TokenSavingsContextPackInputs>,
1414    #[serde(default)]
1415    source_read_inputs: Option<TokenSavingsSourceReadInputs>,
1416}
1417
1418#[derive(Deserialize, Serialize)]
1419struct TokenSavingsRawSymbol {
1420    identifier: String,
1421    file: String,
1422    line: u64,
1423    context: String,
1424}
1425
1426#[derive(Deserialize, Serialize)]
1427struct TokenSavingsFamily {
1428    canonical: String,
1429    count: usize,
1430    #[serde(default)]
1431    aliases: BTreeMap<String, String>,
1432}
1433
1434#[derive(Deserialize, Serialize)]
1435struct TokenSavingsSessionReviewInputs {
1436    prompt_targets: Vec<serde_json::Value>,
1437    sessions: Vec<serde_json::Value>,
1438    commands: Vec<serde_json::Value>,
1439    touched_files: Vec<serde_json::Value>,
1440    touched_symbols: Vec<serde_json::Value>,
1441    failures: Vec<serde_json::Value>,
1442    guardrails: Vec<serde_json::Value>,
1443    largest_turns: Vec<serde_json::Value>,
1444}
1445
1446#[derive(Deserialize, Serialize)]
1447struct TokenSavingsContextPackInputs {
1448    next_context: Vec<serde_json::Value>,
1449    diff: Vec<serde_json::Value>,
1450    test: Vec<serde_json::Value>,
1451    log: Vec<serde_json::Value>,
1452}
1453
1454#[derive(Deserialize, Serialize)]
1455struct TokenSavingsSourceReadInputs {
1456    reads: Vec<TokenSavingsSourceReadInput>,
1457}
1458
1459#[derive(Deserialize, Serialize)]
1460struct TokenSavingsSourceReadInput {
1461    command: String,
1462    file: String,
1463    raw_start: u64,
1464    raw_lines: u64,
1465    raw_excerpt: String,
1466    envelope_start: u64,
1467    envelope_lines: u64,
1468    required_line_anchors: Vec<u64>,
1469}
1470
1471#[derive(Serialize)]
1472struct TokenSavingsEnvelopeFamily {
1473    handle: String,
1474    tag_alias: String,
1475    count: usize,
1476    expand: String,
1477}
1478
1479#[derive(Serialize)]
1480struct TokenSavingsSessionReviewEnvelope<'a> {
1481    section: &'a str,
1482    handle: String,
1483    count: usize,
1484    expand: String,
1485}
1486
1487#[derive(Serialize)]
1488struct TokenSavingsContextPackEnvelope<'a> {
1489    section: &'a str,
1490    handle: String,
1491    count: usize,
1492    expand: String,
1493}
1494
1495#[derive(Serialize)]
1496struct TokenSavingsSourceReadEnvelope {
1497    handle: String,
1498    file: String,
1499    start: u64,
1500    lines: u64,
1501    required_line_anchors: Vec<u64>,
1502    expand: String,
1503}
1504
1505#[derive(Serialize)]
1506struct TokenSavingsCaseReport {
1507    name: String,
1508    surface: String,
1509    raw_symbol_count: usize,
1510    family_count: usize,
1511    raw_bytes: usize,
1512    envelope_bytes: usize,
1513    byte_delta: usize,
1514    raw_estimated_tokens: usize,
1515    envelope_estimated_tokens: usize,
1516    estimated_token_delta: usize,
1517    savings_percent: f64,
1518    minimum_savings_percent: f64,
1519    status: String,
1520}
1521
1522#[derive(Serialize)]
1523struct TokenSavingsTotals {
1524    cases: usize,
1525    raw_bytes: usize,
1526    envelope_bytes: usize,
1527    byte_delta: usize,
1528    raw_estimated_tokens: usize,
1529    envelope_estimated_tokens: usize,
1530    estimated_token_delta: usize,
1531    savings_percent: f64,
1532}
1533
1534#[derive(Serialize)]
1535struct TokenSavingsReport {
1536    schema_version: u64,
1537    token_estimate: String,
1538    pass: bool,
1539    totals: TokenSavingsTotals,
1540    cases: Vec<TokenSavingsCaseReport>,
1541}
1542
1543fn estimated_tokens_from_bytes(bytes: usize) -> usize {
1544    bytes.div_ceil(4)
1545}
1546
1547fn savings_percent(raw_bytes: usize, envelope_bytes: usize) -> f64 {
1548    if raw_bytes == 0 || envelope_bytes >= raw_bytes {
1549        0.0
1550    } else {
1551        ((raw_bytes - envelope_bytes) as f64 / raw_bytes as f64) * 100.0
1552    }
1553}
1554
1555fn token_savings_expand_command(surface: &str, canonical: &str) -> String {
1556    let query = canonical.replace('_', " ");
1557    match surface {
1558        "explain" => format!(
1559            "tsift --envelope explain {} --budget normal",
1560            shell_quote(canonical)
1561        ),
1562        "session-review" => format!("tsift summarize {}", shell_quote(canonical)),
1563        "context-pack" => {
1564            "tsift --envelope context-pack <target> --test-input <test.log> --log-input <build.log> --budget normal"
1565                .to_string()
1566        }
1567        _ => format!(
1568            "tsift --envelope search {} --budget normal",
1569            shell_quote(&query)
1570        ),
1571    }
1572}
1573
1574fn token_savings_envelope_families(
1575    case: &TokenSavingsFixtureCase,
1576) -> Vec<TokenSavingsEnvelopeFamily> {
1577    case.tagpath_families
1578        .iter()
1579        .map(|family| {
1580            let key = format!("{}:{}:{}", case.surface, case.name, family.canonical);
1581            TokenSavingsEnvelopeFamily {
1582                handle: stable_handle("tfam", &key),
1583                tag_alias: family.canonical.replace('_', "/"),
1584                count: family.count,
1585                expand: token_savings_expand_command(&case.surface, &family.canonical),
1586            }
1587        })
1588        .collect()
1589}
1590
1591fn token_savings_context_pack_raw_bytes(inputs: &TokenSavingsContextPackInputs) -> Result<usize> {
1592    Ok(serde_json::to_vec(inputs)?.len())
1593}
1594
1595fn token_savings_session_review_raw_bytes(
1596    inputs: &TokenSavingsSessionReviewInputs,
1597) -> Result<usize> {
1598    Ok(serde_json::to_vec(inputs)?.len())
1599}
1600
1601fn token_savings_source_read_raw_bytes(inputs: &TokenSavingsSourceReadInputs) -> Result<usize> {
1602    Ok(serde_json::to_vec(&inputs.reads)?.len())
1603}
1604
1605fn token_savings_session_review_envelope(
1606    case: &TokenSavingsFixtureCase,
1607    inputs: &TokenSavingsSessionReviewInputs,
1608) -> Vec<TokenSavingsSessionReviewEnvelope<'static>> {
1609    let mut rows = vec![
1610        TokenSavingsSessionReviewEnvelope {
1611            section: "prompt_targets",
1612            handle: stable_handle("tsr", &format!("{}:prompt_targets", case.name)),
1613            count: inputs.prompt_targets.len(),
1614            expand: "tsift session-review <target> --json".to_string(),
1615        },
1616        TokenSavingsSessionReviewEnvelope {
1617            section: "sessions",
1618            handle: stable_handle("tsr", &format!("{}:sessions", case.name)),
1619            count: inputs.sessions.len(),
1620            expand: "tsift session-review <target> --json".to_string(),
1621        },
1622        TokenSavingsSessionReviewEnvelope {
1623            section: "commands",
1624            handle: stable_handle("tsr", &format!("{}:commands", case.name)),
1625            count: inputs.commands.len(),
1626            expand: "tsift session-digest --source auto --input <transcript> --json".to_string(),
1627        },
1628        TokenSavingsSessionReviewEnvelope {
1629            section: "files",
1630            handle: stable_handle("tsr", &format!("{}:files", case.name)),
1631            count: inputs.touched_files.len(),
1632            expand: "tsift session-review <target> --json".to_string(),
1633        },
1634        TokenSavingsSessionReviewEnvelope {
1635            section: "symbols",
1636            handle: stable_handle("tsr", &format!("{}:symbols", case.name)),
1637            count: inputs.touched_symbols.len(),
1638            expand: "tsift --envelope search <symbol> --budget normal".to_string(),
1639        },
1640        TokenSavingsSessionReviewEnvelope {
1641            section: "failures",
1642            handle: stable_handle("tsr", &format!("{}:failures", case.name)),
1643            count: inputs.failures.len(),
1644            expand: "tsift session-review <target> --json".to_string(),
1645        },
1646        TokenSavingsSessionReviewEnvelope {
1647            section: "guardrails",
1648            handle: stable_handle("tsr", &format!("{}:guardrails", case.name)),
1649            count: inputs.guardrails.len(),
1650            expand: "tsift session-cost --input <transcript> --json".to_string(),
1651        },
1652        TokenSavingsSessionReviewEnvelope {
1653            section: "largest_turns",
1654            handle: stable_handle("tsr", &format!("{}:largest_turns", case.name)),
1655            count: inputs.largest_turns.len(),
1656            expand: "tsift session-cost --input <transcript> --json".to_string(),
1657        },
1658    ];
1659    rows.retain(|row| row.count > 0);
1660    rows
1661}
1662
1663fn token_savings_context_pack_envelope(
1664    case: &TokenSavingsFixtureCase,
1665    inputs: &TokenSavingsContextPackInputs,
1666) -> Vec<TokenSavingsContextPackEnvelope<'static>> {
1667    let mut rows = vec![
1668        TokenSavingsContextPackEnvelope {
1669            section: "next_context",
1670            handle: stable_handle("tcp", &format!("{}:next_context", case.name)),
1671            count: inputs.next_context.len(),
1672            expand: "tsift session-review --next-context <target> --json".to_string(),
1673        },
1674        TokenSavingsContextPackEnvelope {
1675            section: "diff",
1676            handle: stable_handle("tcp", &format!("{}:diff", case.name)),
1677            count: inputs.diff.len(),
1678            expand: "tsift diff-digest . --json".to_string(),
1679        },
1680        TokenSavingsContextPackEnvelope {
1681            section: "test",
1682            handle: stable_handle("tcp", &format!("{}:test", case.name)),
1683            count: inputs.test.len(),
1684            expand: "tsift test-digest --path . < test.log".to_string(),
1685        },
1686        TokenSavingsContextPackEnvelope {
1687            section: "log",
1688            handle: stable_handle("tcp", &format!("{}:log", case.name)),
1689            count: inputs.log.len(),
1690            expand: "tsift log-digest --path . < build.log".to_string(),
1691        },
1692    ];
1693    rows.retain(|row| row.count > 0);
1694    rows
1695}
1696
1697fn token_savings_source_read_envelope(
1698    case: &TokenSavingsFixtureCase,
1699    inputs: &TokenSavingsSourceReadInputs,
1700) -> Result<Vec<TokenSavingsSourceReadEnvelope>> {
1701    inputs
1702        .reads
1703        .iter()
1704        .map(|read| {
1705            if read.envelope_lines == 0 {
1706                bail!(
1707                    "source-read fixture {} has an empty envelope window for {}",
1708                    case.name,
1709                    read.command
1710                );
1711            }
1712            let envelope_end = read
1713                .envelope_start
1714                .saturating_add(read.envelope_lines)
1715                .saturating_sub(1);
1716            for anchor in &read.required_line_anchors {
1717                if *anchor < read.envelope_start || *anchor > envelope_end {
1718                    bail!(
1719                        "source-read fixture {} hides required line anchor {} for {} outside {}-{}",
1720                        case.name,
1721                        anchor,
1722                        read.command,
1723                        read.envelope_start,
1724                        envelope_end
1725                    );
1726                }
1727            }
1728            Ok(TokenSavingsSourceReadEnvelope {
1729                handle: stable_handle("tsrc", &format!("{}:{}", case.name, read.command)),
1730                file: read.file.clone(),
1731                start: read.envelope_start,
1732                lines: read.envelope_lines,
1733                required_line_anchors: read.required_line_anchors.clone(),
1734                expand: format!(
1735                    "tsift --envelope source-read {} --start {} --lines {} --budget normal",
1736                    shell_quote(&read.file),
1737                    read.envelope_start,
1738                    read.envelope_lines
1739                ),
1740            })
1741        })
1742        .collect()
1743}
1744
1745fn build_token_savings_report(fixture: &TokenSavingsFixture) -> Result<TokenSavingsReport> {
1746    let mut cases = Vec::new();
1747    let mut total_raw_bytes = 0;
1748    let mut total_envelope_bytes = 0;
1749
1750    for case in &fixture.cases {
1751        let mut raw_bytes = serde_json::to_vec(&case.raw_symbols)?.len();
1752        let envelope = token_savings_envelope_families(case);
1753        let mut envelope_bytes = serde_json::to_vec(&envelope)?.len();
1754        if let Some(inputs) = &case.session_review_inputs {
1755            raw_bytes += token_savings_session_review_raw_bytes(inputs)?;
1756            envelope_bytes +=
1757                serde_json::to_vec(&token_savings_session_review_envelope(case, inputs))?.len();
1758        }
1759        if let Some(inputs) = &case.context_pack_inputs {
1760            raw_bytes += token_savings_context_pack_raw_bytes(inputs)?;
1761            envelope_bytes +=
1762                serde_json::to_vec(&token_savings_context_pack_envelope(case, inputs))?.len();
1763        }
1764        if let Some(inputs) = &case.source_read_inputs {
1765            raw_bytes += token_savings_source_read_raw_bytes(inputs)?;
1766            envelope_bytes +=
1767                serde_json::to_vec(&token_savings_source_read_envelope(case, inputs)?)?.len();
1768        }
1769        let byte_delta = raw_bytes.saturating_sub(envelope_bytes);
1770        let raw_estimated_tokens = estimated_tokens_from_bytes(raw_bytes);
1771        let envelope_estimated_tokens = estimated_tokens_from_bytes(envelope_bytes);
1772        let estimated_token_delta = raw_estimated_tokens.saturating_sub(envelope_estimated_tokens);
1773        let savings_percent = savings_percent(raw_bytes, envelope_bytes);
1774        let pass = savings_percent >= case.minimum_savings_percent;
1775
1776        total_raw_bytes += raw_bytes;
1777        total_envelope_bytes += envelope_bytes;
1778        cases.push(TokenSavingsCaseReport {
1779            name: case.name.clone(),
1780            surface: case.surface.clone(),
1781            raw_symbol_count: case.raw_symbols.len(),
1782            family_count: case.tagpath_families.len(),
1783            raw_bytes,
1784            envelope_bytes,
1785            byte_delta,
1786            raw_estimated_tokens,
1787            envelope_estimated_tokens,
1788            estimated_token_delta,
1789            savings_percent,
1790            minimum_savings_percent: case.minimum_savings_percent,
1791            status: if pass { "pass" } else { "fail" }.to_string(),
1792        });
1793    }
1794
1795    let total_byte_delta = total_raw_bytes.saturating_sub(total_envelope_bytes);
1796    let total_raw_estimated_tokens = estimated_tokens_from_bytes(total_raw_bytes);
1797    let total_envelope_estimated_tokens = estimated_tokens_from_bytes(total_envelope_bytes);
1798    let total_estimated_token_delta =
1799        total_raw_estimated_tokens.saturating_sub(total_envelope_estimated_tokens);
1800    let pass = cases.iter().all(|case| case.status == "pass");
1801
1802    Ok(TokenSavingsReport {
1803        schema_version: fixture.schema_version,
1804        token_estimate: fixture.token_estimate.clone(),
1805        pass,
1806        totals: TokenSavingsTotals {
1807            cases: cases.len(),
1808            raw_bytes: total_raw_bytes,
1809            envelope_bytes: total_envelope_bytes,
1810            byte_delta: total_byte_delta,
1811            raw_estimated_tokens: total_raw_estimated_tokens,
1812            envelope_estimated_tokens: total_envelope_estimated_tokens,
1813            estimated_token_delta: total_estimated_token_delta,
1814            savings_percent: savings_percent(total_raw_bytes, total_envelope_bytes),
1815        },
1816        cases,
1817    })
1818}
1819
1820fn print_token_savings_human(report: &TokenSavingsReport) {
1821    println!(
1822        "surface\tcase\traw_bytes\tenvelope_bytes\tbyte_delta\traw_tokens\tenvelope_tokens\ttoken_delta\tsavings_percent\tminimum_percent\tstatus"
1823    );
1824    for case in &report.cases {
1825        println!(
1826            "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{:.1}\t{:.1}\t{}",
1827            case.surface,
1828            case.name,
1829            case.raw_bytes,
1830            case.envelope_bytes,
1831            case.byte_delta,
1832            case.raw_estimated_tokens,
1833            case.envelope_estimated_tokens,
1834            case.estimated_token_delta,
1835            case.savings_percent,
1836            case.minimum_savings_percent,
1837            case.status
1838        );
1839    }
1840    println!(
1841        "total\tall\t{}\t{}\t{}\t{}\t{}\t{}\t{:.1}\t-\t{}",
1842        report.totals.raw_bytes,
1843        report.totals.envelope_bytes,
1844        report.totals.byte_delta,
1845        report.totals.raw_estimated_tokens,
1846        report.totals.envelope_estimated_tokens,
1847        report.totals.estimated_token_delta,
1848        report.totals.savings_percent,
1849        if report.pass { "pass" } else { "fail" }
1850    );
1851}
1852
1853fn cmd_token_savings(fixture_path: &Path, fail_under: bool, format: OutputFormat) -> Result<()> {
1854    let fixture_body = fs::read_to_string(fixture_path)
1855        .with_context(|| format!("reading token-savings fixture: {}", fixture_path.display()))?;
1856    let fixture: TokenSavingsFixture = serde_json::from_str(&fixture_body)
1857        .with_context(|| format!("parsing token-savings fixture: {}", fixture_path.display()))?;
1858    let report = build_token_savings_report(&fixture)?;
1859
1860    if format.json_output {
1861        print_json_or_envelope(
1862            &report,
1863            &format,
1864            "token-savings",
1865            "report",
1866            ToolEnvelopeSummary {
1867                text: "token-savings report".to_string(),
1868                metrics: vec![
1869                    envelope_metric("cases", report.totals.cases),
1870                    envelope_metric("raw_tokens", report.totals.raw_estimated_tokens),
1871                    envelope_metric("envelope_tokens", report.totals.envelope_estimated_tokens),
1872                    envelope_metric("token_delta", report.totals.estimated_token_delta),
1873                    envelope_metric(
1874                        "savings_percent",
1875                        format!("{:.1}", report.totals.savings_percent),
1876                    ),
1877                ],
1878            },
1879            false,
1880            vec![],
1881        )?;
1882    } else {
1883        print_token_savings_human(&report);
1884    }
1885
1886    if fail_under && !report.pass {
1887        bail!("token-savings threshold failed");
1888    }
1889    Ok(())
1890}
1891
1892fn persist_transcript_artifact(
1893    root: &Path,
1894    prefix: &str,
1895    suffix: &str,
1896    key: &str,
1897    body: &str,
1898    expand: String,
1899) -> Result<TranscriptArtifactRef> {
1900    let handle = stable_handle(prefix, key);
1901    let artifacts_dir = root.join(".tsift/artifacts");
1902    fs::create_dir_all(&artifacts_dir).with_context(|| {
1903        format!(
1904            "creating transcript artifacts dir: {}",
1905            artifacts_dir.display()
1906        )
1907    })?;
1908    let file_name = format!("{handle}.{suffix}");
1909    let artifact_path = artifacts_dir.join(file_name);
1910    fs::write(&artifact_path, body)
1911        .with_context(|| format!("writing transcript artifact: {}", artifact_path.display()))?;
1912    let rel_path = relativize_pathbuf(&artifact_path, root);
1913    Ok(TranscriptArtifactRef {
1914        handle,
1915        path: rel_path.display().to_string(),
1916        bytes: body.len(),
1917        lines: body.lines().count(),
1918        expand,
1919    })
1920}
1921
1922fn terse_key(key: &str) -> &str {
1923    match key {
1924        "name" => "n",
1925        "kind" => "k",
1926        "file" => "f",
1927        "line" => "l",
1928        "path" => "p",
1929        "from" => "fr",
1930        "type" => "ty",
1931        "text" => "tx",
1932        "new" => "nw",
1933        "run" => "r",
1934        "use" => "u",
1935        "score" => "sc",
1936        "language" => "la",
1937        "status" => "st",
1938        "state" => "stt",
1939        "error" => "err",
1940        "errors" => "ers",
1941        "hops" => "hp",
1942        "tags" => "tg",
1943        "model" => "ml",
1944        "skill" => "sk",
1945        "count" => "ct",
1946        "total" => "tot",
1947        "column" => "col",
1948        "description" => "dsc",
1949        "end_line" => "el",
1950        "signature" => "sig",
1951        "parent_module" => "pm",
1952        "visibility" => "vis",
1953        "match_type" => "mt",
1954        "caller_file" => "cf",
1955        "caller_name" => "cn",
1956        "caller_line" => "cl",
1957        "callee_name" => "en",
1958        "call_site_line" => "csl",
1959        "members" => "m",
1960        "refs" => "refs",
1961        "role" => "rl",
1962        "peer" => "pr",
1963        "modularity" => "q",
1964        "modularity_contribution" => "mc",
1965        "iterations" => "it",
1966        "node_count" => "nc",
1967        "edge_count" => "ec",
1968        "community_count" => "cc",
1969        "communities" => "cms",
1970        "community" => "cm",
1971        "community_diagnostics" => "cd",
1972        "cache_hit" => "cah",
1973        "tagpath_state" => "tps",
1974        "tagpath_stale_reason" => "tsr",
1975        "annotated_community_count" => "acc",
1976        "annotated_member_count" => "amc",
1977        "ambiguous_member_count" => "ambc",
1978        "ambiguous_members" => "amb",
1979        "candidate_count" => "cand",
1980        "tagpath_candidate_count" => "tcand",
1981        "evidence" => "ev",
1982        "chosen_file" => "chf",
1983        "symbol" => "s",
1984        "symbols" => "sy",
1985        "definitions" => "df",
1986        "callers" => "crs",
1987        "callees" => "ces",
1988        "total_tracked" => "tt",
1989        "modified" => "md",
1990        "deleted" => "dl",
1991        "unchanged" => "uc",
1992        "changes" => "ch",
1993        "prune_stats" => "ps",
1994        "hits" => "h",
1995        "rank" => "rk",
1996        "snippet" => "sn",
1997        "confidence" => "co",
1998        "index" => "ix",
1999        "summaries" => "sms",
2000        "recommendations" => "rec",
2001        "total_files" => "tf",
2002        "stale_files" => "sf",
2003        "last_indexed_secs_ago" => "age",
2004        "cached_files" => "caf",
2005        "total_indexed_files" => "tif",
2006        "coverage_pct" => "cov",
2007        "symbol_name" => "syn",
2008        "file_path" => "fp",
2009        "content_hash" => "hsh",
2010        "summary" => "sum",
2011        "tool" => "tl",
2012        "view" => "vw",
2013        "truncated" => "tr",
2014        "follow_up" => "fu",
2015        "report" => "rp",
2016        "metrics" => "ms",
2017        "label" => "lb",
2018        "value" => "v",
2019        "command" => "cmd",
2020        "exit_code" => "xc",
2021        "success" => "ok",
2022        "artifact" => "art",
2023        "digest" => "dg",
2024        "bytes" => "bt",
2025        "lines" => "lns",
2026        "expand" => "xp",
2027        "entities" => "ent",
2028        "relationships" => "rel",
2029        "concept_labels" => "cls",
2030        "extracted_at" => "at",
2031        "tokens_input" => "ti",
2032        "tokens_output" => "tout",
2033        "total_summaries" => "ts",
2034        "stale_count" => "stc",
2035        "total_tokens_input" => "tti",
2036        "total_tokens_output" => "tto",
2037        "estimated_tokens_saved" => "ets",
2038        "files_processed" => "fps",
2039        "symbols_extracted" => "se",
2040        "skills_dir" => "sd",
2041        "healthy" => "ok",
2042        "broken" => "brk",
2043        "skills" => "sks",
2044        "manifest_diffs" => "mdf",
2045        "similar_pairs" => "sim",
2046        "usage" => "usg",
2047        "cleanup" => "cln",
2048        "has_skill_md" => "hsm",
2049        "is_symlink" => "isl",
2050        "issues" => "iss",
2051        "invocation_count" => "inv",
2052        "reasons" => "rsn",
2053        "token_estimate" => "te",
2054        "skill_a" => "sa",
2055        "skill_b" => "sb",
2056        "desc_a" => "da",
2057        "desc_b" => "db",
2058        "annotations" => "ann",
2059        "entity" => "ety",
2060        "suggestion" => "sug",
2061        "columns" => "cols",
2062        "row_count" => "rc",
2063        "notnull" => "nn",
2064        "default_value" => "dv",
2065        "replace_all" => "ra",
2066        other => other,
2067    }
2068}
2069
2070fn terse_transform(val: serde_json::Value) -> serde_json::Value {
2071    match val {
2072        serde_json::Value::Object(map) => {
2073            let mut new_map = serde_json::Map::new();
2074            for (k, v) in map {
2075                new_map.insert(terse_key(&k).to_string(), terse_transform(v));
2076            }
2077            serde_json::Value::Object(new_map)
2078        }
2079        serde_json::Value::Array(arr) => {
2080            serde_json::Value::Array(arr.into_iter().map(terse_transform).collect())
2081        }
2082        other => other,
2083    }
2084}
2085
2086fn terse_schema_for(val: &serde_json::Value) -> serde_json::Value {
2087    let mut keys = HashSet::new();
2088    collect_terse_keys(val, &mut keys);
2089    let mut schema = serde_json::Map::new();
2090    for (long, short) in TERSE_PAIRS {
2091        if keys.contains(*short) {
2092            schema.insert(
2093                short.to_string(),
2094                serde_json::Value::String(long.to_string()),
2095            );
2096        }
2097    }
2098    serde_json::Value::Object(schema)
2099}
2100
2101fn collect_terse_keys(val: &serde_json::Value, keys: &mut HashSet<String>) {
2102    match val {
2103        serde_json::Value::Object(map) => {
2104            for (k, v) in map {
2105                keys.insert(k.clone());
2106                collect_terse_keys(v, keys);
2107            }
2108        }
2109        serde_json::Value::Array(arr) => {
2110            for v in arr {
2111                collect_terse_keys(v, keys);
2112            }
2113        }
2114        _ => {}
2115    }
2116}
2117
2118fn schema_transform(val: serde_json::Value) -> serde_json::Value {
2119    match val {
2120        serde_json::Value::Array(arr) if arr.len() >= 2 => {
2121            if let Some(cols) = homogeneous_keys(&arr) {
2122                let rows: Vec<serde_json::Value> = arr
2123                    .into_iter()
2124                    .map(|item| {
2125                        if let serde_json::Value::Object(map) = item {
2126                            let vals: Vec<serde_json::Value> = cols
2127                                .iter()
2128                                .map(|c| map.get(c).cloned().unwrap_or(serde_json::Value::Null))
2129                                .collect();
2130                            serde_json::Value::Array(vals)
2131                        } else {
2132                            item
2133                        }
2134                    })
2135                    .collect();
2136                let col_vals: Vec<serde_json::Value> =
2137                    cols.into_iter().map(serde_json::Value::String).collect();
2138                serde_json::json!({"_c": col_vals, "_r": rows})
2139            } else {
2140                serde_json::Value::Array(arr.into_iter().map(schema_transform).collect())
2141            }
2142        }
2143        serde_json::Value::Array(arr) => {
2144            serde_json::Value::Array(arr.into_iter().map(schema_transform).collect())
2145        }
2146        serde_json::Value::Object(map) => {
2147            let new_map: serde_json::Map<String, serde_json::Value> = map
2148                .into_iter()
2149                .map(|(k, v)| (k, schema_transform(v)))
2150                .collect();
2151            serde_json::Value::Object(new_map)
2152        }
2153        other => other,
2154    }
2155}
2156
2157fn homogeneous_keys(arr: &[serde_json::Value]) -> Option<Vec<String>> {
2158    let first = arr.first()?.as_object()?;
2159    let keys: Vec<String> = first.keys().cloned().collect();
2160    for item in &arr[1..] {
2161        let obj = item.as_object()?;
2162        if obj.len() != keys.len() {
2163            return None;
2164        }
2165        for k in &keys {
2166            if !obj.contains_key(k) {
2167                return None;
2168            }
2169        }
2170    }
2171    Some(keys)
2172}
2173
2174const TERSE_PAIRS: &[(&str, &str)] = &[
2175    ("name", "n"),
2176    ("kind", "k"),
2177    ("file", "f"),
2178    ("line", "l"),
2179    ("path", "p"),
2180    ("from", "fr"),
2181    ("type", "ty"),
2182    ("text", "tx"),
2183    ("new", "nw"),
2184    ("run", "r"),
2185    ("use", "u"),
2186    ("score", "sc"),
2187    ("language", "la"),
2188    ("status", "st"),
2189    ("state", "stt"),
2190    ("error", "err"),
2191    ("errors", "ers"),
2192    ("hops", "hp"),
2193    ("tags", "tg"),
2194    ("model", "ml"),
2195    ("skill", "sk"),
2196    ("count", "ct"),
2197    ("total", "tot"),
2198    ("column", "col"),
2199    ("description", "dsc"),
2200    ("end_line", "el"),
2201    ("signature", "sig"),
2202    ("parent_module", "pm"),
2203    ("visibility", "vis"),
2204    ("match_type", "mt"),
2205    ("caller_file", "cf"),
2206    ("caller_name", "cn"),
2207    ("caller_line", "cl"),
2208    ("callee_name", "en"),
2209    ("call_site_line", "csl"),
2210    ("members", "m"),
2211    ("refs", "refs"),
2212    ("role", "rl"),
2213    ("peer", "pr"),
2214    ("modularity", "q"),
2215    ("modularity_contribution", "mc"),
2216    ("iterations", "it"),
2217    ("node_count", "nc"),
2218    ("edge_count", "ec"),
2219    ("community_count", "cc"),
2220    ("communities", "cms"),
2221    ("community", "cm"),
2222    ("community_diagnostics", "cd"),
2223    ("cache_hit", "cah"),
2224    ("tagpath_state", "tps"),
2225    ("tagpath_stale_reason", "tsr"),
2226    ("annotated_community_count", "acc"),
2227    ("annotated_member_count", "amc"),
2228    ("ambiguous_member_count", "ambc"),
2229    ("ambiguous_members", "amb"),
2230    ("candidate_count", "cand"),
2231    ("tagpath_candidate_count", "tcand"),
2232    ("evidence", "ev"),
2233    ("chosen_file", "chf"),
2234    ("symbol", "s"),
2235    ("symbols", "sy"),
2236    ("definitions", "df"),
2237    ("callers", "crs"),
2238    ("callees", "ces"),
2239    ("total_tracked", "tt"),
2240    ("modified", "md"),
2241    ("deleted", "dl"),
2242    ("unchanged", "uc"),
2243    ("changes", "ch"),
2244    ("prune_stats", "ps"),
2245    ("hits", "h"),
2246    ("rank", "rk"),
2247    ("snippet", "sn"),
2248    ("confidence", "co"),
2249    ("index", "ix"),
2250    ("summaries", "sms"),
2251    ("recommendations", "rec"),
2252    ("total_files", "tf"),
2253    ("stale_files", "sf"),
2254    ("last_indexed_secs_ago", "age"),
2255    ("cached_files", "caf"),
2256    ("total_indexed_files", "tif"),
2257    ("coverage_pct", "cov"),
2258    ("symbol_name", "syn"),
2259    ("file_path", "fp"),
2260    ("content_hash", "hsh"),
2261    ("summary", "sum"),
2262    ("tool", "tl"),
2263    ("view", "vw"),
2264    ("truncated", "tr"),
2265    ("follow_up", "fu"),
2266    ("report", "rp"),
2267    ("metrics", "ms"),
2268    ("label", "lb"),
2269    ("value", "v"),
2270    ("command", "cmd"),
2271    ("exit_code", "xc"),
2272    ("success", "ok"),
2273    ("artifact", "art"),
2274    ("digest", "dg"),
2275    ("bytes", "bt"),
2276    ("lines", "lns"),
2277    ("expand", "xp"),
2278    ("entities", "ent"),
2279    ("relationships", "rel"),
2280    ("concept_labels", "cls"),
2281    ("extracted_at", "at"),
2282    ("tokens_input", "ti"),
2283    ("tokens_output", "tout"),
2284    ("total_summaries", "ts"),
2285    ("stale_count", "stc"),
2286    ("total_tokens_input", "tti"),
2287    ("total_tokens_output", "tto"),
2288    ("estimated_tokens_saved", "ets"),
2289    ("files_processed", "fps"),
2290    ("symbols_extracted", "se"),
2291    ("skills_dir", "sd"),
2292    ("healthy", "ok"),
2293    ("broken", "brk"),
2294    ("skills", "sks"),
2295    ("manifest_diffs", "mdf"),
2296    ("similar_pairs", "sim"),
2297    ("usage", "usg"),
2298    ("cleanup", "cln"),
2299    ("has_skill_md", "hsm"),
2300    ("is_symlink", "isl"),
2301    ("issues", "iss"),
2302    ("invocation_count", "inv"),
2303    ("reasons", "rsn"),
2304    ("token_estimate", "te"),
2305    ("skill_a", "sa"),
2306    ("skill_b", "sb"),
2307    ("desc_a", "da"),
2308    ("desc_b", "db"),
2309    ("annotations", "ann"),
2310    ("entity", "ety"),
2311    ("suggestion", "sug"),
2312    ("columns", "cols"),
2313    ("row_count", "rc"),
2314    ("notnull", "nn"),
2315    ("default_value", "dv"),
2316    ("replace_all", "ra"),
2317];
2318
2319pub(crate) fn relativize(path: &str, root: &std::path::Path) -> String {
2320    let root_str = root.to_string_lossy();
2321    let prefix = format!("{}/", root_str.trim_end_matches('/'));
2322    path.strip_prefix(&prefix).unwrap_or(path).to_string()
2323}
2324
2325fn transcript_artifact_root(path: &Path) -> Result<PathBuf> {
2326    let canonical = path
2327        .canonicalize()
2328        .with_context(|| format!("canonicalizing {}", path.display()))?;
2329    let start = if canonical.is_dir() {
2330        canonical.clone()
2331    } else {
2332        canonical
2333            .parent()
2334            .map(Path::to_path_buf)
2335            .unwrap_or_else(|| canonical.clone())
2336    };
2337
2338    for ancestor in start.ancestors() {
2339        if ancestor.join(".git").exists() || ancestor.join(".gitmodules").is_file() {
2340            return Ok(ancestor.to_path_buf());
2341        }
2342    }
2343
2344    Ok(start)
2345}
2346
2347fn relativize_pathbuf(path: &std::path::Path, root: &std::path::Path) -> PathBuf {
2348    path.strip_prefix(root)
2349        .map(|p| p.to_path_buf())
2350        .unwrap_or_else(|_| path.to_path_buf())
2351}
2352
2353pub(crate) fn relativize_edges(edges: &mut [index::StoredEdge], root: &std::path::Path) {
2354    for edge in edges {
2355        edge.caller_file = relativize(&edge.caller_file, root);
2356    }
2357}
2358
2359pub(crate) fn relativize_symbols(symbols: &mut [index::StoredSymbol], root: &std::path::Path) {
2360    for sym in symbols {
2361        sym.file = relativize(&sym.file, root);
2362    }
2363}
2364
2365pub(crate) fn relativize_symbol_hits(hits: &mut [index::SymbolHit], root: &std::path::Path) {
2366    for hit in hits {
2367        hit.file = relativize(&hit.file, root);
2368    }
2369}
2370
2371const COMMUNITY_DETECTION_CACHE_VERSION: &str = "community-detection-cache-v1";
2372
2373static COMMUNITY_DETECTION_CACHE: OnceLock<Mutex<BTreeMap<String, graph::CommunityResult>>> =
2374    OnceLock::new();
2375
2376#[derive(Debug, Clone, Serialize)]
2377struct CommunityDetectionDiagnostics {
2378    cache_hit: bool,
2379    edge_count: usize,
2380    iterations: usize,
2381    tagpath_state: String,
2382    tagpath_readiness: GraphEffectivenessReadiness,
2383    #[serde(skip_serializing_if = "Option::is_none")]
2384    tagpath_stale_reason: Option<String>,
2385    annotated_community_count: usize,
2386    annotated_member_count: usize,
2387    ambiguous_member_count: usize,
2388    #[serde(skip_serializing_if = "Vec::is_empty")]
2389    ambiguous_members: Vec<CommunityMemberAmbiguityDiagnostic>,
2390}
2391
2392#[derive(Debug, Clone)]
2393pub(crate) struct CommunityDetectionReport {
2394    result: graph::CommunityResult,
2395    diagnostics: CommunityDetectionDiagnostics,
2396}
2397
2398#[derive(Debug, Clone)]
2399struct CommunityTagpathCachePart {
2400    state: String,
2401    reason: Option<String>,
2402    key: String,
2403}
2404
2405#[derive(Debug, Clone, Serialize, Deserialize)]
2406struct CommunityDetectionCacheEntry {
2407    version: String,
2408    key: String,
2409    result: graph::CommunityResult,
2410}
2411
2412fn community_detection_cache() -> &'static Mutex<BTreeMap<String, graph::CommunityResult>> {
2413    COMMUNITY_DETECTION_CACHE.get_or_init(|| Mutex::new(BTreeMap::new()))
2414}
2415
2416pub(crate) fn community_tagpath_cache_part_for_loaded(
2417    adapter: &tagpath_adapter::TagpathAdapter,
2418) -> CommunityTagpathCachePart {
2419    let index_path = tagpath::index::index_path(&adapter.project_root);
2420    let index_hash = fs::read(&index_path)
2421        .map(|bytes| hash_bytes_hex(&bytes))
2422        .unwrap_or_else(|err| hash_bytes_hex(format!("fresh-index-unreadable:{err:#}").as_bytes()));
2423    CommunityTagpathCachePart {
2424        state: "fresh".to_string(),
2425        reason: None,
2426        key: format!("fresh:{index_hash}"),
2427    }
2428}
2429
2430pub(crate) fn community_tagpath_cache_part(
2431    root: &std::path::Path,
2432    opts: &TagpathSearchOpts,
2433) -> Result<CommunityTagpathCachePart> {
2434    if opts.no_tagpath {
2435        return Ok(CommunityTagpathCachePart {
2436            state: "disabled".to_string(),
2437            reason: None,
2438            key: "disabled".to_string(),
2439        });
2440    }
2441    match tagpath_adapter::try_load(root) {
2442        tagpath_adapter::LoadResult::Loaded(adapter) => {
2443            Ok(community_tagpath_cache_part_for_loaded(&adapter))
2444        }
2445        tagpath_adapter::LoadResult::Stale { reason, .. } => {
2446            if opts.strict {
2447                anyhow::bail!(
2448                    "tagpath index is stale (reason={reason}); rerun `tagpath index --update` or drop --tagpath-strict"
2449                );
2450            }
2451            Ok(CommunityTagpathCachePart {
2452                state: "stale".to_string(),
2453                key: format!("stale:{reason}"),
2454                reason: Some(reason),
2455            })
2456        }
2457        tagpath_adapter::LoadResult::Missing => Ok(CommunityTagpathCachePart {
2458            state: "missing".to_string(),
2459            reason: None,
2460            key: "missing".to_string(),
2461        }),
2462    }
2463}
2464
2465fn graph_effectiveness_ready(reason: impl Into<String>) -> GraphEffectivenessReadiness {
2466    GraphEffectivenessReadiness {
2467        status: "ready".to_string(),
2468        fail_closed: false,
2469        reason: reason.into(),
2470        diagnostics: Vec::new(),
2471        next_commands: Vec::new(),
2472    }
2473}
2474
2475fn graph_effectiveness_blocked(
2476    reason: impl Into<String>,
2477    diagnostics: Vec<String>,
2478    next_commands: Vec<String>,
2479) -> GraphEffectivenessReadiness {
2480    GraphEffectivenessReadiness {
2481        status: "blocked".to_string(),
2482        fail_closed: true,
2483        reason: reason.into(),
2484        diagnostics,
2485        next_commands,
2486    }
2487}
2488
2489fn tagpath_index_update_command(root: &std::path::Path) -> String {
2490    format!(
2491        "cd {} && tagpath index --update",
2492        shell_quote(root.to_string_lossy().as_ref())
2493    )
2494}
2495
2496fn graph_tagpath_readiness(
2497    root: &std::path::Path,
2498    tagpath: &CommunityTagpathCachePart,
2499) -> GraphEffectivenessReadiness {
2500    match tagpath.state.as_str() {
2501        "fresh" => graph_effectiveness_ready("tagpath_handles_available"),
2502        "disabled" => GraphEffectivenessReadiness {
2503            status: "disabled".to_string(),
2504            fail_closed: false,
2505            reason: "tagpath_lookup_disabled".to_string(),
2506            diagnostics: Vec::new(),
2507            next_commands: Vec::new(),
2508        },
2509        "stale" => graph_effectiveness_blocked(
2510            "tagpath_state_stale",
2511            vec![format!(
2512                "tagpath_state=stale{}: community members may miss stable tagpath_handle citations; rebuild the tagpath index before relying on handle coverage",
2513                tagpath
2514                    .reason
2515                    .as_ref()
2516                    .map(|reason| format!(" (reason={reason})"))
2517                    .unwrap_or_default()
2518            )],
2519            vec![tagpath_index_update_command(root)],
2520        ),
2521        "missing" => graph_effectiveness_blocked(
2522            "tagpath_state_missing",
2523            vec![format!(
2524                "tagpath_state=missing: community members cannot emit stable tagpath_handle citations; create .naming.toml if needed, then run tagpath indexing from {}",
2525                root.display()
2526            )],
2527            vec![tagpath_index_update_command(root)],
2528        ),
2529        state => graph_effectiveness_blocked(
2530            format!("tagpath_state_{state}"),
2531            vec![format!(
2532                "tagpath_state={state}: community tagpath_handle readiness is unknown"
2533            )],
2534            vec![tagpath_index_update_command(root)],
2535        ),
2536    }
2537}
2538
2539fn community_graph_watermark(db: &index::IndexDb) -> Result<String> {
2540    let source_snapshot = db.source_snapshot_parts()?;
2541    let edge_rows = db.edge_count()?;
2542    let symbol_rows = db.symbol_count()?;
2543    content_hash(&serde_json::json!({
2544        "source_snapshot": source_snapshot,
2545        "edge_rows": edge_rows,
2546        "symbol_rows": symbol_rows,
2547    }))
2548}
2549
2550fn community_detection_cache_key(
2551    root: &std::path::Path,
2552    scope: Option<&str>,
2553    graph_watermark: &str,
2554    tagpath: &CommunityTagpathCachePart,
2555) -> Result<String> {
2556    content_hash(&serde_json::json!({
2557        "version": COMMUNITY_DETECTION_CACHE_VERSION,
2558        "root": root.display().to_string(),
2559        "scope": scope.unwrap_or("root"),
2560        "graph_watermark": graph_watermark,
2561        "tagpath": tagpath.key,
2562    }))
2563}
2564
2565fn community_detection_cache_path(
2566    root: &std::path::Path,
2567    scope: Option<&str>,
2568    key: &str,
2569) -> PathBuf {
2570    root.join(".tsift/community-cache")
2571        .join(scope.unwrap_or("root"))
2572        .join(format!("{key}.json"))
2573}
2574
2575fn read_community_detection_cache(
2576    root: &std::path::Path,
2577    scope: Option<&str>,
2578    key: &str,
2579) -> Option<graph::CommunityResult> {
2580    let path = community_detection_cache_path(root, scope, key);
2581    let bytes = fs::read(path).ok()?;
2582    let entry: CommunityDetectionCacheEntry = serde_json::from_slice(&bytes).ok()?;
2583    if entry.version == COMMUNITY_DETECTION_CACHE_VERSION && entry.key == key {
2584        Some(entry.result)
2585    } else {
2586        None
2587    }
2588}
2589
2590fn write_community_detection_cache(
2591    root: &std::path::Path,
2592    scope: Option<&str>,
2593    key: &str,
2594    result: &graph::CommunityResult,
2595) {
2596    let path = community_detection_cache_path(root, scope, key);
2597    let Some(parent) = path.parent() else {
2598        return;
2599    };
2600    if fs::create_dir_all(parent).is_err() {
2601        return;
2602    }
2603    let entry = CommunityDetectionCacheEntry {
2604        version: COMMUNITY_DETECTION_CACHE_VERSION.to_string(),
2605        key: key.to_string(),
2606        result: result.clone(),
2607    };
2608    if let Ok(bytes) = serde_json::to_vec(&entry) {
2609        let _ = fs::write(path, bytes);
2610    }
2611}
2612
2613fn community_detection_diagnostics(
2614    cache_hit: bool,
2615    result: &graph::CommunityResult,
2616    tagpath: &CommunityTagpathCachePart,
2617    tagpath_root: &std::path::Path,
2618) -> CommunityDetectionDiagnostics {
2619    CommunityDetectionDiagnostics {
2620        cache_hit,
2621        edge_count: result.edge_count,
2622        iterations: result.iterations,
2623        tagpath_state: tagpath.state.clone(),
2624        tagpath_readiness: graph_tagpath_readiness(tagpath_root, tagpath),
2625        tagpath_stale_reason: tagpath.reason.clone(),
2626        annotated_community_count: 0,
2627        annotated_member_count: 0,
2628        ambiguous_member_count: 0,
2629        ambiguous_members: Vec::new(),
2630    }
2631}
2632
2633pub(crate) fn update_community_annotation_diagnostics(
2634    diagnostics: &mut CommunityDetectionDiagnostics,
2635    communities: &[graph::Community],
2636    annotation: Option<&TagpathAnnotationDiagnostic>,
2637) {
2638    diagnostics.annotated_community_count = communities
2639        .iter()
2640        .filter(|community| {
2641            community
2642                .members
2643                .iter()
2644                .any(|member| member.tagpath_handle.is_some())
2645        })
2646        .count();
2647    diagnostics.annotated_member_count = communities
2648        .iter()
2649        .flat_map(|community| community.members.iter())
2650        .filter(|member| member.tagpath_handle.is_some())
2651        .count();
2652    if let Some(annotation) = annotation {
2653        diagnostics.ambiguous_member_count = annotation.ambiguous_members.len();
2654        diagnostics.ambiguous_members = annotation.ambiguous_members.clone();
2655    } else {
2656        diagnostics.ambiguous_member_count = 0;
2657        diagnostics.ambiguous_members.clear();
2658    }
2659}
2660
2661pub(crate) fn detect_communities_cached(
2662    db: &index::IndexDb,
2663    root: &std::path::Path,
2664    scope: Option<&str>,
2665    tagpath: &CommunityTagpathCachePart,
2666    tagpath_root: &std::path::Path,
2667) -> Result<CommunityDetectionReport> {
2668    let graph_watermark = community_graph_watermark(db)?;
2669    let cache_key = community_detection_cache_key(root, scope, &graph_watermark, tagpath)?;
2670
2671    if let Some(result) = community_detection_cache()
2672        .lock()
2673        .ok()
2674        .and_then(|cache| cache.get(&cache_key).cloned())
2675    {
2676        return Ok(CommunityDetectionReport {
2677            diagnostics: community_detection_diagnostics(true, &result, tagpath, tagpath_root),
2678            result,
2679        });
2680    }
2681
2682    if let Some(result) = read_community_detection_cache(root, scope, &cache_key) {
2683        if let Ok(mut cache) = community_detection_cache().lock() {
2684            cache.insert(cache_key.clone(), result.clone());
2685        }
2686        return Ok(CommunityDetectionReport {
2687            diagnostics: community_detection_diagnostics(true, &result, tagpath, tagpath_root),
2688            result,
2689        });
2690    }
2691
2692    let edges = db.all_edges()?;
2693    let result = graph::detect_communities(&edges);
2694    write_community_detection_cache(root, scope, &cache_key, &result);
2695    if let Ok(mut cache) = community_detection_cache().lock() {
2696        cache.insert(cache_key, result.clone());
2697    }
2698    Ok(CommunityDetectionReport {
2699        diagnostics: community_detection_diagnostics(false, &result, tagpath, tagpath_root),
2700        result,
2701    })
2702}
2703
2704fn index_file_abs(file: &str, root: &std::path::Path) -> std::path::PathBuf {
2705    if std::path::Path::new(file).is_absolute() {
2706        std::path::PathBuf::from(file)
2707    } else {
2708        root.join(file)
2709    }
2710}
2711
2712fn index_file_key(file: &str, root: &std::path::Path) -> String {
2713    let path = std::path::Path::new(file);
2714    let rel = if path.is_absolute() {
2715        path.strip_prefix(root).unwrap_or(path)
2716    } else {
2717        path
2718    };
2719    rel.to_string_lossy().replace('\\', "/")
2720}
2721
2722fn tagpath_handle_for_index_file(
2723    file: &str,
2724    name: &str,
2725    root: &std::path::Path,
2726    adapter: &tagpath_adapter::TagpathAdapter,
2727) -> Option<String> {
2728    adapter.handle_for_member(&index_file_abs(file, root), name)
2729}
2730
2731#[derive(Debug, Clone)]
2732struct TagpathHandleCandidate {
2733    file: String,
2734    line: i64,
2735    handle: String,
2736}
2737
2738fn tagpath_handle_candidates_for_symbol_rows(
2739    name: &str,
2740    syms: &[index::StoredSymbol],
2741    root: &std::path::Path,
2742    adapter: &tagpath_adapter::TagpathAdapter,
2743) -> Vec<TagpathHandleCandidate> {
2744    syms.iter()
2745        .filter_map(|sym| {
2746            let handle = tagpath_handle_for_index_file(&sym.file, name, root, adapter)?;
2747            Some(TagpathHandleCandidate {
2748                file: index_file_key(&sym.file, root),
2749                line: sym.line,
2750                handle,
2751            })
2752        })
2753        .collect()
2754}
2755
2756pub(crate) fn file_communities_from_callers(
2757    db: &index::IndexDb,
2758    root: &std::path::Path,
2759    scope: Option<&str>,
2760    tagpath: &CommunityTagpathCachePart,
2761) -> Result<std::collections::HashMap<String, std::collections::HashSet<usize>>> {
2762    let community_report = detect_communities_cached(db, root, scope, tagpath, root)?;
2763    if community_report.result.communities.is_empty() {
2764        return Ok(std::collections::HashMap::new());
2765    }
2766    let mut community_by_symbol = std::collections::HashMap::new();
2767    for community in community_report.result.communities {
2768        for member in community.members {
2769            community_by_symbol.insert(member.name, community.id);
2770        }
2771    }
2772
2773    let mut communities_by_file: std::collections::HashMap<
2774        String,
2775        std::collections::HashSet<usize>,
2776    > = std::collections::HashMap::new();
2777    for sym in db.all_symbols()? {
2778        if let Some(community_id) = community_by_symbol.get(&sym.name) {
2779            communities_by_file
2780                .entry(index_file_key(&sym.file, root))
2781                .or_default()
2782                .insert(*community_id);
2783        }
2784    }
2785    for edge in db.all_stored_edges()? {
2786        if let Some(community_id) = community_by_symbol.get(&edge.caller_name) {
2787            communities_by_file
2788                .entry(index_file_key(&edge.caller_file, root))
2789                .or_default()
2790                .insert(*community_id);
2791        }
2792    }
2793    Ok(communities_by_file)
2794}
2795
2796pub(crate) fn resolve_tagpath_handle_for_callee_edge(
2797    edge: &index::StoredEdge,
2798    db: &index::IndexDb,
2799    root: &std::path::Path,
2800    adapter: &tagpath_adapter::TagpathAdapter,
2801    communities_by_file: &std::collections::HashMap<String, std::collections::HashSet<usize>>,
2802) -> Option<String> {
2803    let syms = db.symbol_info(&edge.callee_name).ok()?;
2804    let candidates =
2805        tagpath_handle_candidates_for_symbol_rows(&edge.callee_name, &syms, root, adapter);
2806    let caller_file = index_file_key(&edge.caller_file, root);
2807
2808    if let Some(candidate) = candidates
2809        .iter()
2810        .find(|candidate| candidate.file == caller_file)
2811    {
2812        return Some(candidate.handle.clone());
2813    }
2814
2815    if let Some(caller_communities) = communities_by_file.get(&caller_file) {
2816        for candidate in &candidates {
2817            if let Some(candidate_communities) = communities_by_file.get(&candidate.file)
2818                && !caller_communities.is_disjoint(candidate_communities)
2819            {
2820                return Some(candidate.handle.clone());
2821            }
2822        }
2823    }
2824
2825    candidates.first().map(|candidate| candidate.handle.clone())
2826}
2827
2828fn push_bounded_community_member_ref(
2829    refs_by_member: &mut HashMap<(usize, String), Vec<graph::CommunityMemberRef>>,
2830    community_id: usize,
2831    name: &str,
2832    reference: graph::CommunityMemberRef,
2833) {
2834    let refs = refs_by_member
2835        .entry((community_id, name.to_string()))
2836        .or_default();
2837    if refs.iter().any(|existing| {
2838        existing.file == reference.file
2839            && existing.line == reference.line
2840            && existing.role == reference.role
2841            && existing.peer == reference.peer
2842    }) {
2843        return;
2844    }
2845    if refs.len() < 6 {
2846        refs.push(reference);
2847    }
2848}
2849
2850fn choose_symbol_row_by_files<'a>(
2851    syms: &'a [index::StoredSymbol],
2852    files: &BTreeSet<String>,
2853    root: &std::path::Path,
2854) -> Option<(&'a index::StoredSymbol, &'static str)> {
2855    let matches: Vec<&index::StoredSymbol> = syms
2856        .iter()
2857        .filter(|sym| files.contains(&index_file_key(&sym.file, root)))
2858        .collect();
2859    if matches.len() == 1 {
2860        Some((matches[0], "edge_file"))
2861    } else {
2862        None
2863    }
2864}
2865
2866fn choose_tagpath_candidate_by_files<'a>(
2867    candidates: &'a [TagpathHandleCandidate],
2868    files: &BTreeSet<String>,
2869    evidence: &'static str,
2870) -> Option<(&'a TagpathHandleCandidate, &'static str)> {
2871    let matches: Vec<&TagpathHandleCandidate> = candidates
2872        .iter()
2873        .filter(|candidate| files.contains(&candidate.file))
2874        .collect();
2875    if matches.len() == 1 {
2876        Some((matches[0], evidence))
2877    } else {
2878        None
2879    }
2880}
2881
2882pub(crate) fn annotate_community_members_with_context(
2883    communities: &mut [graph::Community],
2884    db: &index::IndexDb,
2885    root: &std::path::Path,
2886    adapter: Option<&tagpath_adapter::TagpathAdapter>,
2887) -> Result<Vec<CommunityMemberAmbiguityDiagnostic>> {
2888    let mut community_by_name = HashMap::<String, usize>::new();
2889    for community in communities.iter() {
2890        for member in &community.members {
2891            community_by_name.insert(member.name.clone(), community.id);
2892        }
2893    }
2894
2895    let mut symbols_by_name = HashMap::<String, Vec<index::StoredSymbol>>::new();
2896    for sym in db.all_symbols()? {
2897        symbols_by_name
2898            .entry(sym.name.clone())
2899            .or_default()
2900            .push(sym);
2901    }
2902
2903    let mut refs_by_member = HashMap::<(usize, String), Vec<graph::CommunityMemberRef>>::new();
2904    let mut evidence_files_by_member = HashMap::<(usize, String), BTreeSet<String>>::new();
2905    let mut context_files_by_community = HashMap::<usize, BTreeSet<String>>::new();
2906
2907    for edge in db.all_stored_edges()? {
2908        let Some(&caller_community) = community_by_name.get(&edge.caller_name) else {
2909            continue;
2910        };
2911        let Some(&callee_community) = community_by_name.get(&edge.callee_name) else {
2912            continue;
2913        };
2914        if caller_community != callee_community {
2915            continue;
2916        }
2917
2918        let file = index_file_key(&edge.caller_file, root);
2919        context_files_by_community
2920            .entry(caller_community)
2921            .or_default()
2922            .insert(file.clone());
2923
2924        evidence_files_by_member
2925            .entry((caller_community, edge.caller_name.clone()))
2926            .or_default()
2927            .insert(file.clone());
2928        push_bounded_community_member_ref(
2929            &mut refs_by_member,
2930            caller_community,
2931            &edge.caller_name,
2932            graph::CommunityMemberRef {
2933                file: file.clone(),
2934                line: edge.caller_line,
2935                role: "caller".to_string(),
2936                peer: edge.callee_name.clone(),
2937            },
2938        );
2939
2940        evidence_files_by_member
2941            .entry((callee_community, edge.callee_name.clone()))
2942            .or_default()
2943            .insert(file.clone());
2944        push_bounded_community_member_ref(
2945            &mut refs_by_member,
2946            callee_community,
2947            &edge.callee_name,
2948            graph::CommunityMemberRef {
2949                file,
2950                line: edge.call_site_line,
2951                role: "callee".to_string(),
2952                peer: edge.caller_name.clone(),
2953            },
2954        );
2955    }
2956
2957    let mut diagnostics = Vec::new();
2958    for community in communities.iter_mut() {
2959        let community_files = context_files_by_community
2960            .get(&community.id)
2961            .cloned()
2962            .unwrap_or_default();
2963        for member in community.members.iter_mut() {
2964            member.file = None;
2965            member.line = None;
2966            member.tagpath_handle = None;
2967            let key = (community.id, member.name.clone());
2968            member.refs = refs_by_member.remove(&key).unwrap_or_default();
2969
2970            let syms = symbols_by_name
2971                .get(&member.name)
2972                .map(Vec::as_slice)
2973                .unwrap_or(&[]);
2974            let evidence_files = evidence_files_by_member
2975                .get(&key)
2976                .cloned()
2977                .unwrap_or_default();
2978            let candidates = adapter
2979                .map(|adapter| {
2980                    tagpath_handle_candidates_for_symbol_rows(&member.name, syms, root, adapter)
2981                })
2982                .unwrap_or_default();
2983
2984            let mut selected_file: Option<String> = None;
2985            let mut selected_line: Option<i64> = None;
2986            let mut selected_handle: Option<String> = None;
2987            let mut selected_evidence: Option<&'static str> = None;
2988
2989            if let Some(candidate) = candidates.first().filter(|_| candidates.len() == 1) {
2990                selected_file = Some(candidate.file.clone());
2991                selected_line = Some(candidate.line);
2992                selected_handle = Some(candidate.handle.clone());
2993                selected_evidence = Some("unique_tagpath_handle");
2994            } else if let Some((candidate, evidence)) =
2995                choose_tagpath_candidate_by_files(&candidates, &evidence_files, "edge_file")
2996            {
2997                selected_file = Some(candidate.file.clone());
2998                selected_line = Some(candidate.line);
2999                selected_handle = Some(candidate.handle.clone());
3000                selected_evidence = Some(evidence);
3001            } else if let Some((candidate, evidence)) =
3002                choose_tagpath_candidate_by_files(&candidates, &community_files, "community_file")
3003            {
3004                selected_file = Some(candidate.file.clone());
3005                selected_line = Some(candidate.line);
3006                selected_handle = Some(candidate.handle.clone());
3007                selected_evidence = Some(evidence);
3008            }
3009
3010            if selected_file.is_none() {
3011                if let Some(sym) = syms.first().filter(|_| syms.len() == 1) {
3012                    selected_file = Some(index_file_key(&sym.file, root));
3013                    selected_line = Some(sym.line);
3014                    selected_evidence = Some("unique_symbol_row");
3015                } else if let Some((sym, evidence)) =
3016                    choose_symbol_row_by_files(syms, &evidence_files, root)
3017                {
3018                    selected_file = Some(index_file_key(&sym.file, root));
3019                    selected_line = Some(sym.line);
3020                    selected_evidence = Some(evidence);
3021                } else if let Some((sym, _)) =
3022                    choose_symbol_row_by_files(syms, &community_files, root)
3023                {
3024                    selected_file = Some(index_file_key(&sym.file, root));
3025                    selected_line = Some(sym.line);
3026                    selected_evidence = Some("community_file");
3027                }
3028            }
3029
3030            member.file = selected_file.clone();
3031            member.line = selected_line;
3032            member.tagpath_handle = selected_handle;
3033
3034            if syms.len() > 1 || candidates.len() > 1 {
3035                diagnostics.push(CommunityMemberAmbiguityDiagnostic {
3036                    community_id: community.id,
3037                    name: member.name.clone(),
3038                    candidate_count: syms.len(),
3039                    tagpath_candidate_count: candidates.len(),
3040                    evidence: selected_evidence
3041                        .unwrap_or("ambiguous_no_evidence")
3042                        .to_string(),
3043                    chosen_file: selected_file,
3044                });
3045            }
3046        }
3047    }
3048
3049    Ok(diagnostics)
3050}
3051
3052/// Which endpoint of a `StoredEdge` is the row's primary symbol — caller
3053/// (caller list) or callee (callee list).
3054#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3055pub enum EdgeSide {
3056    Caller,
3057    Callee,
3058}
3059
3060const JSON_PATH_KEYS: &[&str] = &["file", "path", "caller_file", "file_path"];
3061
3062pub(crate) fn relativize_json_paths(val: &mut serde_json::Value, root: &std::path::Path) {
3063    let root_str = root.to_string_lossy();
3064    let prefix = format!("{}/", root_str.trim_end_matches('/'));
3065    relativize_json_inner(val, &prefix);
3066}
3067
3068fn relativize_json_inner(val: &mut serde_json::Value, prefix: &str) {
3069    match val {
3070        serde_json::Value::Array(arr) => {
3071            for v in arr {
3072                relativize_json_inner(v, prefix);
3073            }
3074        }
3075        serde_json::Value::Object(map) => {
3076            for (k, v) in map.iter_mut() {
3077                if JSON_PATH_KEYS.contains(&k.as_str())
3078                    && let serde_json::Value::String(s) = v
3079                    && let Some(rest) = s.strip_prefix(prefix)
3080                {
3081                    *s = rest.to_string();
3082                }
3083                relativize_json_inner(v, prefix);
3084            }
3085        }
3086        _ => {}
3087    }
3088}
3089
3090pub(crate) fn format_score(score: f64, compact: bool) -> String {
3091    if compact {
3092        format!("{score:.2}")
3093    } else {
3094        format!("{score:.4}")
3095    }
3096}
3097
3098pub(crate) fn truncate_for_compact(input: &str, max_chars: usize) -> String {
3099    let trimmed = input.trim();
3100    let count = trimmed.chars().count();
3101    if count <= max_chars {
3102        return trimmed.to_string();
3103    }
3104    let prefix: String = trimmed.chars().take(max_chars.saturating_sub(3)).collect();
3105    format!("{prefix}...")
3106}
3107
3108pub(crate) fn compact_snippet(snippet: &str) -> Option<String> {
3109    snippet
3110        .lines()
3111        .find(|line| !line.trim().is_empty())
3112        .map(|line| truncate_for_compact(line, 100))
3113}
3114
3115pub(crate) fn compact_members(members: &[graph::CommunityMember], limit: usize) -> String {
3116    let names: Vec<&str> = members.iter().map(|m| m.name.as_str()).collect();
3117    if names.len() <= limit {
3118        return names.join(", ");
3119    }
3120    format!(
3121        "{} (+{} more)",
3122        names[..limit].join(", "),
3123        names.len() - limit
3124    )
3125}
3126
3127fn stable_handle(prefix: &str, key: &str) -> String {
3128    let mut hasher = blake3::Hasher::new();
3129    hasher.update(prefix.as_bytes());
3130    hasher.update(&[0]);
3131    hasher.update(key.as_bytes());
3132    let hex = hasher.finalize().to_hex();
3133    format!("{prefix}-{}", &hex[..10])
3134}
3135
3136#[derive(Clone, Debug, PartialEq, Eq)]
3137struct CanonicalTagFamily {
3138    canonical: String,
3139    tag_alias: String,
3140}
3141
3142fn canonical_family_from_tagpath_family(
3143    family: tagpath_family::TagFamily,
3144) -> Option<CanonicalTagFamily> {
3145    let tag_alias = if family.dimensions.is_empty() {
3146        family.tags.join("/")
3147    } else {
3148        family
3149            .dimensions
3150            .iter()
3151            .filter(|dimension| !dimension.tags.is_empty())
3152            .map(|dimension| dimension.tags.join("."))
3153            .collect::<Vec<_>>()
3154            .join("/")
3155    };
3156
3157    if tag_alias.is_empty() {
3158        None
3159    } else {
3160        Some(CanonicalTagFamily {
3161            canonical: family.canonical,
3162            tag_alias,
3163        })
3164    }
3165}
3166
3167fn canonical_tag_family_from_name(name: &str) -> Option<CanonicalTagFamily> {
3168    let trimmed = name.trim();
3169    if trimmed.is_empty() {
3170        return None;
3171    }
3172
3173    canonical_family_from_tagpath_family(tagpath_family::generate_family(trimmed))
3174}
3175
3176fn canonical_tag_family_from_tags(tags: &str) -> Option<CanonicalTagFamily> {
3177    let canonical = tags
3178        .split(',')
3179        .map(str::trim)
3180        .filter(|tag| !tag.is_empty())
3181        .collect::<Vec<_>>()
3182        .join("_");
3183    if canonical.is_empty() {
3184        None
3185    } else {
3186        canonical_family_from_tagpath_family(tagpath_family::generate_family(&canonical))
3187    }
3188}
3189
3190fn canonical_tag_family_from_symbol(name: &str, tags: Option<&str>) -> Option<CanonicalTagFamily> {
3191    tags.and_then(canonical_tag_family_from_tags)
3192        .or_else(|| canonical_tag_family_from_name(name))
3193}
3194
3195fn tag_alias_from_name(name: &str) -> Option<String> {
3196    canonical_tag_family_from_name(name).map(|family| family.tag_alias)
3197}
3198
3199fn tag_alias_from_tags(name: &str, tags: Option<&str>) -> Option<String> {
3200    canonical_tag_family_from_symbol(name, tags).map(|family| family.tag_alias)
3201}
3202
3203fn family_query_from_tag_alias(tag_alias: &str) -> Option<String> {
3204    let query = tag_alias
3205        .split(['/', '.'])
3206        .map(str::trim)
3207        .filter(|part| !part.is_empty())
3208        .collect::<Vec<_>>()
3209        .join(" ");
3210    if query.is_empty() { None } else { Some(query) }
3211}
3212
3213#[derive(Serialize, Clone, Debug, PartialEq, Eq)]
3214struct CompactOntologyRefPreview {
3215    handle: String,
3216    tag: String,
3217    path: String,
3218    #[serde(skip_serializing_if = "Option::is_none")]
3219    title: Option<String>,
3220    #[serde(skip_serializing_if = "Option::is_none")]
3221    domain: Option<String>,
3222}
3223
3224#[derive(Clone, Debug)]
3225struct TagOntologyPreviewContext {
3226    project_root: PathBuf,
3227    tags: BTreeMap<String, tagpath_ontology::OntologyTag>,
3228}
3229
3230#[derive(Serialize, Clone, Debug, PartialEq, Eq)]
3231struct CompactSymbolRefPreview {
3232    handle: String,
3233    name: String,
3234    #[serde(skip_serializing_if = "Option::is_none")]
3235    tag_alias: Option<String>,
3236    #[serde(skip_serializing_if = "Vec::is_empty", default)]
3237    ontology_refs: Vec<CompactOntologyRefPreview>,
3238}
3239
3240fn build_compact_symbol_ref(
3241    prefix: &str,
3242    key: &str,
3243    name: &str,
3244    tags: Option<&str>,
3245    max_bytes: usize,
3246) -> CompactSymbolRefPreview {
3247    build_compact_symbol_ref_with_ontology(prefix, key, name, tags, max_bytes, None)
3248}
3249
3250fn build_compact_symbol_ref_with_ontology(
3251    prefix: &str,
3252    key: &str,
3253    name: &str,
3254    tags: Option<&str>,
3255    max_bytes: usize,
3256    ontology: Option<&TagOntologyPreviewContext>,
3257) -> CompactSymbolRefPreview {
3258    let tag_alias = tag_alias_from_tags(name, tags);
3259    let ontology_refs = tag_alias
3260        .as_deref()
3261        .map(|alias| ontology_refs_for_alias(ontology, alias))
3262        .unwrap_or_default();
3263    CompactSymbolRefPreview {
3264        handle: stable_handle(prefix, key),
3265        name: truncate_for_budget(name, max_bytes),
3266        tag_alias: tag_alias.map(|alias| truncate_for_budget(&alias, max_bytes)),
3267        ontology_refs,
3268    }
3269}
3270
3271fn load_tag_ontology_preview_context(root: &Path) -> Option<TagOntologyPreviewContext> {
3272    let report = tagpath_ontology::load_project(root).ok()?;
3273    if report.tags.is_empty() {
3274        return None;
3275    }
3276    Some(TagOntologyPreviewContext {
3277        project_root: report.project_path,
3278        tags: report
3279            .tags
3280            .into_iter()
3281            .map(|tag| (tag.tag.clone(), tag))
3282            .collect(),
3283    })
3284}
3285
3286fn ontology_refs_for_alias(
3287    ontology: Option<&TagOntologyPreviewContext>,
3288    alias: &str,
3289) -> Vec<CompactOntologyRefPreview> {
3290    let Some(ontology) = ontology else {
3291        return Vec::new();
3292    };
3293    let mut seen = BTreeSet::new();
3294    alias
3295        .split('/')
3296        .flat_map(|part| part.split('.'))
3297        .map(str::trim)
3298        .filter(|tag| !tag.is_empty())
3299        .filter_map(|tag| {
3300            let key = tag.to_ascii_lowercase();
3301            if !seen.insert(key.clone()) {
3302                return None;
3303            }
3304            let ontology_tag = ontology.tags.get(&key)?;
3305            let path = relativize_ontology_path(&ontology_tag.path, &ontology.project_root);
3306            Some(CompactOntologyRefPreview {
3307                handle: stable_handle("tont", &format!("{}:{path}", ontology_tag.tag)),
3308                tag: ontology_tag.tag.clone(),
3309                path,
3310                title: ontology_tag.title.clone(),
3311                domain: ontology_tag.domain.clone(),
3312            })
3313        })
3314        .collect()
3315}
3316
3317fn relativize_ontology_path(path: &Path, root: &Path) -> String {
3318    path.strip_prefix(root)
3319        .unwrap_or(path)
3320        .to_string_lossy()
3321        .replace('\\', "/")
3322}
3323
3324fn format_symbol_preview_line(handle: &str, name: &str, tag_alias: Option<&str>) -> String {
3325    match tag_alias {
3326        Some(alias) => format!("{handle} {name} tag:{alias}"),
3327        None => format!("{handle} {name}"),
3328    }
3329}
3330
3331fn format_summary_ref_line(summary: &ContextPackSummaryRefPreview) -> String {
3332    match summary.tag_alias.as_deref() {
3333        Some(alias) => format!(
3334            "{} {} tag:{} expand:{}",
3335            summary.handle, summary.symbol, alias, summary.expand
3336        ),
3337        None => format!(
3338            "{} {} expand:{}",
3339            summary.handle, summary.symbol, summary.expand
3340        ),
3341    }
3342}
3343
3344fn compact_symbol_ref_token(symbol: &CompactSymbolRefPreview) -> String {
3345    match symbol.tag_alias.as_deref() {
3346        Some(alias) => format!("{}@{}", symbol.handle, alias),
3347        None => format!("{}@{}", symbol.handle, symbol.name),
3348    }
3349}
3350
3351fn truncate_for_budget(input: &str, max_bytes: usize) -> String {
3352    let trimmed = input.trim();
3353    if trimmed.len() <= max_bytes {
3354        return trimmed.to_string();
3355    }
3356    if max_bytes <= 3 {
3357        return ".".repeat(max_bytes);
3358    }
3359
3360    let mut end = 0usize;
3361    for (idx, ch) in trimmed.char_indices() {
3362        let next = idx + ch.len_utf8();
3363        if next > max_bytes.saturating_sub(3) {
3364            break;
3365        }
3366        end = next;
3367    }
3368
3369    if end == 0 {
3370        "...".to_string()
3371    } else {
3372        format!("{}...", &trimmed[..end])
3373    }
3374}
3375
3376pub(crate) fn abbreviate_kind(kind: &str) -> &str {
3377    match kind {
3378        "function" => "fn",
3379        "method" => "meth",
3380        "module" | "mod" => "mod",
3381        "struct" => "struct",
3382        "trait" => "trait",
3383        "impl" => "impl",
3384        "class" => "cls",
3385        "interface" => "iface",
3386        "type_alias" => "type",
3387        "data_class" => "data_cls",
3388        "sealed_class" => "sealed_cls",
3389        "enum_class" => "enum_cls",
3390        "companion_object" => "comp_obj",
3391        "object" => "obj",
3392        "heading" => "h",
3393        "code_block" => "code",
3394        "alias" => "alias",
3395        other => other,
3396    }
3397}
3398
3399pub(crate) fn abbreviate_match_type(mt: &str) -> &str {
3400    match mt {
3401        "exact_name" => "exact",
3402        "all_tags" => "all_tags",
3403        "partial_tags" => "partial",
3404        other => other,
3405    }
3406}
3407
3408pub(crate) fn symbol_path_summary(path: &[graph::PathNode]) -> String {
3409    path.iter()
3410        .map(|n| n.name.as_str())
3411        .collect::<Vec<_>>()
3412        .join(" -> ")
3413}
3414
3415const SEARCH_GROUP_SAMPLE_LIMIT: usize = 2;
3416
3417struct SearchHitGroup {
3418    path: String,
3419    first_rank: usize,
3420    top_score: f64,
3421    confidence: String,
3422    hits: usize,
3423    samples: Vec<String>,
3424}
3425
3426fn format_search_sample(hit: &sift::SearchHit) -> Option<String> {
3427    let snippet = compact_snippet(&hit.snippet)?;
3428    Some(match hit.location.as_deref() {
3429        Some(location) => format!("{location}: {snippet}"),
3430        None => snippet,
3431    })
3432}
3433
3434pub(crate) fn group_search_hits(
3435    hits: &[sift::SearchHit],
3436    root: &Path,
3437    absolute: bool,
3438) -> Vec<SearchHitGroup> {
3439    let mut positions = BTreeMap::new();
3440    let mut groups = Vec::new();
3441    for hit in hits {
3442        let path = if absolute {
3443            hit.path.clone()
3444        } else {
3445            relativize(&hit.path, root)
3446        };
3447        let entry = positions.entry(path.clone()).or_insert_with(|| {
3448            groups.push(SearchHitGroup {
3449                path: path.clone(),
3450                first_rank: hit.rank,
3451                top_score: hit.score,
3452                confidence: format!("{:?}", hit.confidence),
3453                hits: 0,
3454                samples: Vec::new(),
3455            });
3456            groups.len() - 1
3457        });
3458        let group = &mut groups[*entry];
3459        group.hits += 1;
3460        if hit.rank < group.first_rank {
3461            group.first_rank = hit.rank;
3462        }
3463        if hit.score > group.top_score {
3464            group.top_score = hit.score;
3465        }
3466        if let Some(sample) = format_search_sample(hit)
3467            && group.samples.len() < SEARCH_GROUP_SAMPLE_LIMIT
3468            && !group.samples.contains(&sample)
3469        {
3470            group.samples.push(sample);
3471        }
3472    }
3473    groups.sort_by_key(|group| group.first_rank);
3474    groups
3475}
3476
3477pub(crate) fn should_collapse_search_hits(
3478    hits: &[sift::SearchHit],
3479    root: &Path,
3480    absolute: bool,
3481) -> bool {
3482    let groups = group_search_hits(hits, root, absolute);
3483    let max_hits_per_file = groups.iter().map(|group| group.hits).max().unwrap_or(0);
3484    max_hits_per_file >= 3 || (hits.len() >= 6 && groups.len() < hits.len())
3485}
3486
3487pub(crate) fn format_edge_groups(edges: &[index::StoredEdge], use_callers: bool) -> Vec<String> {
3488    let mut grouped: BTreeMap<&str, Vec<&str>> = BTreeMap::new();
3489    for edge in edges {
3490        let key = edge.caller_file.as_str();
3491        let name = if use_callers {
3492            edge.caller_name.as_str()
3493        } else {
3494            edge.callee_name.as_str()
3495        };
3496        let names = grouped.entry(key).or_default();
3497        if !names.contains(&name) {
3498            names.push(name);
3499        }
3500    }
3501
3502    grouped
3503        .into_iter()
3504        .map(|(file, names)| format!("  {} ({}): {}", file, names.len(), names.join(", ")))
3505        .collect()
3506}
3507
3508pub(crate) fn should_collapse_edge_groups(edges: &[index::StoredEdge]) -> bool {
3509    let mut grouped: BTreeMap<&str, usize> = BTreeMap::new();
3510    for edge in edges {
3511        *grouped.entry(edge.caller_file.as_str()).or_default() += 1;
3512    }
3513    let max_hits_per_file = grouped.values().copied().max().unwrap_or(0);
3514    max_hits_per_file >= 3 || (edges.len() >= 6 && grouped.len() < edges.len())
3515}
3516
3517/// Apply a single edit operation to file contents. Returns new content.
3518pub(crate) fn apply_edit_op(content: &str, op: &EditOp) -> Result<(String, usize)> {
3519    if op.old == op.new {
3520        bail!("old and new strings are identical");
3521    }
3522    let count = content.matches(op.old.as_str()).count();
3523    if count == 0 {
3524        bail!("old_string not found");
3525    }
3526    if count > 1 && !op.replace_all {
3527        bail!(
3528            "old_string matches {} times (use replace_all or provide more context)",
3529            count
3530        );
3531    }
3532    let replaced = if op.replace_all {
3533        content.replace(op.old.as_str(), &op.new)
3534    } else {
3535        content.replacen(op.old.as_str(), &op.new, 1)
3536    };
3537    Ok((replaced, count))
3538}
3539
3540pub(crate) fn build_edit_plan(batch: &EditBatch) -> Result<Vec<PlannedEdit>> {
3541    let mut plan = Vec::with_capacity(batch.edits.len());
3542    for (i, op) in batch.edits.iter().enumerate() {
3543        let content = fs::read_to_string(&op.file)
3544            .with_context(|| format!("edit #{}: reading {}", i + 1, op.file.display()))?;
3545        let (replaced, count) = apply_edit_op(&content, op)
3546            .with_context(|| format!("edit #{}: {}", i + 1, op.file.display()))?;
3547        plan.push(PlannedEdit {
3548            index: i,
3549            file: op.file.clone(),
3550            new_content: replaced,
3551            replacements: count,
3552        });
3553    }
3554    Ok(plan)
3555}
3556
3557fn stage_edit_plan(plan: Vec<PlannedEdit>) -> Result<Vec<StagedEdit>> {
3558    let mut staged = Vec::with_capacity(plan.len());
3559    for planned in plan {
3560        let parent = planned.file.parent().unwrap_or_else(|| Path::new("."));
3561        let mut staged_file = NamedTempFile::new_in(parent)
3562            .with_context(|| format!("staging {}", planned.file.display()))?;
3563        staged_file
3564            .write_all(planned.new_content.as_bytes())
3565            .with_context(|| format!("staging {}", planned.file.display()))?;
3566        staged_file
3567            .as_file_mut()
3568            .sync_all()
3569            .with_context(|| format!("flushing staged edit for {}", planned.file.display()))?;
3570        staged.push(StagedEdit {
3571            index: planned.index,
3572            file: planned.file,
3573            replacements: planned.replacements,
3574            staged_file,
3575        });
3576    }
3577    Ok(staged)
3578}
3579
3580fn edit_backup_path(file: &Path, index: usize) -> PathBuf {
3581    let parent = file.parent().unwrap_or_else(|| Path::new("."));
3582    let name = file
3583        .file_name()
3584        .map(|value| value.to_string_lossy().into_owned())
3585        .unwrap_or_else(|| "edit-target".to_string());
3586    let stamp = SystemTime::now()
3587        .duration_since(UNIX_EPOCH)
3588        .unwrap_or_default()
3589        .as_nanos();
3590    parent.join(format!(
3591        ".{name}.tsift-edit-{stamp}-{}-{index}.bak",
3592        std::process::id()
3593    ))
3594}
3595
3596fn rollback_applied_edits(applied: &[AppliedEdit]) -> Result<()> {
3597    let mut rollback_errors = Vec::new();
3598    for entry in applied.iter().rev() {
3599        if let Err(err) = fs::remove_file(&entry.file)
3600            && err.kind() != std::io::ErrorKind::NotFound
3601        {
3602            rollback_errors.push(format!(
3603                "removing {} during rollback: {}",
3604                entry.file.display(),
3605                err
3606            ));
3607            continue;
3608        }
3609        if let Err(err) = fs::rename(&entry.backup_path, &entry.file) {
3610            rollback_errors.push(format!(
3611                "restoring {} during rollback: {}",
3612                entry.file.display(),
3613                err
3614            ));
3615        }
3616    }
3617    if rollback_errors.is_empty() {
3618        Ok(())
3619    } else {
3620        bail!(rollback_errors.join("; "));
3621    }
3622}
3623
3624fn cleanup_edit_backups(applied: &[AppliedEdit]) {
3625    for entry in applied {
3626        let _ = fs::remove_file(&entry.backup_path);
3627    }
3628}
3629
3630fn ok_results_from_applied(applied: &[AppliedEdit]) -> Vec<EditResult> {
3631    applied
3632        .iter()
3633        .map(|entry| EditResult {
3634            file: entry.file.clone(),
3635            status: EditStatus::Ok,
3636            error: None,
3637            replacements: Some(entry.replacements),
3638        })
3639        .collect()
3640}
3641
3642pub(crate) fn apply_edit_plan_atomically(plan: Vec<PlannedEdit>) -> Result<Vec<EditResult>> {
3643    apply_edit_plan_atomically_inner(plan, |_, _| Ok(()))
3644}
3645
3646fn apply_edit_plan_atomically_inner<F>(
3647    plan: Vec<PlannedEdit>,
3648    mut before_swap: F,
3649) -> Result<Vec<EditResult>>
3650where
3651    F: FnMut(usize, &Path) -> Result<()>,
3652{
3653    let staged = stage_edit_plan(plan)?;
3654    let mut applied = Vec::with_capacity(staged.len());
3655
3656    for (commit_index, staged_edit) in staged.into_iter().enumerate() {
3657        if let Err(err) = before_swap(commit_index, &staged_edit.file) {
3658            match rollback_applied_edits(&applied) {
3659                Ok(()) => cleanup_edit_backups(&applied),
3660                Err(rollback_error) => {
3661                    return Err(err.context(format!("rollback also failed: {rollback_error}")));
3662                }
3663            }
3664            return Err(err);
3665        }
3666
3667        let backup_path = edit_backup_path(&staged_edit.file, staged_edit.index);
3668        if let Err(err) = fs::rename(&staged_edit.file, &backup_path) {
3669            match rollback_applied_edits(&applied) {
3670                Ok(()) => cleanup_edit_backups(&applied),
3671                Err(rollback_error) => {
3672                    bail!(
3673                        "moving {} into backup slot failed: {}; rollback also failed: {}",
3674                        staged_edit.file.display(),
3675                        err,
3676                        rollback_error
3677                    );
3678                }
3679            }
3680            bail!(
3681                "moving {} into backup slot failed: {}",
3682                staged_edit.file.display(),
3683                err
3684            );
3685        }
3686        match staged_edit.staged_file.persist(&staged_edit.file) {
3687            Ok(_) => applied.push(AppliedEdit {
3688                index: staged_edit.index,
3689                file: staged_edit.file,
3690                replacements: staged_edit.replacements,
3691                backup_path,
3692            }),
3693            Err(err) => {
3694                let persist_error = err.error;
3695                drop(err.file);
3696                let restore_error = fs::rename(&backup_path, &staged_edit.file).err();
3697                let rollback_error = rollback_applied_edits(&applied).err();
3698                if rollback_error.is_none() {
3699                    cleanup_edit_backups(&applied);
3700                }
3701                let mut message = format!(
3702                    "committing {} failed: {}",
3703                    staged_edit.file.display(),
3704                    persist_error
3705                );
3706                if let Some(restore_error) = restore_error {
3707                    message.push_str(&format!(
3708                        "; restoring original {} failed: {}",
3709                        staged_edit.file.display(),
3710                        restore_error
3711                    ));
3712                }
3713                if let Some(rollback_error) = rollback_error {
3714                    message.push_str(&format!("; rollback also failed: {rollback_error}"));
3715                }
3716                bail!(message);
3717            }
3718        }
3719    }
3720
3721    applied.sort_by_key(|entry| entry.index);
3722    let results = ok_results_from_applied(&applied);
3723    cleanup_edit_backups(&applied);
3724    Ok(results)
3725}
3726
3727fn resolve_query_index_target(
3728    root: &Path,
3729    path_hint: &Path,
3730    scope: Option<&str>,
3731) -> Result<SearchIndexTarget> {
3732    let cfg = config::Config::load(root)?;
3733    if let Some(scope_name) = scope {
3734        let scope = config::Config::resolve_submodule(root, scope_name)?;
3735        return Ok(SearchIndexTarget {
3736            label: format!("submodule `{}` index", scope.id),
3737            db_path: cfg.db_path_for(root, &scope.id),
3738            source_root: scope.source_root.clone(),
3739            scope_name: Some(scope.id.clone()),
3740            reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
3741        });
3742    }
3743
3744    if let Some(scope) = config::Config::infer_submodule_from_path(root, path_hint)? {
3745        return Ok(SearchIndexTarget {
3746            label: format!("submodule `{}` index", scope.id),
3747            db_path: cfg.db_path_for(root, &scope.id),
3748            source_root: scope.source_root.clone(),
3749            scope_name: Some(scope.id.clone()),
3750            reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
3751        });
3752    }
3753
3754    if let Some(scope) = infer_agent_doc_task_submodule(root, path_hint)? {
3755        return Ok(SearchIndexTarget {
3756            label: format!("submodule `{}` index", scope.id),
3757            db_path: cfg.db_path_for(root, &scope.id),
3758            source_root: scope.source_root.clone(),
3759            scope_name: Some(scope.id.clone()),
3760            reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
3761        });
3762    }
3763
3764    let db_path = root.join(".tsift/index.db");
3765    if db_path.exists() {
3766        return Ok(SearchIndexTarget {
3767            label: "index".to_string(),
3768            db_path,
3769            source_root: root.to_path_buf(),
3770            scope_name: None,
3771            reindex_cmd: format!("tsift index {}", root.display()),
3772        });
3773    }
3774
3775    let scopes = config::Config::submodule_dirs(root)?;
3776    if scopes.is_empty() {
3777        return Ok(SearchIndexTarget {
3778            label: "index".to_string(),
3779            db_path,
3780            source_root: root.to_path_buf(),
3781            scope_name: None,
3782            reindex_cmd: format!("tsift index {}", root.display()),
3783        });
3784    }
3785
3786    let available_scopes = scopes
3787        .iter()
3788        .map(|scope| scope.id.as_str())
3789        .collect::<Vec<_>>()
3790        .join(", ");
3791    let indexed_scopes = scopes
3792        .iter()
3793        .filter(|scope| cfg.db_path_for(root, &scope.id).exists())
3794        .map(|scope| scope.id.as_str())
3795        .collect::<Vec<_>>();
3796    let indexed_label = if indexed_scopes.is_empty() {
3797        "none".to_string()
3798    } else {
3799        indexed_scopes.join(", ")
3800    };
3801
3802    bail!(
3803        "workspace root {} has no shared root index at {}. Read-only graph queries require `--scope <scope>` when the workspace is indexed into `.tsift/indexes/*/index.db`. Available scopes: {}. Indexed scopes: {}.",
3804        root.display(),
3805        db_path.display(),
3806        available_scopes,
3807        indexed_label
3808    );
3809}
3810
3811fn resolve_query_db_path(root: &Path, path_hint: &Path, scope: Option<&str>) -> Result<PathBuf> {
3812    Ok(resolve_query_index_target(root, path_hint, scope)?.db_path)
3813}
3814
3815fn ensure_query_index_current(root: &Path, target: &SearchIndexTarget) -> Result<()> {
3816    let state = inspect_search_index(target)?;
3817    let Some(reason) = index_reason_for_state(state) else {
3818        return Ok(());
3819    };
3820
3821    match apply_search_index_update(root, target) {
3822        Ok(_) => {
3823            index::inspect_scope_invalidate_all();
3824            Ok(())
3825        }
3826        Err(err) if is_active_writer_lock_error(&err) && target.db_path.exists() => {
3827            eprintln!(
3828                "note: active tsift writer detected; skipping graph-query autoindex because {}. \
3829                 Continuing with the current read-only index snapshot; graph results may lag. \
3830                 Retry `{}` after the active writer finishes for fresh graph results.",
3831                index_reason_detail(target, reason),
3832                target.reindex_cmd
3833            );
3834            Ok(())
3835        }
3836        Err(err) => Err(err),
3837    }
3838}
3839
3840pub(crate) fn open_index_db(path: &std::path::Path, scope: Option<&str>) -> Result<index::IndexDb> {
3841    let root = lint::resolve_project_root_or_canonical_path(path)?;
3842    let target = resolve_query_index_target(&root, path, scope)?;
3843    ensure_query_index_current(&root, &target)?;
3844    let db_path = target.db_path;
3845    if !db_path.exists() {
3846        bail!(
3847            "no index found at {}. Run `tsift index` first.",
3848            db_path.display()
3849        );
3850    }
3851    index::IndexDb::open_read_only_resilient(&db_path)
3852}
3853
3854pub(crate) fn query_tagpath_root(
3855    root: &std::path::Path,
3856    path_hint: &std::path::Path,
3857    scope: Option<&str>,
3858) -> Result<PathBuf> {
3859    if let Some(scope_name) = scope {
3860        return Ok(config::Config::resolve_submodule(root, scope_name)?.source_root);
3861    }
3862    if let Some(scope) = config::Config::infer_submodule_from_path(root, path_hint)? {
3863        return Ok(scope.source_root);
3864    }
3865    Ok(root.to_path_buf())
3866}
3867
3868#[derive(Clone, Debug, Serialize, PartialEq)]
3869struct TraversalNode {
3870    handle: String,
3871    kind: String,
3872    label: String,
3873    #[serde(skip_serializing_if = "Option::is_none")]
3874    ref_id: Option<String>,
3875    #[serde(skip_serializing_if = "Option::is_none")]
3876    path: Option<String>,
3877    #[serde(skip_serializing_if = "Option::is_none")]
3878    line: Option<i64>,
3879    #[serde(skip_serializing_if = "Option::is_none")]
3880    detail: Option<String>,
3881    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
3882    properties: BTreeMap<String, String>,
3883    expand: String,
3884}
3885
3886#[derive(Clone, Debug, Serialize, PartialEq)]
3887struct TraversalEdge {
3888    from: String,
3889    to: String,
3890    relation: String,
3891    #[serde(skip_serializing_if = "Option::is_none")]
3892    label: Option<String>,
3893    weight: usize,
3894}
3895
3896#[derive(Clone, Debug, Default)]
3897struct TraversalGraphBuild {
3898    nodes: BTreeMap<String, TraversalNode>,
3899    edges: Vec<TraversalEdge>,
3900    edge_keys: BTreeSet<(String, String, String)>,
3901    warnings: Vec<String>,
3902}
3903
3904pub(crate) const GRAPH_PROJECTION_VERSION: &str = "tsift-traversal-v1";
3905const GRAPH_DB_EVIDENCE_CONTRACT_VERSION: &str = "graph-db-evidence-v1";
3906const WORKER_PROMPT_PACKET_CONTRACT_VERSION: &str = "worker-prompt-packet-v1";
3907const CONFLICT_MATRIX_CONTRACT_VERSION: &str = "conflict-matrix-v1";
3908const CONTEXT_PACK_GRAPH_ORCHESTRATION_CONTRACT_VERSION: &str =
3909    "context-pack-graph-orchestration-v1";
3910const SESSION_REVIEW_FOLLOW_UP_CONTRACT_VERSION: &str = "session-review-follow-up-v1";
3911const DISPATCH_TRACE_CONTRACT_VERSION: &str = "dispatch-trace-v1";
3912const DEPENDENCY_DAG_CONTRACT_VERSION: &str = "dependency-dag-v1";
3913const GRAPH_PROJECTION_META_KIND: &str = "projection_meta";
3914const GRAPH_DB_RANKED_NEIGHBOR_CAP: usize = 12;
3915const GRAPH_DB_SEMANTIC_MIN_EDGE_SCAN_CAP: usize = 16;
3916const GRAPH_DB_SEMANTIC_MAX_EDGE_SCAN_CAP: usize = 64;
3917
3918#[derive(Debug, Serialize, PartialEq)]
3919struct TraversalTotals {
3920    nodes: usize,
3921    edges: usize,
3922}
3923
3924#[derive(Debug, Serialize, PartialEq)]
3925struct TraversalPathReport {
3926    from: TraversalNode,
3927    to: TraversalNode,
3928    hops: usize,
3929    nodes: Vec<TraversalNode>,
3930    edges: Vec<TraversalEdge>,
3931}
3932
3933#[derive(Debug, Serialize, PartialEq)]
3934struct TraversalRecommendation {
3935    handle: String,
3936    kind: String,
3937    label: String,
3938    reason: String,
3939    score: usize,
3940    expand: String,
3941}
3942
3943#[derive(Debug, Serialize, PartialEq)]
3944struct TraversalReport {
3945    root: String,
3946    #[serde(skip_serializing_if = "Option::is_none")]
3947    scope: Option<String>,
3948    mode: String,
3949    totals: TraversalTotals,
3950    #[serde(skip_serializing_if = "Option::is_none")]
3951    query: Option<String>,
3952    #[serde(skip_serializing_if = "Option::is_none")]
3953    target: Option<String>,
3954    nodes: Vec<TraversalNode>,
3955    edges: Vec<TraversalEdge>,
3956    #[serde(skip_serializing_if = "Option::is_none")]
3957    shortest_path: Option<TraversalPathReport>,
3958    recommendations: Vec<TraversalRecommendation>,
3959    exploration: ExplorationPacket,
3960    truncated: bool,
3961    #[serde(skip_serializing_if = "Vec::is_empty", default)]
3962    warnings: Vec<String>,
3963}
3964
3965#[derive(Debug, Serialize, PartialEq)]
3966struct SemanticRelatedReport {
3967    root: String,
3968    #[serde(skip_serializing_if = "Option::is_none")]
3969    scope: Option<String>,
3970    query: String,
3971    embedding_model: String,
3972    count: usize,
3973    items: Vec<SemanticRelatedItem>,
3974    #[serde(skip_serializing_if = "Vec::is_empty", default)]
3975    warnings: Vec<String>,
3976}
3977
3978#[derive(Clone, Debug, Serialize, PartialEq)]
3979struct SemanticRelatedItem {
3980    handle: String,
3981    kind: String,
3982    label: String,
3983    score: f64,
3984    #[serde(skip_serializing_if = "Option::is_none")]
3985    file_path: Option<String>,
3986    #[serde(skip_serializing_if = "Option::is_none")]
3987    source_symbol: Option<String>,
3988    #[serde(skip_serializing_if = "Option::is_none")]
3989    detail: Option<String>,
3990    expand: String,
3991}
3992
3993#[derive(Clone)]
3994struct TraversalSymbolIndexEntry {
3995    handle: String,
3996    node: TraversalNode,
3997    tokens: BTreeSet<String>,
3998}
3999
4000#[derive(Clone)]
4001struct TraversalFileIndexEntry {
4002    handle: String,
4003    node: TraversalNode,
4004    tokens: BTreeSet<String>,
4005}
4006
4007#[derive(Clone)]
4008struct TraversalRouteIndexEntry {
4009    handle: String,
4010    node: TraversalNode,
4011    tokens: BTreeSet<String>,
4012}
4013
4014struct TraversalCodeLookup<'a> {
4015    symbols: &'a [TraversalSymbolIndexEntry],
4016    files: &'a [TraversalFileIndexEntry],
4017    routes: &'a [TraversalRouteIndexEntry],
4018    symbol_index: HashMap<String, Vec<usize>>,
4019    file_index: HashMap<String, Vec<usize>>,
4020    route_index: HashMap<String, Vec<usize>>,
4021    file_path_index: HashMap<String, String>,
4022}
4023
4024#[derive(Clone, Debug, Serialize, PartialEq)]
4025struct ExplorationBudget {
4026    project_size: String,
4027    max_source_windows: usize,
4028    lines_per_window: usize,
4029    relationship_limit: usize,
4030}
4031
4032#[derive(Clone, Debug, Serialize, PartialEq)]
4033struct ExplorationRelation {
4034    from: String,
4035    relation: String,
4036    to: String,
4037    #[serde(skip_serializing_if = "Option::is_none")]
4038    label: Option<String>,
4039}
4040
4041#[derive(Clone, Debug, Serialize, PartialEq)]
4042struct ExplorationSourceWindow {
4043    handle: String,
4044    file: String,
4045    start: usize,
4046    end: usize,
4047    reason: String,
4048    expand: String,
4049}
4050
4051#[derive(Clone, Debug, Serialize, PartialEq)]
4052struct ExplorationWorkerContext {
4053    handle: String,
4054    target: String,
4055    summary: String,
4056    expand: String,
4057}
4058
4059#[derive(Clone, Debug, Serialize, PartialEq)]
4060struct ExplorationPacket {
4061    budget: ExplorationBudget,
4062    relationship_map: Vec<ExplorationRelation>,
4063    source_windows: Vec<ExplorationSourceWindow>,
4064    #[serde(skip_serializing_if = "Vec::is_empty", default)]
4065    worker_context: Vec<ExplorationWorkerContext>,
4066    no_reread_guidance: String,
4067}
4068
4069impl TraversalGraphBuild {
4070    fn add_node(&mut self, node: TraversalNode) {
4071        self.nodes.entry(node.handle.clone()).or_insert(node);
4072    }
4073
4074    fn add_edge(
4075        &mut self,
4076        from: &str,
4077        to: &str,
4078        relation: &str,
4079        label: Option<String>,
4080        weight: usize,
4081    ) {
4082        if from == to || !self.nodes.contains_key(from) || !self.nodes.contains_key(to) {
4083            return;
4084        }
4085        let key = (from.to_string(), to.to_string(), relation.to_string());
4086        if self.edge_keys.insert(key) {
4087            self.edges.push(TraversalEdge {
4088                from: from.to_string(),
4089                to: to.to_string(),
4090                relation: relation.to_string(),
4091                label,
4092                weight,
4093            });
4094        }
4095    }
4096}
4097
4098pub(crate) fn graph_substrate_db_path(root: &Path, scope: Option<&str>) -> PathBuf {
4099    match scope {
4100        Some(scope) => root.join(".tsift/indexes").join(scope).join("graph.db"),
4101        None => root.join(".tsift/graph.db"),
4102    }
4103}
4104
4105fn graph_projection_meta_id(scope: Option<&str>) -> String {
4106    format!("projection:tsift-traversal:{}", scope.unwrap_or("root"))
4107}
4108
4109fn content_hash<T: Serialize>(value: &T) -> Result<String> {
4110    let bytes = serde_json::to_vec(value)?;
4111    Ok(blake3::hash(&bytes).to_hex().to_string())
4112}
4113
4114fn node_with_content_freshness(mut node: SubstrateGraphNode) -> Result<SubstrateGraphNode> {
4115    let mut hashable = node.clone();
4116    hashable.freshness = None;
4117    node.freshness = Some(GraphFreshness::content_hash(content_hash(&hashable)?));
4118    Ok(node)
4119}
4120
4121fn edge_with_content_freshness(mut edge: SubstrateGraphEdge) -> Result<SubstrateGraphEdge> {
4122    let mut hashable = edge.clone();
4123    hashable.freshness = None;
4124    edge.freshness = Some(GraphFreshness::content_hash(content_hash(&hashable)?));
4125    Ok(edge)
4126}
4127
4128const SEMANTIC_EMBEDDING_DIM: usize = 32;
4129const SEMANTIC_EMBEDDING_MODEL: &str = "tsift-local-hash-v1";
4130
4131fn semantic_related_kind_name(kind: SemanticRelatedKind) -> &'static str {
4132    match kind {
4133        SemanticRelatedKind::Concept => "concept",
4134        SemanticRelatedKind::Entity => "entity",
4135        SemanticRelatedKind::All => "all",
4136    }
4137}
4138
4139fn semantic_related_command(root: &Path, query: &str, kind: SemanticRelatedKind) -> String {
4140    format!(
4141        "tsift semantic {} --path {} --kind {} --limit 10",
4142        shell_quote(query),
4143        shell_quote(root.to_string_lossy().as_ref()),
4144        semantic_related_kind_name(kind)
4145    )
4146}
4147
4148fn semantic_embedding(input: &str) -> Vec<f64> {
4149    let mut vector = vec![0.0; SEMANTIC_EMBEDDING_DIM];
4150    let mut tokens = traversal_tokens(input);
4151    if tokens.is_empty() {
4152        let trimmed = input.trim().to_ascii_lowercase();
4153        if !trimmed.is_empty() {
4154            tokens.insert(trimmed);
4155        }
4156    }
4157
4158    for token in tokens {
4159        let hash = blake3::hash(token.as_bytes());
4160        let bytes = hash.as_bytes();
4161        let idx = usize::from(bytes[0]) % SEMANTIC_EMBEDDING_DIM;
4162        let sign = if bytes[1] & 1 == 0 { 1.0 } else { -1.0 };
4163        vector[idx] += sign;
4164    }
4165
4166    let norm = vector.iter().map(|value| value * value).sum::<f64>().sqrt();
4167    if norm > 0.0 {
4168        for value in &mut vector {
4169            *value /= norm;
4170        }
4171    }
4172    vector
4173}
4174
4175fn semantic_embedding_property(input: &str) -> String {
4176    semantic_embedding(input)
4177        .iter()
4178        .map(|value| format!("{value:.6}"))
4179        .collect::<Vec<_>>()
4180        .join(",")
4181}
4182
4183fn parse_semantic_embedding_property(value: &str) -> Option<Vec<f64>> {
4184    let parsed = value
4185        .split(',')
4186        .map(str::trim)
4187        .map(str::parse::<f64>)
4188        .collect::<std::result::Result<Vec<_>, _>>()
4189        .ok()?;
4190    (parsed.len() == SEMANTIC_EMBEDDING_DIM).then_some(parsed)
4191}
4192
4193fn semantic_cosine(left: &[f64], right: &[f64]) -> f64 {
4194    if left.len() != right.len() {
4195        return 0.0;
4196    }
4197    left.iter()
4198        .zip(right.iter())
4199        .map(|(left, right)| left * right)
4200        .sum::<f64>()
4201}
4202
4203fn semantic_entity_handle(name: &str, kind: &str) -> String {
4204    stable_handle(
4205        "gent",
4206        &format!(
4207            "entity:{}:{}",
4208            kind.trim().to_ascii_lowercase(),
4209            name.trim().to_ascii_lowercase()
4210        ),
4211    )
4212}
4213
4214fn semantic_concept_handle(label: &str) -> String {
4215    stable_handle(
4216        "gcon",
4217        &format!("concept:{}", label.trim().to_ascii_lowercase()),
4218    )
4219}
4220
4221fn summary_source_handles(
4222    summary: &summarize::Summary,
4223    file_node_by_path: &BTreeMap<String, String>,
4224    symbol_node_by_file_label: &BTreeMap<(String, String), String>,
4225) -> Vec<String> {
4226    let mut handles = Vec::new();
4227    if let Some(handle) = file_node_by_path.get(&summary.file_path) {
4228        handles.push(handle.clone());
4229    }
4230    if let Some(handle) =
4231        symbol_node_by_file_label.get(&(summary.file_path.clone(), summary.symbol_name.clone()))
4232        && !handles.iter().any(|existing| existing == handle)
4233    {
4234        handles.push(handle.clone());
4235    }
4236    handles
4237}
4238
4239fn semantic_entity_node(
4240    root: &Path,
4241    summary: &summarize::Summary,
4242    name: &str,
4243    kind: &str,
4244    description: &str,
4245    provenance: &GraphProvenance,
4246) -> SubstrateGraphNode {
4247    let handle = semantic_entity_handle(name, kind);
4248    let detail = if description.trim().is_empty() {
4249        format!("{kind} entity from cached summaries")
4250    } else {
4251        format!("{kind}: {description}")
4252    };
4253    SubstrateGraphNode::new(handle.clone(), "semantic_entity", name.to_string())
4254        .with_property("handle", handle)
4255        .with_property("ref_id", name.to_string())
4256        .with_property("detail", detail)
4257        .with_property("entity_kind", kind.to_string())
4258        .with_property("description", description.to_string())
4259        .with_property("source_file", summary.file_path.clone())
4260        .with_property("source_symbol", summary.symbol_name.clone())
4261        .with_property("embedding_model", SEMANTIC_EMBEDDING_MODEL)
4262        .with_property(
4263            "embedding",
4264            semantic_embedding_property(&format!("{name} {kind} {description}")),
4265        )
4266        .with_property(
4267            "expand",
4268            semantic_related_command(root, name, SemanticRelatedKind::Entity),
4269        )
4270        .with_provenance(provenance.clone())
4271}
4272
4273fn semantic_concept_node(
4274    root: &Path,
4275    summary: &summarize::Summary,
4276    label: &str,
4277    provenance: &GraphProvenance,
4278) -> SubstrateGraphNode {
4279    let handle = semantic_concept_handle(label);
4280    SubstrateGraphNode::new(handle.clone(), "semantic_concept", label.to_string())
4281        .with_property("handle", handle)
4282        .with_property("ref_id", label.to_string())
4283        .with_property("detail", "concept label from cached summaries".to_string())
4284        .with_property("source_file", summary.file_path.clone())
4285        .with_property("source_symbol", summary.symbol_name.clone())
4286        .with_property("embedding_model", SEMANTIC_EMBEDDING_MODEL)
4287        .with_property("embedding", semantic_embedding_property(label))
4288        .with_property(
4289            "expand",
4290            semantic_related_command(root, label, SemanticRelatedKind::Concept),
4291        )
4292        .with_provenance(provenance.clone())
4293}
4294
4295fn insert_semantic_edge(
4296    edge_map: &mut BTreeMap<(String, String, String), SubstrateGraphEdge>,
4297    edge: SubstrateGraphEdge,
4298) {
4299    edge_map
4300        .entry((edge.from_id.clone(), edge.to_id.clone(), edge.kind.clone()))
4301        .or_insert(edge);
4302}
4303
4304fn append_summary_semantic_projection_rows(
4305    root: &Path,
4306    graph: &TraversalGraphBuild,
4307    provenance: &GraphProvenance,
4308    nodes: &mut Vec<SubstrateGraphNode>,
4309    edges: &mut Vec<SubstrateGraphEdge>,
4310) -> Result<()> {
4311    let summaries_db = root.join(".tsift/summaries.db");
4312    if !summaries_db.exists() {
4313        return Ok(());
4314    }
4315
4316    let summary_db = summarize::SummaryDb::open_read_only_resilient(&summaries_db)?;
4317    let summaries = summary_db.all()?;
4318    if summaries.is_empty() {
4319        return Ok(());
4320    }
4321
4322    let file_node_by_path = graph
4323        .nodes
4324        .values()
4325        .filter(|node| node.kind == "file")
4326        .filter_map(|node| {
4327            node.path
4328                .as_ref()
4329                .map(|path| (path.clone(), node.handle.clone()))
4330        })
4331        .collect::<BTreeMap<_, _>>();
4332    let symbol_node_by_file_label = graph
4333        .nodes
4334        .values()
4335        .filter(|node| node.kind == "symbol")
4336        .filter_map(|node| {
4337            Some((
4338                (node.path.clone()?, node.label.clone()),
4339                node.handle.clone(),
4340            ))
4341        })
4342        .collect::<BTreeMap<_, _>>();
4343
4344    let mut semantic_nodes = BTreeMap::<String, SubstrateGraphNode>::new();
4345    let mut semantic_edges = BTreeMap::<(String, String, String), SubstrateGraphEdge>::new();
4346
4347    for summary in &summaries {
4348        let source_handles =
4349            summary_source_handles(summary, &file_node_by_path, &symbol_node_by_file_label);
4350        let mut entity_ids_by_name = BTreeMap::<String, String>::new();
4351
4352        if let Some(entities) = &summary.entities {
4353            for entity in entities {
4354                let node = semantic_entity_node(
4355                    root,
4356                    summary,
4357                    &entity.name,
4358                    &entity.kind,
4359                    &entity.description,
4360                    provenance,
4361                );
4362                let entity_id = node.id.clone();
4363                entity_ids_by_name.insert(entity.name.to_ascii_lowercase(), entity_id.clone());
4364                semantic_nodes.entry(entity_id.clone()).or_insert(node);
4365
4366                for source_handle in &source_handles {
4367                    insert_semantic_edge(
4368                        &mut semantic_edges,
4369                        SubstrateGraphEdge::new(
4370                            source_handle.clone(),
4371                            entity_id.clone(),
4372                            "mentions_entity",
4373                        )
4374                        .with_property("label", format!("summary entity: {}", entity.name))
4375                        .with_property("source_file", summary.file_path.clone())
4376                        .with_provenance(provenance.clone()),
4377                    );
4378                }
4379            }
4380        }
4381
4382        let mut concept_ids = Vec::new();
4383        if let Some(labels) = &summary.concept_labels {
4384            for label in labels
4385                .iter()
4386                .map(|label| label.trim())
4387                .filter(|label| !label.is_empty())
4388            {
4389                let node = semantic_concept_node(root, summary, label, provenance);
4390                let concept_id = node.id.clone();
4391                semantic_nodes.entry(concept_id.clone()).or_insert(node);
4392                concept_ids.push(concept_id.clone());
4393
4394                for source_handle in &source_handles {
4395                    insert_semantic_edge(
4396                        &mut semantic_edges,
4397                        SubstrateGraphEdge::new(
4398                            source_handle.clone(),
4399                            concept_id.clone(),
4400                            "mentions_concept",
4401                        )
4402                        .with_property("label", format!("summary concept: {label}"))
4403                        .with_property("source_file", summary.file_path.clone())
4404                        .with_provenance(provenance.clone()),
4405                    );
4406                }
4407            }
4408        }
4409
4410        for entity_id in entity_ids_by_name.values() {
4411            for concept_id in &concept_ids {
4412                insert_semantic_edge(
4413                    &mut semantic_edges,
4414                    SubstrateGraphEdge::new(
4415                        entity_id.clone(),
4416                        concept_id.clone(),
4417                        "tagged_concept",
4418                    )
4419                    .with_property("label", "entity concept label".to_string())
4420                    .with_property("source_file", summary.file_path.clone())
4421                    .with_provenance(provenance.clone()),
4422                );
4423            }
4424        }
4425
4426        for idx in 0..concept_ids.len() {
4427            for next_idx in (idx + 1)..concept_ids.len() {
4428                insert_semantic_edge(
4429                    &mut semantic_edges,
4430                    SubstrateGraphEdge::new(
4431                        concept_ids[idx].clone(),
4432                        concept_ids[next_idx].clone(),
4433                        "related_concept",
4434                    )
4435                    .with_property("label", format!("co-occurs in {}", summary.symbol_name))
4436                    .with_property("source_file", summary.file_path.clone())
4437                    .with_provenance(provenance.clone()),
4438                );
4439            }
4440        }
4441
4442        if let Some(relationships) = &summary.relationships {
4443            for relationship in relationships {
4444                let from_id = entity_ids_by_name
4445                    .get(&relationship.from.to_ascii_lowercase())
4446                    .cloned()
4447                    .unwrap_or_else(|| {
4448                        let node = semantic_entity_node(
4449                            root,
4450                            summary,
4451                            &relationship.from,
4452                            "unknown",
4453                            "",
4454                            provenance,
4455                        );
4456                        let id = node.id.clone();
4457                        semantic_nodes.entry(id.clone()).or_insert(node);
4458                        id
4459                    });
4460                let to_id = entity_ids_by_name
4461                    .get(&relationship.to.to_ascii_lowercase())
4462                    .cloned()
4463                    .unwrap_or_else(|| {
4464                        let node = semantic_entity_node(
4465                            root,
4466                            summary,
4467                            &relationship.to,
4468                            "unknown",
4469                            "",
4470                            provenance,
4471                        );
4472                        let id = node.id.clone();
4473                        semantic_nodes.entry(id.clone()).or_insert(node);
4474                        id
4475                    });
4476                insert_semantic_edge(
4477                    &mut semantic_edges,
4478                    SubstrateGraphEdge::new(from_id, to_id, "semantic_relation")
4479                        .with_property("relationship_kind", relationship.kind.clone())
4480                        .with_property("label", relationship.kind.clone())
4481                        .with_property("source_file", summary.file_path.clone())
4482                        .with_property("source_symbol", summary.symbol_name.clone())
4483                        .with_provenance(provenance.clone()),
4484                );
4485            }
4486        }
4487    }
4488
4489    for node in semantic_nodes.into_values() {
4490        nodes.push(node_with_content_freshness(node)?);
4491    }
4492    for edge in semantic_edges.into_values() {
4493        edges.push(edge_with_content_freshness(edge)?);
4494    }
4495
4496    Ok(())
4497}
4498
4499fn projection_content_hash(
4500    nodes: &[SubstrateGraphNode],
4501    edges: &[SubstrateGraphEdge],
4502) -> Result<String> {
4503    #[derive(Serialize)]
4504    struct Payload<'a> {
4505        version: &'static str,
4506        nodes: &'a [SubstrateGraphNode],
4507        edges: &'a [SubstrateGraphEdge],
4508    }
4509
4510    content_hash(&Payload {
4511        version: GRAPH_PROJECTION_VERSION,
4512        nodes,
4513        edges,
4514    })
4515}
4516
4517pub(crate) fn graph_projection_content_hash(projection: &GraphProjection) -> Option<String> {
4518    projection
4519        .nodes
4520        .iter()
4521        .find(|node| node.kind == GRAPH_PROJECTION_META_KIND)
4522        .and_then(|node| node.properties.get("content_hash").cloned())
4523}
4524
4525fn traversal_projection_from_graph(
4526    root: &Path,
4527    scope: Option<&str>,
4528    graph: &TraversalGraphBuild,
4529) -> Result<GraphProjection> {
4530    let provenance = GraphProvenance::new(
4531        "tsift.traverse",
4532        format!("{}:{}", root.display(), scope.unwrap_or("root")),
4533    );
4534    let mut nodes = Vec::with_capacity(graph.nodes.len() + 1);
4535    for node in graph.nodes.values() {
4536        let mut projected =
4537            SubstrateGraphNode::new(node.handle.clone(), node.kind.clone(), node.label.clone())
4538                .with_property("handle", node.handle.clone())
4539                .with_property("expand", node.expand.clone())
4540                .with_provenance(provenance.clone());
4541        if let Some(ref_id) = &node.ref_id {
4542            projected = projected.with_property("ref_id", ref_id.clone());
4543        }
4544        if let Some(path) = &node.path {
4545            projected = projected.with_property("path", path.clone());
4546        }
4547        if let Some(line) = node.line {
4548            projected = projected.with_property("line", line.to_string());
4549        }
4550        if let Some(detail) = &node.detail {
4551            projected = projected.with_property("detail", detail.clone());
4552        }
4553        for (key, value) in &node.properties {
4554            projected = projected.with_property(key.clone(), value.clone());
4555        }
4556        nodes.push(node_with_content_freshness(projected)?);
4557    }
4558
4559    let mut edges = Vec::with_capacity(graph.edges.len());
4560    for edge in &graph.edges {
4561        let mut projected =
4562            SubstrateGraphEdge::new(edge.from.clone(), edge.to.clone(), edge.relation.clone())
4563                .with_property("weight", edge.weight.to_string())
4564                .with_provenance(provenance.clone());
4565        if let Some(label) = &edge.label {
4566            projected = projected.with_property("label", label.clone());
4567        }
4568        edges.push(edge_with_content_freshness(projected)?);
4569    }
4570
4571    append_traversal_context_projection_rows(root, graph, &provenance, &mut nodes, &mut edges)?;
4572    append_summary_semantic_projection_rows(root, graph, &provenance, &mut nodes, &mut edges)?;
4573
4574    let projection_hash = projection_content_hash(&nodes, &edges)?;
4575    let meta = SubstrateGraphNode::new(
4576        graph_projection_meta_id(scope),
4577        GRAPH_PROJECTION_META_KIND,
4578        "tsift traversal projection",
4579    )
4580    .with_property("projection_version", GRAPH_PROJECTION_VERSION)
4581    .with_property("content_hash", projection_hash.clone())
4582    .with_property("root", root.to_string_lossy().to_string())
4583    .with_property("scope", scope.unwrap_or("root"))
4584    .with_property("node_count", graph.nodes.len().to_string())
4585    .with_property("edge_count", graph.edges.len().to_string())
4586    .with_provenance(provenance)
4587    .with_freshness(GraphFreshness::content_hash(projection_hash));
4588    nodes.push(meta);
4589
4590    Ok(GraphProjection { nodes, edges })
4591}
4592
4593#[allow(clippy::too_many_arguments)]
4594fn ensure_traversal_source_handle(
4595    root: &Path,
4596    provenance: &GraphProvenance,
4597    file_node_by_path: &BTreeMap<String, String>,
4598    node: &TraversalNode,
4599    budget: &ExplorationBudget,
4600    source_handle_by_node: &mut BTreeMap<String, String>,
4601    seen_windows: &mut BTreeMap<(String, usize, usize), String>,
4602    nodes: &mut Vec<SubstrateGraphNode>,
4603    edges: &mut Vec<SubstrateGraphEdge>,
4604) -> Result<Option<String>> {
4605    if let Some(handle) = source_handle_by_node.get(&node.handle) {
4606        return Ok(Some(handle.clone()));
4607    }
4608    let Some(window) = exploration_source_window_for_node(root, node, budget) else {
4609        return Ok(None);
4610    };
4611    let window_key = (window.file.clone(), window.start, window.end);
4612    let handle = if let Some(handle) = seen_windows.get(&window_key) {
4613        handle.clone()
4614    } else {
4615        let label = format!("{}:{}-{}", window.file, window.start, window.end);
4616        let projected = SubstrateGraphNode::new(window.handle.clone(), "source_handle", label)
4617            .with_property("handle", window.handle.clone())
4618            .with_property("file", window.file.clone())
4619            .with_property("start", window.start.to_string())
4620            .with_property("end", window.end.to_string())
4621            .with_property("reason", window.reason.clone())
4622            .with_property("expand", window.expand.clone())
4623            .with_provenance(provenance.clone());
4624        nodes.push(node_with_content_freshness(projected)?);
4625
4626        if let Some(file_handle) = file_node_by_path.get(&window.file) {
4627            let edge = SubstrateGraphEdge::new(
4628                window.handle.clone(),
4629                file_handle.clone(),
4630                "expands_source",
4631            )
4632            .with_property("label", window.reason.clone())
4633            .with_provenance(provenance.clone());
4634            edges.push(edge_with_content_freshness(edge)?);
4635        }
4636        if node.kind != "file" {
4637            let edge = SubstrateGraphEdge::new(
4638                window.handle.clone(),
4639                node.handle.clone(),
4640                "anchors_source",
4641            )
4642            .with_property("label", window.reason.clone())
4643            .with_provenance(provenance.clone());
4644            edges.push(edge_with_content_freshness(edge)?);
4645        }
4646        seen_windows.insert(window_key, window.handle.clone());
4647        window.handle
4648    };
4649    source_handle_by_node.insert(node.handle.clone(), handle.clone());
4650    Ok(Some(handle))
4651}
4652
4653fn push_traversal_backlog_target_handles<'a>(
4654    backlog: &TraversalNode,
4655    edges_by_from: &BTreeMap<&'a str, Vec<&'a TraversalEdge>>,
4656    node_by_handle: &BTreeMap<&'a str, &'a TraversalNode>,
4657    max_handles: usize,
4658    seen_target_nodes: &mut BTreeSet<String>,
4659    target_node_handles: &mut Vec<String>,
4660) {
4661    for edge in edges_by_from
4662        .get(backlog.handle.as_str())
4663        .into_iter()
4664        .flatten()
4665        .filter(|edge| edge.relation == "mentions")
4666    {
4667        let Some(target_node) = node_by_handle.get(edge.to.as_str()) else {
4668            continue;
4669        };
4670        if !matches!(target_node.kind.as_str(), "file" | "symbol" | "route") {
4671            continue;
4672        }
4673        if target_node
4674            .path
4675            .as_deref()
4676            .zip(backlog.path.as_deref())
4677            .is_some_and(|(target_path, backlog_path)| {
4678                target_path == backlog_path && target_path.ends_with(".md")
4679            })
4680        {
4681            continue;
4682        }
4683        if seen_target_nodes.insert(target_node.handle.clone()) {
4684            target_node_handles.push(target_node.handle.clone());
4685        }
4686        if target_node_handles.len() >= max_handles {
4687            break;
4688        }
4689    }
4690}
4691
4692fn append_traversal_context_projection_rows(
4693    root: &Path,
4694    graph: &TraversalGraphBuild,
4695    provenance: &GraphProvenance,
4696    nodes: &mut Vec<SubstrateGraphNode>,
4697    edges: &mut Vec<SubstrateGraphEdge>,
4698) -> Result<()> {
4699    let budget = exploration_budget_for_counts(graph.nodes.len(), graph.edges.len());
4700    let file_node_by_path = graph
4701        .nodes
4702        .values()
4703        .filter(|node| node.kind == "file")
4704        .filter_map(|node| {
4705            node.path
4706                .as_ref()
4707                .map(|path| (path.clone(), node.handle.clone()))
4708        })
4709        .collect::<BTreeMap<_, _>>();
4710
4711    let node_by_handle = graph
4712        .nodes
4713        .values()
4714        .map(|node| (node.handle.as_str(), node))
4715        .collect::<BTreeMap<_, _>>();
4716    let mut edges_by_from = BTreeMap::<&str, Vec<&TraversalEdge>>::new();
4717    for edge in &graph.edges {
4718        edges_by_from
4719            .entry(edge.from.as_str())
4720            .or_default()
4721            .push(edge);
4722    }
4723    for rows in edges_by_from.values_mut() {
4724        rows.sort_by(|left, right| {
4725            right
4726                .weight
4727                .cmp(&left.weight)
4728                .then(left.relation.cmp(&right.relation))
4729                .then(left.to.cmp(&right.to))
4730        });
4731    }
4732
4733    let mut seen_windows = BTreeMap::<(String, usize, usize), String>::new();
4734    let mut source_handle_by_node = BTreeMap::<String, String>::new();
4735
4736    let mut code_context_count = 0usize;
4737    let code_context_limit = budget.relationship_limit.min(8);
4738    for node in graph.nodes.values() {
4739        if !matches!(
4740            node.kind.as_str(),
4741            "backlog" | "job_packet" | "worker_result"
4742        ) {
4743            continue;
4744        }
4745        let mut target_node_handles = Vec::new();
4746        let mut fallback_target_handles = Vec::new();
4747        let mut seen_target_nodes = BTreeSet::new();
4748        if node.kind == "backlog" || node.kind == "worker_result" {
4749            push_traversal_backlog_target_handles(
4750                node,
4751                &edges_by_from,
4752                &node_by_handle,
4753                budget.max_source_windows,
4754                &mut seen_target_nodes,
4755                &mut target_node_handles,
4756            );
4757            fallback_target_handles.push(node.handle.clone());
4758        } else {
4759            for edge in edges_by_from
4760                .get(node.handle.as_str())
4761                .into_iter()
4762                .flatten()
4763                .filter(|edge| edge.relation == "targets")
4764            {
4765                let Some(backlog) = node_by_handle.get(edge.to.as_str()) else {
4766                    continue;
4767                };
4768                fallback_target_handles.push(backlog.handle.clone());
4769                push_traversal_backlog_target_handles(
4770                    backlog,
4771                    &edges_by_from,
4772                    &node_by_handle,
4773                    budget.max_source_windows,
4774                    &mut seen_target_nodes,
4775                    &mut target_node_handles,
4776                );
4777                if target_node_handles.len() >= budget.max_source_windows {
4778                    break;
4779                }
4780            }
4781            if fallback_target_handles.is_empty() {
4782                continue;
4783            }
4784        }
4785        let code_context = !target_node_handles.is_empty();
4786        if target_node_handles.is_empty() {
4787            target_node_handles = dedupe_preserve_order(fallback_target_handles);
4788        } else if code_context_count >= code_context_limit {
4789            continue;
4790        }
4791
4792        let mut worker_source_handles = Vec::new();
4793        let mut seen_worker_handles = BTreeSet::new();
4794        for target_handle in target_node_handles {
4795            if worker_source_handles.len() >= budget.max_source_windows {
4796                break;
4797            }
4798            let Some(target_node) = node_by_handle.get(target_handle.as_str()) else {
4799                continue;
4800            };
4801            let Some(handle) = ensure_traversal_source_handle(
4802                root,
4803                provenance,
4804                &file_node_by_path,
4805                target_node,
4806                &budget,
4807                &mut source_handle_by_node,
4808                &mut seen_windows,
4809                nodes,
4810                edges,
4811            )?
4812            else {
4813                continue;
4814            };
4815            if seen_worker_handles.insert(handle.clone()) {
4816                worker_source_handles.push(handle);
4817            }
4818        }
4819        if worker_source_handles.is_empty() {
4820            continue;
4821        }
4822        let target = node
4823            .path
4824            .clone()
4825            .unwrap_or_else(|| root.to_string_lossy().to_string());
4826        let summary = node.detail.clone().unwrap_or_else(|| node.label.clone());
4827        let handle = stable_handle("xwrk", &format!("{}:{}:{}", target, node.handle, summary));
4828        let projected = SubstrateGraphNode::new(handle.clone(), "worker_context", summary.clone())
4829            .with_property("handle", handle.clone())
4830            .with_property("target", target.clone())
4831            .with_property("summary", summary)
4832            .with_property(
4833                "source_handle_count",
4834                worker_source_handles.len().to_string(),
4835            )
4836            .with_property(
4837                "expand",
4838                format!(
4839                    "tsift --envelope context-pack {} --budget normal",
4840                    shell_quote(&target)
4841                ),
4842            )
4843            .with_provenance(provenance.clone());
4844        nodes.push(node_with_content_freshness(projected)?);
4845
4846        let request_edge =
4847            SubstrateGraphEdge::new(node.handle.clone(), handle.clone(), "requests_context")
4848                .with_property("label", "bounded worker context".to_string())
4849                .with_provenance(provenance.clone());
4850        edges.push(edge_with_content_freshness(request_edge)?);
4851
4852        for source_handle in &worker_source_handles {
4853            let scope_edge =
4854                SubstrateGraphEdge::new(handle.clone(), source_handle.clone(), "scopes_source")
4855                    .with_property("label", "bounded worker source window".to_string())
4856                    .with_provenance(provenance.clone());
4857            edges.push(edge_with_content_freshness(scope_edge)?);
4858        }
4859        if code_context {
4860            code_context_count += 1;
4861        }
4862    }
4863
4864    Ok(())
4865}
4866
4867fn traversal_node_from_graph_node(root: &Path, node: SubstrateGraphNode) -> TraversalNode {
4868    let handle = node
4869        .properties
4870        .get("handle")
4871        .cloned()
4872        .unwrap_or_else(|| node.id.clone());
4873    TraversalNode {
4874        expand: node
4875            .properties
4876            .get("expand")
4877            .cloned()
4878            .unwrap_or_else(|| traversal_expand_command(root, &handle)),
4879        handle,
4880        kind: node.kind,
4881        label: node.label,
4882        ref_id: node.properties.get("ref_id").cloned(),
4883        path: node.properties.get("path").cloned(),
4884        line: node
4885            .properties
4886            .get("line")
4887            .and_then(|value| value.parse::<i64>().ok()),
4888        detail: node.properties.get("detail").cloned(),
4889        properties: node.properties,
4890    }
4891}
4892
4893fn traversal_graph_from_store(root: &Path, store: &impl GraphStore) -> Result<TraversalGraphBuild> {
4894    let mut graph = TraversalGraphBuild::default();
4895    for node in store.all_nodes()? {
4896        if node.kind == GRAPH_PROJECTION_META_KIND {
4897            continue;
4898        }
4899        graph.add_node(traversal_node_from_graph_node(root, node));
4900    }
4901    for edge in store.all_edges()? {
4902        graph.add_edge(
4903            &edge.from_id,
4904            &edge.to_id,
4905            &edge.kind,
4906            edge.properties.get("label").cloned(),
4907            edge.properties
4908                .get("weight")
4909                .and_then(|value| value.parse::<usize>().ok())
4910                .unwrap_or(1),
4911        );
4912    }
4913    Ok(graph)
4914}
4915
4916pub(crate) fn convex_rows_from_graph_store(
4917    store: &impl GraphStore,
4918) -> Result<ConvexProjectionRows> {
4919    Ok(GraphProjection {
4920        nodes: store.all_nodes()?,
4921        edges: store.all_edges()?,
4922    }
4923    .to_convex_rows())
4924}
4925
4926#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
4927struct ConvexRequiredIndex {
4928    table: String,
4929    name: String,
4930    fields: Vec<String>,
4931}
4932
4933#[derive(Clone, Debug, Serialize, PartialEq)]
4934struct ConvexSyncChunk {
4935    operation: String,
4936    chunk: usize,
4937    count: usize,
4938    keys: Vec<String>,
4939    max_attempts: usize,
4940    retry_policy: String,
4941}
4942
4943#[derive(Clone, Debug, Serialize, PartialEq)]
4944struct ConvexTransportSummary {
4945    endpoint_env: String,
4946    endpoint_configured: bool,
4947    auth_token_env: String,
4948    auth_configured: bool,
4949    remote_snapshot: bool,
4950    applied_chunks: usize,
4951}
4952
4953#[derive(Clone, Debug, Serialize, PartialEq)]
4954struct ConvexTransportReceipt {
4955    operation: String,
4956    chunk: usize,
4957    attempt: usize,
4958    status: String,
4959    message: Option<String>,
4960}
4961
4962#[derive(Serialize)]
4963#[serde(rename_all = "camelCase")]
4964struct ConvexTransportRequest<'a> {
4965    operation: &'a str,
4966    chunk: usize,
4967    projection_version: &'a str,
4968    projection_hash: Option<&'a str>,
4969    #[serde(skip_serializing_if = "Option::is_none")]
4970    projection_meta_id: Option<&'a str>,
4971    node_rows: Vec<ConvexNodeRow>,
4972    edge_rows: Vec<ConvexEdgeRow>,
4973    keys: Vec<String>,
4974    #[serde(skip_serializing_if = "Option::is_none")]
4975    cursor: Option<String>,
4976    #[serde(skip_serializing_if = "Option::is_none")]
4977    limit: Option<usize>,
4978}
4979
4980#[derive(Deserialize)]
4981#[serde(rename_all = "camelCase")]
4982struct ConvexTransportResponse {
4983    status: Option<String>,
4984    message: Option<String>,
4985    rows: Option<ConvexProjectionRows>,
4986    #[serde(default)]
4987    meta: Option<ConvexSnapshotMeta>,
4988    #[serde(default)]
4989    page: Option<ConvexSnapshotPage>,
4990}
4991
4992#[derive(Deserialize, Debug, Clone)]
4993#[serde(rename_all = "camelCase")]
4994struct ConvexSnapshotMeta {
4995    // Captured for completeness/debugging; not currently consumed by the
4996    // freshness diff (indexes are already validated against the required set
4997    // via `convex_required_indexes`, and `page_size` is informational only).
4998    #[serde(default)]
4999    #[allow(dead_code)]
5000    indexes: Vec<ConvexRequiredIndex>,
5001    #[serde(default)]
5002    #[allow(dead_code)]
5003    node_count: Option<usize>,
5004    #[serde(default)]
5005    #[allow(dead_code)]
5006    edge_count: Option<usize>,
5007    #[serde(default)]
5008    projection_hash: Option<String>,
5009    #[serde(default)]
5010    #[allow(dead_code)]
5011    page_size: Option<usize>,
5012}
5013
5014/// Paginated snapshot page response. `rows` is either node rows or edge rows
5015/// depending on which operation was called; we deserialize as raw values to
5016/// keep the transport struct shared between both shapes, then narrow per call
5017/// site.
5018#[derive(Deserialize, Debug, Clone)]
5019#[serde(rename_all = "camelCase")]
5020struct ConvexSnapshotPage {
5021    rows: Vec<serde_json::Value>,
5022    #[serde(default)]
5023    next_cursor: Option<String>,
5024}
5025
5026#[derive(Clone, Debug, Serialize, PartialEq)]
5027struct ConvexProjectionFreshness {
5028    status: String,
5029    fail_closed: bool,
5030    local_hash: Option<String>,
5031    snapshot_hash: Option<String>,
5032    missing_nodes: Vec<String>,
5033    stale_nodes: Vec<String>,
5034    missing_edges: Vec<String>,
5035    stale_edges: Vec<String>,
5036    diagnostics: Vec<String>,
5037}
5038
5039const DEFAULT_CONVEX_GRAPH_URL_ENV: &str = "TSIFT_CONVEX_GRAPH_URL";
5040
5041impl ConvexProjectionFreshness {
5042    fn current(local_hash: Option<String>, snapshot_hash: Option<String>) -> Self {
5043        Self {
5044            status: "current".to_string(),
5045            fail_closed: false,
5046            local_hash,
5047            snapshot_hash,
5048            missing_nodes: Vec::new(),
5049            stale_nodes: Vec::new(),
5050            missing_edges: Vec::new(),
5051            stale_edges: Vec::new(),
5052            diagnostics: Vec::new(),
5053        }
5054    }
5055}
5056
5057#[derive(Clone, Debug, Serialize, PartialEq)]
5058struct ConvexSyncReport {
5059    root: String,
5060    #[serde(skip_serializing_if = "Option::is_none")]
5061    scope: Option<String>,
5062    graph_db: String,
5063    dry_run: bool,
5064    projection_version: String,
5065    projection_hash: Option<String>,
5066    required_indexes: Vec<ConvexRequiredIndex>,
5067    node_upserts: Vec<ConvexNodeRow>,
5068    edge_upserts: Vec<ConvexEdgeRow>,
5069    node_tombstones: Vec<String>,
5070    edge_tombstones: Vec<String>,
5071    chunks: Vec<ConvexSyncChunk>,
5072    freshness: ConvexProjectionFreshness,
5073    transport: Option<ConvexTransportSummary>,
5074    receipts: Vec<ConvexTransportReceipt>,
5075    diagnostics: Vec<String>,
5076    warnings: Vec<String>,
5077}
5078
5079fn convex_required_indexes() -> Vec<ConvexRequiredIndex> {
5080    vec![
5081        ConvexRequiredIndex {
5082            table: "nodes".to_string(),
5083            name: "by_external_id".to_string(),
5084            fields: vec!["externalId".to_string()],
5085        },
5086        ConvexRequiredIndex {
5087            table: "nodes".to_string(),
5088            name: "by_kind".to_string(),
5089            fields: vec!["kind".to_string()],
5090        },
5091        ConvexRequiredIndex {
5092            table: "edges".to_string(),
5093            name: "by_edge_key".to_string(),
5094            fields: vec!["edgeKey".to_string()],
5095        },
5096        ConvexRequiredIndex {
5097            table: "edges".to_string(),
5098            name: "by_from_kind".to_string(),
5099            fields: vec!["fromExternalId".to_string(), "kind".to_string()],
5100        },
5101        ConvexRequiredIndex {
5102            table: "edges".to_string(),
5103            name: "by_to_kind".to_string(),
5104            fields: vec!["toExternalId".to_string(), "kind".to_string()],
5105        },
5106    ]
5107}
5108
5109pub(crate) fn load_convex_projection_rows(path: &Path) -> Result<ConvexProjectionRows> {
5110    let content = fs::read_to_string(path)
5111        .with_context(|| format!("reading Convex projection snapshot {}", path.display()))?;
5112    serde_json::from_str(&content)
5113        .with_context(|| format!("parsing Convex projection snapshot {}", path.display()))
5114}
5115
5116fn convex_projection_row_diagnostics(rows: &ConvexProjectionRows) -> Vec<String> {
5117    let mut diagnostics = Vec::new();
5118    let mut node_counts = BTreeMap::<&str, usize>::new();
5119    for row in &rows.nodes {
5120        *node_counts.entry(row.external_id.as_str()).or_default() += 1;
5121    }
5122    for (external_id, count) in node_counts.iter().filter(|(_, count)| **count > 1) {
5123        diagnostics.push(format!(
5124            "Convex snapshot contains duplicate node externalId {external_id} ({count} rows)"
5125        ));
5126    }
5127
5128    let node_ids = node_counts.keys().copied().collect::<BTreeSet<_>>();
5129    let mut edge_counts = BTreeMap::<&str, usize>::new();
5130    for edge in &rows.edges {
5131        *edge_counts.entry(edge.edge_key.as_str()).or_default() += 1;
5132        if !node_ids.contains(edge.from_external_id.as_str()) {
5133            diagnostics.push(format!(
5134                "Convex snapshot edge {} references missing from node {}",
5135                edge.edge_key, edge.from_external_id
5136            ));
5137        }
5138        if !node_ids.contains(edge.to_external_id.as_str()) {
5139            diagnostics.push(format!(
5140                "Convex snapshot edge {} references missing to node {}",
5141                edge.edge_key, edge.to_external_id
5142            ));
5143        }
5144        let expected_key =
5145            ConvexEdgeRow::stable_key(&edge.from_external_id, &edge.to_external_id, &edge.kind);
5146        if edge.edge_key != expected_key {
5147            diagnostics.push(format!(
5148                "Convex snapshot edge {} has non-canonical key; expected {} for ({}, {}, {})",
5149                edge.edge_key, expected_key, edge.from_external_id, edge.kind, edge.to_external_id
5150            ));
5151        }
5152    }
5153    for (edge_key, count) in edge_counts.iter().filter(|(_, count)| **count > 1) {
5154        diagnostics.push(format!(
5155            "Convex snapshot contains duplicate edgeKey {edge_key} ({count} rows)"
5156        ));
5157    }
5158    diagnostics
5159}
5160
5161pub(crate) fn validate_convex_projection_rows(rows: &ConvexProjectionRows) -> Result<()> {
5162    let diagnostics = convex_projection_row_diagnostics(rows);
5163    if diagnostics.is_empty() {
5164        Ok(())
5165    } else {
5166        bail!("{}", diagnostics.join("; "))
5167    }
5168}
5169
5170pub(crate) struct ConvexHttpTransport {
5171    endpoint: String,
5172    auth_token_env: String,
5173    auth_token: Option<String>,
5174}
5175
5176impl ConvexHttpTransport {
5177    fn from_options(endpoint: Option<&str>, auth_token_env: &str) -> Result<Self> {
5178        let endpoint = endpoint
5179            .map(str::to_string)
5180            .or_else(|| env::var(DEFAULT_CONVEX_GRAPH_URL_ENV).ok())
5181            .context("Convex transport requires --endpoint or TSIFT_CONVEX_GRAPH_URL")?;
5182        let auth_token = env::var(auth_token_env)
5183            .ok()
5184            .filter(|value| !value.trim().is_empty());
5185        Ok(Self {
5186            endpoint,
5187            auth_token_env: auth_token_env.to_string(),
5188            auth_token,
5189        })
5190    }
5191
5192    fn summary(&self, remote_snapshot: bool, applied_chunks: usize) -> ConvexTransportSummary {
5193        ConvexTransportSummary {
5194            endpoint_env: DEFAULT_CONVEX_GRAPH_URL_ENV.to_string(),
5195            endpoint_configured: true,
5196            auth_token_env: self.auth_token_env.clone(),
5197            auth_configured: self.auth_token.is_some(),
5198            remote_snapshot,
5199            applied_chunks,
5200        }
5201    }
5202
5203    fn post(&self, request: &ConvexTransportRequest<'_>) -> Result<ConvexTransportResponse> {
5204        let mut builder = ureq::post(&self.endpoint);
5205        if let Some(token) = &self.auth_token {
5206            builder = builder.header("Authorization", &format!("Bearer {token}"));
5207        }
5208        builder
5209            .send_json(request)
5210            .with_context(|| format!("calling Convex graph transport {}", self.endpoint))?
5211            .body_mut()
5212            .read_json::<ConvexTransportResponse>()
5213            .with_context(|| format!("parsing Convex graph transport response {}", self.endpoint))
5214    }
5215
5216    /// Fetch a full snapshot of the Convex graph backend.
5217    ///
5218    /// Uses the paginated `snapshot_meta` + `snapshot_nodes_page` +
5219    /// `snapshot_edges_page` triplet so the call works on tables larger than
5220    /// ~5k rows (the single-shot `snapshot` query hits Convex's 15s per-request
5221    /// syscall budget at that scale; see `#convexsnapshotscale`).
5222    ///
5223    /// Falls back to the legacy single-shot `snapshot` operation if the
5224    /// backend doesn't recognize `snapshot_meta` (older deployments that
5225    /// haven't redeployed the new schema).
5226    fn fetch_snapshot(
5227        &self,
5228        projection_version: &str,
5229        scope: Option<&str>,
5230        local_hash: Option<&str>,
5231        local_rows: Option<&ConvexProjectionRows>,
5232    ) -> Result<(ConvexProjectionRows, Vec<String>)> {
5233        match self.fetch_snapshot_paginated(projection_version, scope, local_hash, local_rows) {
5234            Ok(rows) => Ok(rows),
5235            Err(err) => {
5236                // Only fall through to the legacy path if the failure looks
5237                // like "operation unknown" (older backend). Any other failure
5238                // (HTTP timeout, deserialization mismatch) should surface so
5239                // the operator sees the real cause.
5240                let msg = format!("{err:#}");
5241                let is_unknown_op = msg.contains("unknown operation")
5242                    || msg.contains("snapshot_meta")
5243                    || msg.contains("404");
5244                if !is_unknown_op {
5245                    return Err(err);
5246                }
5247                self.fetch_snapshot_legacy(projection_version)
5248                    .map(|rows| (rows, Vec::new()))
5249            }
5250        }
5251    }
5252
5253    fn fetch_snapshot_legacy(&self, projection_version: &str) -> Result<ConvexProjectionRows> {
5254        let response = self.post(&ConvexTransportRequest {
5255            operation: "snapshot",
5256            chunk: 0,
5257            projection_version,
5258            projection_hash: None,
5259            projection_meta_id: None,
5260            node_rows: Vec::new(),
5261            edge_rows: Vec::new(),
5262            keys: Vec::new(),
5263            cursor: None,
5264            limit: None,
5265        })?;
5266        response
5267            .rows
5268            .context("Convex snapshot response did not include rows")
5269    }
5270
5271    fn fetch_snapshot_paginated(
5272        &self,
5273        projection_version: &str,
5274        scope: Option<&str>,
5275        local_hash: Option<&str>,
5276        local_rows: Option<&ConvexProjectionRows>,
5277    ) -> Result<(ConvexProjectionRows, Vec<String>)> {
5278        let projection_meta_id = graph_projection_meta_id(scope);
5279        let meta_response = self.post(&ConvexTransportRequest {
5280            operation: "snapshot_meta",
5281            chunk: 0,
5282            projection_version,
5283            projection_hash: None,
5284            projection_meta_id: Some(&projection_meta_id),
5285            node_rows: Vec::new(),
5286            edge_rows: Vec::new(),
5287            keys: Vec::new(),
5288            cursor: None,
5289            limit: None,
5290        })?;
5291        if matches!(meta_response.status.as_deref(), Some("error")) {
5292            anyhow::bail!(
5293                "Convex snapshot_meta returned error: {}",
5294                meta_response.message.unwrap_or_default()
5295            );
5296        }
5297        let meta = meta_response
5298            .meta
5299            .context("Convex snapshot_meta response did not include meta")?;
5300        if let (Some(remote_hash), Some(local_hash), Some(local_rows)) =
5301            (meta.projection_hash.as_deref(), local_hash, local_rows)
5302            && remote_hash == local_hash
5303        {
5304            return Ok((
5305                local_rows.clone(),
5306                vec![
5307                    "remote projection hash matched local graph; skipped full row-page snapshot diff"
5308                        .to_string(),
5309                ],
5310            ));
5311        }
5312
5313        let mut nodes: Vec<ConvexNodeRow> = Vec::with_capacity(meta.node_count.unwrap_or_default());
5314        let mut node_cursor: Option<String> = None;
5315        loop {
5316            let response = self.post(&ConvexTransportRequest {
5317                operation: "snapshot_nodes_page",
5318                chunk: 0,
5319                projection_version,
5320                projection_hash: None,
5321                projection_meta_id: None,
5322                node_rows: Vec::new(),
5323                edge_rows: Vec::new(),
5324                keys: Vec::new(),
5325                cursor: node_cursor.clone(),
5326                limit: None,
5327            })?;
5328            let page = response
5329                .page
5330                .context("Convex snapshot_nodes_page response did not include page")?;
5331            for raw in page.rows {
5332                let row: ConvexNodeRow =
5333                    serde_json::from_value(raw).context("decoding Convex snapshot node row")?;
5334                nodes.push(row);
5335            }
5336            match page.next_cursor {
5337                Some(next) => node_cursor = Some(next),
5338                None => break,
5339            }
5340        }
5341
5342        let mut edges: Vec<ConvexEdgeRow> = Vec::with_capacity(meta.edge_count.unwrap_or_default());
5343        let mut edge_cursor: Option<String> = None;
5344        loop {
5345            let response = self.post(&ConvexTransportRequest {
5346                operation: "snapshot_edges_page",
5347                chunk: 0,
5348                projection_version,
5349                projection_hash: None,
5350                projection_meta_id: None,
5351                node_rows: Vec::new(),
5352                edge_rows: Vec::new(),
5353                keys: Vec::new(),
5354                cursor: edge_cursor.clone(),
5355                limit: None,
5356            })?;
5357            let page = response
5358                .page
5359                .context("Convex snapshot_edges_page response did not include page")?;
5360            for raw in page.rows {
5361                let row: ConvexEdgeRow =
5362                    serde_json::from_value(raw).context("decoding Convex snapshot edge row")?;
5363                edges.push(row);
5364            }
5365            match page.next_cursor {
5366                Some(next) => edge_cursor = Some(next),
5367                None => break,
5368            }
5369        }
5370
5371        Ok((ConvexProjectionRows { nodes, edges }, Vec::new()))
5372    }
5373
5374    fn apply_chunk(
5375        &self,
5376        report: &ConvexSyncReport,
5377        chunk: &ConvexSyncChunk,
5378    ) -> Result<ConvexTransportReceipt> {
5379        let node_rows = if chunk.operation == "upsert_nodes" {
5380            report
5381                .node_upserts
5382                .iter()
5383                .filter(|row| chunk.keys.contains(&row.external_id))
5384                .cloned()
5385                .collect()
5386        } else {
5387            Vec::new()
5388        };
5389        let edge_rows = if chunk.operation == "upsert_edges" {
5390            report
5391                .edge_upserts
5392                .iter()
5393                .filter(|row| chunk.keys.contains(&row.edge_key))
5394                .cloned()
5395                .collect()
5396        } else {
5397            Vec::new()
5398        };
5399        let request = ConvexTransportRequest {
5400            operation: &chunk.operation,
5401            chunk: chunk.chunk,
5402            projection_version: &report.projection_version,
5403            projection_hash: report.projection_hash.as_deref(),
5404            projection_meta_id: None,
5405            node_rows,
5406            edge_rows,
5407            keys: chunk.keys.clone(),
5408            cursor: None,
5409            limit: None,
5410        };
5411        let mut last_error = None;
5412        for attempt in 1..=chunk.max_attempts {
5413            match self.post(&request) {
5414                Ok(response) => {
5415                    return Ok(ConvexTransportReceipt {
5416                        operation: chunk.operation.clone(),
5417                        chunk: chunk.chunk,
5418                        attempt,
5419                        status: response.status.unwrap_or_else(|| "ok".to_string()),
5420                        message: response.message,
5421                    });
5422                }
5423                Err(err) => {
5424                    last_error = Some(err);
5425                    if attempt < chunk.max_attempts {
5426                        std::thread::sleep(Duration::from_millis(100 * attempt as u64));
5427                    }
5428                }
5429            }
5430        }
5431        Err(last_error.unwrap_or_else(|| anyhow::anyhow!("Convex transport chunk failed")))
5432            .with_context(|| format!("applying Convex {} chunk {}", chunk.operation, chunk.chunk))
5433    }
5434}
5435
5436fn convex_projection_hash(rows: &ConvexProjectionRows, scope: Option<&str>) -> Option<String> {
5437    let meta_id = graph_projection_meta_id(scope);
5438    rows.nodes
5439        .iter()
5440        .find(|row| row.external_id == meta_id && row.kind == GRAPH_PROJECTION_META_KIND)
5441        .and_then(|row| row.properties.get("content_hash").cloned())
5442}
5443
5444fn convex_projection_freshness(
5445    local: &ConvexProjectionRows,
5446    snapshot: Option<&ConvexProjectionRows>,
5447    scope: Option<&str>,
5448) -> ConvexProjectionFreshness {
5449    let local_hash = convex_projection_hash(local, scope);
5450    let Some(snapshot) = snapshot else {
5451        return ConvexProjectionFreshness {
5452            status: "unchecked".to_string(),
5453            fail_closed: false,
5454            local_hash,
5455            snapshot_hash: None,
5456            missing_nodes: Vec::new(),
5457            stale_nodes: Vec::new(),
5458            missing_edges: Vec::new(),
5459            stale_edges: Vec::new(),
5460            diagnostics: vec![
5461                "no Convex snapshot supplied; sync output is a local dry-run plan".to_string(),
5462            ],
5463        };
5464    };
5465
5466    let snapshot_hash = convex_projection_hash(snapshot, scope);
5467    let snapshot_nodes = snapshot
5468        .nodes
5469        .iter()
5470        .map(|row| (row.external_id.as_str(), row))
5471        .collect::<BTreeMap<_, _>>();
5472    let snapshot_edges = snapshot
5473        .edges
5474        .iter()
5475        .map(|row| (row.edge_key.as_str(), row))
5476        .collect::<BTreeMap<_, _>>();
5477
5478    let mut missing_nodes = Vec::new();
5479    let mut stale_nodes = Vec::new();
5480    for row in &local.nodes {
5481        match snapshot_nodes.get(row.external_id.as_str()) {
5482            Some(snapshot_row) if *snapshot_row == row => {}
5483            Some(_) => stale_nodes.push(row.external_id.clone()),
5484            None => missing_nodes.push(row.external_id.clone()),
5485        }
5486    }
5487
5488    let mut missing_edges = Vec::new();
5489    let mut stale_edges = Vec::new();
5490    for row in &local.edges {
5491        match snapshot_edges.get(row.edge_key.as_str()) {
5492            Some(snapshot_row) if *snapshot_row == row => {}
5493            Some(_) => stale_edges.push(row.edge_key.clone()),
5494            None => missing_edges.push(row.edge_key.clone()),
5495        }
5496    }
5497
5498    let hash_current = local_hash.is_some() && local_hash == snapshot_hash;
5499    let rows_current = missing_nodes.is_empty()
5500        && stale_nodes.is_empty()
5501        && missing_edges.is_empty()
5502        && stale_edges.is_empty();
5503    if hash_current && rows_current {
5504        return ConvexProjectionFreshness::current(local_hash, snapshot_hash);
5505    }
5506
5507    let mut diagnostics = Vec::new();
5508    if local_hash != snapshot_hash {
5509        diagnostics.push(format!(
5510            "projection hash mismatch: local={} snapshot={}",
5511            local_hash.as_deref().unwrap_or("missing"),
5512            snapshot_hash.as_deref().unwrap_or("missing")
5513        ));
5514    }
5515    if !missing_nodes.is_empty() || !missing_edges.is_empty() {
5516        diagnostics.push(format!(
5517            "Convex snapshot is missing {} node(s) and {} edge(s)",
5518            missing_nodes.len(),
5519            missing_edges.len()
5520        ));
5521    }
5522    if !stale_nodes.is_empty() || !stale_edges.is_empty() {
5523        diagnostics.push(format!(
5524            "Convex snapshot has {} stale node row(s) and {} stale edge row(s)",
5525            stale_nodes.len(),
5526            stale_edges.len()
5527        ));
5528    }
5529
5530    ConvexProjectionFreshness {
5531        status: "stale".to_string(),
5532        fail_closed: true,
5533        local_hash,
5534        snapshot_hash,
5535        missing_nodes,
5536        stale_nodes,
5537        missing_edges,
5538        stale_edges,
5539        diagnostics,
5540    }
5541}
5542
5543pub(crate) fn verify_convex_projection_snapshot(
5544    root: &Path,
5545    scope: Option<&str>,
5546    snapshot_path: &Path,
5547) -> Result<()> {
5548    let graph_db = graph_substrate_db_path(root, scope);
5549    let store = SqliteGraphStore::open_read_only_resilient(&graph_db)?;
5550    let local = convex_rows_from_graph_store(&store)?;
5551    let snapshot = load_convex_projection_rows(snapshot_path)?;
5552    validate_convex_projection_rows(&snapshot)?;
5553    let freshness = convex_projection_freshness(&local, Some(&snapshot), scope);
5554    if freshness.fail_closed {
5555        bail!(
5556            "Convex graph projection is not current for {}: {}",
5557            root.display(),
5558            freshness.diagnostics.join("; ")
5559        );
5560    }
5561    Ok(())
5562}
5563
5564fn convex_rows_diff(
5565    local: &ConvexProjectionRows,
5566    snapshot: Option<&ConvexProjectionRows>,
5567) -> (
5568    Vec<ConvexNodeRow>,
5569    Vec<ConvexEdgeRow>,
5570    Vec<String>,
5571    Vec<String>,
5572) {
5573    let Some(snapshot) = snapshot else {
5574        return (
5575            local.nodes.clone(),
5576            local.edges.clone(),
5577            Vec::new(),
5578            Vec::new(),
5579        );
5580    };
5581    let local_nodes = local
5582        .nodes
5583        .iter()
5584        .map(|row| (row.external_id.as_str(), row))
5585        .collect::<BTreeMap<_, _>>();
5586    let local_edges = local
5587        .edges
5588        .iter()
5589        .map(|row| (row.edge_key.as_str(), row))
5590        .collect::<BTreeMap<_, _>>();
5591    let snapshot_nodes = snapshot
5592        .nodes
5593        .iter()
5594        .map(|row| (row.external_id.as_str(), row))
5595        .collect::<BTreeMap<_, _>>();
5596    let snapshot_edges = snapshot
5597        .edges
5598        .iter()
5599        .map(|row| (row.edge_key.as_str(), row))
5600        .collect::<BTreeMap<_, _>>();
5601
5602    let node_upserts = local
5603        .nodes
5604        .iter()
5605        .filter(|row| {
5606            snapshot_nodes
5607                .get(row.external_id.as_str())
5608                .is_none_or(|snapshot_row| *snapshot_row != *row)
5609        })
5610        .cloned()
5611        .collect::<Vec<_>>();
5612    let edge_upserts = local
5613        .edges
5614        .iter()
5615        .filter(|row| {
5616            snapshot_edges
5617                .get(row.edge_key.as_str())
5618                .is_none_or(|snapshot_row| *snapshot_row != *row)
5619        })
5620        .cloned()
5621        .collect::<Vec<_>>();
5622    let node_tombstones = snapshot
5623        .nodes
5624        .iter()
5625        .filter(|row| !local_nodes.contains_key(row.external_id.as_str()))
5626        .map(|row| row.external_id.clone())
5627        .collect::<Vec<_>>();
5628    let edge_tombstones = snapshot
5629        .edges
5630        .iter()
5631        .filter(|row| !local_edges.contains_key(row.edge_key.as_str()))
5632        .map(|row| row.edge_key.clone())
5633        .collect::<Vec<_>>();
5634
5635    (node_upserts, edge_upserts, node_tombstones, edge_tombstones)
5636}
5637
5638fn push_sync_chunks(
5639    chunks: &mut Vec<ConvexSyncChunk>,
5640    operation: &str,
5641    keys: Vec<String>,
5642    size: usize,
5643) {
5644    if keys.is_empty() {
5645        return;
5646    }
5647    for (idx, chunk) in keys.chunks(size).enumerate() {
5648        chunks.push(ConvexSyncChunk {
5649            operation: operation.to_string(),
5650            chunk: idx + 1,
5651            count: chunk.len(),
5652            keys: chunk.to_vec(),
5653            max_attempts: 3,
5654            retry_policy:
5655                "retry the whole chunk; rows are idempotent by externalId/edgeKey, stop on a repeated partial failure"
5656                    .to_string(),
5657        });
5658    }
5659}
5660
5661pub(crate) fn build_convex_sync_report_with_snapshot(
5662    path: &Path,
5663    scope: Option<&str>,
5664    snapshot: Option<ConvexProjectionRows>,
5665    chunk_size: usize,
5666    dry_run: bool,
5667) -> Result<ConvexSyncReport> {
5668    if chunk_size == 0 {
5669        bail!("--chunk-size must be greater than zero");
5670    }
5671    let root = lint::resolve_project_root_or_canonical_path(path)?;
5672    let (graph, _refresh) = write_traversal_graph_store(&root, path, scope)?;
5673    let graph_db = graph_substrate_db_path(&root, scope);
5674    let store = SqliteGraphStore::open_read_only_resilient(&graph_db)?;
5675    let local = convex_rows_from_graph_store(&store)?;
5676    let freshness = convex_projection_freshness(&local, snapshot.as_ref(), scope);
5677    let (node_upserts, edge_upserts, node_tombstones, edge_tombstones) =
5678        convex_rows_diff(&local, snapshot.as_ref());
5679
5680    let mut chunks = Vec::new();
5681    push_sync_chunks(
5682        &mut chunks,
5683        "delete_edges",
5684        edge_tombstones.clone(),
5685        chunk_size,
5686    );
5687    push_sync_chunks(
5688        &mut chunks,
5689        "upsert_nodes",
5690        node_upserts
5691            .iter()
5692            .map(|row| row.external_id.clone())
5693            .collect(),
5694        chunk_size,
5695    );
5696    push_sync_chunks(
5697        &mut chunks,
5698        "upsert_edges",
5699        edge_upserts
5700            .iter()
5701            .map(|row| row.edge_key.clone())
5702            .collect(),
5703        chunk_size,
5704    );
5705    push_sync_chunks(
5706        &mut chunks,
5707        "delete_nodes",
5708        node_tombstones.clone(),
5709        chunk_size,
5710    );
5711
5712    let mut diagnostics = vec![
5713        "apply node upserts before edge upserts; apply edge tombstones before node tombstones"
5714            .to_string(),
5715    ];
5716    if dry_run {
5717        diagnostics.push("dry-run only: no Convex network mutation was attempted".to_string());
5718    }
5719    if freshness.fail_closed {
5720        diagnostics.push(
5721            "Convex-backed traverse/context-pack reads must fail closed until this plan is applied"
5722                .to_string(),
5723        );
5724    }
5725
5726    Ok(ConvexSyncReport {
5727        root: root.to_string_lossy().to_string(),
5728        scope: scope.map(str::to_string),
5729        graph_db: graph_db.to_string_lossy().to_string(),
5730        dry_run,
5731        projection_version: GRAPH_PROJECTION_VERSION.to_string(),
5732        projection_hash: convex_projection_hash(&local, scope),
5733        required_indexes: convex_required_indexes(),
5734        node_upserts,
5735        edge_upserts,
5736        node_tombstones,
5737        edge_tombstones,
5738        chunks,
5739        freshness,
5740        transport: None,
5741        receipts: Vec::new(),
5742        diagnostics,
5743        warnings: graph.warnings,
5744    })
5745}
5746
5747#[cfg(test)]
5748fn build_convex_sync_report(
5749    path: &Path,
5750    scope: Option<&str>,
5751    snapshot_path: Option<&Path>,
5752    chunk_size: usize,
5753) -> Result<ConvexSyncReport> {
5754    let snapshot = snapshot_path.map(load_convex_projection_rows).transpose()?;
5755    build_convex_sync_report_with_snapshot(path, scope, snapshot, chunk_size, true)
5756}
5757
5758pub(crate) fn print_convex_sync_human(report: &ConvexSyncReport, compact: bool) {
5759    if compact {
5760        println!(
5761            "convex-sync nodes:+{} -{} edges:+{} -{} chunks:{} freshness:{}",
5762            report.node_upserts.len(),
5763            report.node_tombstones.len(),
5764            report.edge_upserts.len(),
5765            report.edge_tombstones.len(),
5766            report.chunks.len(),
5767            report.freshness.status
5768        );
5769        return;
5770    }
5771
5772    println!(
5773        "Convex graph sync {}",
5774        if report.dry_run { "dry-run" } else { "apply" }
5775    );
5776    println!("root: {}", report.root);
5777    println!("graph_db: {}", report.graph_db);
5778    println!(
5779        "upserts: {} node(s), {} edge(s)",
5780        report.node_upserts.len(),
5781        report.edge_upserts.len()
5782    );
5783    println!(
5784        "tombstones: {} node(s), {} edge(s)",
5785        report.node_tombstones.len(),
5786        report.edge_tombstones.len()
5787    );
5788    println!("chunks: {}", report.chunks.len());
5789    println!("freshness: {}", report.freshness.status);
5790    if let Some(transport) = &report.transport {
5791        println!(
5792            "transport: endpoint_env={} auth_env={} applied_chunks={}",
5793            transport.endpoint_env, transport.auth_token_env, transport.applied_chunks
5794        );
5795    }
5796    for receipt in &report.receipts {
5797        println!(
5798            "receipt: {} chunk {} attempt {} {}",
5799            receipt.operation, receipt.chunk, receipt.attempt, receipt.status
5800        );
5801    }
5802    for diagnostic in report
5803        .diagnostics
5804        .iter()
5805        .chain(report.freshness.diagnostics.iter())
5806    {
5807        println!("- {}", diagnostic);
5808    }
5809}
5810
5811pub(crate) struct ConvexSyncOptions<'a> {
5812    path: &'a Path,
5813    scope: Option<&'a str>,
5814    snapshot: Option<&'a Path>,
5815    chunk_size: usize,
5816    remote_snapshot: bool,
5817    apply: bool,
5818    endpoint: Option<&'a str>,
5819    auth_token_env: &'a str,
5820}
5821
5822#[derive(Serialize)]
5823struct GraphDbSchemaField {
5824    name: &'static str,
5825    value_type: &'static str,
5826    description: &'static str,
5827}
5828
5829#[derive(Serialize)]
5830struct GraphDbSchemaOperation {
5831    command: &'static str,
5832    description: &'static str,
5833}
5834
5835#[derive(Serialize)]
5836struct GraphDbSchemaContract {
5837    name: &'static str,
5838    version: &'static str,
5839    description: &'static str,
5840}
5841
5842#[derive(Serialize)]
5843struct GraphDbSchema {
5844    contract_versions: Vec<GraphDbSchemaContract>,
5845    node_fields: Vec<GraphDbSchemaField>,
5846    edge_fields: Vec<GraphDbSchemaField>,
5847    operations: Vec<GraphDbSchemaOperation>,
5848}
5849
5850#[derive(Clone, Serialize, Deserialize)]
5851struct GraphDbFreshnessReport {
5852    status: String,
5853    fail_closed: bool,
5854    projection_version: Option<String>,
5855    content_hash: Option<String>,
5856    source_watermark: Option<String>,
5857    diagnostics: Vec<String>,
5858}
5859
5860#[derive(Clone, Debug, Serialize)]
5861struct GraphEffectivenessReadiness {
5862    status: String,
5863    fail_closed: bool,
5864    reason: String,
5865    diagnostics: Vec<String>,
5866    next_commands: Vec<String>,
5867}
5868
5869#[derive(Clone, Debug, Serialize, PartialEq)]
5870struct GraphDbPropertyFilter {
5871    key: String,
5872    value: String,
5873}
5874
5875#[derive(Clone, Debug, Default)]
5876struct GraphDbQueryOptions {
5877    cursor: Option<String>,
5878    limit: Option<usize>,
5879    property_filters: Vec<GraphDbPropertyFilter>,
5880}
5881
5882#[derive(Clone, Debug, Serialize, PartialEq)]
5883struct GraphDbPageReport {
5884    #[serde(skip_serializing_if = "Option::is_none")]
5885    cursor: Option<String>,
5886    #[serde(skip_serializing_if = "Option::is_none")]
5887    limit: Option<usize>,
5888    #[serde(skip_serializing_if = "Option::is_none")]
5889    next_cursor: Option<String>,
5890    returned_nodes: usize,
5891    returned_edges: usize,
5892    truncated: bool,
5893    property_filters: Vec<GraphDbPropertyFilter>,
5894    #[serde(skip_serializing_if = "Vec::is_empty", default)]
5895    diagnostics: Vec<String>,
5896}
5897
5898type GraphDbRankedNeighbor = resolution::RankedNeighbor;
5899
5900#[derive(Clone, Debug, Serialize, PartialEq)]
5901struct GraphDbKnowledgeRetrieval {
5902    mode: String,
5903    query: String,
5904    seed_kind: String,
5905    seed_limit: usize,
5906    seed_count: usize,
5907    depth: usize,
5908    limit: usize,
5909    node_count: usize,
5910    edge_count: usize,
5911    truncated: bool,
5912    traversal: String,
5913    freshness_boundary: String,
5914    privacy_boundary: String,
5915    diagnostics: Vec<String>,
5916}
5917
5918struct GraphDbSemanticSeededSubgraph {
5919    nodes: Vec<SubstrateGraphNode>,
5920    edges: Vec<SubstrateGraphEdge>,
5921    truncated: bool,
5922    diagnostics: Vec<String>,
5923}
5924
5925type GraphDbNeighborhoodRankingGate = resolution::NeighborhoodRankingGate;
5926
5927#[derive(Serialize)]
5928struct GraphDbReport {
5929    root: String,
5930    #[serde(skip_serializing_if = "Option::is_none")]
5931    scope: Option<String>,
5932    backend: String,
5933    query: String,
5934    freshness: GraphDbFreshnessReport,
5935    #[serde(skip_serializing_if = "Option::is_none")]
5936    schema: Option<GraphDbSchema>,
5937    #[serde(skip_serializing_if = "Option::is_none")]
5938    node: Option<SubstrateGraphNode>,
5939    #[serde(skip_serializing_if = "Option::is_none")]
5940    edge: Option<SubstrateGraphEdge>,
5941    #[serde(skip_serializing_if = "Vec::is_empty", default)]
5942    nodes: Vec<SubstrateGraphNode>,
5943    #[serde(skip_serializing_if = "Vec::is_empty", default)]
5944    edges: Vec<SubstrateGraphEdge>,
5945    #[serde(skip_serializing_if = "Vec::is_empty", default)]
5946    ranked_neighbors: Vec<GraphDbRankedNeighbor>,
5947    #[serde(skip_serializing_if = "Vec::is_empty", default)]
5948    semantic_related: Vec<SemanticRelatedItem>,
5949    #[serde(skip_serializing_if = "Option::is_none")]
5950    neighborhood_ranking_gate: Option<GraphDbNeighborhoodRankingGate>,
5951    #[serde(skip_serializing_if = "Option::is_none")]
5952    knowledge_retrieval: Option<GraphDbKnowledgeRetrieval>,
5953    #[serde(skip_serializing_if = "Option::is_none")]
5954    path: Option<substrate::GraphPath>,
5955    #[serde(skip_serializing_if = "Option::is_none")]
5956    page: Option<GraphDbPageReport>,
5957    #[serde(skip_serializing_if = "Vec::is_empty", default)]
5958    warnings: Vec<String>,
5959}
5960
5961struct ExperimentalReadOnlyGraphStore {
5962    backend: GraphDbExperimentalBackend,
5963    nodes: BTreeMap<String, SubstrateGraphNode>,
5964    edges: BTreeMap<String, SubstrateGraphEdge>,
5965    node_ids_by_kind: BTreeMap<String, Vec<String>>,
5966    outgoing_edge_keys_by_from: BTreeMap<String, Vec<String>>,
5967}
5968
5969impl ExperimentalReadOnlyGraphStore {
5970    fn from_rows(backend: GraphDbExperimentalBackend, rows: &ConvexProjectionRows) -> Result<Self> {
5971        validate_convex_projection_rows(rows)?;
5972        let nodes = rows
5973            .nodes
5974            .iter()
5975            .map(|row| {
5976                let node = SubstrateGraphNode {
5977                    id: row.external_id.clone(),
5978                    kind: row.kind.clone(),
5979                    label: row.label.clone(),
5980                    properties: row.properties.clone(),
5981                    provenance: row.provenance.clone(),
5982                    freshness: row.freshness.clone(),
5983                };
5984                (node.id.clone(), node)
5985            })
5986            .collect::<BTreeMap<_, _>>();
5987        let edges = rows
5988            .edges
5989            .iter()
5990            .map(|row| {
5991                let edge = SubstrateGraphEdge {
5992                    id: row.edge_key.clone(),
5993                    from_id: row.from_external_id.clone(),
5994                    to_id: row.to_external_id.clone(),
5995                    kind: row.kind.clone(),
5996                    properties: row.properties.clone(),
5997                    provenance: row.provenance.clone(),
5998                    freshness: row.freshness.clone(),
5999                };
6000                (graph_db_edge_key(&edge), edge)
6001            })
6002            .collect::<BTreeMap<_, _>>();
6003        let mut node_ids_by_kind = BTreeMap::<String, Vec<String>>::new();
6004        for node in nodes.values() {
6005            node_ids_by_kind
6006                .entry(node.kind.clone())
6007                .or_default()
6008                .push(node.id.clone());
6009        }
6010        for ids in node_ids_by_kind.values_mut() {
6011            ids.sort();
6012        }
6013        let mut outgoing_edge_keys_by_from = BTreeMap::<String, Vec<String>>::new();
6014        for edge in edges.values() {
6015            outgoing_edge_keys_by_from
6016                .entry(edge.from_id.clone())
6017                .or_default()
6018                .push(graph_db_edge_key(edge));
6019        }
6020        for edge_keys in outgoing_edge_keys_by_from.values_mut() {
6021            edge_keys.sort_by(|left_key, right_key| {
6022                let left = &edges[left_key];
6023                let right = &edges[right_key];
6024                left.to_id
6025                    .cmp(&right.to_id)
6026                    .then(left.kind.cmp(&right.kind))
6027                    .then(left_key.cmp(right_key))
6028            });
6029        }
6030        Ok(Self {
6031            backend,
6032            nodes,
6033            edges,
6034            node_ids_by_kind,
6035            outgoing_edge_keys_by_from,
6036        })
6037    }
6038}
6039
6040impl GraphStore for ExperimentalReadOnlyGraphStore {
6041    fn upsert_node(&self, _node: &SubstrateGraphNode) -> Result<()> {
6042        bail!("{} backend-eval adapter is read-only", self.backend.name())
6043    }
6044
6045    fn upsert_edge(&self, _edge: &SubstrateGraphEdge) -> Result<()> {
6046        bail!("{} backend-eval adapter is read-only", self.backend.name())
6047    }
6048
6049    fn delete_node(&self, _id: &str) -> Result<usize> {
6050        bail!("{} backend-eval adapter is read-only", self.backend.name())
6051    }
6052
6053    fn delete_edge(&self, _from_id: &str, _to_id: &str, _kind: &str) -> Result<usize> {
6054        bail!("{} backend-eval adapter is read-only", self.backend.name())
6055    }
6056
6057    fn node(&self, id: &str) -> Result<Option<SubstrateGraphNode>> {
6058        Ok(self.nodes.get(id).cloned())
6059    }
6060
6061    fn all_nodes(&self) -> Result<Vec<SubstrateGraphNode>> {
6062        Ok(self.nodes.values().cloned().collect())
6063    }
6064
6065    fn all_edges(&self) -> Result<Vec<SubstrateGraphEdge>> {
6066        let mut edges = self.edges.values().cloned().collect::<Vec<_>>();
6067        edges.sort_by(|left, right| {
6068            left.from_id
6069                .cmp(&right.from_id)
6070                .then(left.kind.cmp(&right.kind))
6071                .then(left.to_id.cmp(&right.to_id))
6072        });
6073        Ok(edges)
6074    }
6075
6076    fn graph_counts(&self) -> Result<(usize, usize)> {
6077        Ok((self.nodes.len(), self.edges.len()))
6078    }
6079
6080    fn sample_edge(&self, kind: Option<&str>) -> Result<Option<SubstrateGraphEdge>> {
6081        let mut edges = self
6082            .edges
6083            .values()
6084            .filter(|edge| edge.from_id != edge.to_id)
6085            .filter(|edge| kind.is_none_or(|kind| edge.kind == kind))
6086            .cloned()
6087            .collect::<Vec<_>>();
6088        edges.sort_by(|left, right| {
6089            left.from_id
6090                .cmp(&right.from_id)
6091                .then(left.kind.cmp(&right.kind))
6092                .then(left.to_id.cmp(&right.to_id))
6093        });
6094        Ok(edges.into_iter().next())
6095    }
6096
6097    fn sample_edge_with_property(
6098        &self,
6099    ) -> Result<Option<(SubstrateGraphEdge, GraphPropertyFilter)>> {
6100        Ok(self
6101            .edges
6102            .values()
6103            .filter(|edge| edge.from_id != edge.to_id)
6104            .filter_map(|edge| {
6105                edge.properties.iter().next().map(|(key, value)| {
6106                    (
6107                        edge,
6108                        GraphPropertyFilter {
6109                            key: key.clone(),
6110                            value: value.clone(),
6111                        },
6112                    )
6113                })
6114            })
6115            .min_by(|(left_edge, left_filter), (right_edge, right_filter)| {
6116                left_filter
6117                    .key
6118                    .cmp(&right_filter.key)
6119                    .then(left_filter.value.cmp(&right_filter.value))
6120                    .then_with(|| graph_db_edge_key(left_edge).cmp(&graph_db_edge_key(right_edge)))
6121            })
6122            .map(|(edge, filter)| (edge.clone(), filter)))
6123    }
6124
6125    fn nodes_by_kind(&self, kind: &str) -> Result<Vec<SubstrateGraphNode>> {
6126        Ok(self
6127            .node_ids_by_kind
6128            .get(kind)
6129            .into_iter()
6130            .flatten()
6131            .filter_map(|id| self.nodes.get(id).cloned())
6132            .collect())
6133    }
6134
6135    fn outgoing_edges(&self, from_id: &str, kind: Option<&str>) -> Result<Vec<SubstrateGraphEdge>> {
6136        Ok(self
6137            .outgoing_edge_keys_by_from
6138            .get(from_id)
6139            .into_iter()
6140            .flatten()
6141            .filter_map(|key| self.edges.get(key))
6142            .filter(|edge| kind.is_none_or(|kind| edge.kind == kind))
6143            .cloned()
6144            .collect())
6145    }
6146
6147    fn edges_between_nodes(&self, node_ids: &BTreeSet<String>) -> Result<Vec<SubstrateGraphEdge>> {
6148        Ok(self
6149            .edges
6150            .values()
6151            .filter(|edge| node_ids.contains(&edge.from_id) && node_ids.contains(&edge.to_id))
6152            .cloned()
6153            .collect())
6154    }
6155
6156    fn shortest_path(
6157        &self,
6158        from_id: &str,
6159        to_id: &str,
6160        kind: Option<&str>,
6161    ) -> Result<Option<substrate::GraphPath>> {
6162        if from_id == to_id {
6163            return Ok(Some(substrate::GraphPath {
6164                nodes: vec![from_id.to_string()],
6165                hops: 0,
6166            }));
6167        }
6168
6169        let mut queue = VecDeque::new();
6170        let mut parent = BTreeMap::<String, String>::new();
6171        parent.insert(from_id.to_string(), String::new());
6172        queue.push_back(from_id.to_string());
6173
6174        while let Some(current) = queue.pop_front() {
6175            for edge in self.outgoing_edges(&current, kind)? {
6176                if parent.contains_key(&edge.to_id) {
6177                    continue;
6178                }
6179                parent.insert(edge.to_id.clone(), current.clone());
6180                if edge.to_id == to_id {
6181                    let mut nodes = vec![to_id.to_string()];
6182                    let mut cursor = to_id;
6183                    while let Some(previous) = parent.get(cursor) {
6184                        if previous.is_empty() {
6185                            break;
6186                        }
6187                        nodes.push(previous.clone());
6188                        cursor = previous;
6189                    }
6190                    nodes.reverse();
6191                    return Ok(Some(substrate::GraphPath {
6192                        hops: nodes.len().saturating_sub(1),
6193                        nodes,
6194                    }));
6195                }
6196                queue.push_back(edge.to_id);
6197            }
6198        }
6199
6200        Ok(None)
6201    }
6202
6203    fn reachable_nodes_by_kinds(
6204        &self,
6205        from_id: &str,
6206        kinds: &[&str],
6207        depth: usize,
6208        limit: usize,
6209    ) -> Result<BTreeMap<String, Vec<(SubstrateGraphNode, substrate::GraphPath)>>> {
6210        let requested = kinds.iter().copied().collect::<BTreeSet<_>>();
6211        let mut rows = requested
6212            .iter()
6213            .map(|kind| {
6214                (
6215                    (*kind).to_string(),
6216                    BTreeMap::<String, (SubstrateGraphNode, substrate::GraphPath)>::new(),
6217                )
6218            })
6219            .collect::<BTreeMap<_, _>>();
6220        if requested.is_empty() {
6221            return Ok(BTreeMap::new());
6222        }
6223
6224        let mut seen = BTreeSet::from([from_id.to_string()]);
6225        let mut queue = VecDeque::from([(from_id.to_string(), vec![from_id.to_string()])]);
6226        while let Some((current, path)) = queue.pop_front() {
6227            let current_depth = path.len().saturating_sub(1);
6228            if current_depth >= depth {
6229                continue;
6230            }
6231            for edge in self.outgoing_edges(&current, None)? {
6232                if !seen.insert(edge.to_id.clone()) {
6233                    continue;
6234                }
6235                let Some(node) = self.nodes.get(&edge.to_id).cloned() else {
6236                    continue;
6237                };
6238                let mut next_path = path.clone();
6239                next_path.push(edge.to_id.clone());
6240                let graph_path = substrate::GraphPath {
6241                    hops: next_path.len().saturating_sub(1),
6242                    nodes: next_path.clone(),
6243                };
6244                if requested.contains(node.kind.as_str()) {
6245                    rows.entry(node.kind.clone())
6246                        .or_default()
6247                        .entry(node.id.clone())
6248                        .or_insert((node.clone(), graph_path));
6249                }
6250                queue.push_back((edge.to_id, next_path));
6251            }
6252        }
6253
6254        Ok(rows
6255            .into_iter()
6256            .map(|(kind, values)| {
6257                let mut values = values.into_values().collect::<Vec<_>>();
6258                values.sort_by(|(left_node, left_path), (right_node, right_path)| {
6259                    left_path
6260                        .hops
6261                        .cmp(&right_path.hops)
6262                        .then(left_node.label.cmp(&right_node.label))
6263                        .then(left_node.id.cmp(&right_node.id))
6264                });
6265                if limit > 0 && values.len() > limit {
6266                    values.truncate(limit);
6267                }
6268                (kind, values)
6269            })
6270            .collect())
6271    }
6272}
6273
6274pub(crate) const GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS: usize = 64;
6275pub(crate) const GRAPH_DB_BACKEND_EVAL_EXTENDED_PATH_HOPS: [usize; 3] = [128, 256, 512];
6276pub(crate) const GRAPH_DB_BACKEND_EVAL_DIRECT_PATH_HOPS: usize = 1;
6277const GRAPH_DB_BACKEND_EVAL_ALLOWED_REGRESSION_PERCENT: f64 = 10.0;
6278pub(crate) const GRAPH_DB_BACKEND_EVAL_NORMALIZATION_ROW_UNIT: f64 = 1000.0;
6279const GRAPH_DB_BACKEND_EVAL_MIN_SAMPLE_RUNS: usize = 3;
6280const CONFLICT_MATRIX_PREPARATION_CACHE_VERSION: &str = "conflict-matrix-prep-v1";
6281const CONFLICT_MATRIX_GRAPH_PREPARATION_CACHE_VERSION: &str = "conflict-matrix-graph-prep-v1";
6282const GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION: &str = "backend-eval-full-projection-v5";
6283
6284#[derive(Clone, Serialize, Deserialize)]
6285pub(crate) struct GraphDbBackendEvalPhaseTiming {
6286    name: String,
6287    duration_micros: u128,
6288    detail: String,
6289}
6290
6291#[derive(Serialize, Deserialize)]
6292struct GraphDbBackendEvalFullProjectionCache {
6293    version: String,
6294    key: String,
6295    source_watermark: String,
6296    projection: GraphProjection,
6297    warnings: Vec<String>,
6298}
6299
6300#[derive(Clone, Default)]
6301struct GraphDbBackendEvalFullProjectionCacheStats {
6302    hit: bool,
6303    disk_bytes: u64,
6304    json_bytes: u64,
6305    pruned_files: usize,
6306    pruned_bytes: u64,
6307}
6308
6309#[derive(Serialize)]
6310struct GraphDbBackendEvalRawSourceWatermarkRow {
6311    path: String,
6312    bytes: u64,
6313    content_hash: String,
6314}
6315
6316#[derive(Clone)]
6317struct GraphDbBackendEvalFullProjectionSourceWatermark {
6318    value: String,
6319    detail: String,
6320}
6321
6322#[derive(Serialize)]
6323pub(crate) struct GraphDbBackendEvalConfig {
6324    high_degree_nodes: usize,
6325    high_degree_fanout: usize,
6326    deep_chain_nodes: usize,
6327    deep_chain_fanout: usize,
6328    depth: usize,
6329    limit: usize,
6330    impact_limit: usize,
6331    path_max_hops: usize,
6332    path_direct_hop_budget: usize,
6333    path_deep_chain_hop_budget: usize,
6334    path_extended_hop_budgets: Vec<usize>,
6335    path_hop_policy: String,
6336    path_probe_strategy: String,
6337    path_query_plan_checks: Vec<String>,
6338    full_projection_enabled: bool,
6339    full_projection_profile: String,
6340    normalization_row_unit: usize,
6341}
6342
6343#[derive(Clone)]
6344struct GraphDbBackendEvalSignature {
6345    operation: String,
6346    value: serde_json::Value,
6347}
6348
6349#[derive(Serialize)]
6350struct GraphDbBackendEvalOperation {
6351    name: String,
6352    supported: bool,
6353    status: String,
6354    duration_micros: u128,
6355    #[serde(skip_serializing_if = "Option::is_none")]
6356    rows: Option<usize>,
6357    #[serde(skip_serializing_if = "Option::is_none")]
6358    error: Option<String>,
6359}
6360
6361#[derive(Serialize)]
6362struct GraphDbBackendEvalParity {
6363    matches_sqlite: bool,
6364    diagnostics: Vec<String>,
6365}
6366
6367#[derive(Serialize)]
6368struct GraphDbBackendEvalBackendReport {
6369    backend: String,
6370    adapter: String,
6371    read_only: bool,
6372    projection_load: String,
6373    operations: Vec<GraphDbBackendEvalOperation>,
6374    total_micros: u128,
6375    parity: GraphDbBackendEvalParity,
6376    lock_behavior: String,
6377    install_portability: String,
6378}
6379
6380#[derive(Serialize)]
6381struct GraphDbBackendEvalDataset {
6382    name: String,
6383    target_count: usize,
6384    nodes: usize,
6385    edges: usize,
6386    backends: Vec<GraphDbBackendEvalBackendReport>,
6387}
6388
6389#[derive(Serialize)]
6390struct GraphDbBackendPromotionDecision {
6391    backend: String,
6392    decision: String,
6393    reasons: Vec<String>,
6394    gate: GraphDbBackendPromotionGate,
6395}
6396
6397#[derive(Serialize)]
6398struct GraphDbBackendEvalPerformanceGate {
6399    baseline_fixture: String,
6400    ci_profile: String,
6401    opt_in_real_profile: String,
6402    full_projection_cache_hit_gate: String,
6403    allowed_regression_percent: f64,
6404    minimum_sample_runs: usize,
6405    normalized_metric_unit: String,
6406    required_metrics: Vec<String>,
6407    digest_command: String,
6408    repeated_sample_command: String,
6409    hop_cap_promotion: GraphDbHopCapPromotionGate,
6410    backend_adapter_spike: GraphDbBackendAdapterSpikeGate,
6411}
6412
6413#[derive(Serialize)]
6414struct GraphDbHopCapPromotionGate {
6415    status: String,
6416    current_default_hops: usize,
6417    candidate_hop_tiers: Vec<usize>,
6418    required_backend: String,
6419    required_workloads: Vec<String>,
6420    required_metrics: Vec<String>,
6421    allowed_regression_percent: f64,
6422    minimum_sample_runs: usize,
6423    decision_rule: String,
6424}
6425
6426#[derive(Serialize)]
6427struct GraphDbBackendAdapterSpikeGate {
6428    status: String,
6429    candidate_backends: Vec<GraphDbBackendAdapterSpikeCandidate>,
6430    required_workloads: Vec<String>,
6431    required_checks: Vec<String>,
6432    decision_rule: String,
6433    evidence_plan: String,
6434}
6435
6436#[derive(Serialize)]
6437struct GraphDbBackendAdapterSpikeCandidate {
6438    backend: String,
6439    adapter_label: String,
6440    projection_load: String,
6441    lock_behavior: String,
6442    install_portability: String,
6443}
6444
6445#[derive(Serialize)]
6446pub(crate) struct GraphDbBackendEvalReport {
6447    root: String,
6448    #[serde(skip_serializing_if = "Option::is_none")]
6449    scope: Option<String>,
6450    label: String,
6451    baseline_backend: String,
6452    candidates: Vec<String>,
6453    targets: Vec<String>,
6454    config: GraphDbBackendEvalConfig,
6455    phase_timings: Vec<GraphDbBackendEvalPhaseTiming>,
6456    datasets: Vec<GraphDbBackendEvalDataset>,
6457    promotion: Vec<GraphDbBackendPromotionDecision>,
6458    performance_gate: GraphDbBackendEvalPerformanceGate,
6459    metrics: BTreeMap<String, f64>,
6460    metric_digest_command: String,
6461    warnings: Vec<String>,
6462}
6463
6464#[derive(Clone, Debug, Serialize)]
6465struct GraphDbDoctorCheck {
6466    name: String,
6467    status: String,
6468    fail_closed: bool,
6469    diagnostics: Vec<String>,
6470    repair_commands: Vec<String>,
6471}
6472
6473#[derive(Serialize)]
6474pub(crate) struct GraphDbDoctorReport {
6475    root: String,
6476    #[serde(skip_serializing_if = "Option::is_none")]
6477    scope: Option<String>,
6478    backend: String,
6479    graph_db: String,
6480    #[serde(skip_serializing_if = "Option::is_none")]
6481    convex_snapshot: Option<String>,
6482    status: String,
6483    fail_closed: bool,
6484    checks: Vec<GraphDbDoctorCheck>,
6485    repair_commands: Vec<String>,
6486    #[serde(skip_serializing_if = "Vec::is_empty", default)]
6487    required_indexes: Vec<ConvexRequiredIndex>,
6488}
6489
6490#[derive(Serialize)]
6491struct GraphDbDriftSummary {
6492    node_upserts: usize,
6493    edge_upserts: usize,
6494    node_tombstones: usize,
6495    edge_tombstones: usize,
6496    stale_nodes: usize,
6497    stale_edges: usize,
6498    stale_projection_metadata: usize,
6499    duplicate_failures: usize,
6500    orphan_failures: usize,
6501    missing_required_indexes: usize,
6502}
6503
6504#[derive(Serialize)]
6505struct GraphDbDriftReport {
6506    root: String,
6507    #[serde(skip_serializing_if = "Option::is_none")]
6508    scope: Option<String>,
6509    graph_db: String,
6510    convex_snapshot: String,
6511    status: String,
6512    graph_reads_allowed: bool,
6513    projection_version: String,
6514    local_hash: Option<String>,
6515    snapshot_hash: Option<String>,
6516    summary: GraphDbDriftSummary,
6517    node_upserts: Vec<String>,
6518    edge_upserts: Vec<String>,
6519    node_tombstones: Vec<String>,
6520    edge_tombstones: Vec<String>,
6521    stale_nodes: Vec<String>,
6522    stale_edges: Vec<String>,
6523    diagnostics: Vec<String>,
6524    next_commands: Vec<String>,
6525    required_indexes: Vec<ConvexRequiredIndex>,
6526    #[serde(skip_serializing_if = "Vec::is_empty", default)]
6527    warnings: Vec<String>,
6528}
6529
6530#[derive(Clone, Serialize)]
6531struct GraphDbTombstoneCounts {
6532    nodes: usize,
6533    edges: usize,
6534    total: usize,
6535}
6536
6537#[derive(Clone, Serialize)]
6538struct GraphDbOperatorCounts {
6539    nodes: usize,
6540    edges: usize,
6541    tombstones: GraphDbTombstoneCounts,
6542    #[serde(skip_serializing_if = "Option::is_none")]
6543    file_size_bytes: Option<u64>,
6544    #[serde(skip_serializing_if = "Option::is_none")]
6545    freelist_bytes: Option<u64>,
6546}
6547
6548#[derive(Clone, Serialize)]
6549struct GraphDbCompactionPolicy {
6550    status: String,
6551    tombstone_scan_rows: usize,
6552    live_rows: usize,
6553    file_size_bytes: Option<u64>,
6554    freelist_bytes: Option<u64>,
6555    safe_to_prune_tombstones: bool,
6556    requires_convex_reconciliation: bool,
6557    recommendations: Vec<String>,
6558    proof: Vec<String>,
6559}
6560
6561#[derive(Serialize)]
6562pub(crate) struct GraphDbRefreshSummary {
6563    scope: String,
6564    projection_version: String,
6565    mode: String,
6566    #[serde(skip_serializing_if = "Option::is_none")]
6567    source_watermark: Option<String>,
6568    tombstoned_nodes: usize,
6569    tombstoned_edges: usize,
6570    upserted_nodes: usize,
6571    upserted_edges: usize,
6572    unchanged_nodes: usize,
6573    unchanged_edges: usize,
6574    upserted_properties: usize,
6575    unchanged_properties: usize,
6576    deleted_properties: usize,
6577    deleted_nodes: usize,
6578    deleted_edges: usize,
6579    pruned_tombstones: usize,
6580    #[serde(skip_serializing_if = "Option::is_none")]
6581    file_size_bytes_before: Option<u64>,
6582    #[serde(skip_serializing_if = "Option::is_none")]
6583    file_size_bytes_after: Option<u64>,
6584    #[serde(skip_serializing_if = "Vec::is_empty", default)]
6585    phase_timings: Vec<GraphDbBackendEvalPhaseTiming>,
6586}
6587
6588#[derive(Serialize)]
6589struct GraphDbOperatorReport {
6590    root: String,
6591    #[serde(skip_serializing_if = "Option::is_none")]
6592    scope: Option<String>,
6593    graph_db: String,
6594    operation: String,
6595    status: String,
6596    materialized: bool,
6597    freshness: GraphDbFreshnessReport,
6598    readiness: GraphEffectivenessReadiness,
6599    counts: GraphDbOperatorCounts,
6600    #[serde(skip_serializing_if = "Option::is_none")]
6601    refresh: Option<GraphDbRefreshSummary>,
6602    compaction: GraphDbCompactionPolicy,
6603    #[serde(skip_serializing_if = "Option::is_none")]
6604    recovery: Option<index::ReadOnlyRecovery>,
6605    next_commands: Vec<String>,
6606    #[serde(skip_serializing_if = "Vec::is_empty", default)]
6607    warnings: Vec<String>,
6608}
6609
6610#[derive(Serialize)]
6611pub(crate) struct GraphDbCompactionReport {
6612    root: String,
6613    #[serde(skip_serializing_if = "Option::is_none")]
6614    scope: Option<String>,
6615    graph_db: String,
6616    applied: bool,
6617    pruned_tombstones: usize,
6618    counts_before: GraphDbOperatorCounts,
6619    counts_after: GraphDbOperatorCounts,
6620    compaction_before: GraphDbCompactionPolicy,
6621    compaction_after: GraphDbCompactionPolicy,
6622    reclaimed_bytes: i64,
6623    next_commands: Vec<String>,
6624    #[serde(skip_serializing_if = "Vec::is_empty", default)]
6625    warnings: Vec<String>,
6626}
6627
6628#[derive(Clone, Serialize, Deserialize)]
6629struct GraphDbEvidencePath {
6630    to: String,
6631    kind: String,
6632    label: String,
6633    #[serde(skip_serializing_if = "Option::is_none")]
6634    path: Option<substrate::GraphPath>,
6635    #[serde(skip_serializing_if = "Option::is_none")]
6636    expand: Option<String>,
6637}
6638
6639#[derive(Clone, Serialize, Deserialize)]
6640struct GraphDbFixtureCoverage {
6641    test: String,
6642    fixture: String,
6643    assertions: Vec<String>,
6644}
6645
6646#[derive(Clone, Serialize, Deserialize)]
6647struct GraphDbEvidenceReport {
6648    root: String,
6649    #[serde(skip_serializing_if = "Option::is_none")]
6650    scope: Option<String>,
6651    backend: String,
6652    contract_version: String,
6653    target: String,
6654    packet_id: String,
6655    #[serde(skip_serializing_if = "Option::is_none")]
6656    projection_hash: Option<String>,
6657    freshness: GraphDbFreshnessReport,
6658    target_node: SubstrateGraphNode,
6659    worker_context: Vec<SubstrateGraphNode>,
6660    source_handles: Vec<SubstrateGraphNode>,
6661    worker_results: Vec<SubstrateGraphNode>,
6662    semantic_related: Vec<SubstrateGraphNode>,
6663    shortest_paths: Vec<GraphDbEvidencePath>,
6664    next_commands: Vec<String>,
6665    replay_commands: Vec<String>,
6666    repair_commands: Vec<String>,
6667    fixture_coverage: GraphDbFixtureCoverage,
6668    #[serde(skip_serializing_if = "Vec::is_empty", default)]
6669    warnings: Vec<String>,
6670}
6671
6672pub(crate) struct GraphDbEvidenceInput<'a, S: GraphStore> {
6673    root: &'a Path,
6674    scope: Option<&'a str>,
6675    backend: &'a str,
6676    target: &'a str,
6677    depth: usize,
6678    limit: usize,
6679    store: &'a S,
6680    freshness: GraphDbFreshnessReport,
6681    warnings: Vec<String>,
6682}
6683
6684impl GraphDbDoctorReport {
6685    fn new(
6686        root: &Path,
6687        scope: Option<&str>,
6688        backend: &str,
6689        graph_db: &Path,
6690        convex_snapshot: Option<&Path>,
6691    ) -> Self {
6692        Self {
6693            root: root.to_string_lossy().to_string(),
6694            scope: scope.map(str::to_string),
6695            backend: backend.to_string(),
6696            graph_db: graph_db.to_string_lossy().to_string(),
6697            convex_snapshot: convex_snapshot.map(|path| path.to_string_lossy().to_string()),
6698            status: "ok".to_string(),
6699            fail_closed: false,
6700            checks: Vec::new(),
6701            repair_commands: Vec::new(),
6702            required_indexes: Vec::new(),
6703        }
6704    }
6705
6706    fn push_check(&mut self, check: GraphDbDoctorCheck) {
6707        self.checks.push(check);
6708    }
6709
6710    fn finalize(&mut self) {
6711        self.fail_closed = self.checks.iter().any(|check| check.fail_closed);
6712        self.status = if self.fail_closed {
6713            "fail_closed"
6714        } else {
6715            "ok"
6716        }
6717        .to_string();
6718        let mut commands = BTreeSet::new();
6719        for check in &self.checks {
6720            commands.extend(check.repair_commands.iter().cloned());
6721        }
6722        self.repair_commands = commands.into_iter().collect();
6723    }
6724
6725    fn summary(&self) -> String {
6726        self.checks
6727            .iter()
6728            .filter(|check| check.fail_closed)
6729            .flat_map(|check| check.diagnostics.iter())
6730            .take(3)
6731            .cloned()
6732            .collect::<Vec<_>>()
6733            .join("; ")
6734    }
6735}
6736
6737fn graph_db_doctor_check(
6738    name: impl Into<String>,
6739    diagnostics: Vec<String>,
6740    repair_commands: Vec<String>,
6741) -> GraphDbDoctorCheck {
6742    let fail_closed = !diagnostics.is_empty();
6743    GraphDbDoctorCheck {
6744        name: name.into(),
6745        status: if fail_closed { "fail_closed" } else { "ok" }.to_string(),
6746        fail_closed,
6747        diagnostics,
6748        repair_commands: if fail_closed {
6749            repair_commands
6750        } else {
6751            Vec::new()
6752        },
6753    }
6754}
6755
6756pub(crate) fn graph_db_scope_arg(scope: Option<&str>) -> String {
6757    scope
6758        .map(|scope| format!(" --scope {}", shell_quote(scope)))
6759        .unwrap_or_default()
6760}
6761
6762fn graph_db_refresh_command(root: &Path, scope: Option<&str>) -> String {
6763    format!(
6764        "tsift graph-db --path {}{} refresh --json",
6765        shell_quote(root.to_string_lossy().as_ref()),
6766        graph_db_scope_arg(scope)
6767    )
6768}
6769
6770fn graph_db_rebuild_command(root: &Path, scope: Option<&str>) -> String {
6771    graph_db_refresh_command(root, scope)
6772}
6773
6774fn graph_db_backup_rebuild_command(root: &Path, scope: Option<&str>, graph_db: &Path) -> String {
6775    let backup = format!("{}.bak", graph_db.to_string_lossy());
6776    format!(
6777        "mv {} {} && {}",
6778        shell_quote(graph_db.to_string_lossy().as_ref()),
6779        shell_quote(&backup),
6780        graph_db_rebuild_command(root, scope)
6781    )
6782}
6783
6784fn convex_refresh_command(root: &Path, scope: Option<&str>) -> String {
6785    format!(
6786        "tsift convex-sync {}{} --remote-snapshot --apply --json",
6787        shell_quote(root.to_string_lossy().as_ref()),
6788        graph_db_scope_arg(scope)
6789    )
6790}
6791
6792fn open_sqlite_graph_db_readonly(graph_db: &Path) -> Result<substrate::SqliteReadOnlyConnection> {
6793    substrate::open_graph_read_only_connection_resilient(graph_db)
6794}
6795
6796fn sqlite_table_exists(conn: &Connection, table: &str) -> Result<bool> {
6797    conn.query_row(
6798        "SELECT EXISTS(SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = ?1)",
6799        [table],
6800        |row| row.get::<_, bool>(0),
6801    )
6802    .map_err(Into::into)
6803}
6804
6805fn sqlite_known_table_count(conn: &Connection, table: &str) -> Result<usize> {
6806    let sql = match table {
6807        "graph_nodes" => "SELECT COUNT(*) FROM graph_nodes",
6808        "graph_edges" => "SELECT COUNT(*) FROM graph_edges",
6809        "graph_tombstones" => "SELECT COUNT(*) FROM graph_tombstones",
6810        other => bail!("unsupported graph count table {other}"),
6811    };
6812    conn.query_row(sql, [], |row| row.get::<_, usize>(0))
6813        .map_err(Into::into)
6814}
6815
6816fn sqlite_tombstone_counts(conn: &Connection) -> Result<GraphDbTombstoneCounts> {
6817    if !sqlite_table_exists(conn, "graph_tombstones")? {
6818        return Ok(GraphDbTombstoneCounts {
6819            nodes: 0,
6820            edges: 0,
6821            total: 0,
6822        });
6823    }
6824    let mut stmt =
6825        conn.prepare("SELECT row_kind, COUNT(*) FROM graph_tombstones GROUP BY row_kind")?;
6826    let mut rows = stmt.query([])?;
6827    let mut nodes = 0usize;
6828    let mut edges = 0usize;
6829    while let Some(row) = rows.next()? {
6830        let row_kind: String = row.get(0)?;
6831        let count: usize = row.get(1)?;
6832        match row_kind.as_str() {
6833            "node" => nodes = count,
6834            "edge" => edges = count,
6835            _ => {}
6836        }
6837    }
6838    Ok(GraphDbTombstoneCounts {
6839        nodes,
6840        edges,
6841        total: nodes + edges,
6842    })
6843}
6844
6845fn sqlite_graph_counts_from_cache(
6846    conn: &Connection,
6847    scope: &str,
6848) -> Result<Option<GraphDbOperatorCounts>> {
6849    if !sqlite_table_exists(conn, "graph_operator_stats")? {
6850        return Ok(None);
6851    }
6852    let row = conn
6853        .query_row(
6854            r#"
6855        SELECT nodes, edges, tombstone_nodes, tombstone_edges, file_size_bytes, freelist_bytes
6856        FROM graph_operator_stats
6857        WHERE scope = ?1
6858        "#,
6859            [scope],
6860            |row| {
6861                Ok((
6862                    row.get::<_, usize>(0)?,
6863                    row.get::<_, usize>(1)?,
6864                    row.get::<_, usize>(2)?,
6865                    row.get::<_, usize>(3)?,
6866                    row.get::<_, Option<i64>>(4)?,
6867                    row.get::<_, Option<i64>>(5)?,
6868                ))
6869            },
6870        )
6871        .optional()?;
6872    Ok(row.map(
6873        |(nodes, edges, tombstone_nodes, tombstone_edges, file_size_bytes, freelist_bytes)| {
6874            GraphDbOperatorCounts {
6875                nodes,
6876                edges,
6877                tombstones: GraphDbTombstoneCounts {
6878                    nodes: tombstone_nodes,
6879                    edges: tombstone_edges,
6880                    total: tombstone_nodes + tombstone_edges,
6881                },
6882                file_size_bytes: file_size_bytes
6883                    .and_then(|value| u64::try_from(value).ok())
6884                    .or_else(|| sqlite_database_size_bytes(conn).ok()),
6885                freelist_bytes: freelist_bytes
6886                    .and_then(|value| u64::try_from(value).ok())
6887                    .or_else(|| sqlite_database_freelist_bytes(conn).ok()),
6888            }
6889        },
6890    ))
6891}
6892
6893fn sqlite_graph_counts(conn: &Connection, scope: &str) -> Result<GraphDbOperatorCounts> {
6894    if let Some(counts) = sqlite_graph_counts_from_cache(conn, scope)? {
6895        return Ok(counts);
6896    }
6897    let nodes = if sqlite_table_exists(conn, "graph_nodes")? {
6898        sqlite_known_table_count(conn, "graph_nodes")?
6899    } else {
6900        0
6901    };
6902    let edges = if sqlite_table_exists(conn, "graph_edges")? {
6903        sqlite_known_table_count(conn, "graph_edges")?
6904    } else {
6905        0
6906    };
6907    Ok(GraphDbOperatorCounts {
6908        nodes,
6909        edges,
6910        tombstones: sqlite_tombstone_counts(conn)?,
6911        file_size_bytes: sqlite_database_size_bytes(conn).ok(),
6912        freelist_bytes: sqlite_database_freelist_bytes(conn).ok(),
6913    })
6914}
6915
6916pub(crate) fn graph_db_compaction_policy(
6917    root: &Path,
6918    scope: Option<&str>,
6919    counts: &GraphDbOperatorCounts,
6920    prune_confirmed: bool,
6921) -> GraphDbCompactionPolicy {
6922    let live_rows = counts.nodes + counts.edges;
6923    let tombstone_scan_rows = counts.tombstones.total;
6924    let tombstone_heavy = tombstone_scan_rows > live_rows.max(1);
6925    let freelist_heavy = counts
6926        .file_size_bytes
6927        .zip(counts.freelist_bytes)
6928        .is_some_and(|(file_size, freelist)| freelist > 0 && freelist >= file_size / 20);
6929    let status = if tombstone_heavy || freelist_heavy {
6930        "recommended"
6931    } else {
6932        "not_needed"
6933    }
6934    .to_string();
6935    let mut recommendations = vec![
6936        convex_refresh_command(root, scope),
6937        graph_db_refresh_command(root, scope),
6938        format!(
6939            "tsift graph-db --path {}{} compact --apply --json",
6940            shell_quote(root.to_string_lossy().as_ref()),
6941            graph_db_scope_arg(scope)
6942        ),
6943    ];
6944    if prune_confirmed {
6945        recommendations.push(format!(
6946            "tsift graph-db --path {}{} compact --apply --prune-tombstones --confirmed-convex-reconciled --json",
6947            shell_quote(root.to_string_lossy().as_ref()),
6948            graph_db_scope_arg(scope)
6949        ));
6950    }
6951    let proof = vec![
6952        format!("{live_rows} live graph row(s)"),
6953        format!("{tombstone_scan_rows} retained tombstone row(s) scanned by status/doctor"),
6954        format!(
6955            "graph.db file_size={} byte(s), freelist={} byte(s)",
6956            counts.file_size_bytes.unwrap_or(0),
6957            counts.freelist_bytes.unwrap_or(0)
6958        ),
6959    ];
6960    GraphDbCompactionPolicy {
6961        status,
6962        tombstone_scan_rows,
6963        live_rows,
6964        file_size_bytes: counts.file_size_bytes,
6965        freelist_bytes: counts.freelist_bytes,
6966        safe_to_prune_tombstones: prune_confirmed,
6967        requires_convex_reconciliation: tombstone_scan_rows > 0 && !prune_confirmed,
6968        recommendations,
6969        proof,
6970    }
6971}
6972
6973fn sqlite_database_size_bytes(conn: &Connection) -> Result<u64> {
6974    let page_count: u64 = conn.query_row("PRAGMA page_count", [], |row| row.get(0))?;
6975    let page_size: u64 = conn.query_row("PRAGMA page_size", [], |row| row.get(0))?;
6976    Ok(page_count.saturating_mul(page_size))
6977}
6978
6979fn sqlite_database_freelist_bytes(conn: &Connection) -> Result<u64> {
6980    let freelist_count: u64 = conn.query_row("PRAGMA freelist_count", [], |row| row.get(0))?;
6981    let page_size: u64 = conn.query_row("PRAGMA page_size", [], |row| row.get(0))?;
6982    Ok(freelist_count.saturating_mul(page_size))
6983}
6984
6985fn sqlite_graph_tombstone_retention_diagnostics(
6986    conn: &Connection,
6987    scope: &str,
6988) -> Result<Vec<String>> {
6989    if !sqlite_table_exists(conn, "graph_tombstones")? {
6990        return Ok(Vec::new());
6991    }
6992    let cached = sqlite_graph_counts_from_cache(conn, scope)?;
6993    let counts = match cached.clone() {
6994        Some(counts) => counts,
6995        None => sqlite_graph_counts(conn, scope)?,
6996    };
6997    let live_rows = counts.nodes + counts.edges;
6998    let file_size = counts.file_size_bytes.unwrap_or(0);
6999    let freelist = counts.freelist_bytes.unwrap_or(0);
7000    let stale_live_tombstones = if cached.is_some() {
7001        0
7002    } else {
7003        let mut live_keys = BTreeSet::new();
7004        if sqlite_table_exists(conn, "graph_nodes")? {
7005            let mut stmt = conn.prepare("SELECT id FROM graph_nodes")?;
7006            for row in stmt.query_map([], |row| row.get::<_, String>(0))? {
7007                live_keys.insert(format!("node:{}", row?));
7008            }
7009        }
7010        if sqlite_table_exists(conn, "graph_edges")? {
7011            let mut stmt = conn.prepare("SELECT edge_key FROM graph_edges")?;
7012            for row in stmt.query_map([], |row| row.get::<_, String>(0))? {
7013                live_keys.insert(format!("edge:{}", row?));
7014            }
7015        }
7016        let mut stale_live_tombstones = 0usize;
7017        let mut stmt = conn.prepare("SELECT row_key FROM graph_tombstones ORDER BY row_key")?;
7018        for row in stmt.query_map([], |row| row.get::<_, String>(0))? {
7019            if live_keys.contains(&row?) {
7020                stale_live_tombstones += 1;
7021            }
7022        }
7023        stale_live_tombstones
7024    };
7025
7026    let mut diagnostics = Vec::new();
7027    if stale_live_tombstones > 0 {
7028        diagnostics.push(format!(
7029            "{stale_live_tombstones} tombstone(s) reference rows that are live again; the next graph-db refresh prunes those stale tombstones before inserting new deletion markers"
7030        ));
7031    }
7032    if counts.tombstones.total > live_rows.max(1) {
7033        let source = if cached.is_some() {
7034            "cached refresh stats"
7035        } else {
7036            "live row scan"
7037        };
7038        diagnostics.push(format!(
7039            "tombstone retention exceeds live graph rows: {} tombstone(s) vs {} live row(s) from {}; graph.db file_size={} byte(s), freelist={} byte(s), status/doctor tombstone scans inspect {} extra row(s). Run convex-sync against the remote snapshot before rebuild/compaction if a remote consumer may still need deletion reconciliation.",
7040            counts.tombstones.total,
7041            live_rows,
7042            source,
7043            file_size,
7044            freelist,
7045            counts.tombstones.total
7046        ));
7047    }
7048    Ok(diagnostics)
7049}
7050
7051fn sqlite_graph_freshness_from_conn(
7052    conn: &Connection,
7053    scope: &str,
7054) -> Result<GraphDbFreshnessReport> {
7055    if !sqlite_table_exists(conn, "graph_projection_versions")? {
7056        return Ok(GraphDbFreshnessReport {
7057            status: "missing".to_string(),
7058            fail_closed: true,
7059            projection_version: None,
7060            content_hash: None,
7061            source_watermark: None,
7062            diagnostics: vec![
7063                "graph projection metadata table is missing; refresh graph.db before trusting reads"
7064                    .to_string(),
7065            ],
7066        });
7067    }
7068    let version = conn
7069        .query_row(
7070            r#"
7071            SELECT projection_version, content_hash, source_watermark
7072            FROM graph_projection_versions
7073            WHERE scope = ?1
7074            "#,
7075            [scope],
7076            |row| {
7077                Ok((
7078                    row.get::<_, String>(0)?,
7079                    row.get::<_, Option<String>>(1)?,
7080                    row.get::<_, Option<String>>(2)?,
7081                ))
7082            },
7083        )
7084        .optional()?;
7085    let Some((projection_version, content_hash, source_watermark)) = version else {
7086        return Ok(GraphDbFreshnessReport {
7087            status: "missing".to_string(),
7088            fail_closed: true,
7089            projection_version: None,
7090            content_hash: None,
7091            source_watermark: None,
7092            diagnostics: vec![
7093                "graph projection metadata is missing; refresh graph.db before trusting reads"
7094                    .to_string(),
7095            ],
7096        });
7097    };
7098
7099    let mut diagnostics = Vec::new();
7100    if projection_version != GRAPH_PROJECTION_VERSION {
7101        diagnostics.push(format!(
7102            "projection version mismatch: expected {} got {}",
7103            GRAPH_PROJECTION_VERSION, projection_version
7104        ));
7105    }
7106    if content_hash.is_none() {
7107        diagnostics.push("projection content hash is missing".to_string());
7108    }
7109    let fail_closed = !diagnostics.is_empty();
7110    Ok(GraphDbFreshnessReport {
7111        status: if fail_closed { "stale" } else { "current" }.to_string(),
7112        fail_closed,
7113        projection_version: Some(projection_version),
7114        content_hash,
7115        source_watermark,
7116        diagnostics,
7117    })
7118}
7119
7120fn graph_db_operator_next_commands(
7121    root: &Path,
7122    scope: Option<&str>,
7123    include_refresh: bool,
7124) -> Vec<String> {
7125    let mut commands = Vec::new();
7126    if include_refresh {
7127        commands.push(graph_db_refresh_command(root, scope));
7128    }
7129    commands.push(format!(
7130        "tsift graph-db --path {}{} doctor --json",
7131        shell_quote(root.to_string_lossy().as_ref()),
7132        graph_db_scope_arg(scope)
7133    ));
7134    commands.push(format!(
7135        "tsift graph-db --path {}{} --backend convex-snapshot --convex-snapshot <rows.json> drift --json",
7136        shell_quote(root.to_string_lossy().as_ref()),
7137        graph_db_scope_arg(scope)
7138    ));
7139    commands.push(format!(
7140        "tsift convex-sync {}{} --remote-snapshot --apply --json",
7141        shell_quote(root.to_string_lossy().as_ref()),
7142        graph_db_scope_arg(scope)
7143    ));
7144    commands
7145}
7146
7147pub(crate) fn graph_db_read_recovery_diagnostic(recovery: index::ReadOnlyRecovery) -> String {
7148    match recovery {
7149        index::ReadOnlyRecovery::SnapshotFallback => {
7150            "graph.db read recovered through snapshot fallback after a rollback-journal lock on the live database".to_string()
7151        }
7152        index::ReadOnlyRecovery::SnapshotFallbackWal => {
7153            "graph.db read recovered through WAL-aware snapshot fallback after copying live -wal/-shm sidecars".to_string()
7154        }
7155    }
7156}
7157
7158fn sqlite_string_set(conn: &Connection, sql: &str) -> Result<BTreeSet<String>> {
7159    let mut stmt = conn.prepare(sql)?;
7160    let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
7161    let mut values = BTreeSet::new();
7162    for row in rows {
7163        values.insert(row?);
7164    }
7165    Ok(values)
7166}
7167
7168fn sqlite_column_names(conn: &Connection, table: &str) -> Result<BTreeSet<String>> {
7169    let mut stmt = conn.prepare(&format!("PRAGMA table_info({table})"))?;
7170    let rows = stmt.query_map([], |row| row.get::<_, String>(1))?;
7171    let mut columns = BTreeSet::new();
7172    for row in rows {
7173        columns.insert(row?);
7174    }
7175    Ok(columns)
7176}
7177
7178fn sqlite_graph_schema_diagnostics(conn: &Connection) -> Result<Vec<String>> {
7179    let mut diagnostics = Vec::new();
7180    let user_version: i64 =
7181        conn.pragma_query_value(None, "user_version", |row| row.get::<_, i64>(0))?;
7182    if user_version > SQLITE_GRAPH_SCHEMA_VERSION {
7183        diagnostics.push(format!(
7184            "graph.db schema version {user_version} is newer than supported version {SQLITE_GRAPH_SCHEMA_VERSION}"
7185        ));
7186    } else if user_version < SQLITE_GRAPH_SCHEMA_VERSION {
7187        diagnostics.push(format!(
7188            "graph.db schema version {user_version} is older than supported version {SQLITE_GRAPH_SCHEMA_VERSION}"
7189        ));
7190    }
7191
7192    let tables = sqlite_string_set(
7193        conn,
7194        "SELECT name FROM sqlite_master WHERE type = 'table' ORDER BY name",
7195    )?;
7196    let required_tables = [
7197        (
7198            "graph_nodes",
7199            vec![
7200                "id",
7201                "kind",
7202                "label",
7203                "properties_json",
7204                "provenance_json",
7205                "freshness_json",
7206                "row_hash",
7207                "source_watermark",
7208            ],
7209        ),
7210        (
7211            "graph_edges",
7212            vec![
7213                "edge_key",
7214                "from_id",
7215                "to_id",
7216                "kind",
7217                "properties_json",
7218                "provenance_json",
7219                "freshness_json",
7220                "row_hash",
7221                "source_watermark",
7222            ],
7223        ),
7224        (
7225            "graph_projection_versions",
7226            vec![
7227                "scope",
7228                "projection_version",
7229                "content_hash",
7230                "source_watermark",
7231                "observed_at_unix",
7232            ],
7233        ),
7234        (
7235            "graph_tombstones",
7236            vec!["row_key", "row_kind", "deleted_at_unix"],
7237        ),
7238        ("graph_node_properties", vec!["node_id", "key", "value"]),
7239        ("graph_edge_properties", vec!["edge_key", "key", "value"]),
7240    ];
7241    for (table, required_columns) in required_tables {
7242        if !tables.contains(table) {
7243            diagnostics.push(format!("graph.db schema drift: missing table {table}"));
7244            continue;
7245        }
7246        let columns = sqlite_column_names(conn, table)?;
7247        for column in required_columns {
7248            if !columns.contains(column) {
7249                diagnostics.push(format!(
7250                    "graph.db schema drift: missing column {table}.{column}"
7251                ));
7252            }
7253        }
7254    }
7255
7256    let indexes = sqlite_string_set(
7257        conn,
7258        "SELECT name FROM sqlite_master WHERE type = 'index' ORDER BY name",
7259    )?;
7260    for index in [
7261        "idx_graph_nodes_kind",
7262        "idx_graph_edges_from_kind",
7263        "idx_graph_edges_to_kind",
7264        "idx_graph_edges_edge_key",
7265        "idx_graph_node_properties_key_value_node",
7266        "idx_graph_edge_properties_key_value_edge",
7267    ] {
7268        if !indexes.contains(index) {
7269            diagnostics.push(format!("graph.db schema drift: missing index {index}"));
7270        }
7271    }
7272
7273    if tables.contains("graph_edges") {
7274        let mut stmt = conn.prepare("PRAGMA foreign_key_list(graph_edges)")?;
7275        let rows = stmt.query_map([], |row| {
7276            Ok((row.get::<_, String>(3)?, row.get::<_, String>(4)?))
7277        })?;
7278        let mut fks = BTreeSet::new();
7279        for row in rows {
7280            fks.insert(row?);
7281        }
7282        for expected in [
7283            ("from_id".to_string(), "id".to_string()),
7284            ("to_id".to_string(), "id".to_string()),
7285        ] {
7286            if !fks.contains(&expected) {
7287                diagnostics.push(format!(
7288                    "graph.db schema drift: missing graph_edges foreign key {} -> graph_nodes.{}",
7289                    expected.0, expected.1
7290                ));
7291            }
7292        }
7293    }
7294
7295    Ok(diagnostics)
7296}
7297
7298fn sqlite_query_diagnostics(conn: &Connection, sql: &str) -> Result<Vec<String>> {
7299    let mut stmt = conn.prepare(sql)?;
7300    let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
7301    let mut diagnostics = Vec::new();
7302    for row in rows {
7303        diagnostics.push(row?);
7304    }
7305    Ok(diagnostics)
7306}
7307
7308fn sqlite_graph_duplicate_diagnostics(conn: &Connection) -> Result<Vec<String>> {
7309    let mut diagnostics = sqlite_query_diagnostics(
7310        conn,
7311        r#"
7312        SELECT 'duplicate graph_nodes.id ' || id || ' (' || COUNT(*) || ' rows)'
7313        FROM graph_nodes
7314        GROUP BY id
7315        HAVING COUNT(*) > 1
7316        ORDER BY id
7317        "#,
7318    )?;
7319    diagnostics.extend(sqlite_query_diagnostics(
7320        conn,
7321        r#"
7322        SELECT 'duplicate graph_edges key ' || from_id || ' -' || kind || '-> ' || to_id || ' (' || COUNT(*) || ' rows)'
7323        FROM graph_edges
7324        GROUP BY from_id, to_id, kind
7325        HAVING COUNT(*) > 1
7326        ORDER BY from_id, kind, to_id
7327        "#,
7328    )?);
7329    diagnostics.extend(sqlite_query_diagnostics(
7330        conn,
7331        r#"
7332        SELECT 'duplicate graph_edges.edge_key ' || edge_key || ' (' || COUNT(*) || ' rows)'
7333        FROM graph_edges
7334        GROUP BY edge_key
7335        HAVING COUNT(*) > 1
7336        ORDER BY edge_key
7337        "#,
7338    )?);
7339    Ok(diagnostics)
7340}
7341
7342fn sqlite_graph_orphan_diagnostics(conn: &Connection) -> Result<Vec<String>> {
7343    sqlite_query_diagnostics(
7344        conn,
7345        r#"
7346        SELECT 'orphan edge missing from node: ' || e.from_id || ' -' || e.kind || '-> ' || e.to_id
7347        FROM graph_edges e
7348        LEFT JOIN graph_nodes n ON n.id = e.from_id
7349        WHERE n.id IS NULL
7350        UNION ALL
7351        SELECT 'orphan edge missing to node: ' || e.from_id || ' -' || e.kind || '-> ' || e.to_id
7352        FROM graph_edges e
7353        LEFT JOIN graph_nodes n ON n.id = e.to_id
7354        WHERE n.id IS NULL
7355        ORDER BY 1
7356        "#,
7357    )
7358}
7359
7360fn sqlite_graph_json_diagnostics(conn: &Connection) -> Result<Vec<String>> {
7361    let mut diagnostics = Vec::new();
7362    let mut node_stmt = conn.prepare(
7363        "SELECT id, properties_json, provenance_json, freshness_json FROM graph_nodes ORDER BY id",
7364    )?;
7365    let node_rows = node_stmt.query_map([], |row| {
7366        Ok((
7367            row.get::<_, String>(0)?,
7368            row.get::<_, String>(1)?,
7369            row.get::<_, String>(2)?,
7370            row.get::<_, Option<String>>(3)?,
7371        ))
7372    })?;
7373    for row in node_rows {
7374        let (id, properties_json, provenance_json, freshness_json) = row?;
7375        if let Err(err) = serde_json::from_str::<BTreeMap<String, String>>(&properties_json) {
7376            diagnostics.push(format!(
7377                "graph_nodes {id} properties_json is invalid: {err}"
7378            ));
7379        }
7380        if let Err(err) = serde_json::from_str::<Vec<GraphProvenance>>(&provenance_json) {
7381            diagnostics.push(format!(
7382                "graph_nodes {id} provenance_json is invalid: {err}"
7383            ));
7384        }
7385        if let Some(freshness_json) = freshness_json
7386            && let Err(err) = serde_json::from_str::<GraphFreshness>(&freshness_json)
7387        {
7388            diagnostics.push(format!("graph_nodes {id} freshness_json is invalid: {err}"));
7389        }
7390    }
7391
7392    let mut edge_stmt = conn.prepare(
7393        "SELECT edge_key, from_id, to_id, kind, properties_json, provenance_json, freshness_json FROM graph_edges ORDER BY from_id, kind, to_id",
7394    )?;
7395    let edge_rows = edge_stmt.query_map([], |row| {
7396        Ok((
7397            row.get::<_, String>(0)?,
7398            row.get::<_, String>(1)?,
7399            row.get::<_, String>(2)?,
7400            row.get::<_, String>(3)?,
7401            row.get::<_, String>(4)?,
7402            row.get::<_, String>(5)?,
7403            row.get::<_, Option<String>>(6)?,
7404        ))
7405    })?;
7406    for row in edge_rows {
7407        let (edge_key, from_id, to_id, kind, properties_json, provenance_json, freshness_json) =
7408            row?;
7409        let edge = format!("{edge_key} {from_id} -{kind}-> {to_id}");
7410        if let Err(err) = serde_json::from_str::<BTreeMap<String, String>>(&properties_json) {
7411            diagnostics.push(format!(
7412                "graph_edges {edge} properties_json is invalid: {err}"
7413            ));
7414        }
7415        if let Err(err) = serde_json::from_str::<Vec<GraphProvenance>>(&provenance_json) {
7416            diagnostics.push(format!(
7417                "graph_edges {edge} provenance_json is invalid: {err}"
7418            ));
7419        }
7420        if let Some(freshness_json) = freshness_json
7421            && let Err(err) = serde_json::from_str::<GraphFreshness>(&freshness_json)
7422        {
7423            diagnostics.push(format!(
7424                "graph_edges {edge} freshness_json is invalid: {err}"
7425            ));
7426        }
7427    }
7428    Ok(diagnostics)
7429}
7430
7431fn sqlite_graph_projection_metadata_diagnostics(
7432    conn: &Connection,
7433    scope: Option<&str>,
7434) -> Result<Vec<String>> {
7435    let mut diagnostics = Vec::new();
7436    let scope_key = scope.unwrap_or("root");
7437    let version = conn
7438        .query_row(
7439            r#"
7440            SELECT projection_version, content_hash, source_watermark
7441            FROM graph_projection_versions
7442            WHERE scope = ?1
7443            "#,
7444            [scope_key],
7445            |row| {
7446                Ok((
7447                    row.get::<_, String>(0)?,
7448                    row.get::<_, Option<String>>(1)?,
7449                    row.get::<_, Option<String>>(2)?,
7450                ))
7451            },
7452        )
7453        .optional()?;
7454    let Some((projection_version, content_hash, _source_watermark)) = version else {
7455        diagnostics.push(format!(
7456            "graph projection metadata is missing for scope {scope_key}"
7457        ));
7458        return Ok(diagnostics);
7459    };
7460    if projection_version != GRAPH_PROJECTION_VERSION {
7461        diagnostics.push(format!(
7462            "projection version mismatch: expected {GRAPH_PROJECTION_VERSION} got {projection_version}"
7463        ));
7464    }
7465    if content_hash.is_none() {
7466        diagnostics.push("projection content hash is missing".to_string());
7467    }
7468
7469    let meta_id = graph_projection_meta_id(scope);
7470    let meta_properties = conn
7471        .query_row(
7472            "SELECT properties_json FROM graph_nodes WHERE id = ?1 AND kind = ?2",
7473            (&meta_id, GRAPH_PROJECTION_META_KIND),
7474            |row| row.get::<_, String>(0),
7475        )
7476        .optional()?;
7477    let Some(meta_properties) = meta_properties else {
7478        diagnostics.push(format!("projection_meta node {meta_id} is missing"));
7479        return Ok(diagnostics);
7480    };
7481    let properties = serde_json::from_str::<BTreeMap<String, String>>(&meta_properties)
7482        .with_context(|| format!("parsing projection_meta properties for {meta_id}"))?;
7483    if properties.get("projection_version").map(String::as_str) != Some(GRAPH_PROJECTION_VERSION) {
7484        diagnostics.push(format!(
7485            "projection_meta node {meta_id} has stale projection_version"
7486        ));
7487    }
7488    if properties.get("content_hash") != content_hash.as_ref() {
7489        diagnostics.push(format!(
7490            "projection_meta node {meta_id} content_hash does not match graph_projection_versions"
7491        ));
7492    }
7493    Ok(diagnostics)
7494}
7495
7496pub(crate) fn sqlite_convex_rows_from_conn(conn: &Connection) -> Result<ConvexProjectionRows> {
7497    let mut node_stmt = conn.prepare(
7498        "SELECT id, kind, label, properties_json, provenance_json, freshness_json FROM graph_nodes ORDER BY id",
7499    )?;
7500    let node_rows = node_stmt.query_map([], |row| {
7501        let properties_json: String = row.get(3)?;
7502        let provenance_json: String = row.get(4)?;
7503        let freshness_json: Option<String> = row.get(5)?;
7504        Ok((
7505            row.get::<_, String>(0)?,
7506            row.get::<_, String>(1)?,
7507            row.get::<_, String>(2)?,
7508            properties_json,
7509            provenance_json,
7510            freshness_json,
7511        ))
7512    })?;
7513    let mut nodes = Vec::new();
7514    for row in node_rows {
7515        let (external_id, kind, label, properties_json, provenance_json, freshness_json) = row?;
7516        nodes.push(ConvexNodeRow {
7517            external_id,
7518            kind,
7519            label,
7520            properties: serde_json::from_str(&properties_json)?,
7521            provenance: serde_json::from_str(&provenance_json)?,
7522            freshness: freshness_json
7523                .map(|value| serde_json::from_str(&value))
7524                .transpose()?,
7525        });
7526    }
7527
7528    let mut edge_stmt = conn.prepare(
7529        "SELECT edge_key, from_id, to_id, kind, properties_json, provenance_json, freshness_json FROM graph_edges ORDER BY from_id, kind, to_id",
7530    )?;
7531    let edge_rows = edge_stmt.query_map([], |row| {
7532        let properties_json: String = row.get(4)?;
7533        let provenance_json: String = row.get(5)?;
7534        let freshness_json: Option<String> = row.get(6)?;
7535        Ok((
7536            row.get::<_, String>(0)?,
7537            row.get::<_, String>(1)?,
7538            row.get::<_, String>(2)?,
7539            row.get::<_, String>(3)?,
7540            properties_json,
7541            provenance_json,
7542            freshness_json,
7543        ))
7544    })?;
7545    let mut edges = Vec::new();
7546    for row in edge_rows {
7547        let (
7548            edge_key,
7549            from_external_id,
7550            to_external_id,
7551            kind,
7552            properties_json,
7553            provenance_json,
7554            freshness_json,
7555        ) = row?;
7556        edges.push(ConvexEdgeRow {
7557            edge_key,
7558            from_external_id,
7559            to_external_id,
7560            kind,
7561            properties: serde_json::from_str(&properties_json)?,
7562            provenance: serde_json::from_str(&provenance_json)?,
7563            freshness: freshness_json
7564                .map(|value| serde_json::from_str(&value))
7565                .transpose()?,
7566        });
7567    }
7568    Ok(ConvexProjectionRows { nodes, edges })
7569}
7570
7571fn convex_required_index_label(index: &ConvexRequiredIndex) -> String {
7572    format!("{}.{}({})", index.table, index.name, index.fields.join(","))
7573}
7574
7575fn convex_snapshot_index_value(value: &serde_json::Value) -> Option<&serde_json::Value> {
7576    value
7577        .get("indexes")
7578        .or_else(|| value.get("requiredIndexes"))
7579        .or_else(|| {
7580            value
7581                .get("metadata")
7582                .and_then(|metadata| metadata.get("indexes"))
7583        })
7584}
7585
7586fn convex_snapshot_declared_indexes(
7587    value: &serde_json::Value,
7588) -> Result<Option<Vec<ConvexRequiredIndex>>> {
7589    convex_snapshot_index_value(value)
7590        .map(|indexes| {
7591            serde_json::from_value::<Vec<ConvexRequiredIndex>>(indexes.clone())
7592                .context("parsing Convex snapshot index metadata")
7593        })
7594        .transpose()
7595}
7596
7597fn convex_snapshot_index_diagnostics(value: &serde_json::Value) -> Result<Vec<String>> {
7598    let required = convex_required_indexes();
7599    let Some(declared) = convex_snapshot_declared_indexes(value)? else {
7600        return Ok(vec![format!(
7601            "Convex snapshot index metadata is missing; required indexes not confirmed: {}",
7602            required
7603                .iter()
7604                .map(convex_required_index_label)
7605                .collect::<Vec<_>>()
7606                .join(", ")
7607        )]);
7608    };
7609    let declared = declared.into_iter().collect::<BTreeSet<_>>();
7610    let missing = required
7611        .iter()
7612        .filter(|index| !declared.contains(*index))
7613        .map(convex_required_index_label)
7614        .collect::<Vec<_>>();
7615    if missing.is_empty() {
7616        Ok(Vec::new())
7617    } else {
7618        Ok(vec![format!(
7619            "Convex snapshot is missing required index metadata: {}",
7620            missing.join(", ")
7621        )])
7622    }
7623}
7624
7625pub(crate) fn load_convex_projection_snapshot_value(
7626    snapshot_path: &Path,
7627) -> Result<(ConvexProjectionRows, serde_json::Value)> {
7628    let content = fs::read_to_string(snapshot_path).with_context(|| {
7629        format!(
7630            "reading Convex projection snapshot {}",
7631            snapshot_path.display()
7632        )
7633    })?;
7634    let value = serde_json::from_str::<serde_json::Value>(&content).with_context(|| {
7635        format!(
7636            "parsing Convex projection snapshot {}",
7637            snapshot_path.display()
7638        )
7639    })?;
7640    let rows = serde_json::from_value::<ConvexProjectionRows>(value.clone())
7641        .with_context(|| format!("parsing Convex projection rows {}", snapshot_path.display()))?;
7642    Ok((rows, value))
7643}
7644
7645pub(crate) fn append_sqlite_graph_doctor_checks(
7646    report: &mut GraphDbDoctorReport,
7647    root: &Path,
7648    scope: Option<&str>,
7649    graph_db: &Path,
7650) -> Option<substrate::SqliteReadOnlyConnection> {
7651    let rebuild = graph_db_rebuild_command(root, scope);
7652    let backup_rebuild = graph_db_backup_rebuild_command(root, scope, graph_db);
7653    if !graph_db.exists() {
7654        report.push_check(graph_db_doctor_check(
7655            "sqlite_graph_db_exists",
7656            vec![format!("graph.db is missing at {}", graph_db.display())],
7657            vec![rebuild],
7658        ));
7659        return None;
7660    }
7661    report.push_check(graph_db_doctor_check(
7662        "sqlite_graph_db_exists",
7663        Vec::new(),
7664        vec![rebuild.clone()],
7665    ));
7666
7667    let conn = match open_sqlite_graph_db_readonly(graph_db) {
7668        Ok(conn) => conn,
7669        Err(err) => {
7670            report.push_check(graph_db_doctor_check(
7671                "sqlite_graph_db_open",
7672                vec![err.to_string()],
7673                vec![backup_rebuild],
7674            ));
7675            return None;
7676        }
7677    };
7678    report.push_check(graph_db_doctor_check(
7679        "sqlite_graph_db_open",
7680        Vec::new(),
7681        vec![rebuild.clone()],
7682    ));
7683    if let Some(recovery) = conn.recovery() {
7684        report.push_check(GraphDbDoctorCheck {
7685            name: "sqlite_graph_db_read_recovery".to_string(),
7686            status: "recovered".to_string(),
7687            fail_closed: false,
7688            diagnostics: vec![graph_db_read_recovery_diagnostic(recovery)],
7689            repair_commands: Vec::new(),
7690        });
7691    }
7692
7693    let schema_diagnostics = sqlite_graph_schema_diagnostics(conn.conn())
7694        .unwrap_or_else(|err| vec![format!("graph.db schema inspection failed: {err}")]);
7695    report.push_check(graph_db_doctor_check(
7696        "sqlite_schema",
7697        schema_diagnostics,
7698        vec![backup_rebuild.clone()],
7699    ));
7700
7701    let metadata_diagnostics = sqlite_graph_projection_metadata_diagnostics(conn.conn(), scope)
7702        .unwrap_or_else(|err| {
7703            vec![format!(
7704                "graph projection metadata inspection failed: {err}"
7705            )]
7706        });
7707    report.push_check(graph_db_doctor_check(
7708        "sqlite_projection_metadata",
7709        metadata_diagnostics,
7710        vec![rebuild.clone()],
7711    ));
7712
7713    let duplicate_diagnostics = sqlite_graph_duplicate_diagnostics(conn.conn())
7714        .unwrap_or_else(|err| vec![format!("duplicate id inspection failed: {err}")]);
7715    report.push_check(graph_db_doctor_check(
7716        "sqlite_duplicate_ids",
7717        duplicate_diagnostics,
7718        vec![backup_rebuild.clone()],
7719    ));
7720
7721    let orphan_diagnostics = sqlite_graph_orphan_diagnostics(conn.conn())
7722        .unwrap_or_else(|err| vec![format!("orphan edge inspection failed: {err}")]);
7723    report.push_check(graph_db_doctor_check(
7724        "sqlite_orphan_edges",
7725        orphan_diagnostics,
7726        vec![rebuild.clone()],
7727    ));
7728
7729    let json_diagnostics = sqlite_graph_json_diagnostics(conn.conn())
7730        .unwrap_or_else(|err| vec![format!("graph row JSON inspection failed: {err}")]);
7731    report.push_check(graph_db_doctor_check(
7732        "sqlite_row_json",
7733        json_diagnostics,
7734        vec![backup_rebuild],
7735    ));
7736
7737    let tombstone_diagnostics =
7738        sqlite_graph_tombstone_retention_diagnostics(conn.conn(), scope.unwrap_or("root"))
7739            .unwrap_or_else(|err| {
7740                vec![format!(
7741                    "graph tombstone retention inspection failed: {err}"
7742                )]
7743            });
7744    report.push_check(GraphDbDoctorCheck {
7745        name: "sqlite_tombstone_retention".to_string(),
7746        status: if tombstone_diagnostics.is_empty() {
7747            "ok".to_string()
7748        } else {
7749            "warning".to_string()
7750        },
7751        fail_closed: false,
7752        diagnostics: tombstone_diagnostics,
7753        repair_commands: Vec::new(),
7754    });
7755    let compaction_check = match sqlite_graph_counts(conn.conn(), scope.unwrap_or("root")) {
7756        Ok(counts) => {
7757            let policy = graph_db_compaction_policy(root, scope, &counts, false);
7758            GraphDbDoctorCheck {
7759                name: "sqlite_compaction_policy".to_string(),
7760                status: policy.status.clone(),
7761                fail_closed: false,
7762                diagnostics: policy.proof,
7763                repair_commands: if policy.status == "recommended" {
7764                    policy.recommendations
7765                } else {
7766                    Vec::new()
7767                },
7768            }
7769        }
7770        Err(err) => GraphDbDoctorCheck {
7771            name: "sqlite_compaction_policy".to_string(),
7772            status: "warning".to_string(),
7773            fail_closed: false,
7774            diagnostics: vec![format!("graph compaction policy inspection failed: {err}")],
7775            repair_commands: Vec::new(),
7776        },
7777    };
7778    report.push_check(compaction_check);
7779
7780    Some(conn)
7781}
7782
7783pub(crate) fn append_convex_snapshot_doctor_checks(
7784    report: &mut GraphDbDoctorReport,
7785    root: &Path,
7786    scope: Option<&str>,
7787    local_rows: Option<&ConvexProjectionRows>,
7788    snapshot_path: Option<&Path>,
7789) {
7790    let repair = convex_refresh_command(root, scope);
7791    let Some(snapshot_path) = snapshot_path else {
7792        report.push_check(graph_db_doctor_check(
7793            "convex_snapshot_present",
7794            vec!["--backend convex-snapshot requires --convex-snapshot <rows.json>".to_string()],
7795            vec![format!(
7796                "tsift convex-sync {}{} --json > convex-rows.json",
7797                shell_quote(root.to_string_lossy().as_ref()),
7798                graph_db_scope_arg(scope)
7799            )],
7800        ));
7801        return;
7802    };
7803    report.push_check(graph_db_doctor_check(
7804        "convex_snapshot_present",
7805        Vec::new(),
7806        vec![repair.clone()],
7807    ));
7808
7809    let (snapshot, snapshot_value) = match load_convex_projection_snapshot_value(snapshot_path) {
7810        Ok(snapshot) => snapshot,
7811        Err(err) => {
7812            report.push_check(graph_db_doctor_check(
7813                "convex_snapshot_parse",
7814                vec![err.to_string()],
7815                vec![repair],
7816            ));
7817            return;
7818        }
7819    };
7820    report.push_check(graph_db_doctor_check(
7821        "convex_snapshot_parse",
7822        Vec::new(),
7823        vec![repair.clone()],
7824    ));
7825
7826    let row_diagnostics = convex_projection_row_diagnostics(&snapshot);
7827    report.push_check(graph_db_doctor_check(
7828        "convex_snapshot_rows",
7829        row_diagnostics,
7830        vec![repair.clone()],
7831    ));
7832
7833    let index_diagnostics = convex_snapshot_index_diagnostics(&snapshot_value)
7834        .unwrap_or_else(|err| vec![err.to_string()]);
7835    report.required_indexes = convex_required_indexes();
7836    report.push_check(graph_db_doctor_check(
7837        "convex_required_indexes",
7838        index_diagnostics,
7839        vec![
7840            "Add the indexes from examples/convex-graph/schema.ts, then redeploy the Convex app"
7841                .to_string(),
7842        ],
7843    ));
7844
7845    if let Some(local_rows) = local_rows {
7846        let freshness = convex_projection_freshness(local_rows, Some(&snapshot), scope);
7847        report.push_check(graph_db_doctor_check(
7848            "convex_projection_freshness",
7849            freshness.diagnostics,
7850            vec![repair],
7851        ));
7852    } else {
7853        report.push_check(graph_db_doctor_check(
7854            "convex_projection_freshness",
7855            vec![
7856                "local SQLite graph.db could not be read, so Convex freshness cannot be verified"
7857                    .to_string(),
7858            ],
7859            vec![graph_db_rebuild_command(root, scope)],
7860        ));
7861    }
7862}
7863
7864fn graph_db_convex_snapshot_doctor_command(
7865    root: &Path,
7866    scope: Option<&str>,
7867    snapshot_path: &Path,
7868) -> String {
7869    format!(
7870        "tsift graph-db --path {}{} --backend convex-snapshot --convex-snapshot {} doctor --json",
7871        shell_quote(root.to_string_lossy().as_ref()),
7872        graph_db_scope_arg(scope),
7873        shell_quote(snapshot_path.to_string_lossy().as_ref())
7874    )
7875}
7876
7877fn graph_db_convex_snapshot_read_command(
7878    root: &Path,
7879    scope: Option<&str>,
7880    snapshot_path: &Path,
7881) -> String {
7882    format!(
7883        "tsift graph-db --path {}{} --backend convex-snapshot --convex-snapshot {} schema --json",
7884        shell_quote(root.to_string_lossy().as_ref()),
7885        graph_db_scope_arg(scope),
7886        shell_quote(snapshot_path.to_string_lossy().as_ref())
7887    )
7888}
7889
7890fn convex_sync_snapshot_diff_command(
7891    root: &Path,
7892    scope: Option<&str>,
7893    snapshot_path: &Path,
7894) -> String {
7895    format!(
7896        "tsift convex-sync {}{} --snapshot {} --json",
7897        shell_quote(root.to_string_lossy().as_ref()),
7898        graph_db_scope_arg(scope),
7899        shell_quote(snapshot_path.to_string_lossy().as_ref())
7900    )
7901}
7902
7903pub(crate) struct GraphDbDriftInput<'a> {
7904    root: &'a Path,
7905    scope: Option<&'a str>,
7906    graph_db: &'a Path,
7907    snapshot_path: &'a Path,
7908    local: &'a ConvexProjectionRows,
7909    snapshot: &'a ConvexProjectionRows,
7910    snapshot_value: &'a serde_json::Value,
7911    warnings: Vec<String>,
7912}
7913
7914pub(crate) fn graph_db_drift_report(input: GraphDbDriftInput<'_>) -> GraphDbDriftReport {
7915    let GraphDbDriftInput {
7916        root,
7917        scope,
7918        graph_db,
7919        snapshot_path,
7920        local,
7921        snapshot,
7922        snapshot_value,
7923        warnings,
7924    } = input;
7925    let freshness = convex_projection_freshness(local, Some(snapshot), scope);
7926    let (node_upserts, edge_upserts, node_tombstones, edge_tombstones) =
7927        convex_rows_diff(local, Some(snapshot));
7928    let row_diagnostics = convex_projection_row_diagnostics(snapshot);
7929    let index_diagnostics = convex_snapshot_index_diagnostics(snapshot_value)
7930        .unwrap_or_else(|err| vec![format!("Convex snapshot index metadata failed: {err}")]);
7931    let local_hash = freshness.local_hash.clone();
7932    let snapshot_hash = freshness.snapshot_hash.clone();
7933    let stale_nodes = freshness.stale_nodes.clone();
7934    let stale_edges = freshness.stale_edges.clone();
7935
7936    let duplicate_failures = row_diagnostics
7937        .iter()
7938        .filter(|diagnostic| diagnostic.contains("duplicate"))
7939        .count();
7940    let orphan_failures = row_diagnostics
7941        .iter()
7942        .filter(|diagnostic| diagnostic.contains("references missing"))
7943        .count();
7944    let missing_required_indexes = index_diagnostics.len();
7945    let stale_projection_metadata =
7946        usize::from(local_hash != snapshot_hash || snapshot_hash.is_none());
7947    let hard_failures = duplicate_failures + orphan_failures + missing_required_indexes;
7948    let has_drift = freshness.fail_closed
7949        || !node_upserts.is_empty()
7950        || !edge_upserts.is_empty()
7951        || !node_tombstones.is_empty()
7952        || !edge_tombstones.is_empty();
7953    let status = if hard_failures > 0 {
7954        "fail_closed"
7955    } else if has_drift {
7956        "drift"
7957    } else {
7958        "current"
7959    }
7960    .to_string();
7961
7962    let mut diagnostics = Vec::new();
7963    diagnostics.extend(row_diagnostics);
7964    diagnostics.extend(index_diagnostics);
7965    diagnostics.extend(freshness.diagnostics.clone());
7966    if has_drift {
7967        diagnostics.push(format!(
7968            "projection diff: {} node upsert(s), {} edge upsert(s), {} node tombstone(s), {} edge tombstone(s)",
7969            node_upserts.len(),
7970            edge_upserts.len(),
7971            node_tombstones.len(),
7972            edge_tombstones.len()
7973        ));
7974    }
7975
7976    let mut next_commands = vec![graph_db_convex_snapshot_doctor_command(
7977        root,
7978        scope,
7979        snapshot_path,
7980    )];
7981    if status == "current" {
7982        next_commands.push(graph_db_convex_snapshot_read_command(
7983            root,
7984            scope,
7985            snapshot_path,
7986        ));
7987    } else {
7988        next_commands.push(convex_sync_snapshot_diff_command(
7989            root,
7990            scope,
7991            snapshot_path,
7992        ));
7993        next_commands.push(convex_refresh_command(root, scope));
7994    }
7995
7996    GraphDbDriftReport {
7997        root: root.to_string_lossy().to_string(),
7998        scope: scope.map(str::to_string),
7999        graph_db: graph_db.to_string_lossy().to_string(),
8000        convex_snapshot: snapshot_path.to_string_lossy().to_string(),
8001        status: status.clone(),
8002        graph_reads_allowed: status == "current",
8003        projection_version: GRAPH_PROJECTION_VERSION.to_string(),
8004        local_hash,
8005        snapshot_hash,
8006        summary: GraphDbDriftSummary {
8007            node_upserts: node_upserts.len(),
8008            edge_upserts: edge_upserts.len(),
8009            node_tombstones: node_tombstones.len(),
8010            edge_tombstones: edge_tombstones.len(),
8011            stale_nodes: stale_nodes.len(),
8012            stale_edges: stale_edges.len(),
8013            stale_projection_metadata,
8014            duplicate_failures,
8015            orphan_failures,
8016            missing_required_indexes,
8017        },
8018        node_upserts: node_upserts
8019            .into_iter()
8020            .map(|row| row.external_id)
8021            .collect(),
8022        edge_upserts: edge_upserts.into_iter().map(|row| row.edge_key).collect(),
8023        node_tombstones,
8024        edge_tombstones,
8025        stale_nodes,
8026        stale_edges,
8027        diagnostics,
8028        next_commands,
8029        required_indexes: convex_required_indexes(),
8030        warnings,
8031    }
8032}
8033
8034pub(crate) fn print_graph_db_drift_human(report: &GraphDbDriftReport) {
8035    println!(
8036        "graph-db drift status: {} reads_allowed: {}",
8037        report.status, report.graph_reads_allowed
8038    );
8039    println!("graph_db: {}", report.graph_db);
8040    println!("convex_snapshot: {}", report.convex_snapshot);
8041    println!(
8042        "upserts: {} node(s), {} edge(s)",
8043        report.summary.node_upserts, report.summary.edge_upserts
8044    );
8045    println!(
8046        "tombstones: {} node(s), {} edge(s)",
8047        report.summary.node_tombstones, report.summary.edge_tombstones
8048    );
8049    for diagnostic in &report.diagnostics {
8050        println!("diagnostic: {diagnostic}");
8051    }
8052    for command in &report.next_commands {
8053        println!("next: {command}");
8054    }
8055}
8056
8057pub(crate) fn print_graph_db_doctor_human(report: &GraphDbDoctorReport) {
8058    println!(
8059        "graph-db doctor backend: {} status: {}",
8060        report.backend, report.status
8061    );
8062    println!("graph_db: {}", report.graph_db);
8063    if let Some(snapshot) = &report.convex_snapshot {
8064        println!("convex_snapshot: {snapshot}");
8065    }
8066    for check in &report.checks {
8067        println!("check: {} {}", check.name, check.status);
8068        for diagnostic in &check.diagnostics {
8069            println!("  diagnostic: {diagnostic}");
8070        }
8071    }
8072    for command in &report.repair_commands {
8073        println!("repair: {command}");
8074    }
8075}
8076
8077pub(crate) fn graph_db_operator_report_from_disk(
8078    root: &Path,
8079    scope: Option<&str>,
8080    graph_db: &Path,
8081    operation: &str,
8082    refresh: Option<GraphDbRefreshSummary>,
8083    warnings: Vec<String>,
8084) -> Result<GraphDbOperatorReport> {
8085    if !graph_db.exists() {
8086        let next_commands = graph_db_operator_next_commands(root, scope, true);
8087        let counts = GraphDbOperatorCounts {
8088            nodes: 0,
8089            edges: 0,
8090            tombstones: GraphDbTombstoneCounts {
8091                nodes: 0,
8092                edges: 0,
8093                total: 0,
8094            },
8095            file_size_bytes: None,
8096            freelist_bytes: None,
8097        };
8098        return Ok(GraphDbOperatorReport {
8099            root: root.to_string_lossy().to_string(),
8100            scope: scope.map(str::to_string),
8101            graph_db: graph_db.to_string_lossy().to_string(),
8102            operation: operation.to_string(),
8103            status: "missing".to_string(),
8104            materialized: false,
8105            freshness: GraphDbFreshnessReport {
8106                status: "missing".to_string(),
8107                fail_closed: true,
8108                projection_version: None,
8109                content_hash: None,
8110                source_watermark: None,
8111                diagnostics: vec![
8112                    "graph.db is missing; run graph-db refresh before trusting graph reads"
8113                        .to_string(),
8114                ],
8115            },
8116            readiness: graph_effectiveness_blocked(
8117                "graph_db_missing",
8118                vec![
8119                    "graph.db is missing; materialize the projection before relying on graph effectiveness".to_string(),
8120                ],
8121                next_commands.clone(),
8122            ),
8123            counts: counts.clone(),
8124            refresh,
8125            compaction: graph_db_compaction_policy(root, scope, &counts, false),
8126            recovery: None,
8127            next_commands,
8128            warnings,
8129        });
8130    }
8131
8132    let conn = open_sqlite_graph_db_readonly(graph_db)?;
8133    let recovery = conn.recovery();
8134    let mut warnings = warnings;
8135    if let Some(recovery) = recovery {
8136        warnings.push(graph_db_read_recovery_diagnostic(recovery));
8137    }
8138    let mut freshness = sqlite_graph_freshness_from_conn(conn.conn(), scope.unwrap_or("root"))?;
8139    let schema_diagnostics = sqlite_graph_schema_diagnostics(conn.conn())
8140        .unwrap_or_else(|err| vec![format!("graph.db schema inspection failed: {err}")]);
8141    if !schema_diagnostics.is_empty() {
8142        freshness.diagnostics.extend(schema_diagnostics);
8143        freshness.fail_closed = true;
8144        freshness.status = "stale".to_string();
8145    }
8146    let counts = sqlite_graph_counts(conn.conn(), scope.unwrap_or("root"))?;
8147    warnings.extend(
8148        sqlite_graph_tombstone_retention_diagnostics(conn.conn(), scope.unwrap_or("root"))
8149            .unwrap_or_else(|err| {
8150                vec![format!(
8151                    "graph tombstone retention inspection failed: {err}"
8152                )]
8153            }),
8154    );
8155    let status = if freshness.fail_closed {
8156        "stale"
8157    } else {
8158        "current"
8159    }
8160    .to_string();
8161
8162    Ok(GraphDbOperatorReport {
8163        root: root.to_string_lossy().to_string(),
8164        scope: scope.map(str::to_string),
8165        graph_db: graph_db.to_string_lossy().to_string(),
8166        operation: operation.to_string(),
8167        status,
8168        materialized: true,
8169        freshness,
8170        readiness: graph_db_semantic_readiness(root, scope),
8171        compaction: graph_db_compaction_policy(root, scope, &counts, false),
8172        counts,
8173        refresh,
8174        recovery,
8175        next_commands: graph_db_operator_next_commands(root, scope, false),
8176        warnings,
8177    })
8178}
8179
8180fn print_graph_db_operator_human(report: &GraphDbOperatorReport) {
8181    println!(
8182        "graph-db {} status: {} materialized: {}",
8183        report.operation, report.status, report.materialized
8184    );
8185    println!("graph_db: {}", report.graph_db);
8186    println!(
8187        "projection: version={} hash={} watermark={}",
8188        report
8189            .freshness
8190            .projection_version
8191            .as_deref()
8192            .unwrap_or("<missing>"),
8193        report
8194            .freshness
8195            .content_hash
8196            .as_deref()
8197            .unwrap_or("<missing>"),
8198        report
8199            .freshness
8200            .source_watermark
8201            .as_deref()
8202            .unwrap_or("<missing>")
8203    );
8204    println!(
8205        "rows: {} node(s), {} edge(s), {} tombstone(s)",
8206        report.counts.nodes, report.counts.edges, report.counts.tombstones.total
8207    );
8208    println!(
8209        "readiness: {} reason: {} fail_closed: {}",
8210        report.readiness.status, report.readiness.reason, report.readiness.fail_closed
8211    );
8212    if let Some(file_size) = report.counts.file_size_bytes {
8213        println!(
8214            "storage: {} byte(s), {} free byte(s)",
8215            file_size,
8216            report.counts.freelist_bytes.unwrap_or(0)
8217        );
8218    }
8219    if let Some(refresh) = &report.refresh {
8220        println!(
8221            "refresh: {} tombstoned node(s), {} tombstoned edge(s)",
8222            refresh.tombstoned_nodes, refresh.tombstoned_edges
8223        );
8224        println!(
8225            "delta: {} node upsert(s), {} edge upsert(s), {} property row upsert(s), {} unchanged node(s), {} unchanged edge(s), {} unchanged property row(s), {} deleted property row(s), {} pruned tombstone(s)",
8226            refresh.upserted_nodes,
8227            refresh.upserted_edges,
8228            refresh.upserted_properties,
8229            refresh.unchanged_nodes,
8230            refresh.unchanged_edges,
8231            refresh.unchanged_properties,
8232            refresh.deleted_properties,
8233            refresh.pruned_tombstones
8234        );
8235    }
8236    println!(
8237        "compaction: {} tombstone_scan_rows={} live_rows={}",
8238        report.compaction.status,
8239        report.compaction.tombstone_scan_rows,
8240        report.compaction.live_rows
8241    );
8242    for proof in &report.compaction.proof {
8243        println!("compaction proof: {proof}");
8244    }
8245    if let Some(recovery) = report.recovery {
8246        println!("recovery: {}", graph_db_read_recovery_diagnostic(recovery));
8247    }
8248    for diagnostic in &report.freshness.diagnostics {
8249        println!("diagnostic: {diagnostic}");
8250    }
8251    for diagnostic in &report.readiness.diagnostics {
8252        println!("readiness diagnostic: {diagnostic}");
8253    }
8254    for warning in &report.warnings {
8255        println!("warning: {warning}");
8256    }
8257    for command in &report.readiness.next_commands {
8258        println!("readiness next: {command}");
8259    }
8260    for command in &report.next_commands {
8261        println!("next: {command}");
8262    }
8263}
8264
8265pub(crate) fn print_graph_db_operator_report(
8266    report: &GraphDbOperatorReport,
8267    format: OutputFormat,
8268) -> Result<()> {
8269    if format.json_output {
8270        print_json_or_envelope(
8271            report,
8272            &format,
8273            "graph-db",
8274            &report.operation,
8275            ToolEnvelopeSummary {
8276                text: format!(
8277                    "Graph DB {} status {} with {} node(s), {} edge(s), {} tombstone(s)",
8278                    report.operation,
8279                    report.status,
8280                    report.counts.nodes,
8281                    report.counts.edges,
8282                    report.counts.tombstones.total
8283                ),
8284                metrics: vec![
8285                    envelope_metric("operation", &report.operation),
8286                    envelope_metric("status", &report.status),
8287                    envelope_metric("nodes", report.counts.nodes),
8288                    envelope_metric("edges", report.counts.edges),
8289                    envelope_metric("tombstones", report.counts.tombstones.total),
8290                    envelope_metric("compaction", &report.compaction.status),
8291                    envelope_metric("readiness", &report.readiness.status),
8292                ],
8293            },
8294            false,
8295            report.next_commands.clone(),
8296        )
8297    } else {
8298        print_graph_db_operator_human(report);
8299        Ok(())
8300    }
8301}
8302
8303fn status_run_command_without_notes(run: &str) -> &str {
8304    run.split_once("  (")
8305        .map(|(command, _)| command)
8306        .unwrap_or(run)
8307}
8308
8309fn graph_db_status_summarize_command(report: &status::StatusReport) -> String {
8310    report
8311        .recommendations
8312        .run
8313        .as_deref()
8314        .filter(|command| command.contains("summarize --extract"))
8315        .map(status_run_command_without_notes)
8316        .unwrap_or("tsift summarize --extract .")
8317        .to_string()
8318}
8319
8320fn graph_db_semantic_readiness(root: &Path, scope: Option<&str>) -> GraphEffectivenessReadiness {
8321    let report = match status::check_status(root) {
8322        Ok(report) => report,
8323        Err(err) => {
8324            return graph_effectiveness_blocked(
8325                "status_check_unavailable",
8326                vec![format!(
8327                    "semantic readiness could not inspect summary cache after graph-db refresh: {err:#}"
8328                )],
8329                vec![graph_db_refresh_command(root, scope)],
8330            );
8331        }
8332    };
8333
8334    match &report.summaries {
8335        status::SummaryStatus::Available {
8336            cached_files,
8337            total_indexed_files,
8338            coverage_pct,
8339            ..
8340        } => {
8341            let mut readiness = graph_effectiveness_ready("semantic_rows_available");
8342            readiness.diagnostics.push(format!(
8343                "summary cache has {cached_files}/{total_indexed_files} indexed file(s) cached ({coverage_pct}% coverage); graph semantic rows are available"
8344            ));
8345            readiness
8346        }
8347        status::SummaryStatus::None { .. } => {
8348            let summarize = graph_db_status_summarize_command(&report);
8349            graph_effectiveness_blocked(
8350                "summary_cache_empty",
8351                vec![format!(
8352                    "summary cache empty: graph-db materialized code/session rows but semantic rows are unavailable; run `{}` from {} and rerun `{}` before relying on semantic evidence",
8353                    summarize,
8354                    root.display(),
8355                    graph_db_refresh_command(root, scope)
8356                )],
8357                vec![summarize, graph_db_refresh_command(root, scope)],
8358            )
8359        }
8360        status::SummaryStatus::Unavailable => graph_effectiveness_blocked(
8361            "summary_cache_unavailable",
8362            vec![
8363                "summary cache unavailable because the source index is missing; build the index before relying on semantic graph evidence".to_string(),
8364            ],
8365            report
8366                .recommendations
8367                .run
8368                .clone()
8369                .into_iter()
8370                .chain(std::iter::once(graph_db_refresh_command(root, scope)))
8371                .collect(),
8372        ),
8373    }
8374}
8375
8376pub(crate) fn graph_db_operator_status_warnings(root: &Path, scope: Option<&str>) -> Vec<String> {
8377    let report = match status::check_status(root) {
8378        Ok(report) => report,
8379        Err(err) => {
8380            return vec![format!(
8381                "status check unavailable after graph-db refresh: {err:#}"
8382            )];
8383        }
8384    };
8385
8386    let summarize_run = if matches!(report.summaries, status::SummaryStatus::None { .. }) {
8387        Some(graph_db_status_summarize_command(&report))
8388    } else {
8389        None
8390    };
8391    let mut warnings = report.reminders;
8392    if matches!(report.summaries, status::SummaryStatus::None { .. }) {
8393        let run = summarize_run.unwrap_or_else(|| "tsift summarize --extract .".to_string());
8394        warnings.push(format!(
8395            "summary cache empty: graph-db refresh materialized code/session rows but semantic rows are unavailable; run `{}` from {} and rerun `{}` before relying on semantic evidence",
8396            run,
8397            root.display(),
8398            graph_db_refresh_command(root, scope)
8399        ));
8400    }
8401    dedupe_preserve_order(warnings)
8402}
8403
8404pub(crate) fn print_graph_db_compaction_human(report: &GraphDbCompactionReport) {
8405    println!(
8406        "graph-db compact applied:{} pruned_tombstones:{} reclaimed:{} byte(s)",
8407        report.applied, report.pruned_tombstones, report.reclaimed_bytes
8408    );
8409    println!("graph_db: {}", report.graph_db);
8410    println!(
8411        "before: {} node(s), {} edge(s), {} tombstone(s), file={} free={}",
8412        report.counts_before.nodes,
8413        report.counts_before.edges,
8414        report.counts_before.tombstones.total,
8415        report.counts_before.file_size_bytes.unwrap_or(0),
8416        report.counts_before.freelist_bytes.unwrap_or(0)
8417    );
8418    println!(
8419        "after: {} node(s), {} edge(s), {} tombstone(s), file={} free={}",
8420        report.counts_after.nodes,
8421        report.counts_after.edges,
8422        report.counts_after.tombstones.total,
8423        report.counts_after.file_size_bytes.unwrap_or(0),
8424        report.counts_after.freelist_bytes.unwrap_or(0)
8425    );
8426    for proof in &report.compaction_after.proof {
8427        println!("proof: {proof}");
8428    }
8429    for warning in &report.warnings {
8430        println!("warning: {warning}");
8431    }
8432    for command in &report.next_commands {
8433        println!("next: {command}");
8434    }
8435}
8436
8437fn parse_graph_db_property_filters(raw: &[String]) -> Result<Vec<GraphDbPropertyFilter>> {
8438    raw.iter()
8439        .map(|value| {
8440            let (key, filter_value) = value
8441                .split_once('=')
8442                .with_context(|| format!("graph-db --property expects KEY=VALUE, got {value:?}"))?;
8443            let key = key.trim();
8444            let filter_value = filter_value.trim();
8445            if key.is_empty() || filter_value.is_empty() {
8446                bail!("graph-db --property expects non-empty KEY=VALUE, got {value:?}");
8447            }
8448            Ok(GraphDbPropertyFilter {
8449                key: key.to_string(),
8450                value: filter_value.to_string(),
8451            })
8452        })
8453        .collect()
8454}
8455
8456fn graph_db_query_options(
8457    cursor: Option<String>,
8458    limit: Option<usize>,
8459    property_filters: &[String],
8460) -> Result<GraphDbQueryOptions> {
8461    Ok(GraphDbQueryOptions {
8462        cursor,
8463        limit: limit.filter(|limit| *limit > 0),
8464        property_filters: parse_graph_db_property_filters(property_filters)?,
8465    })
8466}
8467
8468fn graph_db_query_options_for_store(options: &GraphDbQueryOptions) -> GraphQueryOptions {
8469    GraphQueryOptions {
8470        cursor: options.cursor.clone(),
8471        limit: options.limit,
8472        property_filters: options
8473            .property_filters
8474            .iter()
8475            .map(|filter| GraphPropertyFilter {
8476                key: filter.key.clone(),
8477                value: filter.value.clone(),
8478            })
8479            .collect(),
8480    }
8481}
8482
8483fn graph_db_page_report_from_store(
8484    page: GraphQueryPage,
8485    property_filters: Vec<GraphDbPropertyFilter>,
8486) -> GraphDbPageReport {
8487    GraphDbPageReport {
8488        cursor: page.cursor,
8489        limit: page.limit,
8490        next_cursor: page.next_cursor,
8491        returned_nodes: page.returned_nodes,
8492        returned_edges: page.returned_edges,
8493        truncated: page.truncated,
8494        property_filters,
8495        diagnostics: page.diagnostics,
8496    }
8497}
8498
8499fn graph_db_neighborhood_ranking_gate(
8500    ranked_neighbor_cap: usize,
8501) -> GraphDbNeighborhoodRankingGate {
8502    GraphDbNeighborhoodRankingGate {
8503        status: "held_default_order_unchanged".to_string(),
8504        ranked_output_default: false,
8505        default_order: "stable_node_id".to_string(),
8506        default_change_gate: "community_search_quality_metrics".to_string(),
8507        required_workloads: metric_digest::COMMUNITY_SEARCH_WORKLOADS
8508            .iter()
8509            .map(|workload| (*workload).to_string())
8510            .collect(),
8511        required_metrics: metric_digest::COMMUNITY_SEARCH_REQUIRED_METRICS
8512            .iter()
8513            .map(|metric| (*metric).to_string())
8514            .collect(),
8515        max_duration_regression_percent: metric_digest::COMMUNITY_MAX_DURATION_REGRESSION_PERCENT,
8516        min_handle_coverage_pct: metric_digest::COMMUNITY_MIN_HANDLE_COVERAGE_PCT,
8517        min_duplicate_name_precision: metric_digest::COMMUNITY_MIN_DUPLICATE_NAME_PRECISION,
8518        min_top_community_stability: metric_digest::COMMUNITY_MIN_TOP_COMMUNITY_STABILITY,
8519        diagnostics: vec![
8520            "ranked_neighbors is additive; neighborhood nodes remain ordered by stable node id for cursor pagination".to_string(),
8521            format!(
8522                "ranked_neighbors is score-capped at {ranked_neighbor_cap} entries so previews stay bounded while cursor pagination remains exhaustive"
8523            ),
8524            "changing the default neighborhood order requires the community-search gate to pass for every required workload".to_string(),
8525        ],
8526    }
8527}
8528
8529fn graph_db_ranked_neighbor_cap(limit: Option<usize>) -> usize {
8530    match limit {
8531        Some(0) | None => GRAPH_DB_RANKED_NEIGHBOR_CAP,
8532        Some(limit) => limit.clamp(1, GRAPH_DB_RANKED_NEIGHBOR_CAP),
8533    }
8534}
8535
8536fn graph_db_ranked_neighbors(
8537    center_id: &str,
8538    nodes: &[SubstrateGraphNode],
8539    edges: &[SubstrateGraphEdge],
8540    cap: usize,
8541) -> Vec<GraphDbRankedNeighbor> {
8542    resolution::ranked_neighbors_capped(center_id, nodes, edges, cap)
8543}
8544
8545fn graph_db_edge_key(edge: &SubstrateGraphEdge) -> String {
8546    if edge.id.is_empty() {
8547        substrate::ConvexEdgeRow::stable_key(&edge.from_id, &edge.to_id, &edge.kind)
8548    } else {
8549        edge.id.clone()
8550    }
8551}
8552
8553fn graph_db_schema() -> GraphDbSchema {
8554    GraphDbSchema {
8555        contract_versions: vec![
8556            GraphDbSchemaContract {
8557                name: "graph_db_evidence",
8558                version: GRAPH_DB_EVIDENCE_CONTRACT_VERSION,
8559                description: "graph-db evidence JSON packet including packet_id, projection hash, worker context, source handles, worker results, semantic rows, replay commands, and repair commands",
8560            },
8561            GraphDbSchemaContract {
8562                name: "worker_prompt_packet",
8563                version: WORKER_PROMPT_PACKET_CONTRACT_VERSION,
8564                description: "conflict-matrix worker prompt packet with owned scope, scheduler fields, stable graph handles, expected tests, expansion commands, token budget, semantic ranking reasons, worker feedback closure controls, and fail-closed prompt text",
8565            },
8566            GraphDbSchemaContract {
8567                name: "conflict_matrix",
8568                version: CONFLICT_MATRIX_CONTRACT_VERSION,
8569                description: "parallel-dispatch decision report keyed by graph evidence packets, scheduler block fields, hard file/symbol/test/config gates, and soft worker-feedback closure ranking",
8570            },
8571            GraphDbSchemaContract {
8572                name: "context_pack_graph_orchestration",
8573                version: CONTEXT_PACK_GRAPH_ORCHESTRATION_CONTRACT_VERSION,
8574                description: "context-pack graph orchestration summary with projection freshness, evidence packet ids, ownership blocks, and follow-up graph commands",
8575            },
8576            GraphDbSchemaContract {
8577                name: "session_review_follow_up",
8578                version: SESSION_REVIEW_FOLLOW_UP_CONTRACT_VERSION,
8579                description: "session-review next-context follow-up command contract for resumable digest/context-pack commands",
8580            },
8581            GraphDbSchemaContract {
8582                name: "dispatch_trace",
8583                version: DISPATCH_TRACE_CONTRACT_VERSION,
8584                description: "operator review trace linking backlog, job packets, worker results, source handles, semantic rows, scheduler fields, evidence packet ids, worker feedback closure controls, and worker prompt packets",
8585            },
8586            GraphDbSchemaContract {
8587                name: "dependency_dag",
8588                version: DEPENDENCY_DAG_CONTRACT_VERSION,
8589                description: "topological planning DAG for agent-doc backlog targets with replayable dependency edges, topo batches, and cycle diagnostics",
8590            },
8591        ],
8592        node_fields: vec![
8593            GraphDbSchemaField {
8594                name: "id",
8595                value_type: "string",
8596                description: "Stable provider-neutral node id",
8597            },
8598            GraphDbSchemaField {
8599                name: "kind",
8600                value_type: "string",
8601                description: "Application-defined node family such as file, symbol, or backlog",
8602            },
8603            GraphDbSchemaField {
8604                name: "label",
8605                value_type: "string",
8606                description: "Human-readable label",
8607            },
8608            GraphDbSchemaField {
8609                name: "properties",
8610                value_type: "object<string,string>",
8611                description: "Adapter-specific string properties",
8612            },
8613            GraphDbSchemaField {
8614                name: "provenance",
8615                value_type: "array",
8616                description: "Source system and source reference metadata",
8617            },
8618            GraphDbSchemaField {
8619                name: "freshness",
8620                value_type: "object|null",
8621                description: "Optional content hash and observed timestamp",
8622            },
8623        ],
8624        edge_fields: vec![
8625            GraphDbSchemaField {
8626                name: "id",
8627                value_type: "string",
8628                description: "Stable provider-neutral edge id derived from from_id, kind, and to_id",
8629            },
8630            GraphDbSchemaField {
8631                name: "from_id",
8632                value_type: "string",
8633                description: "Source node id",
8634            },
8635            GraphDbSchemaField {
8636                name: "to_id",
8637                value_type: "string",
8638                description: "Target node id",
8639            },
8640            GraphDbSchemaField {
8641                name: "kind",
8642                value_type: "string",
8643                description: "Application-defined edge relation",
8644            },
8645            GraphDbSchemaField {
8646                name: "properties",
8647                value_type: "object<string,string>",
8648                description: "Adapter-specific string properties",
8649            },
8650            GraphDbSchemaField {
8651                name: "provenance",
8652                value_type: "array",
8653                description: "Source system and source reference metadata",
8654            },
8655            GraphDbSchemaField {
8656                name: "freshness",
8657                value_type: "object|null",
8658                description: "Optional content hash and observed timestamp",
8659            },
8660        ],
8661        operations: vec![
8662            GraphDbSchemaOperation {
8663                command: "refresh",
8664                description: "Materialize .tsift/graph.db explicitly with delta upserts/deletes, row hash watermarks, tombstone pruning, projection metadata, row counts, and operator next commands",
8665            },
8666            GraphDbSchemaOperation {
8667                command: "status",
8668                description: "Inspect .tsift/graph.db freshness, projection metadata, row counts, tombstone counts, file-size impact, and operator next commands without refreshing",
8669            },
8670            GraphDbSchemaOperation {
8671                command: "doctor",
8672                description: "Validate graph.db or Convex snapshot health and return fail-closed repair diagnostics plus non-fatal SQLite tombstone-retention warnings",
8673            },
8674            GraphDbSchemaOperation {
8675                command: "drift",
8676                description: "Compare local SQLite projection rows with a Convex snapshot and return upsert, tombstone, metadata, duplicate, orphan, and next-command diagnostics",
8677            },
8678            GraphDbSchemaOperation {
8679                command: "compact [--apply] [--prune-tombstones --confirmed-convex-reconciled]",
8680                description: "Return or apply the post-reconciliation SQLite graph compaction policy, including WAL checkpoint/VACUUM proof and guarded tombstone pruning",
8681            },
8682            GraphDbSchemaOperation {
8683                command: "backend-eval [--candidate duckdb-duckpgq|falkordb|ladybug|kuzu] [--target ID] [--full-projection]",
8684                description: "Benchmark experimental read-only GraphStore backend prototypes against SQLite on bounded real, optional full-project, and synthetic projections across refresh/status/path tiers/evidence/conflict-matrix/dispatch-trace and emit promotion hold/eligibility gates",
8685            },
8686            GraphDbSchemaOperation {
8687                command: "evidence <target> [--depth N] [--limit N]",
8688                description: "Return a bounded versioned graph-db handoff packet for a backlog id or job packet handle, including packet_id, projection hash, worker_context rows, source_handle rows, worker_result rows, semantic_concept/entity rows, shortest paths, replay commands, repair commands, and next commands",
8689            },
8690            GraphDbSchemaOperation {
8691                command: "related <phrase> [--kind concept|entity|all] [--depth N] [--seed-limit N] [--limit N]",
8692                description: "Resolve a natural-language phrase to cached semantic concept/entity seed nodes, then return an incident/outgoing GraphStore neighborhood around those seeds for general knowledge retrieval without changing stable neighborhood pagination defaults",
8693            },
8694            GraphDbSchemaOperation {
8695                command: "dispatch-trace [target...] --path <session> [--format json|html]",
8696                description: "Export a compact graph-backed dispatch trace with evidence packet ids, worker-result feedback closure summaries, graph links, and conflict-matrix worker prompt packets",
8697            },
8698            GraphDbSchemaOperation {
8699                command: "dependency-dag [target...] --path <session>",
8700                description: "Extract a versioned agent-doc dependency DAG from backlog ids, explicit depends-on text, shared file/symbol/test/config evidence, semantic overlap, and worker-result follow-up ids",
8701            },
8702            GraphDbSchemaOperation {
8703                command: "schema",
8704                description: "Return record and operation schemas",
8705            },
8706            GraphDbSchemaOperation {
8707                command: "node <id>",
8708                description: "Return one node by stable id",
8709            },
8710            GraphDbSchemaOperation {
8711                command: "edge <id>",
8712                description: "Return one edge by stable edge id",
8713            },
8714            GraphDbSchemaOperation {
8715                command: "edges [--edge-kind <kind>] [--property KEY=VALUE] [--cursor EDGE_ID] [--limit N]",
8716                description: "Return edge records ordered by stable edge id with SQLite-pushed edge-property filtering and cursor pagination",
8717            },
8718            GraphDbSchemaOperation {
8719                command: "incident <id> [--edge-kind <kind>] [--property KEY=VALUE] [--cursor EDGE_ID] [--limit N]",
8720                description: "Return incoming and outgoing edges incident to one node, ordered by stable edge id with optional kind and edge-property filters",
8721            },
8722            GraphDbSchemaOperation {
8723                command: "kind <kind> [--property KEY=VALUE] [--cursor ID] [--limit N]",
8724                description: "Return nodes of one kind ordered by id with SQLite-pushed property filtering/cursor pagination and query-plan diagnostics",
8725            },
8726            GraphDbSchemaOperation {
8727                command: "neighborhood <id> --depth <n> [--edge-kind <kind>] [--property KEY=VALUE] [--cursor ID] [--limit N]",
8728                description: "Return a directed outgoing subgraph around a node using batched SQLite recursive traversal plus pushed filters/paging when available; JSON also includes additive ranked_neighbors while default nodes remain stable-id ordered",
8729            },
8730            GraphDbSchemaOperation {
8731                command: "path <from> <to> [--edge-kind <kind>] [--max-hops N]",
8732                description: "Return the shortest directed path by node id, optionally bounded by hop count",
8733            },
8734        ],
8735    }
8736}
8737
8738pub(crate) fn sqlite_graph_freshness(
8739    store: &SqliteGraphStore,
8740    scope: &str,
8741) -> Result<GraphDbFreshnessReport> {
8742    let version = store.projection_version(scope)?;
8743    let Some(version) = version else {
8744        return Ok(GraphDbFreshnessReport {
8745            status: "missing".to_string(),
8746            fail_closed: true,
8747            projection_version: None,
8748            content_hash: None,
8749            source_watermark: None,
8750            diagnostics: vec![
8751                "graph projection metadata is missing; rebuild the graph before trusting reads"
8752                    .to_string(),
8753            ],
8754        });
8755    };
8756    let mut diagnostics = Vec::new();
8757    let fail_closed =
8758        version.projection_version != GRAPH_PROJECTION_VERSION || version.content_hash.is_none();
8759    if version.projection_version != GRAPH_PROJECTION_VERSION {
8760        diagnostics.push(format!(
8761            "projection version mismatch: expected {} got {}",
8762            GRAPH_PROJECTION_VERSION, version.projection_version
8763        ));
8764    }
8765    if version.content_hash.is_none() {
8766        diagnostics.push("projection content hash is missing".to_string());
8767    }
8768    Ok(GraphDbFreshnessReport {
8769        status: if fail_closed { "stale" } else { "current" }.to_string(),
8770        fail_closed,
8771        projection_version: Some(version.projection_version),
8772        content_hash: version.content_hash,
8773        source_watermark: version.source_watermark,
8774        diagnostics,
8775    })
8776}
8777
8778pub(crate) fn convex_graph_freshness(
8779    local: &ConvexProjectionRows,
8780    snapshot: &ConvexProjectionRows,
8781    scope: Option<&str>,
8782) -> GraphDbFreshnessReport {
8783    let freshness = convex_projection_freshness(local, Some(snapshot), scope);
8784    GraphDbFreshnessReport {
8785        status: freshness.status,
8786        fail_closed: freshness.fail_closed,
8787        projection_version: Some(GRAPH_PROJECTION_VERSION.to_string()),
8788        content_hash: freshness.snapshot_hash,
8789        source_watermark: None,
8790        diagnostics: freshness.diagnostics,
8791    }
8792}
8793
8794pub(crate) fn tokensave_graph_freshness(store: &TokensaveDb) -> Result<GraphDbFreshnessReport> {
8795    let (nodes, edges) = store.graph_counts()?;
8796    let files = store.file_count()?;
8797    Ok(GraphDbFreshnessReport {
8798        status: "current".to_string(),
8799        fail_closed: false,
8800        projection_version: Some("tokensave-readonly".to_string()),
8801        content_hash: None,
8802        source_watermark: Some(store.db_path().to_string_lossy().to_string()),
8803        diagnostics: vec![format!(
8804            "tokensave read-only adapter opened {} node(s), {} edge(s), {} file(s)",
8805            nodes, edges, files
8806        )],
8807    })
8808}
8809
8810pub(crate) fn append_tokensave_graph_doctor_checks(report: &mut GraphDbDoctorReport, root: &Path) {
8811    match TokensaveDb::discover(root) {
8812        Ok(Some(store)) => {
8813            report.push_check(GraphDbDoctorCheck {
8814                name: "tokensave_db_open".to_string(),
8815                status: "ok".to_string(),
8816                fail_closed: false,
8817                diagnostics: vec![format!(
8818                    "opened tokensave database at {}",
8819                    store.db_path().display()
8820                )],
8821                repair_commands: Vec::new(),
8822            });
8823            match (store.node_count(), store.edge_count(), store.file_count()) {
8824                (Ok(nodes), Ok(edges), Ok(files)) => {
8825                    report.push_check(GraphDbDoctorCheck {
8826                        name: "tokensave_counts".to_string(),
8827                        status: "ok".to_string(),
8828                        fail_closed: false,
8829                        diagnostics: vec![format!(
8830                            "tokensave contains {} node(s), {} edge(s), {} file(s)",
8831                            nodes, edges, files
8832                        )],
8833                        repair_commands: Vec::new(),
8834                    });
8835                }
8836                (nodes, edges, files) => {
8837                    report.push_check(graph_db_doctor_check(
8838                        "tokensave_counts",
8839                        vec![format!(
8840                            "tokensave count inspection failed: nodes={:?} edges={:?} files={:?}",
8841                            nodes.err(),
8842                            edges.err(),
8843                            files.err()
8844                        )],
8845                        Vec::new(),
8846                    ));
8847                }
8848            }
8849        }
8850        Ok(None) => report.push_check(graph_db_doctor_check(
8851            "tokensave_db_exists",
8852            vec![format!(
8853                "tokensave database is missing at {}",
8854                root.join(".tokensave").join("tokensave.db").display()
8855            )],
8856            Vec::new(),
8857        )),
8858        Err(err) => report.push_check(graph_db_doctor_check(
8859            "tokensave_db_open",
8860            vec![err.to_string()],
8861            Vec::new(),
8862        )),
8863    }
8864}
8865
8866pub(crate) fn graph_db_resolve_evidence_target(
8867    store: &impl GraphStore,
8868    target: &str,
8869) -> Result<Option<SubstrateGraphNode>> {
8870    store.resolve_evidence_target(
8871        target,
8872        &[
8873            "backlog",
8874            "job_packet",
8875            "worker_result",
8876            "worker_context",
8877            "source_handle",
8878        ],
8879    )
8880}
8881
8882fn graph_db_reachable_nodes_by_kind(
8883    store: &impl GraphStore,
8884    from_id: &str,
8885    kind: &str,
8886    depth: usize,
8887    limit: usize,
8888) -> Result<Vec<(SubstrateGraphNode, substrate::GraphPath)>> {
8889    store.reachable_nodes_by_kind(from_id, kind, depth, limit)
8890}
8891
8892fn graph_db_evidence_completed_queue_drift_warnings(
8893    store: &impl GraphStore,
8894    target: &SubstrateGraphNode,
8895    worker_results: &[SubstrateGraphNode],
8896) -> Result<Vec<String>> {
8897    let ref_id = target.properties.get("ref_id").map(String::as_str);
8898    let has_completed_result = worker_results.iter().any(|node| {
8899        node.properties.get("status").map(String::as_str) == Some("completed")
8900            && node.properties.get("ref_id").map(String::as_str) == ref_id
8901    });
8902    if !has_completed_result {
8903        return Ok(Vec::new());
8904    }
8905    let active_jobs = store
8906        .nodes_by_kind("job_packet")?
8907        .into_iter()
8908        .filter(|node| {
8909            node.properties.get("ref_id").map(String::as_str) == ref_id
8910                && node.label.starts_with("do #")
8911        })
8912        .collect::<Vec<_>>();
8913    if active_jobs.is_empty() {
8914        return Ok(Vec::new());
8915    }
8916    let repair = match (target.properties.get("path"), ref_id) {
8917        (Some(path), Some(id)) => format!(
8918            "repair with `agent-doc write --commit {} --done {}` or the next `agent-doc finalize --done {}` closeout",
8919            shell_quote(path),
8920            shell_quote(id),
8921            shell_quote(id)
8922        ),
8923        _ => {
8924            "repair by marking the queue item done/reaping it in the agent-doc session".to_string()
8925        }
8926    };
8927    Ok(vec![format!(
8928        "queue-head drift: target {} has {} active queued do packet(s) but already has a completed worker_result; {repair}; do not redispatch or reactivate the completed item",
8929        target.label,
8930        active_jobs.len()
8931    )])
8932}
8933
8934fn graph_db_evidence_next_commands(
8935    root: &Path,
8936    scope: Option<&str>,
8937    target: &SubstrateGraphNode,
8938    worker_context: &[SubstrateGraphNode],
8939    source_handles: &[SubstrateGraphNode],
8940    worker_results: &[SubstrateGraphNode],
8941    semantic_related: &[SubstrateGraphNode],
8942) -> Vec<String> {
8943    let mut commands = BTreeSet::new();
8944    if let Some(expand) = target.properties.get("expand") {
8945        commands.insert(expand.clone());
8946    }
8947    for worker in worker_context {
8948        if let Some(expand) = worker.properties.get("expand") {
8949            commands.insert(expand.clone());
8950        }
8951    }
8952    for source in source_handles {
8953        if let Some(expand) = source.properties.get("expand") {
8954            commands.insert(expand.clone());
8955        }
8956    }
8957    for result in worker_results {
8958        if let Some(expand) = result.properties.get("expand") {
8959            commands.insert(expand.clone());
8960        }
8961    }
8962    for semantic in semantic_related {
8963        if let Some(expand) = semantic.properties.get("expand") {
8964            commands.insert(expand.clone());
8965        }
8966    }
8967    commands.insert(format!(
8968        "tsift graph-db --path {}{} status --json",
8969        shell_quote(root.to_string_lossy().as_ref()),
8970        graph_db_scope_arg(scope)
8971    ));
8972    commands.insert(format!(
8973        "tsift graph-db --path {}{} doctor --json",
8974        shell_quote(root.to_string_lossy().as_ref()),
8975        graph_db_scope_arg(scope)
8976    ));
8977    commands.into_iter().collect()
8978}
8979
8980fn graph_db_repair_commands(root: &Path, scope: Option<&str>) -> Vec<String> {
8981    vec![
8982        format!(
8983            "tsift graph-db --path {}{} refresh --json",
8984            shell_quote(root.to_string_lossy().as_ref()),
8985            graph_db_scope_arg(scope)
8986        ),
8987        format!(
8988            "tsift graph-db --path {}{} doctor --json",
8989            shell_quote(root.to_string_lossy().as_ref()),
8990            graph_db_scope_arg(scope)
8991        ),
8992    ]
8993}
8994
8995fn graph_db_evidence_replay_commands(
8996    root: &Path,
8997    scope: Option<&str>,
8998    target: &str,
8999    depth: usize,
9000    limit: usize,
9001) -> Vec<String> {
9002    vec![
9003        format!(
9004            "tsift graph-db --path {}{} evidence {} --depth {} --limit {} --json",
9005            shell_quote(root.to_string_lossy().as_ref()),
9006            graph_db_scope_arg(scope),
9007            shell_quote(target),
9008            depth,
9009            limit
9010        ),
9011        format!(
9012            "tsift conflict-matrix --path {} {} --json",
9013            shell_quote(root.to_string_lossy().as_ref()),
9014            shell_quote(target)
9015        ),
9016    ]
9017}
9018
9019fn graph_db_evidence_packet_id(
9020    target: &str,
9021    target_node: &SubstrateGraphNode,
9022    freshness: &GraphDbFreshnessReport,
9023) -> String {
9024    stable_handle(
9025        "gevd",
9026        &format!(
9027            "{}:{}:{}:{}",
9028            GRAPH_DB_EVIDENCE_CONTRACT_VERSION,
9029            target,
9030            target_node.id,
9031            freshness.content_hash.as_deref().unwrap_or("no-hash")
9032        ),
9033    )
9034}
9035
9036pub(crate) fn graph_db_evidence_report_from_store<S: GraphStore>(
9037    input: GraphDbEvidenceInput<'_, S>,
9038) -> Result<GraphDbEvidenceReport> {
9039    let GraphDbEvidenceInput {
9040        root,
9041        scope,
9042        backend,
9043        target,
9044        depth,
9045        limit,
9046        store,
9047        freshness,
9048        mut warnings,
9049    } = input;
9050    let repair_commands = graph_db_repair_commands(root, scope);
9051    if freshness.fail_closed {
9052        bail!(
9053            "graph database evidence failed closed for {} backend: {}; repair: {}",
9054            backend,
9055            freshness.diagnostics.join("; "),
9056            repair_commands.join("; ")
9057        );
9058    }
9059    let target_node = graph_db_resolve_evidence_target(store, target)?
9060        .with_context(|| format!("graph-db evidence target not found: {target}"))?;
9061    let max_rows = if limit == 0 { usize::MAX } else { limit };
9062    let mut reachable = store.reachable_nodes_by_kinds(
9063        &target_node.id,
9064        &[
9065            "worker_context",
9066            "source_handle",
9067            "worker_result",
9068            "semantic_concept",
9069            "semantic_entity",
9070        ],
9071        depth,
9072        max_rows,
9073    )?;
9074    let worker_paths = reachable.remove("worker_context").unwrap_or_default();
9075    let source_paths = reachable.remove("source_handle").unwrap_or_default();
9076    let worker_result_paths = reachable.remove("worker_result").unwrap_or_default();
9077    let mut semantic_paths = reachable.remove("semantic_concept").unwrap_or_default();
9078    semantic_paths.extend(reachable.remove("semantic_entity").unwrap_or_default());
9079    semantic_paths.sort_by(|(left_node, left_path), (right_node, right_path)| {
9080        left_path
9081            .hops
9082            .cmp(&right_path.hops)
9083            .then(left_node.kind.cmp(&right_node.kind))
9084            .then(left_node.label.cmp(&right_node.label))
9085            .then(left_node.id.cmp(&right_node.id))
9086    });
9087    if max_rows != usize::MAX && semantic_paths.len() > max_rows {
9088        semantic_paths.truncate(max_rows);
9089    }
9090
9091    let worker_context = worker_paths
9092        .iter()
9093        .map(|(node, _)| node.clone())
9094        .collect::<Vec<_>>();
9095    let source_handles = source_paths
9096        .iter()
9097        .map(|(node, _)| node.clone())
9098        .collect::<Vec<_>>();
9099    let worker_results = worker_result_paths
9100        .iter()
9101        .map(|(node, _)| node.clone())
9102        .collect::<Vec<_>>();
9103    let semantic_related = semantic_paths
9104        .iter()
9105        .map(|(node, _)| node.clone())
9106        .collect::<Vec<_>>();
9107    warnings.extend(graph_db_evidence_completed_queue_drift_warnings(
9108        store,
9109        &target_node,
9110        &worker_results,
9111    )?);
9112    if worker_context.is_empty()
9113        && source_handles.is_empty()
9114        && worker_results.is_empty()
9115        && semantic_related.is_empty()
9116    {
9117        warnings.push(format!(
9118            "graph-db evidence target {} resolved to a {} node but has no projection-linked context rows; add source/file tokens to the backlog text or rerun graph-db refresh after the session document is indexed",
9119            target, target_node.kind
9120        ));
9121    }
9122    let shortest_paths = worker_paths
9123        .iter()
9124        .chain(source_paths.iter())
9125        .chain(worker_result_paths.iter())
9126        .chain(semantic_paths.iter())
9127        .map(|(node, path)| GraphDbEvidencePath {
9128            to: node.id.clone(),
9129            kind: node.kind.clone(),
9130            label: node.label.clone(),
9131            path: Some(path.clone()),
9132            expand: node.properties.get("expand").cloned(),
9133        })
9134        .collect::<Vec<_>>();
9135    let next_commands = graph_db_evidence_next_commands(
9136        root,
9137        scope,
9138        &target_node,
9139        &worker_context,
9140        &source_handles,
9141        &worker_results,
9142        &semantic_related,
9143    );
9144    let replay_commands = graph_db_evidence_replay_commands(root, scope, target, depth, limit);
9145    let packet_id = graph_db_evidence_packet_id(target, &target_node, &freshness);
9146    let projection_hash = freshness.content_hash.clone();
9147
9148    Ok(GraphDbEvidenceReport {
9149        root: root.to_string_lossy().to_string(),
9150        scope: scope.map(str::to_string),
9151        backend: backend.to_string(),
9152        contract_version: GRAPH_DB_EVIDENCE_CONTRACT_VERSION.to_string(),
9153        target: target.to_string(),
9154        packet_id,
9155        projection_hash,
9156        freshness,
9157        target_node,
9158        worker_context,
9159        source_handles,
9160        worker_results,
9161        semantic_related,
9162        shortest_paths,
9163        next_commands,
9164        replay_commands,
9165        repair_commands,
9166        fixture_coverage: GraphDbFixtureCoverage {
9167            test: "graph_db_evidence_packet_covers_backlog_job_worker_context_and_source_handles"
9168                .to_string(),
9169            fixture: "tests/graph_db_conformance.rs::graph_db_project".to_string(),
9170            assertions: vec![
9171                "backlog id and job packet handle resolve to graph nodes".to_string(),
9172                "worker_context rows are reachable from queued work".to_string(),
9173                "source_handle rows are reachable through bounded shortest paths".to_string(),
9174                "worker_result rows are reachable from completed or blocked work".to_string(),
9175            ],
9176        },
9177        warnings,
9178    })
9179}
9180
9181fn print_graph_db_evidence_human(report: &GraphDbEvidenceReport) {
9182    println!(
9183        "graph-db evidence backend: {} target: {} [{}] packet:{}",
9184        report.backend, report.target_node.id, report.target_node.kind, report.packet_id
9185    );
9186    println!(
9187        "evidence: {} worker_context row(s), {} source_handle row(s), {} worker_result row(s), {} semantic row(s), {} path(s)",
9188        report.worker_context.len(),
9189        report.source_handles.len(),
9190        report.worker_results.len(),
9191        report.semantic_related.len(),
9192        report.shortest_paths.len()
9193    );
9194    for path in &report.shortest_paths {
9195        if let Some(graph_path) = &path.path {
9196            println!(
9197                "path: {} hop(s) {}",
9198                graph_path.hops,
9199                graph_path.nodes.join(" -> ")
9200            );
9201        }
9202    }
9203    for command in &report.next_commands {
9204        println!("next: {command}");
9205    }
9206    for warning in &report.warnings {
9207        println!("warning: {warning}");
9208    }
9209}
9210
9211pub(crate) fn print_graph_db_evidence_report(
9212    report: &GraphDbEvidenceReport,
9213    format: OutputFormat,
9214) -> Result<()> {
9215    if format.json_output {
9216        print_json_or_envelope(
9217            report,
9218            &format,
9219            "graph-db",
9220            "evidence",
9221            ToolEnvelopeSummary {
9222                text: format!(
9223                    "Graph DB evidence for {} returned {} worker context row(s), {} source handle(s), {} worker result row(s), {} semantic row(s), and {} shortest path(s)",
9224                    report.target,
9225                    report.worker_context.len(),
9226                    report.source_handles.len(),
9227                    report.worker_results.len(),
9228                    report.semantic_related.len(),
9229                    report.shortest_paths.len()
9230                ),
9231                metrics: vec![
9232                    envelope_metric("backend", &report.backend),
9233                    envelope_metric("worker_context", report.worker_context.len()),
9234                    envelope_metric("source_handles", report.source_handles.len()),
9235                    envelope_metric("worker_results", report.worker_results.len()),
9236                    envelope_metric("semantic_related", report.semantic_related.len()),
9237                    envelope_metric("paths", report.shortest_paths.len()),
9238                ],
9239            },
9240            false,
9241            report.next_commands.clone(),
9242        )
9243    } else {
9244        print_graph_db_evidence_human(report);
9245        Ok(())
9246    }
9247}
9248
9249pub(crate) fn graph_db_report_from_store(
9250    root: &Path,
9251    scope: Option<&str>,
9252    backend: &str,
9253    query: GraphDbQuery,
9254    store: &impl GraphStore,
9255    freshness: GraphDbFreshnessReport,
9256    warnings: Vec<String>,
9257) -> Result<GraphDbReport> {
9258    if freshness.fail_closed {
9259        bail!(
9260            "graph database read failed closed for {} backend: {}",
9261            backend,
9262            freshness.diagnostics.join("; ")
9263        );
9264    }
9265    let mut report = GraphDbReport {
9266        root: root.to_string_lossy().to_string(),
9267        scope: scope.map(str::to_string),
9268        backend: backend.to_string(),
9269        query: format!("{query:?}"),
9270        freshness,
9271        schema: None,
9272        node: None,
9273        edge: None,
9274        nodes: Vec::new(),
9275        edges: Vec::new(),
9276        ranked_neighbors: Vec::new(),
9277        semantic_related: Vec::new(),
9278        neighborhood_ranking_gate: None,
9279        knowledge_retrieval: None,
9280        path: None,
9281        page: None,
9282        warnings,
9283    };
9284
9285    match query {
9286        GraphDbQuery::Refresh => {
9287            bail!("graph-db refresh must be handled by the refresh command path");
9288        }
9289        GraphDbQuery::Status => {
9290            bail!("graph-db status must be handled by the status command path");
9291        }
9292        GraphDbQuery::Doctor => {
9293            bail!("graph-db doctor must be handled by the doctor command path");
9294        }
9295        GraphDbQuery::Drift => {
9296            bail!("graph-db drift must be handled by the drift command path");
9297        }
9298        GraphDbQuery::Compact { .. } => {
9299            bail!("graph-db compact must be handled by the compact command path");
9300        }
9301        GraphDbQuery::BackendEval { .. } => {
9302            bail!("graph-db backend-eval must be handled by the benchmark command path");
9303        }
9304        GraphDbQuery::Evidence { .. } => {
9305            bail!("graph-db evidence must be handled by the evidence command path");
9306        }
9307        GraphDbQuery::Related {
9308            query,
9309            kind,
9310            depth,
9311            seed_limit,
9312            limit,
9313        } => {
9314            let semantic =
9315                semantic_related_report_from_store(root, scope, &query, seed_limit, kind, store)?;
9316            let SemanticRelatedReport {
9317                items,
9318                warnings: semantic_warnings,
9319                ..
9320            } = semantic;
9321            report.warnings.extend(semantic_warnings);
9322            let seed_ids = items
9323                .iter()
9324                .map(|item| item.handle.clone())
9325                .collect::<Vec<_>>();
9326            let subgraph = graph_db_semantic_seeded_neighborhood(store, &seed_ids, depth, limit)?;
9327            let seed_count = seed_ids.len();
9328
9329            report.semantic_related = items;
9330            report.nodes = subgraph.nodes;
9331            report.edges = subgraph.edges;
9332            if let Some(seed_id) = seed_ids.first() {
9333                let ranked_neighbor_cap = graph_db_ranked_neighbor_cap(Some(limit));
9334                report.ranked_neighbors = graph_db_ranked_neighbors(
9335                    seed_id,
9336                    &report.nodes,
9337                    &report.edges,
9338                    ranked_neighbor_cap,
9339                );
9340                report.neighborhood_ranking_gate =
9341                    Some(graph_db_neighborhood_ranking_gate(ranked_neighbor_cap));
9342            }
9343            report.knowledge_retrieval = Some(GraphDbKnowledgeRetrieval {
9344                mode: "semantic_seeded_neighborhood".to_string(),
9345                query,
9346                seed_kind: semantic_related_kind_name(kind).to_string(),
9347                seed_limit,
9348                seed_count,
9349                depth,
9350                limit,
9351                node_count: report.nodes.len(),
9352                edge_count: report.edges.len(),
9353                truncated: subgraph.truncated,
9354                traversal: "incident_plus_outgoing_edges".to_string(),
9355                freshness_boundary:
9356                    "semantic rows must come from refreshed summary graph records".to_string(),
9357                privacy_boundary:
9358                    "GraphStore stores substrate records only; user consent, deletion policy, persona policy, and LiveKit session state stay in the avatar/agent adapter"
9359                        .to_string(),
9360                diagnostics: subgraph.diagnostics,
9361            });
9362        }
9363        GraphDbQuery::Schema => {
9364            report.schema = Some(graph_db_schema());
9365        }
9366        GraphDbQuery::Node { id } => {
9367            report.node = store.node(&id)?;
9368        }
9369        GraphDbQuery::Edge { id } => {
9370            report.edge = store.edge(&id)?;
9371        }
9372        GraphDbQuery::Edges {
9373            edge_kind,
9374            cursor,
9375            limit,
9376            property_filters,
9377        } => {
9378            let options = graph_db_query_options(cursor, limit, &property_filters)?;
9379            let paged = store.paged_edges(
9380                edge_kind.as_deref(),
9381                graph_db_query_options_for_store(&options),
9382            )?;
9383            report.edges = paged.edges;
9384            report.page = Some(graph_db_page_report_from_store(
9385                paged.page,
9386                options.property_filters,
9387            ));
9388        }
9389        GraphDbQuery::Incident {
9390            id,
9391            edge_kind,
9392            cursor,
9393            limit,
9394            property_filters,
9395        } => {
9396            let options = graph_db_query_options(cursor, limit, &property_filters)?;
9397            let paged = store.paged_incident_edges(
9398                &id,
9399                edge_kind.as_deref(),
9400                graph_db_query_options_for_store(&options),
9401            )?;
9402            report.edges = paged.edges;
9403            report.page = Some(graph_db_page_report_from_store(
9404                paged.page,
9405                options.property_filters,
9406            ));
9407        }
9408        GraphDbQuery::Kind {
9409            kind,
9410            cursor,
9411            limit,
9412            property_filters,
9413        } => {
9414            let options = graph_db_query_options(cursor, limit, &property_filters)?;
9415            let paged =
9416                store.paged_nodes_by_kind(&kind, graph_db_query_options_for_store(&options))?;
9417            report.nodes = paged.nodes;
9418            report.edges = paged.edges;
9419            report.page = Some(graph_db_page_report_from_store(
9420                paged.page,
9421                options.property_filters,
9422            ));
9423        }
9424        GraphDbQuery::Neighborhood {
9425            id,
9426            depth,
9427            edge_kind,
9428            cursor,
9429            limit,
9430            property_filters,
9431        } => {
9432            let options = graph_db_query_options(cursor, limit, &property_filters)?;
9433            if let Some(paged) = store.paged_neighborhood(
9434                &id,
9435                depth,
9436                edge_kind.as_deref(),
9437                graph_db_query_options_for_store(&options),
9438            )? {
9439                report.nodes = paged.nodes;
9440                report.edges = paged.edges;
9441                let ranked_neighbor_cap = graph_db_ranked_neighbor_cap(options.limit);
9442                report.ranked_neighbors = graph_db_ranked_neighbors(
9443                    &id,
9444                    &report.nodes,
9445                    &report.edges,
9446                    ranked_neighbor_cap,
9447                );
9448                report.neighborhood_ranking_gate =
9449                    Some(graph_db_neighborhood_ranking_gate(ranked_neighbor_cap));
9450                report.page = Some(graph_db_page_report_from_store(
9451                    paged.page,
9452                    options.property_filters,
9453                ));
9454            }
9455        }
9456        GraphDbQuery::Path {
9457            from,
9458            to,
9459            edge_kind,
9460            max_hops,
9461        } => {
9462            report.path =
9463                store.shortest_path_with_max_hops(&from, &to, edge_kind.as_deref(), max_hops)?;
9464            if let Some(max_hops) = max_hops
9465                && report.path.is_none()
9466            {
9467                report.warnings.push(format!(
9468                    "no directed path found within --max-hops {}",
9469                    max_hops
9470                ));
9471            }
9472        }
9473    }
9474    Ok(report)
9475}
9476
9477pub(crate) fn print_graph_db_human(report: &GraphDbReport, compact: bool) {
9478    if compact {
9479        println!(
9480            "graph-db backend:{} query:{} nodes:{} edges:{} freshness:{}",
9481            report.backend,
9482            report.query,
9483            report.nodes.len() + usize::from(report.node.is_some()),
9484            report.edges.len() + usize::from(report.edge.is_some()),
9485            report.freshness.status
9486        );
9487        return;
9488    }
9489    println!("graph-db backend: {}", report.backend);
9490    println!("freshness: {}", report.freshness.status);
9491    if let Some(schema) = &report.schema {
9492        println!(
9493            "schema: {} node fields, {} edge fields, {} operations",
9494            schema.node_fields.len(),
9495            schema.edge_fields.len(),
9496            schema.operations.len()
9497        );
9498    }
9499    if let Some(node) = &report.node {
9500        println!("node: {} [{}] {}", node.id, node.kind, node.label);
9501    }
9502    if let Some(edge) = &report.edge {
9503        println!(
9504            "edge: {} {} -{}-> {}",
9505            graph_db_edge_key(edge),
9506            edge.from_id,
9507            edge.kind,
9508            edge.to_id
9509        );
9510    }
9511    if let Some(knowledge) = &report.knowledge_retrieval {
9512        println!(
9513            "knowledge_retrieval: {} seeds:{} depth:{} traversal:{}",
9514            knowledge.mode, knowledge.seed_count, knowledge.depth, knowledge.traversal
9515        );
9516    }
9517    for item in &report.semantic_related {
9518        println!(
9519            "semantic_seed: {:.3} [{}] {} ({})",
9520            item.score, item.kind, item.label, item.handle
9521        );
9522    }
9523    for node in &report.nodes {
9524        println!("node: {} [{}] {}", node.id, node.kind, node.label);
9525    }
9526    for edge in &report.edges {
9527        println!(
9528            "edge: {} {} -{}-> {}",
9529            graph_db_edge_key(edge),
9530            edge.from_id,
9531            edge.kind,
9532            edge.to_id
9533        );
9534    }
9535    for neighbor in &report.ranked_neighbors {
9536        println!(
9537            "ranked_neighbor: #{} score:{} depth:{} {} [{}] {}",
9538            neighbor.rank,
9539            neighbor.score,
9540            neighbor
9541                .depth
9542                .map(|depth| depth.to_string())
9543                .unwrap_or_else(|| "unknown".to_string()),
9544            neighbor.node_id,
9545            neighbor.kind,
9546            neighbor.label
9547        );
9548    }
9549    if let Some(gate) = &report.neighborhood_ranking_gate {
9550        println!(
9551            "neighborhood_ranking_gate: {} default_order:{} ranked_output_default:{}",
9552            gate.status, gate.default_order, gate.ranked_output_default
9553        );
9554    }
9555    if let Some(path) = &report.path {
9556        println!("path: {} hop(s) {}", path.hops, path.nodes.join(" -> "));
9557    }
9558    if let Some(page) = &report.page {
9559        if let Some(next_cursor) = &page.next_cursor {
9560            println!("next_cursor: {next_cursor}");
9561        }
9562        for diagnostic in &page.diagnostics {
9563            println!("page: {diagnostic}");
9564        }
9565    }
9566    for warning in &report.warnings {
9567        println!("warning: {warning}");
9568    }
9569}
9570
9571pub(crate) fn graph_db_backend_eval_phase_timing(
9572    name: &str,
9573    duration_micros: u128,
9574    detail: &str,
9575) -> GraphDbBackendEvalPhaseTiming {
9576    GraphDbBackendEvalPhaseTiming {
9577        name: name.to_string(),
9578        duration_micros,
9579        detail: detail.to_string(),
9580    }
9581}
9582
9583pub(crate) fn graph_db_backend_eval_timed_phase<T>(
9584    phases: &mut Vec<GraphDbBackendEvalPhaseTiming>,
9585    name: &str,
9586    detail: &str,
9587    run: impl FnOnce() -> Result<T>,
9588) -> Result<T> {
9589    let started = Instant::now();
9590    let result = run();
9591    phases.push(graph_db_backend_eval_phase_timing(
9592        name,
9593        started.elapsed().as_micros(),
9594        detail,
9595    ));
9596    result
9597}
9598
9599pub(crate) fn graph_db_backend_eval_refresh_total_micros(
9600    phases: &[GraphDbBackendEvalPhaseTiming],
9601) -> u128 {
9602    phases
9603        .iter()
9604        .filter(|phase| phase.name != "conflict_matrix_preparation")
9605        .map(|phase| phase.duration_micros)
9606        .sum()
9607}
9608
9609pub(crate) fn graph_db_backend_eval_cached_refresh(
9610    root: &Path,
9611    scope: Option<&str>,
9612    source_watermark: Option<&str>,
9613) -> Result<
9614    Option<(
9615        TraversalGraphBuild,
9616        SqliteProjectionRefresh,
9617        Vec<GraphDbBackendEvalPhaseTiming>,
9618    )>,
9619> {
9620    let Some(source_watermark) = source_watermark else {
9621        return Ok(None);
9622    };
9623    let graph_db = graph_substrate_db_path(root, scope);
9624    if !graph_db.exists() {
9625        return Ok(None);
9626    }
9627
9628    let started = Instant::now();
9629    let store = match SqliteGraphStore::open_read_only_resilient(&graph_db) {
9630        Ok(store) => store,
9631        Err(_) => return Ok(None),
9632    };
9633    if store.has_user_triggers().unwrap_or(true) {
9634        return Ok(None);
9635    }
9636    let freshness = sqlite_graph_freshness(&store, scope.unwrap_or("root"))?;
9637    if freshness.fail_closed || freshness.source_watermark.as_deref() != Some(source_watermark) {
9638        return Ok(None);
9639    }
9640
9641    let phases = vec![
9642        graph_db_backend_eval_phase_timing(
9643            "source_graph_build",
9644            started.elapsed().as_micros(),
9645            "reused current graph.db projection because the source watermark matched; skipped code-index loading, session markdown scanning, source-handle construction, and semantic summary reads",
9646        ),
9647        graph_db_backend_eval_phase_timing(
9648            "projection_rows",
9649            0,
9650            "reused cached provider-neutral projection rows from graph.db",
9651        ),
9652        graph_db_backend_eval_phase_timing(
9653            "sqlite_open",
9654            0,
9655            "reused existing graph.db projection without opening a write transaction",
9656        ),
9657    ];
9658    let refresh = SqliteProjectionRefresh {
9659        scope: scope.unwrap_or("root").to_string(),
9660        projection_version: freshness
9661            .projection_version
9662            .unwrap_or_else(|| GRAPH_PROJECTION_VERSION.to_string()),
9663        source_watermark: Some(source_watermark.to_string()),
9664        tombstoned_nodes: Vec::new(),
9665        tombstoned_edges: Vec::new(),
9666        upserted_nodes: 0,
9667        upserted_edges: 0,
9668        unchanged_nodes: 0,
9669        unchanged_edges: 0,
9670        upserted_properties: 0,
9671        unchanged_properties: 0,
9672        deleted_properties: 0,
9673        deleted_nodes: 0,
9674        deleted_edges: 0,
9675        pruned_tombstones: 0,
9676        file_size_bytes_before: None,
9677        file_size_bytes_after: None,
9678        phase_timings: Vec::new(),
9679    };
9680    Ok(Some((TraversalGraphBuild::default(), refresh, phases)))
9681}
9682
9683pub(crate) fn graph_db_backend_eval_reused_cached_projection(
9684    phases: &[GraphDbBackendEvalPhaseTiming],
9685) -> bool {
9686    phases.iter().any(|phase| {
9687        phase.name == "source_graph_build"
9688            && phase.detail.contains("reused current graph.db projection")
9689    })
9690}
9691
9692pub(crate) fn graph_db_backend_eval_update_source_watermark(
9693    root: &Path,
9694    path_hint: &Path,
9695    scope: Option<&str>,
9696) -> Result<()> {
9697    let Some(source_watermark) = traversal_source_watermark(root, path_hint, scope, false)? else {
9698        return Ok(());
9699    };
9700    let graph_db = graph_substrate_db_path(root, scope);
9701    let mut store = SqliteGraphStore::open(&graph_db)?;
9702    store.update_projection_source_watermark(scope.unwrap_or("root"), Some(source_watermark))?;
9703    Ok(())
9704}
9705
9706pub(crate) fn graph_db_backend_eval_refresh_with_profile(
9707    root: &Path,
9708    path_hint: &Path,
9709    scope: Option<&str>,
9710) -> Result<(
9711    TraversalGraphBuild,
9712    SqliteProjectionRefresh,
9713    Vec<GraphDbBackendEvalPhaseTiming>,
9714)> {
9715    let source_watermark = traversal_source_watermark(root, path_hint, scope, false)?;
9716    if let Some(cached) =
9717        graph_db_backend_eval_cached_refresh(root, scope, source_watermark.as_deref())?
9718    {
9719        return Ok(cached);
9720    }
9721
9722    let mut phases = Vec::new();
9723    let source_graph_detail = if hinted_markdown_file(root, path_hint).is_some() {
9724        "bounded session projection: index/source loading plus agent-doc session markdown scan, source-handle construction, and semantic summary reads; skips global call-edge materialization because full-projection is the complete-call-graph regression guard"
9725    } else {
9726        "index/source loading plus agent-doc session markdown scan, source-handle construction, and semantic summary reads when summaries are cached"
9727    };
9728    let source_graph = graph_db_backend_eval_timed_phase(
9729        &mut phases,
9730        "source_graph_build",
9731        source_graph_detail,
9732        || build_traversal_graph_source_with_options(root, path_hint, scope, false),
9733    )?;
9734    let projection = graph_db_backend_eval_timed_phase(
9735        &mut phases,
9736        "projection_rows",
9737        "provider-neutral GraphStore node/edge row construction before SQLite persistence",
9738        || traversal_projection_from_graph(root, scope, &source_graph),
9739    )?;
9740    let graph_db = graph_substrate_db_path(root, scope);
9741    let mut store = graph_db_backend_eval_timed_phase(
9742        &mut phases,
9743        "sqlite_open",
9744        "open the local SQLite graph.db with WAL and busy-timeout settings",
9745        || SqliteGraphStore::open(&graph_db),
9746    )?;
9747    let refreshed_source_watermark = traversal_source_watermark(root, path_hint, scope, false)
9748        .ok()
9749        .flatten();
9750    let refresh = store.replace_projection_with_version(
9751        scope.unwrap_or("root"),
9752        &projection,
9753        Some(GRAPH_PROJECTION_VERSION),
9754        refreshed_source_watermark
9755            .or(source_watermark)
9756            .or_else(|| graph_projection_content_hash(&projection)),
9757    )?;
9758    phases.extend(
9759        refresh
9760            .phase_timings
9761            .iter()
9762            .map(|phase| GraphDbBackendEvalPhaseTiming {
9763                name: phase.name.clone(),
9764                duration_micros: phase.duration_micros,
9765                detail: phase.detail.clone(),
9766            }),
9767    );
9768    Ok((source_graph, refresh, phases))
9769}
9770
9771fn graph_db_backend_eval_disk_cache_dir(root: &Path) -> PathBuf {
9772    root.join(".tsift/backend-eval-cache")
9773}
9774
9775fn graph_db_backend_eval_disk_cache_path(root: &Path, kind: &str, key: &str) -> PathBuf {
9776    graph_db_backend_eval_disk_cache_dir(root)
9777        .join(kind)
9778        .join(format!("{key}.json.gz"))
9779}
9780
9781fn graph_db_backend_eval_legacy_disk_cache_path(root: &Path, kind: &str, key: &str) -> PathBuf {
9782    graph_db_backend_eval_disk_cache_dir(root)
9783        .join(kind)
9784        .join(format!("{key}.json"))
9785}
9786
9787#[derive(Default, Clone)]
9788struct GraphDbBackendEvalDiskCacheReadProfile {
9789    file_read_micros: u128,
9790    gzip_decode_micros: u128,
9791    serde_decode_micros: u128,
9792    legacy: bool,
9793}
9794
9795fn graph_db_backend_eval_read_disk_cache<T: for<'de> Deserialize<'de>>(
9796    root: &Path,
9797    kind: &str,
9798    key: &str,
9799) -> Option<(T, u64, u64, GraphDbBackendEvalDiskCacheReadProfile)> {
9800    let mut profile = GraphDbBackendEvalDiskCacheReadProfile::default();
9801    let path = graph_db_backend_eval_disk_cache_path(root, kind, key);
9802    let read_started = Instant::now();
9803    let read_result = fs::read(&path);
9804    profile.file_read_micros = read_started.elapsed().as_micros();
9805    if let Ok(bytes) = read_result {
9806        let decode_started = Instant::now();
9807        let mut decoder = GzDecoder::new(bytes.as_slice());
9808        let mut decoded = Vec::new();
9809        let decode_ok = decoder.read_to_end(&mut decoded).is_ok();
9810        profile.gzip_decode_micros = decode_started.elapsed().as_micros();
9811        if decode_ok {
9812            let serde_started = Instant::now();
9813            let parsed: Option<T> = serde_json::from_slice(&decoded).ok();
9814            profile.serde_decode_micros = serde_started.elapsed().as_micros();
9815            if let Some(value) = parsed {
9816                return Some((value, bytes.len() as u64, decoded.len() as u64, profile));
9817            }
9818        }
9819    }
9820
9821    let legacy_path = graph_db_backend_eval_legacy_disk_cache_path(root, kind, key);
9822    let legacy_started = Instant::now();
9823    let bytes = fs::read(legacy_path).ok()?;
9824    profile.file_read_micros = profile
9825        .file_read_micros
9826        .saturating_add(legacy_started.elapsed().as_micros());
9827    let serde_started = Instant::now();
9828    let value = serde_json::from_slice(&bytes).ok()?;
9829    profile.serde_decode_micros = profile
9830        .serde_decode_micros
9831        .saturating_add(serde_started.elapsed().as_micros());
9832    profile.legacy = true;
9833    Some((value, bytes.len() as u64, bytes.len() as u64, profile))
9834}
9835
9836#[derive(Default, Clone)]
9837struct GraphDbBackendEvalDiskCacheWriteProfile {
9838    serde_encode_micros: u128,
9839    gzip_encode_micros: u128,
9840    file_write_micros: u128,
9841}
9842
9843fn graph_db_backend_eval_write_disk_cache<T: Serialize>(
9844    root: &Path,
9845    kind: &str,
9846    key: &str,
9847    value: &T,
9848) -> Option<(u64, u64, GraphDbBackendEvalDiskCacheWriteProfile)> {
9849    let mut profile = GraphDbBackendEvalDiskCacheWriteProfile::default();
9850    let path = graph_db_backend_eval_disk_cache_path(root, kind, key);
9851    let parent = path.parent()?;
9852    if fs::create_dir_all(parent).is_err() {
9853        return None;
9854    }
9855    let serde_started = Instant::now();
9856    let bytes = serde_json::to_vec(value).ok()?;
9857    profile.serde_encode_micros = serde_started.elapsed().as_micros();
9858    let gzip_started = Instant::now();
9859    let mut encoder = GzEncoder::new(Vec::new(), Compression::fast());
9860    if encoder.write_all(&bytes).is_err() {
9861        return None;
9862    }
9863    let encoded = encoder.finish().ok()?;
9864    profile.gzip_encode_micros = gzip_started.elapsed().as_micros();
9865    let write_started = Instant::now();
9866    if fs::write(&path, &encoded).is_err() {
9867        return None;
9868    }
9869    profile.file_write_micros = write_started.elapsed().as_micros();
9870    Some((encoded.len() as u64, bytes.len() as u64, profile))
9871}
9872
9873fn graph_db_backend_eval_prune_disk_cache(root: &Path, kind: &str, keep_key: &str) -> (usize, u64) {
9874    let dir = graph_db_backend_eval_disk_cache_dir(root).join(kind);
9875    let Ok(entries) = fs::read_dir(dir) else {
9876        return (0, 0);
9877    };
9878    let keep_name = format!("{keep_key}.json.gz");
9879    let mut pruned_files = 0usize;
9880    let mut pruned_bytes = 0u64;
9881    for entry in entries.flatten() {
9882        let path = entry.path();
9883        if !path.is_file() {
9884            continue;
9885        }
9886        let Some(name) = path.file_name().and_then(|name| name.to_str()) else {
9887            continue;
9888        };
9889        if name == keep_name {
9890            continue;
9891        }
9892        let is_backend_eval_cache = name.ends_with(".json") || name.ends_with(".json.gz");
9893        if !is_backend_eval_cache {
9894            continue;
9895        }
9896        let bytes = entry.metadata().map(|metadata| metadata.len()).unwrap_or(0);
9897        if fs::remove_file(&path).is_ok() {
9898            pruned_files += 1;
9899            pruned_bytes += bytes;
9900        }
9901    }
9902    (pruned_files, pruned_bytes)
9903}
9904
9905fn graph_db_backend_eval_full_projection_raw_watermark_rows(
9906    root: &Path,
9907    source_root: &Path,
9908) -> Result<Vec<GraphDbBackendEvalRawSourceWatermarkRow>> {
9909    let mut rows = Vec::new();
9910    let mut entries = walk::walk_files(source_root)?;
9911    entries.sort_by(|left, right| left.path.cmp(&right.path));
9912    for entry in entries {
9913        if traversal_path_is_generated_artifact(root, source_root, &entry.path) {
9914            continue;
9915        }
9916        let bytes = fs::read(&entry.path)
9917            .with_context(|| format!("reading source input {}", entry.path.display()))?;
9918        rows.push(GraphDbBackendEvalRawSourceWatermarkRow {
9919            path: traversal_watermark_path(root, &entry.path),
9920            bytes: bytes.len() as u64,
9921            content_hash: content_hash(&bytes)?,
9922        });
9923    }
9924    Ok(rows)
9925}
9926
9927fn graph_db_backend_eval_full_projection_source_watermark(
9928    root: &Path,
9929    scope: Option<&str>,
9930) -> Result<GraphDbBackendEvalFullProjectionSourceWatermark> {
9931    let path_hint = root;
9932    let mut detail_parts = Vec::new();
9933    let mut parts = vec![
9934        format!("projection_version:{GRAPH_PROJECTION_VERSION}"),
9935        format!("cache_version:{GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION}"),
9936        "watermark_kind:stable_full_projection_inputs".to_string(),
9937        format!("scope:{}", scope.unwrap_or("root")),
9938        format!("path_hint:{}", traversal_watermark_path(root, path_hint)),
9939    ];
9940
9941    let gate = prepare_agent_doc_index_gate(root, path_hint, scope, "full-projection cache key");
9942    match gate.db_path.as_ref().filter(|db_path| db_path.exists()) {
9943        Some(db_path) => {
9944            let db = index::IndexDb::open_read_only_resilient(db_path)?;
9945            parts.push("index_mode:indexed".to_string());
9946            detail_parts.push("mode=indexed".to_string());
9947            parts.push(format!(
9948                "index_source_root:{}",
9949                traversal_watermark_path(root, &gate.source_root)
9950            ));
9951
9952            let symbols = db
9953                .all_symbols()?
9954                .into_iter()
9955                .filter(|symbol| {
9956                    !traversal_path_is_generated_artifact(
9957                        root,
9958                        &gate.source_root,
9959                        Path::new(&symbol.file),
9960                    )
9961                })
9962                .collect::<Vec<_>>();
9963            let symbols_hash = content_hash(&symbols)?;
9964            detail_parts.push(format!("symbols={symbols_hash}"));
9965            parts.push(format!("index_symbols:{symbols_hash}"));
9966
9967            let edges = db
9968                .all_stored_edges()?
9969                .into_iter()
9970                .filter(|edge| {
9971                    !traversal_path_is_generated_artifact(
9972                        root,
9973                        &gate.source_root,
9974                        Path::new(&edge.caller_file),
9975                    )
9976                })
9977                .collect::<Vec<_>>();
9978            let edges_hash = content_hash(&edges)?;
9979            detail_parts.push(format!("call_edges={edges_hash}"));
9980            parts.push(format!("index_call_edges:{edges_hash}"));
9981
9982            let routes = db
9983                .all_routes()?
9984                .into_iter()
9985                .filter(|route| {
9986                    !traversal_path_is_generated_artifact(
9987                        root,
9988                        &gate.source_root,
9989                        Path::new(&route.file),
9990                    )
9991                })
9992                .collect::<Vec<_>>();
9993            let routes_hash = content_hash(&routes)?;
9994            detail_parts.push(format!("routes={routes_hash}"));
9995            parts.push(format!("index_routes:{routes_hash}"));
9996        }
9997        None => {
9998            parts.push("index_mode:raw_fallback".to_string());
9999            detail_parts.push("mode=raw_fallback".to_string());
10000            parts.push(format!(
10001                "raw_source_root:{}",
10002                traversal_watermark_path(root, &gate.source_root)
10003            ));
10004            let raw_rows =
10005                graph_db_backend_eval_full_projection_raw_watermark_rows(root, &gate.source_root)?;
10006            let raw_hash = content_hash(&raw_rows)?;
10007            detail_parts.push(format!("raw_source_files={raw_hash}"));
10008            parts.push(format!("raw_source_files:{raw_hash}"));
10009        }
10010    }
10011
10012    parts.push("agent_doc_session_markdown:bounded_real_dataset_only".to_string());
10013    detail_parts.push("session_markdown=bounded_real_dataset_only".to_string());
10014    let summaries_start = parts.len();
10015    push_traversal_summaries_watermark_part(root, &mut parts)?;
10016    let summaries_hash = content_hash(&parts[summaries_start..].to_vec())?;
10017    detail_parts.push(format!("summaries={summaries_hash}"));
10018    let value = content_hash(&parts)?;
10019    detail_parts.push(format!("watermark={value}"));
10020    Ok(GraphDbBackendEvalFullProjectionSourceWatermark {
10021        value,
10022        detail: detail_parts.join(" "),
10023    })
10024}
10025
10026fn graph_db_backend_eval_full_projection_cache_key(
10027    root: &Path,
10028    scope: Option<&str>,
10029) -> Result<(String, String, String)> {
10030    let source_watermark = graph_db_backend_eval_full_projection_source_watermark(root, scope)?;
10031    let key = graph_db_backend_eval_full_projection_cache_key_for_watermark(
10032        root,
10033        scope,
10034        &source_watermark.value,
10035    )?;
10036    Ok((source_watermark.value, key, source_watermark.detail))
10037}
10038
10039fn graph_db_backend_eval_full_projection_cache_key_for_watermark(
10040    root: &Path,
10041    scope: Option<&str>,
10042    source_watermark: &str,
10043) -> Result<String> {
10044    content_hash(&serde_json::json!({
10045    "version": GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION,
10046    "root": root.display().to_string(),
10047    "scope": scope.unwrap_or("root"),
10048    "source_watermark": source_watermark,
10049    }))
10050}
10051
10052pub(crate) fn graph_db_backend_eval_full_projection_with_profile(
10053    root: &Path,
10054    scope: Option<&str>,
10055) -> Result<(
10056    GraphProjection,
10057    Vec<String>,
10058    Vec<GraphDbBackendEvalPhaseTiming>,
10059    GraphDbBackendEvalFullProjectionCacheStats,
10060)> {
10061    let (source_watermark, key, source_watermark_detail) =
10062        graph_db_backend_eval_full_projection_cache_key(root, scope)?;
10063    let lookup_started = Instant::now();
10064    if let Some((cached, disk_bytes, json_bytes, read_profile)) =
10065        graph_db_backend_eval_read_disk_cache::<GraphDbBackendEvalFullProjectionCache>(
10066            root,
10067            "full_projection",
10068            &key,
10069        )
10070        && cached.version == GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION
10071        && cached.key == key
10072        && cached.source_watermark == source_watermark
10073    {
10074        let lookup_overhead_micros = lookup_started
10075            .elapsed()
10076            .as_micros()
10077            .saturating_sub(read_profile.file_read_micros)
10078            .saturating_sub(read_profile.gzip_decode_micros)
10079            .saturating_sub(read_profile.serde_decode_micros);
10080        let prune_started = Instant::now();
10081        let (pruned_files, pruned_bytes) =
10082            graph_db_backend_eval_prune_disk_cache(root, "full_projection", &key);
10083        let prune_micros = prune_started.elapsed().as_micros();
10084        let cache_stats = GraphDbBackendEvalFullProjectionCacheStats {
10085            hit: true,
10086            disk_bytes,
10087            json_bytes,
10088            pruned_files,
10089            pruned_bytes,
10090        };
10091        let read_detail_suffix = if read_profile.legacy {
10092            " (legacy uncompressed cache path)"
10093        } else {
10094            ""
10095        };
10096        return Ok((
10097            cached.projection,
10098            cached.warnings,
10099            vec![
10100                graph_db_backend_eval_phase_timing(
10101                    "full_projection.cache_lookup",
10102                    lookup_overhead_micros,
10103                    &format!(
10104                        "watermark/version check overhead around the cache load phases; {source_watermark_detail}"
10105                    ),
10106                ),
10107                graph_db_backend_eval_phase_timing(
10108                    "full_projection.cache.file_read",
10109                    read_profile.file_read_micros,
10110                    &format!(
10111                        "read compressed cache bytes from .tsift/backend-eval-cache{read_detail_suffix}"
10112                    ),
10113                ),
10114                graph_db_backend_eval_phase_timing(
10115                    "full_projection.cache.gzip_decode",
10116                    read_profile.gzip_decode_micros,
10117                    "gunzip the compressed projection cache bytes",
10118                ),
10119                graph_db_backend_eval_phase_timing(
10120                    "full_projection.cache.serde_decode",
10121                    read_profile.serde_decode_micros,
10122                    "serde_json deserialize the decoded projection cache payload",
10123                ),
10124                graph_db_backend_eval_phase_timing(
10125                    "full_projection.cache.prune",
10126                    prune_micros,
10127                    "prune sibling cache files older than the current key",
10128                ),
10129                graph_db_backend_eval_phase_timing(
10130                    "full_projection.source_graph_build",
10131                    0,
10132                    "reused cached full-project source graph; skipped code-index loading, session markdown scanning, source-handle construction, and semantic summary reads",
10133                ),
10134                graph_db_backend_eval_phase_timing(
10135                    "full_projection.projection_rows",
10136                    0,
10137                    "reused cached provider-neutral full-project projection rows",
10138                ),
10139            ],
10140            cache_stats,
10141        ));
10142    }
10143
10144    let mut cache_stats = GraphDbBackendEvalFullProjectionCacheStats::default();
10145    let mut phases = vec![graph_db_backend_eval_phase_timing(
10146        "full_projection.cache_lookup",
10147        lookup_started.elapsed().as_micros(),
10148        &format!(
10149            "no full-project projection cache entry matched the source watermark; {source_watermark_detail}"
10150        ),
10151    )];
10152    let full_source = graph_db_backend_eval_timed_phase(
10153        &mut phases,
10154        "full_projection.source_graph_build",
10155        "opt-in full-project source graph build; uses the project root as the path hint so bounded session projections cannot hide full-graph regressions",
10156        || build_traversal_graph_source_with_options(root, root, scope, false),
10157    )?;
10158    let projection = graph_db_backend_eval_timed_phase(
10159        &mut phases,
10160        "full_projection.projection_rows",
10161        "provider-neutral row construction for the opt-in full-project projection dataset",
10162        || traversal_projection_from_graph(root, scope, &full_source),
10163    )?;
10164    let warnings = full_source.warnings;
10165    let refreshed_source_watermark =
10166        graph_db_backend_eval_full_projection_source_watermark(root, scope)
10167            .map(|watermark| watermark.value)
10168            .unwrap_or_else(|_| source_watermark.clone());
10169    let write_key = graph_db_backend_eval_full_projection_cache_key_for_watermark(
10170        root,
10171        scope,
10172        &refreshed_source_watermark,
10173    )?;
10174    let cache = GraphDbBackendEvalFullProjectionCache {
10175        version: GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION.to_string(),
10176        key: write_key.clone(),
10177        source_watermark: refreshed_source_watermark,
10178        projection: projection.clone(),
10179        warnings: warnings.clone(),
10180    };
10181    if let Some((disk_bytes, json_bytes, write_profile)) =
10182        graph_db_backend_eval_write_disk_cache(root, "full_projection", &write_key, &cache)
10183    {
10184        cache_stats.disk_bytes = disk_bytes;
10185        cache_stats.json_bytes = json_bytes;
10186        phases.push(graph_db_backend_eval_phase_timing(
10187            "full_projection.cache.serde_encode",
10188            write_profile.serde_encode_micros,
10189            "serde_json serialize the projection cache payload before compression",
10190        ));
10191        phases.push(graph_db_backend_eval_phase_timing(
10192            "full_projection.cache.gzip_encode",
10193            write_profile.gzip_encode_micros,
10194            "gzip-compress the serialized projection cache payload",
10195        ));
10196        phases.push(graph_db_backend_eval_phase_timing(
10197            "full_projection.cache.file_write",
10198            write_profile.file_write_micros,
10199            "write the compressed projection cache bytes to .tsift/backend-eval-cache",
10200        ));
10201    }
10202    let prune_started = Instant::now();
10203    let (pruned_files, pruned_bytes) =
10204        graph_db_backend_eval_prune_disk_cache(root, "full_projection", &write_key);
10205    phases.push(graph_db_backend_eval_phase_timing(
10206        "full_projection.cache.prune",
10207        prune_started.elapsed().as_micros(),
10208        "prune sibling cache files older than the current key",
10209    ));
10210    cache_stats.pruned_files = pruned_files;
10211    cache_stats.pruned_bytes = pruned_bytes;
10212    Ok((projection, warnings, phases, cache_stats))
10213}
10214
10215fn graph_db_backend_eval_timed(
10216    name: &str,
10217    run: impl FnOnce() -> Result<(Option<usize>, serde_json::Value)>,
10218) -> (
10219    GraphDbBackendEvalOperation,
10220    Option<GraphDbBackendEvalSignature>,
10221) {
10222    let started = Instant::now();
10223    match run() {
10224        Ok((rows, value)) => (
10225            GraphDbBackendEvalOperation {
10226                name: name.to_string(),
10227                supported: true,
10228                status: "ok".to_string(),
10229                duration_micros: started.elapsed().as_micros(),
10230                rows,
10231                error: None,
10232            },
10233            Some(GraphDbBackendEvalSignature {
10234                operation: name.to_string(),
10235                value,
10236            }),
10237        ),
10238        Err(err) => (
10239            GraphDbBackendEvalOperation {
10240                name: name.to_string(),
10241                supported: false,
10242                status: "error".to_string(),
10243                duration_micros: started.elapsed().as_micros(),
10244                rows: None,
10245                error: Some(format!("{err:#}")),
10246            },
10247            None,
10248        ),
10249    }
10250}
10251
10252fn graph_db_backend_eval_parity(
10253    sqlite_signatures: Option<&[GraphDbBackendEvalSignature]>,
10254    candidate_signatures: &[GraphDbBackendEvalSignature],
10255) -> GraphDbBackendEvalParity {
10256    let Some(sqlite_signatures) = sqlite_signatures else {
10257        return GraphDbBackendEvalParity {
10258            matches_sqlite: true,
10259            diagnostics: Vec::new(),
10260        };
10261    };
10262    let sqlite = sqlite_signatures
10263        .iter()
10264        .map(|signature| (signature.operation.as_str(), &signature.value))
10265        .collect::<BTreeMap<_, _>>();
10266    let candidate = candidate_signatures
10267        .iter()
10268        .map(|signature| (signature.operation.as_str(), &signature.value))
10269        .collect::<BTreeMap<_, _>>();
10270    let mut diagnostics = Vec::new();
10271    for (operation, sqlite_value) in sqlite {
10272        match candidate.get(operation) {
10273            Some(candidate_value) if *candidate_value == sqlite_value => {}
10274            Some(_) => diagnostics.push(format!("{operation} output differed from SQLite")),
10275            None => diagnostics.push(format!(
10276                "{operation} did not complete for candidate backend"
10277            )),
10278        }
10279    }
10280    GraphDbBackendEvalParity {
10281        matches_sqlite: diagnostics.is_empty(),
10282        diagnostics,
10283    }
10284}
10285
10286pub(crate) fn graph_db_backend_eval_targets(
10287    store: &impl GraphStore,
10288    requested: &[String],
10289) -> Result<Vec<String>> {
10290    let requested = requested
10291        .iter()
10292        .filter_map(|target| normalize_conflict_target(target))
10293        .collect::<Vec<_>>();
10294    if !requested.is_empty() {
10295        return Ok(requested);
10296    }
10297
10298    for kind in ["backlog", "job_packet"] {
10299        let nodes = store.nodes_by_kind(kind)?;
10300        if let Some(node) = nodes.first() {
10301            if let Some(ref_id) = node.properties.get("ref_id") {
10302                return Ok(vec![ref_id.clone()]);
10303            }
10304            return Ok(vec![node.id.clone()]);
10305        }
10306    }
10307    Ok(Vec::new())
10308}
10309
10310fn graph_db_backend_eval_path_targets(
10311    store: &impl GraphStore,
10312    max_hops: usize,
10313) -> Result<Option<(String, String, usize)>> {
10314    let synthetic_from = "gsym-synthetic-0000";
10315    let synthetic_to = format!("gsym-synthetic-{max_hops:04}");
10316    if store.node(synthetic_from)?.is_some() && store.node(&synthetic_to)?.is_some() {
10317        let outgoing = store.outgoing_edges(synthetic_from, None)?;
10318        if outgoing.len() > 1
10319            && let Some(edge) = outgoing.first()
10320        {
10321            return Ok(Some((
10322                edge.from_id.clone(),
10323                edge.to_id.clone(),
10324                GRAPH_DB_BACKEND_EVAL_DIRECT_PATH_HOPS,
10325            )));
10326        }
10327        return Ok(Some((synthetic_from.to_string(), synthetic_to, max_hops)));
10328    }
10329
10330    Ok(store.sample_edge(None)?.map(|edge| {
10331        (
10332            edge.from_id,
10333            edge.to_id,
10334            GRAPH_DB_BACKEND_EVAL_DIRECT_PATH_HOPS,
10335        )
10336    }))
10337}
10338
10339fn graph_db_backend_eval_path_operation<S: GraphStore>(
10340    store: &S,
10341    configured_max_hops: usize,
10342) -> (
10343    GraphDbBackendEvalOperation,
10344    Option<GraphDbBackendEvalSignature>,
10345) {
10346    let operation_name = if configured_max_hops == GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS {
10347        "path_max_hops".to_string()
10348    } else {
10349        format!("path_max_hops_{configured_max_hops}")
10350    };
10351    graph_db_backend_eval_timed(&operation_name, || {
10352        let (from, to, effective_max_hops) =
10353            graph_db_backend_eval_path_targets(store, configured_max_hops)?
10354                .context("backend-eval path probe requires at least one traversable edge")?;
10355        let path = store.shortest_path_with_max_hops(&from, &to, None, Some(effective_max_hops))?;
10356        let warning = if configured_max_hops > GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS {
10357            Some(format!(
10358                "{configured_max_hops}-hop tier is measured only; keep user-facing defaults at {} until repeated samples and SQLite query-plan checks pass",
10359                GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS
10360            ))
10361        } else if path.is_none() && effective_max_hops == configured_max_hops {
10362            Some(format!(
10363                "path probe truncated at {configured_max_hops} hops before a route was found"
10364            ))
10365        } else {
10366            None
10367        };
10368        Ok((
10369            path.as_ref().map(|path| path.nodes.len()),
10370            serde_json::json!({
10371                "from": from,
10372                "to": to,
10373                "configured_max_hops": configured_max_hops,
10374                "effective_max_hops": effective_max_hops,
10375                "hops": path.as_ref().map(|path| path.hops),
10376                "nodes": path.as_ref().map(|path| &path.nodes),
10377                "found": path.is_some(),
10378                "warning": warning,
10379            }),
10380        ))
10381    })
10382}
10383
10384fn graph_db_backend_eval_neighborhood_operation<S: GraphStore>(
10385    store: &S,
10386    depth: usize,
10387    limit: usize,
10388) -> (
10389    GraphDbBackendEvalOperation,
10390    Option<GraphDbBackendEvalSignature>,
10391) {
10392    graph_db_backend_eval_timed("neighborhood", || {
10393        let edge = match store.sample_edge(Some("calls"))? {
10394            Some(edge) => edge,
10395            None => store.sample_edge(None)?.context(
10396                "backend-eval neighborhood probe requires at least one traversable edge",
10397            )?,
10398        };
10399        let page = store
10400            .paged_neighborhood(
10401                &edge.from_id,
10402                depth,
10403                Some(&edge.kind),
10404                GraphQueryOptions {
10405                    limit: Some(limit.max(1)),
10406                    ..GraphQueryOptions::default()
10407                },
10408            )?
10409            .with_context(|| {
10410                format!(
10411                    "backend-eval neighborhood target not found: {}",
10412                    edge.from_id
10413                )
10414            })?;
10415        Ok((
10416            Some(page.nodes.len() + page.edges.len()),
10417            serde_json::json!({
10418                "center": edge.from_id,
10419                "kind": edge.kind,
10420                "depth": depth,
10421                "limit": limit.max(1),
10422                "node_ids": page.nodes.iter().map(|node| &node.id).collect::<Vec<_>>(),
10423                "edge_ids": page.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
10424                "truncated": page.page.truncated,
10425            }),
10426        ))
10427    })
10428}
10429
10430fn graph_db_backend_eval_related_operation<S: GraphStore>(
10431    root: &Path,
10432    scope: Option<&str>,
10433    store: &S,
10434    depth: usize,
10435    limit: usize,
10436) -> (
10437    GraphDbBackendEvalOperation,
10438    Option<GraphDbBackendEvalSignature>,
10439) {
10440    graph_db_backend_eval_timed("related", || {
10441        let query = "backend evaluation";
10442        let semantic = semantic_related_report_from_store(
10443            root,
10444            scope,
10445            query,
10446            3,
10447            SemanticRelatedKind::All,
10448            store,
10449        )?;
10450        let seed_ids = semantic
10451            .items
10452            .iter()
10453            .map(|item| item.handle.clone())
10454            .collect::<Vec<_>>();
10455        let subgraph =
10456            graph_db_semantic_seeded_neighborhood(store, &seed_ids, depth, limit.max(1))?;
10457        Ok((
10458            Some(subgraph.nodes.len() + subgraph.edges.len()),
10459            serde_json::json!({
10460                "query": query,
10461                "seed_ids": seed_ids,
10462                "node_ids": subgraph.nodes.iter().map(|node| &node.id).collect::<Vec<_>>(),
10463                "edge_ids": subgraph.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
10464                "truncated": subgraph.truncated,
10465                "warnings": semantic.warnings,
10466                "diagnostics": subgraph.diagnostics,
10467            }),
10468        ))
10469    })
10470}
10471
10472fn graph_db_backend_eval_evidence_signature(report: &GraphDbEvidenceReport) -> serde_json::Value {
10473    serde_json::json!({
10474        "target": report.target,
10475        "target_node_id": report.target_node.id,
10476        "target_kind": report.target_node.kind,
10477        "worker_context": report.worker_context.iter().map(|node| &node.id).collect::<Vec<_>>(),
10478        "source_handles": report.source_handles.iter().map(|node| &node.id).collect::<Vec<_>>(),
10479        "worker_results": report.worker_results.iter().map(|node| &node.id).collect::<Vec<_>>(),
10480        "semantic_related": report.semantic_related.iter().map(|node| &node.id).collect::<Vec<_>>(),
10481        "path_count": report.shortest_paths.len(),
10482    })
10483}
10484
10485fn graph_db_backend_eval_target_resolution_signature(
10486    resolved: &[(String, SubstrateGraphNode)],
10487) -> serde_json::Value {
10488    serde_json::json!({
10489        "targets": resolved.iter().map(|(target, node)| {
10490            serde_json::json!({
10491                "target": target,
10492                "target_node_id": node.id,
10493                "target_kind": node.kind,
10494                "target_label": node.label,
10495            })
10496        }).collect::<Vec<_>>(),
10497    })
10498}
10499
10500fn graph_db_backend_eval_conflict_signature(report: &ConflictMatrixReport) -> serde_json::Value {
10501    serde_json::json!({
10502        "targets": report.targets,
10503        "can_parallel": report.can_parallel,
10504        "fail_closed": report.fail_closed,
10505        "cross_target_parallel_safe": report.cross_target_parallel_safe,
10506        "per_target_fail_closed": report.per_target_fail_closed.iter().map(|target| &target.target).collect::<Vec<_>>(),
10507        "candidates": report.candidates.iter().map(|candidate| {
10508            serde_json::json!({
10509                "target": candidate.target,
10510                "risk": conflict_risk_label(candidate.risk),
10511                "owned_files": candidate.owned_files,
10512                "owned_symbols": candidate.owned_symbols,
10513                "source_handles": candidate.source_handles.iter().map(|handle| &handle.handle).collect::<Vec<_>>(),
10514                "previously_completed": candidate.previously_completed,
10515                "parallel_safe": candidate.parallel_safe,
10516            })
10517        }).collect::<Vec<_>>(),
10518        "conflicts": report.conflicts.iter().map(|pair| {
10519            serde_json::json!({
10520                "left": pair.left,
10521                "right": pair.right,
10522                "risk": conflict_risk_label(pair.risk),
10523            })
10524        }).collect::<Vec<_>>(),
10525    })
10526}
10527
10528fn graph_db_backend_eval_dispatch_signature(report: &DispatchTraceReport) -> serde_json::Value {
10529    serde_json::json!({
10530        "targets": report.targets,
10531        "node_ids": report.nodes.iter().map(|node| &node.id).collect::<Vec<_>>(),
10532        "edge_keys": report.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
10533        "evidence_packet_ids": report.evidence_packet_ids,
10534        "worker_prompt_targets": report.worker_prompt_packets.iter().map(|packet| &packet.target).collect::<Vec<_>>(),
10535        "truncated": report.truncated,
10536    })
10537}
10538
10539fn graph_db_backend_eval_edge_scan_probe(
10540    store: &impl GraphStore,
10541) -> Result<(SubstrateGraphEdge, Vec<GraphPropertyFilter>)> {
10542    if let Some((edge, filter)) = store.sample_edge_with_property()? {
10543        return Ok((edge, vec![filter]));
10544    }
10545    let edge = store
10546        .sample_edge(None)?
10547        .context("backend-eval edge scan requires at least one edge")?;
10548    Ok((edge, Vec::new()))
10549}
10550
10551#[allow(clippy::too_many_arguments)]
10552fn graph_db_backend_eval_report_for_store<S: GraphStore>(
10553    backend: &str,
10554    adapter: &str,
10555    read_only: bool,
10556    root: &Path,
10557    path: &Path,
10558    scope: Option<&str>,
10559    targets: &[String],
10560    depth: usize,
10561    limit: usize,
10562    impact_limit: usize,
10563    store: &S,
10564    freshness: GraphDbFreshnessReport,
10565    refresh_operation: GraphDbBackendEvalOperation,
10566    refresh_signature: Option<GraphDbBackendEvalSignature>,
10567    sqlite_signatures: Option<&[GraphDbBackendEvalSignature]>,
10568    extra_warnings: Vec<String>,
10569    prepared: &ConflictMatrixPreparedInputs,
10570    projection_load: &str,
10571    lock_behavior: &str,
10572    install_portability: &str,
10573) -> (
10574    GraphDbBackendEvalBackendReport,
10575    Vec<GraphDbBackendEvalSignature>,
10576) {
10577    let mut operations = vec![refresh_operation];
10578    let mut signatures = refresh_signature.into_iter().collect::<Vec<_>>();
10579
10580    let (operation, signature) = graph_db_backend_eval_timed("status", || {
10581        let (nodes, edges) = store.graph_counts()?;
10582        Ok((
10583            Some(nodes + edges),
10584            serde_json::json!({
10585                "freshness": freshness.status,
10586                "nodes": nodes,
10587                "edges": edges,
10588            }),
10589        ))
10590    });
10591    operations.push(operation);
10592    signatures.extend(signature);
10593
10594    let (operation, signature) = graph_db_backend_eval_timed("edge_lookup", || {
10595        let edge = store
10596            .sample_edge(None)?
10597            .context("backend-eval edge lookup requires at least one edge")?;
10598        let edge_id = graph_db_edge_key(&edge);
10599        let found = store
10600            .edge(&edge_id)?
10601            .with_context(|| format!("backend-eval edge lookup missed {edge_id}"))?;
10602        Ok((
10603            Some(1),
10604            serde_json::json!({
10605                "edge_id": edge_id,
10606                "from_id": found.from_id,
10607                "to_id": found.to_id,
10608                "kind": found.kind,
10609            }),
10610        ))
10611    });
10612    operations.push(operation);
10613    signatures.extend(signature);
10614
10615    let (operation, signature) = graph_db_backend_eval_timed("edge_property_scan", || {
10616        let (edge, filters) = graph_db_backend_eval_edge_scan_probe(store)?;
10617        let page = store.paged_edges(
10618            Some(&edge.kind),
10619            GraphQueryOptions {
10620                limit: Some(limit.max(1)),
10621                property_filters: filters.clone(),
10622                ..GraphQueryOptions::default()
10623            },
10624        )?;
10625        Ok((
10626            Some(page.edges.len()),
10627            serde_json::json!({
10628                "kind": edge.kind,
10629                "filters": filters.iter().map(|filter| format!("{}={}", filter.key, filter.value)).collect::<Vec<_>>(),
10630                "edge_ids": page.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
10631                "truncated": page.page.truncated,
10632            }),
10633        ))
10634    });
10635    operations.push(operation);
10636    signatures.extend(signature);
10637
10638    let (operation, signature) = graph_db_backend_eval_timed("incident_edges", || {
10639        let edge = store
10640            .sample_edge(None)?
10641            .context("backend-eval incident edge scan requires at least one edge")?;
10642        let page = store.paged_incident_edges(
10643            &edge.from_id,
10644            Some(&edge.kind),
10645            GraphQueryOptions {
10646                limit: Some(limit.max(1)),
10647                ..GraphQueryOptions::default()
10648            },
10649        )?;
10650        Ok((
10651            Some(page.edges.len()),
10652            serde_json::json!({
10653                "node_id": edge.from_id,
10654                "kind": edge.kind,
10655                "edge_ids": page.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
10656                "truncated": page.page.truncated,
10657            }),
10658        ))
10659    });
10660    operations.push(operation);
10661    signatures.extend(signature);
10662
10663    let (operation, signature) = graph_db_backend_eval_neighborhood_operation(store, depth, limit);
10664    operations.push(operation);
10665    signatures.extend(signature);
10666
10667    let (operation, signature) =
10668        graph_db_backend_eval_related_operation(root, scope, store, depth, limit);
10669    operations.push(operation);
10670    signatures.extend(signature);
10671
10672    for configured_max_hops in std::iter::once(GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS)
10673        .chain(GRAPH_DB_BACKEND_EVAL_EXTENDED_PATH_HOPS)
10674    {
10675        let (operation, signature) =
10676            graph_db_backend_eval_path_operation(store, configured_max_hops);
10677        operations.push(operation);
10678        signatures.extend(signature);
10679    }
10680
10681    let (operation, signature) = graph_db_backend_eval_timed("evidence_target_resolution", || {
10682        let resolved = targets
10683            .iter()
10684            .map(|target| {
10685                let node = graph_db_resolve_evidence_target(store, target)?
10686                    .with_context(|| format!("backend-eval target not found: {target}"))?;
10687                Ok((target.clone(), node))
10688            })
10689            .collect::<Result<Vec<_>>>()?;
10690        let signature = graph_db_backend_eval_target_resolution_signature(&resolved);
10691        Ok((Some(resolved.len()), signature))
10692    });
10693    operations.push(operation);
10694    signatures.extend(signature);
10695
10696    let mut evidence_for_report = None;
10697    let mut graph_snapshot_for_trace = None;
10698    let (operation, signature) = graph_db_backend_eval_timed("evidence", || {
10699        let resolved_targets =
10700            resolve_conflict_matrix_targets(store, targets, &prepared.context_pack)?;
10701        let evidence = collect_conflict_matrix_evidence_packets(
10702            root,
10703            scope,
10704            backend,
10705            &resolved_targets,
10706            depth,
10707            limit,
10708            store,
10709            freshness.clone(),
10710        )?;
10711        let report = &evidence
10712            .first()
10713            .context("backend-eval evidence requires at least one target")?
10714            .report;
10715        let rows = evidence
10716            .iter()
10717            .map(|entry| {
10718                entry.report.worker_context.len()
10719                    + entry.report.source_handles.len()
10720                    + entry.report.worker_results.len()
10721                    + entry.report.semantic_related.len()
10722            })
10723            .sum();
10724        let signature = graph_db_backend_eval_evidence_signature(report);
10725        evidence_for_report = Some((resolved_targets, evidence));
10726        Ok((Some(rows), signature))
10727    });
10728    operations.push(operation);
10729    signatures.extend(signature);
10730
10731    let mut conflict_for_trace = None;
10732    let (operation, signature) = graph_db_backend_eval_timed("conflict_matrix", || {
10733        let graph_prepared = if let Some((targets, evidence)) = evidence_for_report.take() {
10734            let graph =
10735                conflict_matrix_target_scoped_graph_snapshot(store, &evidence, depth, limit)?;
10736            let shared_preparation =
10737                conflict_matrix_shared_preparation_summary(&graph, &evidence, "memory_reuse");
10738            ConflictMatrixGraphPreparedInputs {
10739                targets,
10740                graph,
10741                evidence,
10742                shared_preparation,
10743            }
10744        } else {
10745            prepare_conflict_matrix_graph_orchestration(
10746                root,
10747                scope,
10748                backend,
10749                targets,
10750                prepared,
10751                depth,
10752                limit,
10753                store,
10754                freshness.clone(),
10755            )?
10756        };
10757        let report = build_conflict_matrix_report_from_prepared_graph(
10758            root,
10759            path,
10760            scope,
10761            depth,
10762            limit,
10763            impact_limit,
10764            freshness.clone(),
10765            extra_warnings.clone(),
10766            prepared,
10767            &graph_prepared,
10768        )?;
10769        let signature = graph_db_backend_eval_conflict_signature(&report);
10770        let rows = report.candidates.len() + report.conflicts.len();
10771        conflict_for_trace = Some(report);
10772        graph_snapshot_for_trace = Some(graph_prepared.graph);
10773        Ok((Some(rows), signature))
10774    });
10775    operations.push(operation);
10776    signatures.extend(signature);
10777
10778    let (operation, signature) = graph_db_backend_eval_timed("dispatch_trace", || {
10779        let conflict = conflict_for_trace
10780            .take()
10781            .context("backend-eval dispatch-trace requires a completed conflict-matrix report")?;
10782        let graph = graph_snapshot_for_trace
10783            .take()
10784            .context("backend-eval dispatch-trace requires conflict-matrix graph preparation")?;
10785        let report = build_dispatch_trace_report_from_conflict_snapshot(
10786            root,
10787            scope,
10788            conflict,
10789            graph.nodes,
10790            graph.edges,
10791            depth,
10792            limit,
10793            Vec::new(),
10794        )?;
10795        Ok((
10796            Some(report.nodes.len() + report.edges.len()),
10797            graph_db_backend_eval_dispatch_signature(&report),
10798        ))
10799    });
10800    operations.push(operation);
10801    signatures.extend(signature);
10802
10803    let total_micros = operations
10804        .iter()
10805        .map(|operation| operation.duration_micros)
10806        .sum();
10807    let parity = graph_db_backend_eval_parity(sqlite_signatures, &signatures);
10808    (
10809        GraphDbBackendEvalBackendReport {
10810            backend: backend.to_string(),
10811            adapter: adapter.to_string(),
10812            read_only,
10813            projection_load: projection_load.to_string(),
10814            operations,
10815            total_micros,
10816            parity,
10817            lock_behavior: lock_behavior.to_string(),
10818            install_portability: install_portability.to_string(),
10819        },
10820        signatures,
10821    )
10822}
10823
10824pub(crate) fn graph_db_backend_eval_refresh_operation(
10825    duration_micros: u128,
10826    rows: usize,
10827    value: serde_json::Value,
10828) -> (GraphDbBackendEvalOperation, GraphDbBackendEvalSignature) {
10829    (
10830        GraphDbBackendEvalOperation {
10831            name: "refresh".to_string(),
10832            supported: true,
10833            status: "ok".to_string(),
10834            duration_micros,
10835            rows: Some(rows),
10836            error: None,
10837        },
10838        GraphDbBackendEvalSignature {
10839            operation: "refresh".to_string(),
10840            value,
10841        },
10842    )
10843}
10844
10845pub(crate) fn graph_db_backend_eval_synthetic_projection(
10846    nodes: usize,
10847    fanout: usize,
10848) -> GraphProjection {
10849    let nodes = nodes.max(12);
10850    let symbol_count = nodes.saturating_sub(9).max(1);
10851    let source = GraphProvenance::new("backend-eval", "synthetic");
10852    let mut projection_nodes = vec![
10853        SubstrateGraphNode::new(
10854            "projection:tsift-traversal:synthetic",
10855            GRAPH_PROJECTION_META_KIND,
10856            "synthetic projection",
10857        )
10858        .with_property("projection_version", GRAPH_PROJECTION_VERSION)
10859        .with_property(
10860            "content_hash",
10861            format!("synthetic-{nodes}-{fanout}-{symbol_count}"),
10862        )
10863        .with_provenance(source.clone()),
10864        SubstrateGraphNode::new("gses-synthetic", "session", "synthetic session")
10865            .with_property("ref_id", "synthetic-session"),
10866        SubstrateGraphNode::new("gbak-synthetic", "backlog", "#synthetic")
10867            .with_property("ref_id", "synthetic")
10868            .with_property("path", "tasks/software/synthetic.md")
10869            .with_property("line", "1")
10870            .with_property(
10871                "expand",
10872                "tsift source-read tasks/software/synthetic.md --start 1 --lines 40",
10873            ),
10874        SubstrateGraphNode::new("gjob-synthetic", "job_packet", "do #synthetic")
10875            .with_property("ref_id", "synthetic"),
10876        SubstrateGraphNode::new("gwctx-synthetic", "worker_context", "synthetic context")
10877            .with_property("target", "synthetic")
10878            .with_property("summary", "Synthetic worker owns synthetic.rs")
10879            .with_property(
10880                "expand",
10881                "tsift source-read synthetic.rs --start 1 --lines 80",
10882            ),
10883        SubstrateGraphNode::new("gsrc-synthetic", "source_handle", "synthetic.rs:1-80")
10884            .with_property("file", "synthetic.rs")
10885            .with_property("start", "1")
10886            .with_property("end", "80")
10887            .with_property(
10888                "expand",
10889                "tsift source-read synthetic.rs --start 1 --lines 80",
10890            ),
10891        SubstrateGraphNode::new("gfil-synthetic", "file", "synthetic.rs")
10892            .with_property("path", "synthetic.rs"),
10893        SubstrateGraphNode::new("gsem-synthetic", "semantic_concept", "backend evaluation")
10894            .with_property("handle", "gsem-synthetic")
10895            .with_property("label", "backend evaluation")
10896            .with_property("embedding_model", SEMANTIC_EMBEDDING_MODEL)
10897            .with_property(
10898                "embedding",
10899                semantic_embedding_property("backend evaluation"),
10900            ),
10901        SubstrateGraphNode::new("gwres-synthetic", "worker_result", "completed #synthetic")
10902            .with_property("ref_id", "synthetic")
10903            .with_property("status", "completed")
10904            .with_property("touched_files", "synthetic.rs")
10905            .with_property("expected_tests", "cargo test --test graph_db_conformance"),
10906    ];
10907    for idx in 0..symbol_count {
10908        projection_nodes.push(
10909            SubstrateGraphNode::new(
10910                format!("gsym-synthetic-{idx:04}"),
10911                "symbol",
10912                format!("synthetic_symbol_{idx:04}"),
10913            )
10914            .with_property("ref_id", format!("synthetic_symbol_{idx:04}"))
10915            .with_property("path", "synthetic.rs")
10916            .with_property("line", (idx + 1).to_string()),
10917        );
10918    }
10919
10920    let mut projection_edges = vec![
10921        SubstrateGraphEdge::new("gses-synthetic", "gbak-synthetic", "contains"),
10922        SubstrateGraphEdge::new("gses-synthetic", "gjob-synthetic", "queues"),
10923        SubstrateGraphEdge::new("gbak-synthetic", "gwctx-synthetic", "has_context"),
10924        SubstrateGraphEdge::new("gjob-synthetic", "gwctx-synthetic", "has_context"),
10925        SubstrateGraphEdge::new("gwctx-synthetic", "gsrc-synthetic", "uses_source"),
10926        SubstrateGraphEdge::new("gbak-synthetic", "gwres-synthetic", "has_worker_result"),
10927        SubstrateGraphEdge::new("gbak-synthetic", "gsem-synthetic", "mentions_concept"),
10928        SubstrateGraphEdge::new("gsrc-synthetic", "gfil-synthetic", "reads_file"),
10929        SubstrateGraphEdge::new("gfil-synthetic", "gsym-synthetic-0000", "defines"),
10930    ];
10931    for idx in 0..symbol_count {
10932        let from = format!("gsym-synthetic-{idx:04}");
10933        for offset in 1..=fanout.max(1).min(symbol_count) {
10934            let to_idx = (idx + offset) % symbol_count;
10935            if to_idx != idx {
10936                projection_edges.push(SubstrateGraphEdge::new(
10937                    from.clone(),
10938                    format!("gsym-synthetic-{to_idx:04}"),
10939                    "calls",
10940                ));
10941            }
10942        }
10943    }
10944
10945    GraphProjection {
10946        nodes: projection_nodes,
10947        edges: projection_edges
10948            .into_iter()
10949            .map(|edge| {
10950                edge.with_property("dataset", "synthetic")
10951                    .with_provenance(source.clone())
10952            })
10953            .collect(),
10954    }
10955}
10956
10957pub(crate) fn graph_db_backend_eval_promotion(
10958    datasets: &[GraphDbBackendEvalDataset],
10959    candidates: &[GraphDbExperimentalBackend],
10960) -> Vec<GraphDbBackendPromotionDecision> {
10961    let mut decisions = Vec::new();
10962    for candidate in candidates {
10963        let mut reasons = Vec::new();
10964        let mut faster_everywhere = true;
10965        let mut parity_everywhere = true;
10966        for dataset in datasets {
10967            let Some(sqlite_report) = dataset
10968                .backends
10969                .iter()
10970                .find(|backend| backend.backend == "sqlite")
10971            else {
10972                parity_everywhere = false;
10973                faster_everywhere = false;
10974                reasons.push(format!(
10975                    "{} dataset is missing SQLite baseline",
10976                    dataset.name
10977                ));
10978                continue;
10979            };
10980            let sqlite_total = sqlite_report.total_micros;
10981            let Some(candidate_report) = dataset
10982                .backends
10983                .iter()
10984                .find(|backend| backend.backend == candidate.name())
10985            else {
10986                parity_everywhere = false;
10987                reasons.push(format!("{} dataset did not run", dataset.name));
10988                continue;
10989            };
10990            if !candidate_report.parity.matches_sqlite {
10991                parity_everywhere = false;
10992                reasons.push(format!("{} parity differed from SQLite", dataset.name));
10993            }
10994            if candidate_report.total_micros >= sqlite_total {
10995                faster_everywhere = false;
10996                reasons.push(format!(
10997                    "{} total {}us did not beat SQLite {}us",
10998                    dataset.name, candidate_report.total_micros, sqlite_total
10999                ));
11000            }
11001            let sqlite_operations = sqlite_report
11002                .operations
11003                .iter()
11004                .map(|operation| (operation.name.as_str(), operation.duration_micros))
11005                .collect::<BTreeMap<_, _>>();
11006            for operation in &candidate_report.operations {
11007                if let Some(sqlite_duration) = sqlite_operations.get(operation.name.as_str())
11008                    && operation.duration_micros >= *sqlite_duration
11009                {
11010                    faster_everywhere = false;
11011                    reasons.push(format!(
11012                        "{} {} operation {}us did not beat SQLite {}us",
11013                        dataset.name, operation.name, operation.duration_micros, sqlite_duration
11014                    ));
11015                }
11016            }
11017            if candidate_report
11018                .operations
11019                .iter()
11020                .any(|operation| operation.status != "ok")
11021            {
11022                parity_everywhere = false;
11023                reasons.push(format!("{} has failed benchmark operations", dataset.name));
11024            }
11025        }
11026        let decision = if let Some(reason) = candidate.prototype_hold_reason() {
11027            reasons.push(reason.to_string());
11028            reasons.push(
11029                "current bounded prototype timings are benchmark evidence, not a backend switch approval"
11030                    .to_string(),
11031            );
11032            "hold"
11033        } else if parity_everywhere && faster_everywhere {
11034            reasons.push(
11035                "prototype gate passed; production promotion still requires the real engine adapter to preserve SQLite's bundled install and multi-process lock behavior"
11036                    .to_string(),
11037            );
11038            "eligible"
11039        } else {
11040            reasons.push(
11041                "production promotion requires SQLite parity plus lower total time for every measured operation on every dataset without worse lock behavior or install portability"
11042                    .to_string(),
11043            );
11044            "hold"
11045        };
11046        decisions.push(GraphDbBackendPromotionDecision {
11047            backend: candidate.name().to_string(),
11048            decision: decision.to_string(),
11049            reasons: dedupe_preserve_order(reasons),
11050            gate: candidate.promotion_gate(),
11051        });
11052    }
11053    decisions
11054}
11055
11056pub(crate) fn graph_db_backend_eval_metrics(
11057    datasets: &[GraphDbBackendEvalDataset],
11058) -> BTreeMap<String, f64> {
11059    let mut metrics = BTreeMap::new();
11060    for dataset in datasets {
11061        let graph_rows = graph_db_backend_eval_graph_rows(dataset);
11062        metrics.insert(format!("{}.nodes", dataset.name), dataset.nodes as f64);
11063        metrics.insert(format!("{}.edges", dataset.name), dataset.edges as f64);
11064        metrics.insert(format!("{}.graph_rows", dataset.name), graph_rows as f64);
11065        for backend in &dataset.backends {
11066            let prefix = format!("{}.{}", dataset.name, backend.backend.replace('-', "_"));
11067            metrics.insert(
11068                format!("{prefix}.total_duration_micros"),
11069                backend.total_micros as f64,
11070            );
11071            append_graph_db_backend_eval_normalized_duration_metric(
11072                &mut metrics,
11073                &format!("{prefix}.total_duration_micros_per_1k_graph_rows"),
11074                backend.total_micros,
11075                graph_rows,
11076            );
11077            for operation in &backend.operations {
11078                metrics.insert(
11079                    format!("{prefix}.{}.duration_micros", operation.name),
11080                    operation.duration_micros as f64,
11081                );
11082                append_graph_db_backend_eval_normalized_duration_metric(
11083                    &mut metrics,
11084                    &format!(
11085                        "{prefix}.{}.duration_micros_per_1k_graph_rows",
11086                        operation.name
11087                    ),
11088                    operation.duration_micros,
11089                    graph_rows,
11090                );
11091                if let Some(rows) = operation.rows {
11092                    metrics.insert(format!("{prefix}.{}.rows", operation.name), rows as f64);
11093                }
11094            }
11095        }
11096    }
11097    metrics
11098}
11099
11100pub(crate) fn graph_db_backend_eval_graph_rows(dataset: &GraphDbBackendEvalDataset) -> usize {
11101    dataset.nodes + dataset.edges
11102}
11103
11104pub(crate) fn append_graph_db_backend_eval_normalized_duration_metric(
11105    metrics: &mut BTreeMap<String, f64>,
11106    key: &str,
11107    duration_micros: u128,
11108    graph_rows: usize,
11109) {
11110    if graph_rows == 0 {
11111        return;
11112    }
11113    metrics.insert(
11114        key.to_string(),
11115        duration_micros as f64 / graph_rows as f64 * GRAPH_DB_BACKEND_EVAL_NORMALIZATION_ROW_UNIT,
11116    );
11117}
11118
11119pub(crate) fn append_graph_db_backend_eval_phase_metrics(
11120    metrics: &mut BTreeMap<String, f64>,
11121    dataset: &str,
11122    graph_rows: usize,
11123    phases: &[GraphDbBackendEvalPhaseTiming],
11124) {
11125    for phase in phases {
11126        metrics.insert(
11127            format!("{dataset}.refresh_phase.{}.duration_micros", phase.name),
11128            phase.duration_micros as f64,
11129        );
11130        append_graph_db_backend_eval_normalized_duration_metric(
11131            metrics,
11132            &format!(
11133                "{dataset}.refresh_phase.{}.duration_micros_per_1k_graph_rows",
11134                phase.name
11135            ),
11136            phase.duration_micros,
11137            graph_rows,
11138        );
11139    }
11140}
11141
11142fn graph_db_backend_eval_base_command(
11143    root: &Path,
11144    scope: Option<&str>,
11145    full_projection: bool,
11146) -> String {
11147    let full_projection_arg = if full_projection {
11148        " --full-projection"
11149    } else {
11150        ""
11151    };
11152    format!(
11153        "tsift graph-db --path {}{} --json backend-eval{}",
11154        shell_quote(root.to_string_lossy().as_ref()),
11155        graph_db_scope_arg(scope),
11156        full_projection_arg
11157    )
11158}
11159
11160pub(crate) fn graph_db_backend_eval_metric_digest_command(
11161    root: &Path,
11162    scope: Option<&str>,
11163    full_projection: bool,
11164) -> String {
11165    format!(
11166        "{} | tsift metric-digest --baseline fixtures/graph-db-performance-history.json",
11167        graph_db_backend_eval_base_command(root, scope, full_projection)
11168    )
11169}
11170
11171fn graph_db_backend_eval_repeated_sample_command(
11172    root: &Path,
11173    scope: Option<&str>,
11174    full_projection: bool,
11175) -> String {
11176    format!(
11177        "for sample in 1 2 3; do {}; done | tsift metric-digest --baseline fixtures/graph-db-performance-history.json",
11178        graph_db_backend_eval_base_command(root, scope, full_projection)
11179    )
11180}
11181
11182fn graph_db_backend_eval_hop_cap_promotion_gate() -> GraphDbHopCapPromotionGate {
11183    let mut required_metrics = Vec::new();
11184    for workload in perf_gate::HOP_CAP_REQUIRED_WORKLOADS {
11185        required_metrics.push(format!("{workload}.sqlite.path_max_hops.duration_micros"));
11186        required_metrics.push(format!("{workload}.sqlite.path_max_hops.rows"));
11187        for hops in perf_gate::HOP_CAP_CANDIDATE_TIERS {
11188            required_metrics.push(format!(
11189                "{workload}.sqlite.path_max_hops_{hops}.duration_micros"
11190            ));
11191            required_metrics.push(format!("{workload}.sqlite.path_max_hops_{hops}.rows"));
11192        }
11193    }
11194    GraphDbHopCapPromotionGate {
11195        status: "hold_64_default_until_gate_passes".to_string(),
11196        current_default_hops: perf_gate::HOP_CAP_CURRENT_DEFAULT,
11197        candidate_hop_tiers: perf_gate::HOP_CAP_CANDIDATE_TIERS.to_vec(),
11198        required_backend: perf_gate::BASELINE_BACKEND.to_string(),
11199        required_workloads: perf_gate::HOP_CAP_REQUIRED_WORKLOADS
11200            .iter()
11201            .map(|workload| (*workload).to_string())
11202            .collect(),
11203        required_metrics,
11204        allowed_regression_percent: GRAPH_DB_BACKEND_EVAL_ALLOWED_REGRESSION_PERCENT,
11205        minimum_sample_runs: GRAPH_DB_BACKEND_EVAL_MIN_SAMPLE_RUNS,
11206        decision_rule:
11207            "keep 64 as the user-facing default until each candidate tier has repeated real, full_projection, and synthetic_deep_chain SQLite samples within the latency-regression budget and returning useful path rows; full_projection samples are binding only after a cold populate leg proves a cache-hit leg"
11208                .to_string(),
11209    }
11210}
11211
11212fn graph_db_backend_eval_backend_adapter_spike_gate() -> GraphDbBackendAdapterSpikeGate {
11213    let candidate_backends = [
11214        GraphDbExperimentalBackend::Falkordb,
11215        GraphDbExperimentalBackend::Kuzu,
11216    ]
11217    .into_iter()
11218    .map(|backend| GraphDbBackendAdapterSpikeCandidate {
11219        backend: backend.name().to_string(),
11220        adapter_label: backend.adapter_label().to_string(),
11221        projection_load: backend.projection_load().to_string(),
11222        lock_behavior: backend.lock_behavior().to_string(),
11223        install_portability: backend.install_portability().to_string(),
11224    })
11225    .collect();
11226
11227    GraphDbBackendAdapterSpikeGate {
11228        status: "hold_real_optional_adapter_required".to_string(),
11229        candidate_backends,
11230        required_workloads: perf_gate::GATE_WORKLOAD_PREFIXES
11231            .iter()
11232            .map(|workload| (*workload).to_string())
11233            .collect(),
11234        required_checks: vec![
11235            "real_optional_adapter_behind_graphstore_without_default_build_dependency".to_string(),
11236            "projection_load_writes_provider_neutral_rows_without_sqlite_row_replay".to_string(),
11237            "freshness_and_full_parity_match_sqlite_on_every_graphstore_operation".to_string(),
11238            "lock_semantics_match_or_beat_sqlite_for_writer_and_read_only_workflows".to_string(),
11239            "install_portability_preserves_cargo_build_install_without_external_service_or_native_toolchain"
11240                .to_string(),
11241            "full_projection_cache_hit_sample_before_backend_or_hop_cap_changes".to_string(),
11242            "beats_sqlite_on_every_required_workload_and_metric_in_backend_eval".to_string(),
11243        ],
11244        decision_rule:
11245            "do not promote a read-only prototype; FalkorDB or Kuzu can only advance after a real optional adapter proves projection writes/load, lock semantics, install portability, full parity, and faster-than-SQLite results across every required workload"
11246                .to_string(),
11247        evidence_plan: "plans/gback-evidence.md".to_string(),
11248    }
11249}
11250
11251pub(crate) fn graph_db_backend_eval_performance_gate(
11252    root: &Path,
11253    scope: Option<&str>,
11254    full_projection: bool,
11255) -> GraphDbBackendEvalPerformanceGate {
11256    let mut required_metrics = vec![
11257        "real.sqlite.refresh.duration_micros".to_string(),
11258        "real.sqlite.refresh.duration_micros_per_1k_graph_rows".to_string(),
11259        "real.sqlite.edge_lookup.duration_micros_per_1k_graph_rows".to_string(),
11260        "real.sqlite.edge_property_scan.duration_micros_per_1k_graph_rows".to_string(),
11261        "real.sqlite.incident_edges.duration_micros_per_1k_graph_rows".to_string(),
11262        "real.sqlite.neighborhood.duration_micros_per_1k_graph_rows".to_string(),
11263        "real.sqlite.evidence_target_resolution.duration_micros_per_1k_graph_rows".to_string(),
11264        "real.sqlite.evidence.duration_micros_per_1k_graph_rows".to_string(),
11265        "real.sqlite.total_duration_micros_per_1k_graph_rows".to_string(),
11266        "real.refresh_phase.source_graph_build.duration_micros_per_1k_graph_rows".to_string(),
11267        "real.refresh_phase.sqlite_delta_write.duration_micros".to_string(),
11268        "real.refresh_phase.sqlite_property_row_staging.duration_micros".to_string(),
11269        "real.refresh_phase.sqlite_edge_property_row_staging.duration_micros".to_string(),
11270        "real.sqlite.conflict_matrix.duration_micros".to_string(),
11271        "real.sqlite.dispatch_trace.duration_micros".to_string(),
11272        "real.sqlite.path_max_hops.duration_micros".to_string(),
11273        "real.sqlite.path_max_hops_128.duration_micros".to_string(),
11274        "real.sqlite.path_max_hops_256.duration_micros".to_string(),
11275        "real.sqlite.path_max_hops_512.duration_micros".to_string(),
11276        "real.sqlite.path_max_hops_128.duration_micros_per_1k_graph_rows".to_string(),
11277        "real.sqlite.path_max_hops_256.duration_micros_per_1k_graph_rows".to_string(),
11278        "real.sqlite.path_max_hops_512.duration_micros_per_1k_graph_rows".to_string(),
11279        "synthetic_high_degree.sqlite.total_duration_micros".to_string(),
11280        "synthetic_high_degree.sqlite.total_duration_micros_per_1k_graph_rows".to_string(),
11281        "synthetic_high_degree.sqlite.neighborhood.duration_micros_per_1k_graph_rows".to_string(),
11282        "synthetic_high_degree.sqlite.edge_property_scan.duration_micros_per_1k_graph_rows"
11283            .to_string(),
11284        "synthetic_high_degree.sqlite.evidence_target_resolution.duration_micros_per_1k_graph_rows"
11285            .to_string(),
11286        "synthetic_deep_chain.sqlite.incident_edges.duration_micros_per_1k_graph_rows".to_string(),
11287        "synthetic_deep_chain.sqlite.neighborhood.duration_micros_per_1k_graph_rows".to_string(),
11288        "synthetic_deep_chain.sqlite.path_max_hops.duration_micros".to_string(),
11289        "synthetic_deep_chain.sqlite.path_max_hops_128.duration_micros".to_string(),
11290        "synthetic_deep_chain.sqlite.path_max_hops_256.duration_micros".to_string(),
11291        "synthetic_deep_chain.sqlite.path_max_hops_512.duration_micros".to_string(),
11292        "synthetic_deep_chain.sqlite.evidence_target_resolution.duration_micros_per_1k_graph_rows"
11293            .to_string(),
11294        "synthetic_deep_chain.sqlite.path_max_hops.duration_micros_per_1k_graph_rows".to_string(),
11295        "synthetic_deep_chain.sqlite.path_max_hops_128.duration_micros_per_1k_graph_rows"
11296            .to_string(),
11297        "synthetic_deep_chain.sqlite.path_max_hops_256.duration_micros_per_1k_graph_rows"
11298            .to_string(),
11299        "synthetic_deep_chain.sqlite.path_max_hops_512.duration_micros_per_1k_graph_rows"
11300            .to_string(),
11301    ];
11302    if full_projection {
11303        required_metrics.extend([
11304            "full_projection.cache.hit".to_string(),
11305            "full_projection.cache.disk_bytes".to_string(),
11306            "full_projection.cache.compression_ratio".to_string(),
11307            "full_projection.refresh_phase.cache_lookup.duration_micros".to_string(),
11308            "full_projection.sqlite.total_duration_micros_per_1k_graph_rows".to_string(),
11309            "full_projection.refresh_phase.source_graph_build.duration_micros_per_1k_graph_rows"
11310                .to_string(),
11311            "full_projection.refresh_phase.projection_rows.duration_micros_per_1k_graph_rows"
11312                .to_string(),
11313            "full_projection.sqlite.sqlite_delta_write.duration_micros".to_string(),
11314            "full_projection.sqlite.sqlite_edge_staging.duration_micros".to_string(),
11315            "full_projection.sqlite.post_write_reads.duration_micros".to_string(),
11316            "full_projection.sqlite.neighborhood.duration_micros".to_string(),
11317            "full_projection.sqlite.evidence_target_resolution.duration_micros".to_string(),
11318            "full_projection.sqlite.evidence.duration_micros".to_string(),
11319            "full_projection.sqlite.path_max_hops.duration_micros".to_string(),
11320            "full_projection.sqlite.path_max_hops_128.duration_micros".to_string(),
11321            "full_projection.sqlite.path_max_hops_256.duration_micros".to_string(),
11322            "full_projection.sqlite.path_max_hops_512.duration_micros".to_string(),
11323            "full_projection.sqlite.conflict_matrix.duration_micros".to_string(),
11324            "full_projection.sqlite.dispatch_trace.duration_micros".to_string(),
11325        ]);
11326    }
11327    GraphDbBackendEvalPerformanceGate {
11328        baseline_fixture: "fixtures/graph-db-performance-history.json".to_string(),
11329        ci_profile: "synthetic_high_degree + synthetic_deep_chain metrics are CI-safe and bounded"
11330            .to_string(),
11331        opt_in_real_profile:
11332            "pass --full-projection to add the full-project dataset when checking for large projection regressions"
11333                .to_string(),
11334        full_projection_cache_hit_gate: if full_projection {
11335            "binding full_projection performance evidence requires a cold populate leg followed by cache-leg samples with full_projection.cache.hit=1; cache-miss samples are diagnostics, not backend or hop-cap promotion proof"
11336                .to_string()
11337        } else {
11338            "not evaluated until --full-projection is enabled".to_string()
11339        },
11340        allowed_regression_percent: GRAPH_DB_BACKEND_EVAL_ALLOWED_REGRESSION_PERCENT,
11341        minimum_sample_runs: GRAPH_DB_BACKEND_EVAL_MIN_SAMPLE_RUNS,
11342        normalized_metric_unit: "duration_micros_per_1k_graph_rows".to_string(),
11343        required_metrics,
11344        digest_command: graph_db_backend_eval_metric_digest_command(root, scope, full_projection),
11345        repeated_sample_command: graph_db_backend_eval_repeated_sample_command(
11346            root,
11347            scope,
11348            full_projection,
11349        ),
11350        hop_cap_promotion: graph_db_backend_eval_hop_cap_promotion_gate(),
11351        backend_adapter_spike: graph_db_backend_eval_backend_adapter_spike_gate(),
11352    }
11353}
11354
11355pub(crate) struct GraphDbBackendEvalOptions<'a> {
11356    path: &'a Path,
11357    scope: Option<&'a str>,
11358    candidates: &'a [String],
11359    targets: &'a [String],
11360    full_projection: bool,
11361}
11362
11363#[allow(clippy::too_many_arguments)]
11364pub(crate) fn graph_db_backend_eval_dataset(
11365    name: &str,
11366    root: &Path,
11367    path: &Path,
11368    scope: Option<&str>,
11369    targets: &[String],
11370    depth: usize,
11371    limit: usize,
11372    impact_limit: usize,
11373    candidates: &[GraphDbExperimentalBackend],
11374    sqlite_store: &SqliteGraphStore,
11375    sqlite_freshness: GraphDbFreshnessReport,
11376    sqlite_refresh: (GraphDbBackendEvalOperation, GraphDbBackendEvalSignature),
11377    sqlite_rows: ConvexProjectionRows,
11378    extra_warnings: Vec<String>,
11379    prepared: &ConflictMatrixPreparedInputs,
11380) -> Result<GraphDbBackendEvalDataset> {
11381    let (nodes, edges) = sqlite_store.graph_counts()?;
11382    let (sqlite_operation, sqlite_signature) = sqlite_refresh;
11383    let (sqlite_report, sqlite_signatures) = graph_db_backend_eval_report_for_store(
11384        "sqlite",
11385        "SQLite GraphStore correctness baseline",
11386        false,
11387        root,
11388        path,
11389        scope,
11390        targets,
11391        depth,
11392        limit,
11393        impact_limit,
11394        sqlite_store,
11395        sqlite_freshness,
11396        sqlite_operation,
11397        Some(sqlite_signature),
11398        None,
11399        extra_warnings.clone(),
11400        prepared,
11401        "SQLite refresh writes provider-neutral projection rows into graph.db transactionally",
11402        "SQLite WAL correctness store; refresh uses one transactional writer and read-only queries use snapshot recovery",
11403        "bundled rusqlite baseline; no external service or runtime required",
11404    );
11405
11406    let mut backends = vec![sqlite_report];
11407    for candidate in candidates {
11408        let started = Instant::now();
11409        let store = ExperimentalReadOnlyGraphStore::from_rows(*candidate, &sqlite_rows)?;
11410        let (candidate_nodes, candidate_edges) = store.graph_counts()?;
11411        let rows = candidate_nodes + candidate_edges;
11412        let refresh = graph_db_backend_eval_refresh_operation(
11413            started.elapsed().as_micros(),
11414            rows,
11415            serde_json::json!({
11416                "nodes": candidate_nodes,
11417                "edges": candidate_edges,
11418            }),
11419        );
11420        let freshness = sqlite_graph_freshness(sqlite_store, scope.unwrap_or("root"))?;
11421        let (candidate_report, _signatures) = graph_db_backend_eval_report_for_store(
11422            candidate.name(),
11423            candidate.adapter_label(),
11424            true,
11425            root,
11426            path,
11427            scope,
11428            targets,
11429            depth,
11430            limit,
11431            impact_limit,
11432            &store,
11433            freshness,
11434            refresh.0,
11435            Some(refresh.1),
11436            Some(&sqlite_signatures),
11437            extra_warnings.clone(),
11438            prepared,
11439            candidate.projection_load(),
11440            candidate.lock_behavior(),
11441            candidate.install_portability(),
11442        );
11443        backends.push(candidate_report);
11444    }
11445
11446    Ok(GraphDbBackendEvalDataset {
11447        name: name.to_string(),
11448        target_count: targets.len(),
11449        nodes,
11450        edges,
11451        backends,
11452    })
11453}
11454
11455pub(crate) fn print_graph_db_backend_eval_human(report: &GraphDbBackendEvalReport) {
11456    println!(
11457        "graph-db backend-eval baseline:{} candidates:{}",
11458        report.baseline_backend,
11459        report.candidates.join(", ")
11460    );
11461    for phase in &report.phase_timings {
11462        println!(
11463            "phase:{} {}us {}",
11464            phase.name, phase.duration_micros, phase.detail
11465        );
11466    }
11467    for dataset in &report.datasets {
11468        println!(
11469            "dataset:{} targets:{} rows:{}",
11470            dataset.name,
11471            dataset.target_count,
11472            dataset.nodes + dataset.edges
11473        );
11474        for backend in &dataset.backends {
11475            println!(
11476                "  backend:{} total:{}us parity:{}",
11477                backend.backend, backend.total_micros, backend.parity.matches_sqlite
11478            );
11479            println!("    projection-load: {}", backend.projection_load);
11480            println!("    lock-behavior: {}", backend.lock_behavior);
11481            println!("    install-portability: {}", backend.install_portability);
11482            for operation in &backend.operations {
11483                println!(
11484                    "    {} {} {}us",
11485                    operation.name, operation.status, operation.duration_micros
11486                );
11487            }
11488            for diagnostic in &backend.parity.diagnostics {
11489                println!("    parity: {diagnostic}");
11490            }
11491        }
11492    }
11493    for decision in &report.promotion {
11494        println!("promotion {}: {}", decision.backend, decision.decision);
11495        println!("  gate: {}", decision.gate.status);
11496        for reason in &decision.reasons {
11497            println!("  reason: {reason}");
11498        }
11499        for check in &decision.gate.required_checks {
11500            println!("  check: {check}");
11501        }
11502    }
11503    println!("metric-digest: {}", report.metric_digest_command);
11504    println!(
11505        "repeat-samples: {}",
11506        report.performance_gate.repeated_sample_command
11507    );
11508}
11509
11510fn traversal_expand_command(root: &Path, handle: &str) -> String {
11511    format!(
11512        "tsift traverse {} --path {} --depth 1 --limit 50",
11513        shell_quote(handle),
11514        shell_quote(root.to_string_lossy().as_ref())
11515    )
11516}
11517
11518fn traversal_file_node(root: &Path, file: &str) -> TraversalNode {
11519    let display = relativize(file, root);
11520    let handle = stable_handle("gfil", &format!("file:{display}"));
11521    TraversalNode {
11522        handle: handle.clone(),
11523        kind: "file".to_string(),
11524        label: display.clone(),
11525        ref_id: Some(display.clone()),
11526        path: Some(display),
11527        line: None,
11528        detail: None,
11529        properties: BTreeMap::new(),
11530        expand: traversal_expand_command(root, &handle),
11531    }
11532}
11533
11534fn traversal_raw_source_file_node(root: &Path, file: &str) -> TraversalNode {
11535    let mut node = traversal_file_node(root, file);
11536    if let Some(path) = node.path.clone() {
11537        node.detail = Some("raw source fallback; graph evidence unavailable".to_string());
11538        node.expand = source_read_command(root, &path, 1, 80);
11539    }
11540    node
11541}
11542
11543fn traversal_symbol_node(root: &Path, symbol: &index::StoredSymbol) -> TraversalNode {
11544    let file = relativize(&symbol.file, root);
11545    let key = format!("symbol:{file}:{}:{}", symbol.line, symbol.name);
11546    let handle = stable_handle("gsym", &key);
11547    TraversalNode {
11548        handle: handle.clone(),
11549        kind: "symbol".to_string(),
11550        label: symbol.name.clone(),
11551        ref_id: Some(symbol.name.clone()),
11552        path: Some(file),
11553        line: Some(symbol.line),
11554        detail: Some(format!("{} {}", symbol.language, symbol.kind)),
11555        properties: BTreeMap::new(),
11556        expand: traversal_expand_command(root, &handle),
11557    }
11558}
11559
11560fn traversal_unresolved_symbol_node(root: &Path, name: &str) -> TraversalNode {
11561    let handle = stable_handle("gsym", &format!("symbol:{name}"));
11562    TraversalNode {
11563        handle: handle.clone(),
11564        kind: "symbol".to_string(),
11565        label: name.to_string(),
11566        ref_id: Some(name.to_string()),
11567        path: None,
11568        line: None,
11569        detail: Some("unresolved call target".to_string()),
11570        properties: BTreeMap::new(),
11571        expand: traversal_expand_command(root, &handle),
11572    }
11573}
11574
11575fn traversal_route_node(root: &Path, route: &index::StoredRoute) -> TraversalNode {
11576    let file = relativize(&route.file, root);
11577    let method = route.method.as_deref().unwrap_or("any");
11578    let key = format!(
11579        "route:{file}:{}:{}:{}",
11580        route.line, method, route.route_path
11581    );
11582    let handle = stable_handle("grte", &key);
11583    TraversalNode {
11584        handle: handle.clone(),
11585        kind: "route".to_string(),
11586        label: format!("{} {}", method.to_uppercase(), route.route_path),
11587        ref_id: Some(route.route_path.clone()),
11588        path: Some(file),
11589        line: Some(route.line),
11590        detail: Some(format!(
11591            "{} route handled by {}",
11592            route.framework, route.handler_name
11593        )),
11594        properties: BTreeMap::new(),
11595        expand: traversal_expand_command(root, &handle),
11596    }
11597}
11598
11599fn traversal_session_node(
11600    root: &Path,
11601    markdown_path: &Path,
11602    session_id: Option<&str>,
11603) -> TraversalNode {
11604    let display = relativize_pathbuf(markdown_path, root)
11605        .to_string_lossy()
11606        .replace('\\', "/");
11607    let handle = stable_handle("gses", &format!("session:{display}"));
11608    TraversalNode {
11609        handle: handle.clone(),
11610        kind: "session".to_string(),
11611        label: session_id.unwrap_or(&display).to_string(),
11612        ref_id: session_id.map(str::to_string),
11613        path: Some(display),
11614        line: None,
11615        detail: Some("agent-doc session artifact".to_string()),
11616        properties: BTreeMap::new(),
11617        expand: traversal_expand_command(root, &handle),
11618    }
11619}
11620
11621fn traversal_backlog_node(
11622    root: &Path,
11623    markdown_path: &Path,
11624    id: &str,
11625    text: &str,
11626    line: i64,
11627) -> TraversalNode {
11628    let display = relativize_pathbuf(markdown_path, root)
11629        .to_string_lossy()
11630        .replace('\\', "/");
11631    let handle = stable_handle("gbak", &format!("backlog:{display}:#{id}"));
11632    TraversalNode {
11633        handle: handle.clone(),
11634        kind: "backlog".to_string(),
11635        label: format!("#{id}"),
11636        ref_id: Some(id.to_string()),
11637        path: Some(display),
11638        line: Some(line),
11639        detail: Some(text.to_string()),
11640        properties: BTreeMap::new(),
11641        expand: traversal_expand_command(root, &handle),
11642    }
11643}
11644
11645fn traversal_job_packet_node(
11646    root: &Path,
11647    markdown_path: &Path,
11648    label: &str,
11649    ref_id: Option<&str>,
11650    detail: &str,
11651    line: i64,
11652) -> TraversalNode {
11653    let display = relativize_pathbuf(markdown_path, root)
11654        .to_string_lossy()
11655        .replace('\\', "/");
11656    let handle = stable_handle("gjob", &format!("job:{display}:{line}:{label}"));
11657    TraversalNode {
11658        handle: handle.clone(),
11659        kind: "job_packet".to_string(),
11660        label: label.to_string(),
11661        ref_id: ref_id.map(str::to_string),
11662        path: Some(display),
11663        line: Some(line),
11664        detail: Some(detail.to_string()),
11665        properties: BTreeMap::new(),
11666        expand: traversal_expand_command(root, &handle),
11667    }
11668}
11669
11670#[derive(Clone, Debug)]
11671struct ParsedWorkerResult {
11672    id: String,
11673    status: String,
11674    touched_files: Vec<String>,
11675    tests: Vec<String>,
11676    follow_up_ids: Vec<String>,
11677}
11678
11679fn traversal_worker_result_node(
11680    root: &Path,
11681    markdown_path: &Path,
11682    parsed: &ParsedWorkerResult,
11683    line_text: &str,
11684    line: i64,
11685) -> TraversalNode {
11686    let display = relativize_pathbuf(markdown_path, root)
11687        .to_string_lossy()
11688        .replace('\\', "/");
11689    let handle = stable_handle(
11690        "wres",
11691        &format!(
11692            "worker-result:{display}:{}:{}:{}",
11693            parsed.id, parsed.status, line
11694        ),
11695    );
11696    let mut properties = BTreeMap::new();
11697    properties.insert("status".to_string(), parsed.status.clone());
11698    if !parsed.touched_files.is_empty() {
11699        properties.insert("touched_files".to_string(), parsed.touched_files.join(","));
11700    }
11701    if !parsed.tests.is_empty() {
11702        properties.insert("expected_tests".to_string(), parsed.tests.join(" && "));
11703    }
11704    if !parsed.follow_up_ids.is_empty() {
11705        properties.insert("follow_up_ids".to_string(), parsed.follow_up_ids.join(","));
11706    }
11707    TraversalNode {
11708        handle: handle.clone(),
11709        kind: "worker_result".to_string(),
11710        label: format!("{} #{}", parsed.status, parsed.id),
11711        ref_id: Some(parsed.id.clone()),
11712        path: Some(display),
11713        line: Some(line),
11714        detail: Some(line_text.trim().to_string()),
11715        properties,
11716        expand: traversal_expand_command(root, &handle),
11717    }
11718}
11719
11720fn traversal_tokens(input: &str) -> BTreeSet<String> {
11721    input
11722        .split(|ch: char| !(ch.is_ascii_alphanumeric() || ch == '_' || ch == '-'))
11723        .flat_map(|part| part.split(['_', '-']))
11724        .map(str::trim)
11725        .filter(|part| part.len() >= 3)
11726        .map(|part| part.to_ascii_lowercase())
11727        .collect()
11728}
11729
11730fn traversal_node_tokens(node: &TraversalNode) -> BTreeSet<String> {
11731    let mut tokens = traversal_tokens(&node.label);
11732    if let Some(ref_id) = &node.ref_id {
11733        tokens.extend(traversal_tokens(ref_id));
11734    }
11735    if let Some(path) = &node.path {
11736        tokens.extend(traversal_tokens(path));
11737    }
11738    if let Some(detail) = &node.detail {
11739        tokens.extend(traversal_tokens(detail));
11740    }
11741    tokens
11742}
11743
11744fn parse_agent_doc_session_id(content: &str) -> Option<String> {
11745    content.lines().find_map(|line| {
11746        let trimmed = line.trim();
11747        trimmed
11748            .strip_prefix("agent_doc_session:")
11749            .map(str::trim)
11750            .filter(|value| !value.is_empty())
11751            .map(str::to_string)
11752    })
11753}
11754
11755fn parse_backlog_line(line: &str) -> Option<(String, String)> {
11756    let trimmed = line.trim();
11757    if !trimmed.starts_with("- [") {
11758        return None;
11759    }
11760    let start = trimmed.find("[#")?;
11761    let after_start = start + 2;
11762    let rest = &trimmed[after_start..];
11763    let end = rest.find(']')?;
11764    let id = rest[..end].trim();
11765    if id.is_empty() {
11766        return None;
11767    }
11768    let text = rest[end + 1..].trim().to_string();
11769    Some((id.to_string(), text))
11770}
11771
11772fn parse_queue_dispatch_line(line: &str) -> Option<String> {
11773    let trimmed = line.trim();
11774    ["dispatch ", "preset "].iter().find_map(|prefix| {
11775        trimmed
11776            .strip_prefix(prefix)
11777            .map(str::trim)
11778            .filter(|value| !value.is_empty())
11779            .map(str::to_string)
11780    })
11781}
11782
11783fn parse_queue_do_line(line: &str) -> Option<String> {
11784    let trimmed = line.trim();
11785    let rest = trimmed.strip_prefix("- do [#")?;
11786    let end = rest.find(']')?;
11787    let id = rest[..end].trim();
11788    (!id.is_empty()).then(|| id.to_string())
11789}
11790
11791fn markdown_code_spans(input: &str) -> Vec<String> {
11792    input
11793        .split('`')
11794        .enumerate()
11795        .filter(|(idx, _)| idx % 2 == 1)
11796        .map(|(_, part)| part.trim().to_string())
11797        .filter(|part| !part.is_empty())
11798        .collect()
11799}
11800
11801fn push_traversal_token_index(
11802    index: &mut HashMap<String, Vec<usize>>,
11803    tokens: &BTreeSet<String>,
11804    entry_index: usize,
11805) {
11806    for token in tokens {
11807        index.entry(token.clone()).or_default().push(entry_index);
11808    }
11809}
11810
11811impl<'a> TraversalCodeLookup<'a> {
11812    fn new(
11813        symbols: &'a [TraversalSymbolIndexEntry],
11814        files: &'a [TraversalFileIndexEntry],
11815        routes: &'a [TraversalRouteIndexEntry],
11816    ) -> Self {
11817        let mut symbol_index = HashMap::new();
11818        for (idx, entry) in symbols.iter().enumerate() {
11819            push_traversal_token_index(&mut symbol_index, &entry.tokens, idx);
11820        }
11821        let mut file_index = HashMap::new();
11822        let mut file_path_index = HashMap::new();
11823        for (idx, entry) in files.iter().enumerate() {
11824            push_traversal_token_index(&mut file_index, &entry.tokens, idx);
11825            if let Some(path) = entry.node.path.as_ref() {
11826                file_path_index.insert(path.clone(), path.clone());
11827            }
11828        }
11829        let mut route_index = HashMap::new();
11830        for (idx, entry) in routes.iter().enumerate() {
11831            push_traversal_token_index(&mut route_index, &entry.tokens, idx);
11832        }
11833        Self {
11834            symbols,
11835            files,
11836            routes,
11837            symbol_index,
11838            file_index,
11839            route_index,
11840            file_path_index,
11841        }
11842    }
11843
11844    fn touched_files_for_line(&self, line: &str) -> Vec<String> {
11845        let mut touched_files = BTreeSet::new();
11846        for candidate in markdown_code_spans(line)
11847            .into_iter()
11848            .chain(line.split_whitespace().map(str::to_string))
11849        {
11850            for path in traversal_path_candidates(&candidate) {
11851                if let Some(file) = self.file_path_index.get(&path) {
11852                    touched_files.insert(file.clone());
11853                }
11854            }
11855        }
11856        touched_files.into_iter().collect()
11857    }
11858}
11859
11860fn traversal_path_candidates(candidate: &str) -> Vec<String> {
11861    let trimmed = candidate.trim_matches(|ch: char| {
11862        matches!(
11863            ch,
11864            '`' | '"' | '\'' | ',' | ';' | '.' | '!' | '?' | '(' | ')' | '[' | ']' | '{' | '}'
11865        )
11866    });
11867    if trimmed.is_empty() {
11868        return Vec::new();
11869    }
11870    let mut candidates = vec![trimmed.to_string()];
11871    if let Some((path, line_suffix)) = trimmed.rsplit_once(':')
11872        && !path.is_empty()
11873        && line_suffix.chars().all(|ch| ch.is_ascii_digit())
11874    {
11875        candidates.push(path.to_string());
11876    }
11877    candidates
11878}
11879
11880fn parse_worker_result_line(
11881    line: &str,
11882    lookup: &TraversalCodeLookup<'_>,
11883) -> Vec<ParsedWorkerResult> {
11884    if line.trim_start().starts_with("- [") {
11885        return Vec::new();
11886    }
11887    let lower = line.to_ascii_lowercase();
11888    let status =
11889        if lower.contains("completed") || lower.contains("code-complete") || lower.contains("done")
11890        {
11891            "completed"
11892        } else if lower.contains("blocked") || lower.contains("externally blocked") {
11893            "blocked"
11894        } else {
11895            return Vec::new();
11896        };
11897    let result_prefix_end = ["follow-up", "follow up", "next:"]
11898        .iter()
11899        .filter_map(|marker| lower.find(marker))
11900        .min()
11901        .unwrap_or(line.len());
11902    let ids = extract_conflict_target_refs(&line[..result_prefix_end]);
11903    if ids.is_empty() {
11904        return Vec::new();
11905    }
11906    let result_ids = ids.iter().cloned().collect::<BTreeSet<_>>();
11907    let all_ids = extract_conflict_target_refs(line);
11908
11909    let touched_files = lookup.touched_files_for_line(line);
11910    let tests = markdown_code_spans(line)
11911        .into_iter()
11912        .filter(|span| span.to_ascii_lowercase().contains("test"))
11913        .collect::<Vec<_>>();
11914
11915    ids.iter()
11916        .map(|id| ParsedWorkerResult {
11917            id: id.clone(),
11918            status: status.to_string(),
11919            touched_files: touched_files.clone(),
11920            tests: tests.clone(),
11921            follow_up_ids: all_ids
11922                .iter()
11923                .filter(|other| *other != id && !result_ids.contains(*other))
11924                .cloned()
11925                .collect(),
11926        })
11927        .collect()
11928}
11929
11930fn hinted_markdown_file(root: &Path, path_hint: &Path) -> Option<PathBuf> {
11931    let hinted_path = if path_hint.is_absolute() {
11932        path_hint.to_path_buf()
11933    } else {
11934        root.join(path_hint)
11935    };
11936    if hinted_path.extension().and_then(|ext| ext.to_str()) == Some("md") && hinted_path.is_file() {
11937        return Some(hinted_path);
11938    }
11939    None
11940}
11941
11942fn traversal_markdown_content_looks_like_session(content: &str) -> bool {
11943    parse_agent_doc_session_id(content).is_some()
11944        || content.contains("<!-- agent:exchange")
11945        || content.contains("<!-- agent:backlog")
11946        || content.contains("## Backlog")
11947}
11948
11949fn markdown_files_for_traversal(root: &Path, path_hint: &Path) -> Result<Vec<PathBuf>> {
11950    if let Some(hinted_path) = hinted_markdown_file(root, path_hint) {
11951        return Ok(vec![hinted_path]);
11952    }
11953    let mut files = Vec::new();
11954    let walker = ignore::WalkBuilder::new(root)
11955        .hidden(true)
11956        .git_ignore(true)
11957        .git_global(true)
11958        .git_exclude(true)
11959        .build();
11960    for result in walker {
11961        let entry =
11962            result.with_context(|| format!("walking markdown files under {}", root.display()))?;
11963        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
11964            continue;
11965        }
11966        if traversal_path_is_generated_artifact(root, root, entry.path()) {
11967            continue;
11968        }
11969        if entry.path().extension().and_then(|ext| ext.to_str()) == Some("md") {
11970            files.push(entry.path().to_path_buf());
11971        }
11972    }
11973    files.sort();
11974    Ok(files)
11975}
11976
11977fn traversal_watermark_path(root: &Path, path: &Path) -> String {
11978    path.strip_prefix(root)
11979        .unwrap_or(path)
11980        .to_string_lossy()
11981        .replace('\\', "/")
11982}
11983
11984fn push_traversal_metadata_watermark_part(
11985    root: &Path,
11986    path: &Path,
11987    label: &str,
11988    parts: &mut Vec<String>,
11989) {
11990    let display = traversal_watermark_path(root, path);
11991    match fs::metadata(path) {
11992        Ok(metadata) => {
11993            let (secs, nanos) = metadata
11994                .modified()
11995                .ok()
11996                .and_then(|modified| modified.duration_since(UNIX_EPOCH).ok())
11997                .map(|duration| (duration.as_secs(), duration.subsec_nanos()))
11998                .unwrap_or((0, 0));
11999            parts.push(format!(
12000                "{label}:{display}:len={}:mtime={secs}.{nanos}",
12001                metadata.len()
12002            ));
12003        }
12004        Err(_) => parts.push(format!("{label}:{display}:missing")),
12005    }
12006}
12007
12008#[derive(Serialize)]
12009struct TraversalSummaryWatermarkRow<'a> {
12010    symbol_name: &'a str,
12011    file_path: &'a str,
12012    entities: &'a Option<Vec<summarize::Entity>>,
12013    relationships: &'a Option<Vec<summarize::Relationship>>,
12014    concept_labels: &'a Option<Vec<String>>,
12015}
12016
12017fn push_traversal_summaries_watermark_part(root: &Path, parts: &mut Vec<String>) -> Result<()> {
12018    let summaries_db = root.join(".tsift/summaries.db");
12019    if !summaries_db.exists() {
12020        parts.push("summaries_db:absent".to_string());
12021        return Ok(());
12022    }
12023
12024    match summarize::SummaryDb::open_read_only_resilient(&summaries_db)
12025        .and_then(|summary_db| summary_db.all())
12026    {
12027        Ok(summaries) => {
12028            let rows = summaries
12029                .iter()
12030                .map(|summary| TraversalSummaryWatermarkRow {
12031                    symbol_name: &summary.symbol_name,
12032                    file_path: &summary.file_path,
12033                    entities: &summary.entities,
12034                    relationships: &summary.relationships,
12035                    concept_labels: &summary.concept_labels,
12036                })
12037                .collect::<Vec<_>>();
12038            parts.push(format!(
12039                "summaries_db:rows={}:semantic_hash={}",
12040                rows.len(),
12041                content_hash(&rows)?
12042            ));
12043        }
12044        Err(_) => {
12045            push_traversal_metadata_watermark_part(
12046                root,
12047                &summaries_db,
12048                "summaries_db_unreadable",
12049                parts,
12050            );
12051        }
12052    }
12053    Ok(())
12054}
12055
12056#[cfg(test)]
12057fn traversal_relative_path_is_generated_artifact(relative: &str) -> bool {
12058    resolution::relative_path_is_generated_artifact(relative)
12059}
12060
12061fn traversal_path_is_generated_artifact(root: &Path, source_root: &Path, path: &Path) -> bool {
12062    resolution::path_is_generated_artifact(root, source_root, path)
12063}
12064
12065fn traversal_index_snapshot_part_is_generated(root: &Path, source_root: &Path, part: &str) -> bool {
12066    resolution::index_snapshot_part_is_generated(root, source_root, part)
12067}
12068
12069pub(crate) fn traversal_source_watermark(
12070    root: &Path,
12071    path_hint: &Path,
12072    scope: Option<&str>,
12073    session_only: bool,
12074) -> Result<Option<String>> {
12075    let mut parts = vec![
12076        format!("projection_version:{GRAPH_PROJECTION_VERSION}"),
12077        format!("scope:{}", scope.unwrap_or("root")),
12078        format!("path_hint:{}", traversal_watermark_path(root, path_hint)),
12079        format!("session_only:{session_only}"),
12080    ];
12081
12082    if !session_only || hinted_markdown_file(root, path_hint).is_none() {
12083        let targets = match resolve_search_index_targets(root, path_hint, scope, false) {
12084            Ok(targets) => targets,
12085            Err(_) => return Ok(None),
12086        };
12087        let Some(target) = targets.into_iter().next() else {
12088            return Ok(None);
12089        };
12090        let db = match index::IndexDb::open_read_only_resilient(&target.db_path) {
12091            Ok(db) => db,
12092            Err(_) => return Ok(None),
12093        };
12094        parts.push(format!("index_label:{}", target.label));
12095        parts.push(format!(
12096            "index_scope:{}",
12097            target.scope_name.as_deref().unwrap_or("root")
12098        ));
12099        parts.push(format!(
12100            "index_source_root:{}",
12101            traversal_watermark_path(root, &target.source_root)
12102        ));
12103        let mut snapshot_rows = 0usize;
12104        for part in db.source_snapshot_parts()? {
12105            if traversal_index_snapshot_part_is_generated(root, &target.source_root, &part) {
12106                continue;
12107            }
12108            snapshot_rows += 1;
12109            parts.push(format!("index_snapshot:{part}"));
12110        }
12111        parts.push(format!("index_snapshot_rows:{snapshot_rows}"));
12112    }
12113
12114    let markdown_files = markdown_files_for_traversal(root, path_hint)?;
12115    parts.push(format!("markdown_count:{}", markdown_files.len()));
12116    for markdown_path in markdown_files {
12117        push_traversal_metadata_watermark_part(root, &markdown_path, "markdown", &mut parts);
12118    }
12119
12120    push_traversal_summaries_watermark_part(root, &mut parts)?;
12121
12122    Ok(Some(content_hash(&parts)?))
12123}
12124
12125fn ranked_symbol_matches<'a>(
12126    query_tokens: &BTreeSet<String>,
12127    entries: &'a [TraversalSymbolIndexEntry],
12128    index: &HashMap<String, Vec<usize>>,
12129) -> Vec<(usize, &'a TraversalSymbolIndexEntry)> {
12130    let mut scores = BTreeMap::<usize, usize>::new();
12131    for token in query_tokens {
12132        if let Some(indices) = index.get(token) {
12133            for idx in indices {
12134                *scores.entry(*idx).or_default() += 1;
12135            }
12136        }
12137    }
12138    let mut matches = scores
12139        .into_iter()
12140        .map(|(idx, score)| (score, &entries[idx]))
12141        .collect::<Vec<_>>();
12142    matches.sort_by(|(left_score, left), (right_score, right)| {
12143        right_score
12144            .cmp(left_score)
12145            .then_with(|| left.node.label.cmp(&right.node.label))
12146            .then_with(|| left.handle.cmp(&right.handle))
12147    });
12148    matches
12149}
12150
12151fn ranked_file_matches<'a>(
12152    query_tokens: &BTreeSet<String>,
12153    entries: &'a [TraversalFileIndexEntry],
12154    index: &HashMap<String, Vec<usize>>,
12155) -> Vec<(usize, &'a TraversalFileIndexEntry)> {
12156    let mut scores = BTreeMap::<usize, usize>::new();
12157    for token in query_tokens {
12158        if let Some(indices) = index.get(token) {
12159            for idx in indices {
12160                *scores.entry(*idx).or_default() += 1;
12161            }
12162        }
12163    }
12164    let mut matches = scores
12165        .into_iter()
12166        .map(|(idx, score)| (score, &entries[idx]))
12167        .collect::<Vec<_>>();
12168    matches.sort_by(|(left_score, left), (right_score, right)| {
12169        right_score
12170            .cmp(left_score)
12171            .then_with(|| left.node.label.cmp(&right.node.label))
12172            .then_with(|| left.handle.cmp(&right.handle))
12173    });
12174    matches
12175}
12176
12177fn ranked_route_matches<'a>(
12178    query_tokens: &BTreeSet<String>,
12179    entries: &'a [TraversalRouteIndexEntry],
12180    index: &HashMap<String, Vec<usize>>,
12181) -> Vec<(usize, &'a TraversalRouteIndexEntry)> {
12182    let mut scores = BTreeMap::<usize, usize>::new();
12183    for token in query_tokens {
12184        if let Some(indices) = index.get(token) {
12185            for idx in indices {
12186                *scores.entry(*idx).or_default() += 1;
12187            }
12188        }
12189    }
12190    let mut matches = scores
12191        .into_iter()
12192        .map(|(idx, score)| (score, &entries[idx]))
12193        .collect::<Vec<_>>();
12194    matches.sort_by(|(left_score, left), (right_score, right)| {
12195        right_score
12196            .cmp(left_score)
12197            .then_with(|| left.node.label.cmp(&right.node.label))
12198            .then_with(|| left.handle.cmp(&right.handle))
12199    });
12200    matches
12201}
12202
12203fn link_backlog_to_code_nodes(
12204    graph: &mut TraversalGraphBuild,
12205    backlog: &TraversalNode,
12206    text: &str,
12207    lookup: &TraversalCodeLookup<'_>,
12208    limit: usize,
12209) {
12210    let mut query_tokens = traversal_tokens(text);
12211    if let Some(ref_id) = &backlog.ref_id {
12212        query_tokens.extend(traversal_tokens(ref_id));
12213    }
12214    if query_tokens.is_empty() {
12215        return;
12216    }
12217
12218    for (score, entry) in ranked_symbol_matches(&query_tokens, lookup.symbols, &lookup.symbol_index)
12219        .into_iter()
12220        .take(limit)
12221    {
12222        graph.add_edge(
12223            &backlog.handle,
12224            &entry.handle,
12225            "mentions",
12226            Some("backlog text matches symbol tokens".to_string()),
12227            score,
12228        );
12229    }
12230
12231    for (score, entry) in ranked_file_matches(&query_tokens, lookup.files, &lookup.file_index)
12232        .into_iter()
12233        .take(limit.min(5))
12234    {
12235        graph.add_edge(
12236            &backlog.handle,
12237            &entry.handle,
12238            "mentions",
12239            Some("backlog text matches file tokens".to_string()),
12240            score,
12241        );
12242    }
12243
12244    for (score, entry) in ranked_route_matches(&query_tokens, lookup.routes, &lookup.route_index)
12245        .into_iter()
12246        .take(limit.min(5))
12247    {
12248        graph.add_edge(
12249            &backlog.handle,
12250            &entry.handle,
12251            "mentions",
12252            Some("backlog text matches route tokens".to_string()),
12253            score,
12254        );
12255    }
12256}
12257
12258fn load_agent_doc_traversal_nodes(
12259    root: &Path,
12260    path_hint: &Path,
12261    graph: &mut TraversalGraphBuild,
12262    lookup: &TraversalCodeLookup<'_>,
12263) -> Result<()> {
12264    for markdown_path in markdown_files_for_traversal(root, path_hint)? {
12265        let content = match fs::read_to_string(&markdown_path) {
12266            Ok(content) => content,
12267            Err(err) => {
12268                graph.warnings.push(format!(
12269                    "session artifact unavailable: {}: {err}",
12270                    markdown_path.display()
12271                ));
12272                continue;
12273            }
12274        };
12275        if !traversal_markdown_content_looks_like_session(&content) {
12276            continue;
12277        }
12278
12279        let session_id = parse_agent_doc_session_id(&content);
12280        let session = traversal_session_node(root, &markdown_path, session_id.as_deref());
12281        graph.add_node(session.clone());
12282        let lines = content.lines().collect::<Vec<_>>();
12283        let mut backlog_by_id = BTreeMap::<String, TraversalNode>::new();
12284        for (idx, line) in lines.iter().enumerate() {
12285            let Some((id, text)) = parse_backlog_line(line) else {
12286                continue;
12287            };
12288            let backlog = traversal_backlog_node(root, &markdown_path, &id, &text, idx as i64 + 1);
12289            graph.add_node(backlog.clone());
12290            backlog_by_id.insert(id.clone(), backlog.clone());
12291            graph.add_edge(
12292                &session.handle,
12293                &backlog.handle,
12294                "contains",
12295                Some("session backlog item".to_string()),
12296                1,
12297            );
12298            link_backlog_to_code_nodes(graph, &backlog, &text, lookup, 8);
12299        }
12300
12301        let mut in_queue = false;
12302        let mut job_by_id = BTreeMap::<String, TraversalNode>::new();
12303        for (idx, line) in lines.iter().enumerate() {
12304            let trimmed = line.trim();
12305            if trimmed.starts_with("<!-- agent:queue") {
12306                in_queue = true;
12307                continue;
12308            }
12309            if trimmed.starts_with("<!-- /agent:queue") {
12310                in_queue = false;
12311                continue;
12312            }
12313            if !in_queue {
12314                continue;
12315            }
12316            if let Some(dispatch) = parse_queue_dispatch_line(line) {
12317                let dispatch_ref = dispatch.strip_prefix('#').unwrap_or(dispatch.as_str());
12318                let node = traversal_job_packet_node(
12319                    root,
12320                    &markdown_path,
12321                    &format!("dispatch {dispatch}"),
12322                    Some(dispatch_ref),
12323                    "agent-doc dispatch preset",
12324                    idx as i64 + 1,
12325                );
12326                graph.add_node(node.clone());
12327                graph.add_edge(
12328                    &session.handle,
12329                    &node.handle,
12330                    "contains",
12331                    Some("session queued dispatch".to_string()),
12332                    1,
12333                );
12334                continue;
12335            }
12336            if let Some(id) = parse_queue_do_line(line) {
12337                let detail = backlog_by_id
12338                    .get(&id)
12339                    .and_then(|node| node.detail.clone())
12340                    .unwrap_or_else(|| "queued backlog item".to_string());
12341                let node = traversal_job_packet_node(
12342                    root,
12343                    &markdown_path,
12344                    &format!("do #{id}"),
12345                    Some(&id),
12346                    &detail,
12347                    idx as i64 + 1,
12348                );
12349                graph.add_node(node.clone());
12350                graph.add_edge(
12351                    &session.handle,
12352                    &node.handle,
12353                    "contains",
12354                    Some("session queued job packet".to_string()),
12355                    1,
12356                );
12357                if let Some(backlog) = backlog_by_id.get(&id) {
12358                    graph.add_edge(
12359                        &node.handle,
12360                        &backlog.handle,
12361                        "targets",
12362                        Some("queued backlog item".to_string()),
12363                        1,
12364                    );
12365                }
12366                job_by_id.insert(id, node);
12367            }
12368        }
12369
12370        let mut seen_results = BTreeSet::<(String, String, i64)>::new();
12371        for (idx, line) in lines.iter().enumerate() {
12372            for parsed in parse_worker_result_line(line, lookup) {
12373                let line_no = idx as i64 + 1;
12374                if !seen_results.insert((parsed.id.clone(), parsed.status.clone(), line_no)) {
12375                    continue;
12376                }
12377                let result =
12378                    traversal_worker_result_node(root, &markdown_path, &parsed, line, line_no);
12379                graph.add_node(result.clone());
12380                graph.add_edge(
12381                    &session.handle,
12382                    &result.handle,
12383                    "contains",
12384                    Some("session worker result".to_string()),
12385                    1,
12386                );
12387                if let Some(backlog) = backlog_by_id.get(&parsed.id) {
12388                    graph.add_edge(
12389                        &backlog.handle,
12390                        &result.handle,
12391                        "has_result",
12392                        Some(format!("worker result {}", parsed.status)),
12393                        1,
12394                    );
12395                }
12396                if let Some(job) = job_by_id.get(&parsed.id) {
12397                    graph.add_edge(
12398                        &job.handle,
12399                        &result.handle,
12400                        "has_result",
12401                        Some(format!("queued worker result {}", parsed.status)),
12402                        1,
12403                    );
12404                }
12405                let mut result_text = line.to_string();
12406                if !parsed.touched_files.is_empty() {
12407                    result_text.push(' ');
12408                    result_text.push_str(&parsed.touched_files.join(" "));
12409                }
12410                link_backlog_to_code_nodes(graph, &result, &result_text, lookup, 8);
12411            }
12412        }
12413    }
12414    Ok(())
12415}
12416
12417#[derive(Debug, Clone)]
12418struct AgentDocIndexGate {
12419    db_path: Option<PathBuf>,
12420    source_root: PathBuf,
12421    diagnostics: Vec<String>,
12422}
12423
12424#[derive(Clone, Hash, PartialEq, Eq)]
12425struct AgentDocIndexGateCacheKey {
12426    root: PathBuf,
12427    path_hint: PathBuf,
12428    scope: Option<String>,
12429    packet_label: String,
12430}
12431
12432fn agent_doc_index_gate_cache() -> &'static std::sync::Mutex<
12433    std::collections::HashMap<AgentDocIndexGateCacheKey, AgentDocIndexGate>,
12434> {
12435    static CACHE: std::sync::OnceLock<
12436        std::sync::Mutex<std::collections::HashMap<AgentDocIndexGateCacheKey, AgentDocIndexGate>>,
12437    > = std::sync::OnceLock::new();
12438    CACHE.get_or_init(|| std::sync::Mutex::new(std::collections::HashMap::new()))
12439}
12440
12441fn prepare_agent_doc_index_gate_cached(
12442    root: &Path,
12443    path_hint: &Path,
12444    scope: Option<&str>,
12445    packet_label: &str,
12446) -> (AgentDocIndexGate, String) {
12447    let key = AgentDocIndexGateCacheKey {
12448        root: root.to_path_buf(),
12449        path_hint: path_hint.to_path_buf(),
12450        scope: scope.map(str::to_string),
12451        packet_label: packet_label.to_string(),
12452    };
12453    if let Ok(cache) = agent_doc_index_gate_cache().lock()
12454        && let Some(cached) = cache.get(&key)
12455    {
12456        return (
12457            cached.clone(),
12458            "reused from in-process index gate cache by root/path_hint/scope key".to_string(),
12459        );
12460    }
12461    let gate = prepare_agent_doc_index_gate(root, path_hint, scope, packet_label);
12462    if let Ok(mut cache) = agent_doc_index_gate_cache().lock() {
12463        cache.insert(key, gate.clone());
12464    }
12465    (
12466        gate,
12467        "fresh inspection/refresh — cache miss on this preparation key".to_string(),
12468    )
12469}
12470
12471fn index_reason_for_state(state: SearchIndexState) -> Option<RebuildSearchReason> {
12472    match state {
12473        SearchIndexState::Fresh => None,
12474        SearchIndexState::Missing => Some(RebuildSearchReason::Missing),
12475        SearchIndexState::Stale { stale_files } => Some(RebuildSearchReason::Stale { stale_files }),
12476    }
12477}
12478
12479fn index_reason_detail(target: &SearchIndexTarget, reason: RebuildSearchReason) -> String {
12480    rebuild_search_target_detail(&RebuildSearchTarget {
12481        label: target.label.clone(),
12482        reason,
12483        reindex_cmd: target.reindex_cmd.clone(),
12484    })
12485}
12486
12487fn index_refresh_diagnostic(
12488    target: &SearchIndexTarget,
12489    reason: RebuildSearchReason,
12490    summary: &index::IndexSummary,
12491    packet_label: &str,
12492) -> String {
12493    let changed = summary.new + summary.modified + summary.deleted;
12494    format!(
12495        "index refreshed: {}; updated {} changed file{} before {}",
12496        index_reason_detail(target, reason),
12497        changed,
12498        if changed == 1 { "" } else { "s" },
12499        packet_label
12500    )
12501}
12502
12503fn index_refresh_fallback_diagnostic(
12504    target: &SearchIndexTarget,
12505    reason: RebuildSearchReason,
12506    err: &anyhow::Error,
12507    packet_label: &str,
12508) -> String {
12509    format!(
12510        "{}; could not refresh before {}: {err:#}; falling back to raw source file nodes",
12511        index_reason_detail(target, reason),
12512        packet_label
12513    )
12514}
12515
12516fn graph_fallback_source_root(root: &Path, path_hint: &Path, scope: Option<&str>) -> PathBuf {
12517    if let Some(scope_name) = scope
12518        && let Ok(scope) = config::Config::resolve_submodule(root, scope_name)
12519    {
12520        return scope.source_root;
12521    }
12522    if let Ok(Some(scope)) = config::Config::infer_submodule_from_path(root, path_hint) {
12523        return scope.source_root;
12524    }
12525    if let Ok(Some(scope)) = infer_agent_doc_task_submodule(root, path_hint) {
12526        return scope.source_root;
12527    }
12528    root.to_path_buf()
12529}
12530
12531fn prepare_agent_doc_index_gate(
12532    root: &Path,
12533    path_hint: &Path,
12534    scope: Option<&str>,
12535    packet_label: &str,
12536) -> AgentDocIndexGate {
12537    let fallback_source_root = graph_fallback_source_root(root, path_hint, scope);
12538    let targets = match resolve_search_index_targets(root, path_hint, scope, false) {
12539        Ok(targets) => targets,
12540        Err(err) => {
12541            return AgentDocIndexGate {
12542                db_path: None,
12543                source_root: fallback_source_root,
12544                diagnostics: vec![format!(
12545                    "code index unavailable before {packet_label}: {err:#}; falling back to raw source file nodes"
12546                )],
12547            };
12548        }
12549    };
12550    let Some(target) = targets.into_iter().next() else {
12551        return AgentDocIndexGate {
12552            db_path: None,
12553            source_root: fallback_source_root,
12554            diagnostics: vec![format!(
12555                "code index unavailable before {packet_label}: no index target resolved; falling back to raw source file nodes"
12556            )],
12557        };
12558    };
12559
12560    let state = match inspect_search_index(&target) {
12561        Ok(state) => state,
12562        Err(err) => {
12563            return AgentDocIndexGate {
12564                db_path: None,
12565                source_root: target.source_root,
12566                diagnostics: vec![format!(
12567                    "code index freshness unavailable before {packet_label}: {err:#}; falling back to raw source file nodes"
12568                )],
12569            };
12570        }
12571    };
12572
12573    let Some(reason) = index_reason_for_state(state) else {
12574        return AgentDocIndexGate {
12575            db_path: Some(target.db_path),
12576            source_root: target.source_root,
12577            diagnostics: Vec::new(),
12578        };
12579    };
12580
12581    match apply_search_index_update(root, &target) {
12582        Ok(summary) => {
12583            // #gdbgatecold: the index was just rewritten, so any cached
12584            // pre-refresh inspection result for this scope (held by the
12585            // active `InspectScopeGuard`) is stale. Drop it so the next
12586            // `inspect_read_only` re-reads the fresh index.
12587            index::inspect_scope_invalidate_all();
12588            let diagnostics = vec![index_refresh_diagnostic(
12589                &target,
12590                reason,
12591                &summary,
12592                packet_label,
12593            )];
12594            AgentDocIndexGate {
12595                db_path: Some(target.db_path),
12596                source_root: target.source_root,
12597                diagnostics,
12598            }
12599        }
12600        Err(err) => {
12601            let diagnostics = vec![index_refresh_fallback_diagnostic(
12602                &target,
12603                reason,
12604                &err,
12605                packet_label,
12606            )];
12607            AgentDocIndexGate {
12608                db_path: None,
12609                source_root: target.source_root,
12610                diagnostics,
12611            }
12612        }
12613    }
12614}
12615
12616fn add_raw_source_file_nodes(
12617    root: &Path,
12618    source_root: &Path,
12619    graph: &mut TraversalGraphBuild,
12620    file_entries: &mut Vec<TraversalFileIndexEntry>,
12621) -> Result<()> {
12622    let mut entries = walk::walk_files(source_root)?;
12623    entries.sort_by(|left, right| left.path.cmp(&right.path));
12624    for entry in entries {
12625        let file = entry.path.to_string_lossy();
12626        let node = traversal_raw_source_file_node(root, file.as_ref());
12627        let entry = TraversalFileIndexEntry {
12628            handle: node.handle.clone(),
12629            tokens: traversal_node_tokens(&node),
12630            node: node.clone(),
12631        };
12632        graph.add_node(node);
12633        file_entries.push(entry);
12634    }
12635    Ok(())
12636}
12637
12638fn build_traversal_graph_source_with_options(
12639    root: &Path,
12640    path_hint: &Path,
12641    scope: Option<&str>,
12642    session_only: bool,
12643) -> Result<TraversalGraphBuild> {
12644    let mut graph = TraversalGraphBuild::default();
12645    let mut symbol_entries = Vec::new();
12646    let mut file_entries = Vec::new();
12647    let mut route_entries = Vec::new();
12648    let bounded_session_projection = hinted_markdown_file(root, path_hint).is_some();
12649    if !session_only || hinted_markdown_file(root, path_hint).is_none() {
12650        let (gate, _cache_detail) =
12651            prepare_agent_doc_index_gate_cached(root, path_hint, scope, "graph traversal packet");
12652        graph.warnings.extend(gate.diagnostics);
12653
12654        match gate.db_path {
12655            Some(db_path) if db_path.exists() => {
12656                let db = index::IndexDb::open_read_only_resilient(&db_path)?;
12657                let file_paths = db.file_paths()?;
12658                let mut file_handle_by_path = HashMap::<String, String>::new();
12659                for file in file_paths {
12660                    if traversal_path_is_generated_artifact(
12661                        root,
12662                        &gate.source_root,
12663                        Path::new(&file),
12664                    ) {
12665                        continue;
12666                    }
12667                    let node = traversal_file_node(root, &file);
12668                    let entry = TraversalFileIndexEntry {
12669                        handle: node.handle.clone(),
12670                        tokens: traversal_node_tokens(&node),
12671                        node: node.clone(),
12672                    };
12673                    if let Some(path) = entry.node.path.as_ref() {
12674                        file_handle_by_path.insert(path.clone(), entry.handle.clone());
12675                    }
12676                    graph.add_node(node);
12677                    file_entries.push(entry);
12678                }
12679
12680                let symbols = db.all_symbols()?;
12681                let mut symbol_by_file_name_line = HashMap::new();
12682                let mut first_symbol_by_name = BTreeMap::<String, String>::new();
12683                for symbol in symbols.iter().filter(|symbol| {
12684                    !traversal_path_is_generated_artifact(
12685                        root,
12686                        &gate.source_root,
12687                        Path::new(&symbol.file),
12688                    )
12689                }) {
12690                    let node = traversal_symbol_node(root, symbol);
12691                    let file = relativize(&symbol.file, root);
12692                    symbol_by_file_name_line.insert(
12693                        format!("{file}:{}:{}", symbol.line, symbol.name),
12694                        node.handle.clone(),
12695                    );
12696                    first_symbol_by_name
12697                        .entry(symbol.name.clone())
12698                        .or_insert_with(|| node.handle.clone());
12699                    let entry = TraversalSymbolIndexEntry {
12700                        handle: node.handle.clone(),
12701                        tokens: traversal_node_tokens(&node),
12702                        node: node.clone(),
12703                    };
12704                    graph.add_node(node.clone());
12705                    if let Some(file_handle) = file_handle_by_path.get(&file) {
12706                        graph.add_edge(
12707                            file_handle,
12708                            &node.handle,
12709                            "defines",
12710                            Some("file defines symbol".to_string()),
12711                            1,
12712                        );
12713                    }
12714                    symbol_entries.push(entry);
12715                }
12716
12717                if !bounded_session_projection {
12718                    for edge in db.all_stored_edges()? {
12719                        if traversal_path_is_generated_artifact(
12720                            root,
12721                            &gate.source_root,
12722                            Path::new(&edge.caller_file),
12723                        ) {
12724                            continue;
12725                        }
12726                        let caller_file = relativize(&edge.caller_file, root);
12727                        let caller_key =
12728                            format!("{caller_file}:{}:{}", edge.caller_line, edge.caller_name);
12729                        let Some(caller_handle) =
12730                            symbol_by_file_name_line.get(&caller_key).cloned()
12731                        else {
12732                            continue;
12733                        };
12734                        let callee_handle = if let Some(handle) =
12735                            first_symbol_by_name.get(&edge.callee_name)
12736                        {
12737                            handle.clone()
12738                        } else {
12739                            let node = traversal_unresolved_symbol_node(root, &edge.callee_name);
12740                            let handle = node.handle.clone();
12741                            graph.add_node(node);
12742                            handle
12743                        };
12744                        graph.add_edge(
12745                            &caller_handle,
12746                            &callee_handle,
12747                            "calls",
12748                            Some(format!("call site {}:{}", caller_file, edge.call_site_line)),
12749                            1,
12750                        );
12751                    }
12752                }
12753
12754                for route in db.all_routes()? {
12755                    if traversal_path_is_generated_artifact(
12756                        root,
12757                        &gate.source_root,
12758                        Path::new(&route.file),
12759                    ) {
12760                        continue;
12761                    }
12762                    let node = traversal_route_node(root, &route);
12763                    let entry = TraversalRouteIndexEntry {
12764                        handle: node.handle.clone(),
12765                        tokens: traversal_node_tokens(&node),
12766                        node: node.clone(),
12767                    };
12768                    graph.add_node(node.clone());
12769                    if let Some(path) = node.path.as_ref()
12770                        && let Some(file_handle) = file_handle_by_path.get(path)
12771                    {
12772                        graph.add_edge(
12773                            file_handle,
12774                            &node.handle,
12775                            "defines_route",
12776                            Some("file declares route".to_string()),
12777                            1,
12778                        );
12779                    }
12780                    let handler_handle =
12781                        if let Some(handle) = first_symbol_by_name.get(&route.handler_name) {
12782                            handle.clone()
12783                        } else {
12784                            let node = traversal_unresolved_symbol_node(root, &route.handler_name);
12785                            let handle = node.handle.clone();
12786                            graph.add_node(node);
12787                            handle
12788                        };
12789                    graph.add_edge(
12790                        &entry.handle,
12791                        &handler_handle,
12792                        "handled_by",
12793                        Some("route handler reference".to_string()),
12794                        1,
12795                    );
12796                    route_entries.push(entry);
12797                }
12798            }
12799            _ => {
12800                add_raw_source_file_nodes(root, &gate.source_root, &mut graph, &mut file_entries)
12801                    .with_context(|| {
12802                    format!(
12803                        "loading raw source fallback nodes from {}",
12804                        gate.source_root.display()
12805                    )
12806                })?;
12807            }
12808        }
12809    }
12810
12811    let code_lookup = TraversalCodeLookup::new(&symbol_entries, &file_entries, &route_entries);
12812    load_agent_doc_traversal_nodes(root, path_hint, &mut graph, &code_lookup)?;
12813    Ok(graph)
12814}
12815
12816#[cfg(test)]
12817fn build_traversal_graph_source(
12818    root: &Path,
12819    path_hint: &Path,
12820    scope: Option<&str>,
12821) -> Result<TraversalGraphBuild> {
12822    build_traversal_graph_source_with_options(root, path_hint, scope, false)
12823}
12824
12825pub(crate) fn write_traversal_graph_store_with_options(
12826    root: &Path,
12827    path_hint: &Path,
12828    scope: Option<&str>,
12829    session_only: bool,
12830) -> Result<(TraversalGraphBuild, SqliteProjectionRefresh)> {
12831    let source_graph =
12832        build_traversal_graph_source_with_options(root, path_hint, scope, session_only)?;
12833    let projection = traversal_projection_from_graph(root, scope, &source_graph)?;
12834    let graph_db = graph_substrate_db_path(root, scope);
12835    let mut store = SqliteGraphStore::open(&graph_db)?;
12836    let source_watermark = traversal_source_watermark(root, path_hint, scope, session_only)
12837        .ok()
12838        .flatten()
12839        .or_else(|| graph_projection_content_hash(&projection));
12840    let refresh = store.replace_projection_with_version(
12841        scope.unwrap_or("root"),
12842        &projection,
12843        Some(GRAPH_PROJECTION_VERSION),
12844        source_watermark,
12845    )?;
12846    Ok((source_graph, refresh))
12847}
12848
12849pub(crate) fn write_traversal_graph_store(
12850    root: &Path,
12851    path_hint: &Path,
12852    scope: Option<&str>,
12853) -> Result<(TraversalGraphBuild, SqliteProjectionRefresh)> {
12854    write_traversal_graph_store_with_options(root, path_hint, scope, false)
12855}
12856
12857fn refresh_traversal_graph_store_with_options(
12858    root: &Path,
12859    path_hint: &Path,
12860    scope: Option<&str>,
12861    session_only: bool,
12862) -> Result<(TraversalGraphBuild, SqliteProjectionRefresh)> {
12863    let (source_graph, refresh) =
12864        write_traversal_graph_store_with_options(root, path_hint, scope, session_only)?;
12865    let graph_db = graph_substrate_db_path(root, scope);
12866    let store = SqliteGraphStore::open_read_only_resilient(&graph_db)?;
12867    let mut graph = traversal_graph_from_store(root, &store)?;
12868    graph.warnings = source_graph.warnings;
12869    Ok((graph, refresh))
12870}
12871
12872fn refresh_traversal_graph_store(
12873    root: &Path,
12874    path_hint: &Path,
12875    scope: Option<&str>,
12876) -> Result<(TraversalGraphBuild, SqliteProjectionRefresh)> {
12877    refresh_traversal_graph_store_with_options(root, path_hint, scope, false)
12878}
12879
12880pub(crate) fn build_traversal_graph(
12881    root: &Path,
12882    path_hint: &Path,
12883    scope: Option<&str>,
12884) -> Result<TraversalGraphBuild> {
12885    let (graph, _refresh) = refresh_traversal_graph_store(root, path_hint, scope)?;
12886    Ok(graph)
12887}
12888
12889fn traversal_query_kind_priority(kind: &str) -> usize {
12890    match kind {
12891        "backlog" => 0,
12892        "job_packet" => 1,
12893        "worker_result" => 2,
12894        "symbol" => 3,
12895        "file" => 4,
12896        "route" => 5,
12897        "session" => 6,
12898        "semantic_concept" => 7,
12899        "semantic_entity" => 8,
12900        _ => 9,
12901    }
12902}
12903
12904fn traversal_node_match_rank(node: &TraversalNode, query: &str) -> Option<(usize, usize, String)> {
12905    let trimmed = query.trim();
12906    if trimmed.is_empty() {
12907        return None;
12908    }
12909    let kind_priority = traversal_query_kind_priority(&node.kind);
12910    if node.handle == trimmed {
12911        return Some((0, kind_priority, node.handle.clone()));
12912    }
12913    if node.path.as_deref() == Some(trimmed) {
12914        let path_priority = if node.kind == "file" {
12915            0
12916        } else {
12917            kind_priority.saturating_add(1)
12918        };
12919        return Some((1, path_priority, node.handle.clone()));
12920    }
12921    let normalized_backlog = trimmed.trim_start_matches('#');
12922    if node.ref_id.as_deref() == Some(trimmed) || node.ref_id.as_deref() == Some(normalized_backlog)
12923    {
12924        return Some((2, kind_priority, node.handle.clone()));
12925    }
12926    if node.label == trimmed || (node.kind == "symbol" && node.label == normalized_backlog) {
12927        return Some((3, kind_priority, node.handle.clone()));
12928    }
12929    None
12930}
12931
12932fn resolve_traversal_node<'a>(
12933    graph: &'a TraversalGraphBuild,
12934    query: &str,
12935) -> Option<&'a TraversalNode> {
12936    graph
12937        .nodes
12938        .values()
12939        .filter_map(|node| traversal_node_match_rank(node, query).map(|rank| (rank, node)))
12940        .min_by(|(left_rank, _), (right_rank, _)| left_rank.cmp(right_rank))
12941        .map(|(_, node)| node)
12942}
12943
12944fn traversal_adjacency(edges: &[TraversalEdge]) -> BTreeMap<String, Vec<String>> {
12945    let mut adj = BTreeMap::<String, BTreeSet<String>>::new();
12946    for edge in edges {
12947        adj.entry(edge.from.clone())
12948            .or_default()
12949            .insert(edge.to.clone());
12950        adj.entry(edge.to.clone())
12951            .or_default()
12952            .insert(edge.from.clone());
12953    }
12954    adj.into_iter()
12955        .map(|(node, neighbors)| (node, neighbors.into_iter().collect()))
12956        .collect()
12957}
12958
12959fn traversal_shortest_handles(
12960    edges: &[TraversalEdge],
12961    from: &str,
12962    to: &str,
12963) -> Option<Vec<String>> {
12964    if from == to {
12965        return Some(vec![from.to_string()]);
12966    }
12967    let adj = traversal_adjacency(edges);
12968    if !adj.contains_key(from) || !adj.contains_key(to) {
12969        return None;
12970    }
12971    let mut visited = BTreeSet::new();
12972    let mut queue = VecDeque::new();
12973    let mut parent = BTreeMap::<String, String>::new();
12974    visited.insert(from.to_string());
12975    queue.push_back(from.to_string());
12976    while let Some(current) = queue.pop_front() {
12977        if let Some(neighbors) = adj.get(&current) {
12978            for neighbor in neighbors {
12979                if visited.insert(neighbor.clone()) {
12980                    parent.insert(neighbor.clone(), current.clone());
12981                    if neighbor == to {
12982                        let mut path = vec![to.to_string()];
12983                        let mut cursor = to.to_string();
12984                        while let Some(prev) = parent.get(&cursor) {
12985                            path.push(prev.clone());
12986                            cursor = prev.clone();
12987                        }
12988                        path.reverse();
12989                        return Some(path);
12990                    }
12991                    queue.push_back(neighbor.clone());
12992                }
12993            }
12994        }
12995    }
12996    None
12997}
12998
12999fn traversal_scored_neighbors(edges: &[TraversalEdge], current: &str) -> Vec<String> {
13000    let mut best_score_by_neighbor = BTreeMap::<String, usize>::new();
13001    for edge in edges {
13002        let neighbor = if edge.from == current {
13003            edge.to.as_str()
13004        } else if edge.to == current {
13005            edge.from.as_str()
13006        } else {
13007            continue;
13008        };
13009        let score = traversal_relation_score(edge, current);
13010        best_score_by_neighbor
13011            .entry(neighbor.to_string())
13012            .and_modify(|best| *best = (*best).max(score))
13013            .or_insert(score);
13014    }
13015    let mut ranked = best_score_by_neighbor.into_iter().collect::<Vec<_>>();
13016    ranked.sort_by(|(left_handle, left_score), (right_handle, right_score)| {
13017        right_score
13018            .cmp(left_score)
13019            .then_with(|| left_handle.cmp(right_handle))
13020    });
13021    ranked.into_iter().map(|(handle, _)| handle).collect()
13022}
13023
13024fn traversal_neighborhood_handles(
13025    edges: &[TraversalEdge],
13026    origin: &str,
13027    depth: usize,
13028    limit: usize,
13029) -> BTreeSet<String> {
13030    let mut seen = BTreeSet::new();
13031    let mut queue = VecDeque::new();
13032    seen.insert(origin.to_string());
13033    queue.push_back((origin.to_string(), 0usize));
13034    while let Some((current, current_depth)) = queue.pop_front() {
13035        if current_depth >= depth {
13036            continue;
13037        }
13038        for neighbor in traversal_scored_neighbors(edges, &current) {
13039            if limit > 0 && seen.len() >= limit {
13040                return seen;
13041            }
13042            if seen.insert(neighbor.clone()) {
13043                queue.push_back((neighbor, current_depth + 1));
13044            }
13045        }
13046    }
13047    seen
13048}
13049
13050fn traversal_edges_between(
13051    handles: &BTreeSet<String>,
13052    edges: &[TraversalEdge],
13053) -> Vec<TraversalEdge> {
13054    edges
13055        .iter()
13056        .filter(|edge| handles.contains(&edge.from) && handles.contains(&edge.to))
13057        .cloned()
13058        .collect()
13059}
13060
13061fn traversal_path_edges(path: &[String], edges: &[TraversalEdge]) -> Vec<TraversalEdge> {
13062    let mut result = Vec::new();
13063    for pair in path.windows(2) {
13064        if let Some(edge) = edges.iter().find(|edge| {
13065            (edge.from == pair[0] && edge.to == pair[1])
13066                || (edge.from == pair[1] && edge.to == pair[0])
13067        }) {
13068            result.push(edge.clone());
13069        }
13070    }
13071    result
13072}
13073
13074fn sorted_traversal_nodes<'a>(
13075    nodes: impl IntoIterator<Item = &'a TraversalNode>,
13076) -> Vec<TraversalNode> {
13077    let mut nodes = nodes.into_iter().cloned().collect::<Vec<_>>();
13078    nodes.sort_by(|left, right| {
13079        left.kind
13080            .cmp(&right.kind)
13081            .then_with(|| left.label.cmp(&right.label))
13082            .then_with(|| left.path.cmp(&right.path))
13083            .then_with(|| left.handle.cmp(&right.handle))
13084    });
13085    nodes
13086}
13087
13088fn traversal_relation_score(edge: &TraversalEdge, origin: &str) -> usize {
13089    let base = match edge.relation.as_str() {
13090        "mentions" => 100,
13091        "contains" => 80,
13092        "calls" => {
13093            if edge.from == origin {
13094                70
13095            } else {
13096                65
13097            }
13098        }
13099        "handled_by" => 68,
13100        "defines_route" => 62,
13101        "mentions_concept" | "mentions_entity" => 66,
13102        "semantic_relation" => 64,
13103        "tagged_concept" | "related_concept" => 58,
13104        "defines" => {
13105            if edge.from == origin {
13106                60
13107            } else {
13108                55
13109            }
13110        }
13111        _ => 10,
13112    };
13113    base + edge.weight
13114}
13115
13116fn traversal_recommendation_reason(edge: &TraversalEdge, origin: &str) -> String {
13117    match edge.relation.as_str() {
13118        "mentions" => "matched from backlog/session text".to_string(),
13119        "contains" => "contained in the selected session artifact".to_string(),
13120        "defines" if edge.from == origin => "symbol defined in selected file".to_string(),
13121        "defines" => "file that defines the selected symbol".to_string(),
13122        "defines_route" if edge.from == origin => "route declared in selected file".to_string(),
13123        "defines_route" => "file that declares the selected route".to_string(),
13124        "handled_by" if edge.from == origin => "handler for the selected route".to_string(),
13125        "handled_by" => "route handled by the selected symbol".to_string(),
13126        "mentions_concept" => "cached summary concept for the selected source".to_string(),
13127        "mentions_entity" => "cached summary entity for the selected source".to_string(),
13128        "semantic_relation" => "LLM-extracted semantic relationship".to_string(),
13129        "tagged_concept" => "concept label attached to the selected entity".to_string(),
13130        "related_concept" => "co-occurring cached summary concept".to_string(),
13131        "calls" if edge.from == origin => "callee from the selected symbol".to_string(),
13132        "calls" => "caller of the selected symbol".to_string(),
13133        other => format!("connected by {other}"),
13134    }
13135}
13136
13137fn traversal_recommendations(
13138    graph: &TraversalGraphBuild,
13139    origin: Option<&str>,
13140    shortest_path: Option<&[String]>,
13141    limit: usize,
13142) -> Vec<TraversalRecommendation> {
13143    let Some(origin) = origin else {
13144        return Vec::new();
13145    };
13146    let mut recommendations = Vec::new();
13147    let mut seen = BTreeSet::new();
13148
13149    if let Some(path) = shortest_path
13150        && path.len() > 1
13151        && path.first().is_some_and(|handle| handle == origin)
13152        && let Some(next) = graph.nodes.get(&path[1])
13153    {
13154        seen.insert(next.handle.clone());
13155        recommendations.push(TraversalRecommendation {
13156            handle: next.handle.clone(),
13157            kind: next.kind.clone(),
13158            label: next.label.clone(),
13159            reason: "next hop on shortest path".to_string(),
13160            score: 1_000,
13161            expand: next.expand.clone(),
13162        });
13163    }
13164
13165    let mut candidates = graph
13166        .edges
13167        .iter()
13168        .filter_map(|edge| {
13169            let neighbor = if edge.from == origin {
13170                edge.to.as_str()
13171            } else if edge.to == origin {
13172                edge.from.as_str()
13173            } else {
13174                return None;
13175            };
13176            let node = graph.nodes.get(neighbor)?;
13177            Some((traversal_relation_score(edge, origin), edge, node))
13178        })
13179        .collect::<Vec<_>>();
13180    candidates.sort_by(|(left_score, _, left), (right_score, _, right)| {
13181        right_score
13182            .cmp(left_score)
13183            .then_with(|| left.kind.cmp(&right.kind))
13184            .then_with(|| left.label.cmp(&right.label))
13185            .then_with(|| left.handle.cmp(&right.handle))
13186    });
13187
13188    let max = if limit == 0 { usize::MAX } else { limit };
13189    for (score, edge, node) in candidates {
13190        if recommendations.len() >= max {
13191            break;
13192        }
13193        if seen.insert(node.handle.clone()) {
13194            recommendations.push(TraversalRecommendation {
13195                handle: node.handle.clone(),
13196                kind: node.kind.clone(),
13197                label: node.label.clone(),
13198                reason: traversal_recommendation_reason(edge, origin),
13199                score,
13200                expand: node.expand.clone(),
13201            });
13202        }
13203    }
13204
13205    recommendations
13206}
13207
13208fn exploration_budget_for_counts(nodes: usize, edges: usize) -> ExplorationBudget {
13209    let scale = nodes.saturating_add(edges);
13210    if scale <= 80 {
13211        ExplorationBudget {
13212            project_size: "small".to_string(),
13213            max_source_windows: 8,
13214            lines_per_window: 96,
13215            relationship_limit: 40,
13216        }
13217    } else if scale <= 800 {
13218        ExplorationBudget {
13219            project_size: "medium".to_string(),
13220            max_source_windows: 6,
13221            lines_per_window: 80,
13222            relationship_limit: 32,
13223        }
13224    } else {
13225        ExplorationBudget {
13226            project_size: "large".to_string(),
13227            max_source_windows: 4,
13228            lines_per_window: 64,
13229            relationship_limit: 24,
13230        }
13231    }
13232}
13233
13234fn exploration_node_label(node: &TraversalNode) -> String {
13235    format!("{}:{}", node.kind, node.label)
13236}
13237
13238fn exploration_source_window_for_node(
13239    root: &Path,
13240    node: &TraversalNode,
13241    budget: &ExplorationBudget,
13242) -> Option<ExplorationSourceWindow> {
13243    let file = node.path.as_ref()?;
13244    let anchor = node
13245        .line
13246        .and_then(|line| usize::try_from(line).ok())
13247        .and_then(|line| line.checked_add(1))
13248        .unwrap_or(1);
13249    let context_before = budget.lines_per_window / 3;
13250    let start = anchor.saturating_sub(context_before).max(1);
13251    let end = start
13252        .saturating_add(budget.lines_per_window)
13253        .saturating_sub(1);
13254    let handle = stable_handle("xwin", &format!("{file}:{start}:{end}:{}", node.handle));
13255    Some(ExplorationSourceWindow {
13256        handle,
13257        file: file.clone(),
13258        start,
13259        end,
13260        reason: format!("cluster around {}", exploration_node_label(node)),
13261        expand: source_read_command(root, file, start, budget.lines_per_window),
13262    })
13263}
13264
13265fn build_exploration_packet(
13266    root: &Path,
13267    totals: &TraversalTotals,
13268    selected_nodes: &[TraversalNode],
13269    selected_edges: &[TraversalEdge],
13270) -> ExplorationPacket {
13271    let budget = exploration_budget_for_counts(totals.nodes, totals.edges);
13272    let node_by_handle = selected_nodes
13273        .iter()
13274        .map(|node| (node.handle.as_str(), node))
13275        .collect::<BTreeMap<_, _>>();
13276    let relationship_map = selected_edges
13277        .iter()
13278        .take(budget.relationship_limit)
13279        .filter_map(|edge| {
13280            let from = node_by_handle.get(edge.from.as_str())?;
13281            let to = node_by_handle.get(edge.to.as_str())?;
13282            Some(ExplorationRelation {
13283                from: exploration_node_label(from),
13284                relation: edge.relation.clone(),
13285                to: exploration_node_label(to),
13286                label: edge.label.clone(),
13287            })
13288        })
13289        .collect::<Vec<_>>();
13290
13291    let mut seen_windows = BTreeSet::new();
13292    let mut source_windows = Vec::new();
13293    for node in selected_nodes {
13294        if source_windows.len() >= budget.max_source_windows {
13295            break;
13296        }
13297        let Some(window) = exploration_source_window_for_node(root, node, &budget) else {
13298            continue;
13299        };
13300        let key = (window.file.clone(), window.start, window.end);
13301        if seen_windows.insert(key) {
13302            source_windows.push(window);
13303        }
13304    }
13305
13306    ExplorationPacket {
13307        budget,
13308        relationship_map,
13309        source_windows,
13310        worker_context: Vec::new(),
13311        no_reread_guidance:
13312            "Use the source_windows expand commands for line-numbered context; avoid whole-file reads unless the needed line is outside every listed window."
13313                .to_string(),
13314    }
13315}
13316
13317pub(crate) fn traversal_report(
13318    root: &Path,
13319    scope: Option<&str>,
13320    graph: TraversalGraphBuild,
13321    query: Option<&str>,
13322    target: Option<&str>,
13323    depth: usize,
13324    limit: usize,
13325) -> Result<TraversalReport> {
13326    let totals = TraversalTotals {
13327        nodes: graph.nodes.len(),
13328        edges: graph.edges.len(),
13329    };
13330    let origin_node = query.and_then(|value| resolve_traversal_node(&graph, value));
13331    let target_node = target.and_then(|value| resolve_traversal_node(&graph, value));
13332    if let Some(query) = query
13333        && origin_node.is_none()
13334    {
13335        bail!("traversal node not found: {}", query);
13336    }
13337    if let Some(target) = target
13338        && target_node.is_none()
13339    {
13340        bail!("traversal target not found: {}", target);
13341    }
13342
13343    let (mode, selected_nodes, selected_edges, shortest_path) =
13344        if let (Some(origin), Some(target)) = (origin_node, target_node) {
13345            if let Some(handles) =
13346                traversal_shortest_handles(&graph.edges, &origin.handle, &target.handle)
13347            {
13348                let handle_set = handles.iter().cloned().collect::<BTreeSet<_>>();
13349                let nodes = handles
13350                    .iter()
13351                    .filter_map(|handle| graph.nodes.get(handle).cloned())
13352                    .collect::<Vec<_>>();
13353                let edges = traversal_path_edges(&handles, &graph.edges);
13354                let path = TraversalPathReport {
13355                    from: origin.clone(),
13356                    to: target.clone(),
13357                    hops: handles.len().saturating_sub(1),
13358                    nodes: nodes.clone(),
13359                    edges: edges.clone(),
13360                };
13361                (
13362                    "path".to_string(),
13363                    nodes,
13364                    traversal_edges_between(&handle_set, &graph.edges),
13365                    Some(path),
13366                )
13367            } else {
13368                (
13369                    "path".to_string(),
13370                    vec![origin.clone(), target.clone()],
13371                    Vec::new(),
13372                    None,
13373                )
13374            }
13375        } else if let Some(origin) = origin_node {
13376            let handles =
13377                traversal_neighborhood_handles(&graph.edges, &origin.handle, depth, limit);
13378            let nodes =
13379                sorted_traversal_nodes(handles.iter().filter_map(|handle| graph.nodes.get(handle)));
13380            let edges = traversal_edges_between(&handles, &graph.edges);
13381            ("neighborhood".to_string(), nodes, edges, None)
13382        } else {
13383            let mut nodes = sorted_traversal_nodes(graph.nodes.values());
13384            let truncated_nodes = limit > 0 && nodes.len() > limit;
13385            if truncated_nodes {
13386                nodes.truncate(limit);
13387            }
13388            let handles = nodes
13389                .iter()
13390                .map(|node| node.handle.clone())
13391                .collect::<BTreeSet<_>>();
13392            let mut edges = traversal_edges_between(&handles, &graph.edges);
13393            let truncated_edges = limit > 0 && edges.len() > limit;
13394            if truncated_edges {
13395                edges.truncate(limit);
13396            }
13397            ("export".to_string(), nodes, edges, None)
13398        };
13399
13400    let shortest_handles = shortest_path.as_ref().map(|path| {
13401        path.nodes
13402            .iter()
13403            .map(|node| node.handle.clone())
13404            .collect::<Vec<_>>()
13405    });
13406    let recommendations = traversal_recommendations(
13407        &graph,
13408        origin_node.map(|node| node.handle.as_str()),
13409        shortest_handles.as_deref(),
13410        if limit == 0 { 10 } else { limit.min(10) },
13411    );
13412    let exploration = build_exploration_packet(root, &totals, &selected_nodes, &selected_edges);
13413    let truncated = selected_nodes.len() < totals.nodes || selected_edges.len() < totals.edges;
13414
13415    Ok(TraversalReport {
13416        root: root.to_string_lossy().to_string(),
13417        scope: scope.map(str::to_string),
13418        mode,
13419        totals,
13420        query: query.map(str::to_string),
13421        target: target.map(str::to_string),
13422        nodes: selected_nodes,
13423        edges: selected_edges,
13424        shortest_path,
13425        recommendations,
13426        exploration,
13427        truncated,
13428        warnings: graph.warnings,
13429    })
13430}
13431
13432fn html_escape(input: &str) -> String {
13433    input
13434        .replace('&', "&amp;")
13435        .replace('<', "&lt;")
13436        .replace('>', "&gt;")
13437        .replace('"', "&quot;")
13438        .replace('\'', "&#39;")
13439}
13440
13441pub(crate) fn traversal_report_html(report: &TraversalReport) -> Result<String> {
13442    let json = serde_json::to_string(report)?.replace("</", "<\\/");
13443    let mut html = String::new();
13444    html.push_str(
13445        "<!doctype html><html><head><meta charset=\"utf-8\"><title>tsift traversal graph</title>",
13446    );
13447    html.push_str(
13448        r#"<style>
13449:root{color-scheme:light dark;--bg:#f7f8fb;--panel:#ffffff;--text:#17202a;--muted:#5c6674;--line:#d7dce3;--edge:#8b98a8;--accent:#0f766e;--semantic:#9a3412}
13450@media (prefers-color-scheme:dark){:root{--bg:#111318;--panel:#1b2028;--text:#ecf1f7;--muted:#a8b3c1;--line:#323946;--edge:#667386;--accent:#2dd4bf;--semantic:#fb923c}}
13451*{box-sizing:border-box}body{margin:0;background:var(--bg);color:var(--text);font-family:Inter,ui-sans-serif,system-ui,sans-serif;line-height:1.4}.page{max-width:1280px;margin:0 auto;padding:20px}.top{display:flex;align-items:flex-end;justify-content:space-between;gap:16px;margin-bottom:14px}.top h1{font-size:22px;margin:0}.meta{color:var(--muted);font-size:13px}.toolbar{display:flex;gap:8px;align-items:center}.toolbar input{min-width:220px;border:1px solid var(--line);border-radius:6px;background:var(--panel);color:var(--text);padding:8px 10px}.layout{display:grid;grid-template-columns:minmax(0,1fr) 320px;gap:14px;min-height:650px}.graph-panel,.side{background:var(--panel);border:1px solid var(--line);border-radius:8px;overflow:hidden}.graph-panel{position:relative}.legend{position:absolute;left:12px;top:12px;display:flex;flex-wrap:wrap;gap:6px;max-width:calc(100% - 24px)}.legend span{font-size:12px;background:color-mix(in srgb,var(--panel) 86%,transparent);border:1px solid var(--line);border-radius:999px;padding:4px 8px}.side{padding:14px;overflow:auto}.side h2{font-size:15px;margin:0 0 8px}.selected{border-top:1px solid var(--line);margin-top:12px;padding-top:12px}.list{display:grid;gap:8px}.row{border:1px solid var(--line);border-radius:6px;padding:8px;cursor:pointer}.row:hover{border-color:var(--accent)}.kind{font-size:11px;text-transform:uppercase;color:var(--muted);letter-spacing:.04em}.label{font-weight:650;overflow-wrap:anywhere}.handle,code{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;font-size:12px;color:var(--muted)}svg{width:100%;height:650px;display:block}.edge{stroke:var(--edge);stroke-width:1.4;opacity:.72}.edge.semantic{stroke:var(--semantic);stroke-width:1.8}.node{stroke:var(--panel);stroke-width:2;cursor:pointer}.node.semantic{stroke:var(--semantic);stroke-width:2.5}.node-label{font-size:12px;paint-order:stroke;stroke:var(--panel);stroke-width:4px;stroke-linejoin:round;fill:var(--text);pointer-events:none}.hidden{display:none}@media(max-width:900px){.top{display:block}.toolbar{margin-top:12px}.layout{grid-template-columns:1fr}.side{max-height:360px}svg{height:560px}}
13452</style>"#,
13453    );
13454    html.push_str("</head><body>");
13455    html.push_str("<div class=\"page\">");
13456    html.push_str(&format!(
13457        "<header class=\"top\"><div><h1>tsift traversal graph</h1><div class=\"meta\">mode <code>{}</code> | nodes <code>{}</code>/<code>{}</code> | edges <code>{}</code>/<code>{}</code></div></div><div class=\"toolbar\"><input id=\"filter\" type=\"search\" placeholder=\"Filter nodes\"></div></header>",
13458        html_escape(&report.mode),
13459        report.nodes.len(),
13460        report.totals.nodes,
13461        report.edges.len(),
13462        report.totals.edges
13463    ));
13464    html.push_str(
13465        r#"<main class="layout"><section class="graph-panel"><div id="legend" class="legend"></div><svg id="graph-canvas" role="img" aria-label="Traversal graph"></svg></section><aside class="side"><h2>Nodes</h2><div id="node-list" class="list"></div><div id="selected" class="selected"></div></aside></main>"#,
13466    );
13467    html.push_str("<script id=\"graph-data\" type=\"application/json\">");
13468    html.push_str(&json);
13469    html.push_str(
13470        r##"</script><script>
13471const report = JSON.parse(document.getElementById("graph-data").textContent);
13472const svg = document.getElementById("graph-canvas");
13473const list = document.getElementById("node-list");
13474const selected = document.getElementById("selected");
13475const filter = document.getElementById("filter");
13476const legend = document.getElementById("legend");
13477const nodes = report.nodes.map((node, index) => ({...node, index}));
13478const nodeByHandle = new Map(nodes.map(node => [node.handle, node]));
13479const edges = report.edges.filter(edge => nodeByHandle.has(edge.from) && nodeByHandle.has(edge.to));
13480const colorByKind = new Map([
13481  ["file", "#2563eb"], ["symbol", "#16a34a"], ["route", "#7c3aed"],
13482  ["session", "#0891b2"], ["backlog", "#dc2626"], ["job_packet", "#ea580c"],
13483  ["semantic_concept", "#9a3412"], ["semantic_entity", "#b45309"],
13484  ["source_handle", "#64748b"], ["worker_context", "#475569"], ["worker_result", "#15803d"]
13485]);
13486function color(kind){ return colorByKind.get(kind) || "#6b7280"; }
13487function isSemantic(edge){ return edge.relation.includes("concept") || edge.relation.includes("entity") || edge.relation.includes("semantic"); }
13488function text(value){ return value == null ? "" : String(value); }
13489function matches(node, query){
13490  if (!query) return true;
13491  const haystack = [node.kind,node.label,node.handle,node.ref_id,node.path,node.detail].map(text).join(" ").toLowerCase();
13492  return haystack.includes(query);
13493}
13494function layout(){
13495  const rect = svg.getBoundingClientRect();
13496  const width = rect.width || 900;
13497  const height = rect.height || 650;
13498  const cx = width / 2;
13499  const cy = height / 2;
13500  const kinds = [...new Set(nodes.map(node => node.kind))].sort();
13501  const counts = new Map();
13502  for (const node of nodes) counts.set(node.kind, (counts.get(node.kind) || 0) + 1);
13503  const offsets = new Map();
13504  for (const node of nodes) {
13505    const group = kinds.indexOf(node.kind);
13506    const index = offsets.get(node.kind) || 0;
13507    offsets.set(node.kind, index + 1);
13508    const groupCount = counts.get(node.kind) || 1;
13509    const ring = Math.min(width, height) * (0.18 + ((group % 4) * 0.09));
13510    const angle = (Math.PI * 2 * index / Math.max(groupCount, 1)) + (group * 0.47);
13511    node.x = cx + Math.cos(angle) * ring;
13512    node.y = cy + Math.sin(angle) * ring;
13513  }
13514}
13515function draw(){
13516  const query = filter.value.trim().toLowerCase();
13517  const visible = new Set(nodes.filter(node => matches(node, query)).map(node => node.handle));
13518  svg.innerHTML = "";
13519  for (const edge of edges) {
13520    if (!visible.has(edge.from) || !visible.has(edge.to)) continue;
13521    const from = nodeByHandle.get(edge.from);
13522    const to = nodeByHandle.get(edge.to);
13523    const line = document.createElementNS("http://www.w3.org/2000/svg", "line");
13524    line.setAttribute("x1", from.x); line.setAttribute("y1", from.y);
13525    line.setAttribute("x2", to.x); line.setAttribute("y2", to.y);
13526    line.setAttribute("class", "edge" + (isSemantic(edge) ? " semantic" : ""));
13527    line.appendChild(document.createElementNS("http://www.w3.org/2000/svg", "title")).textContent = edge.relation + (edge.label ? ": " + edge.label : "");
13528    svg.appendChild(line);
13529  }
13530  for (const node of nodes) {
13531    if (!visible.has(node.handle)) continue;
13532    const circle = document.createElementNS("http://www.w3.org/2000/svg", "circle");
13533    circle.setAttribute("cx", node.x); circle.setAttribute("cy", node.y);
13534    circle.setAttribute("r", node.kind.startsWith("semantic_") ? 8 : 6);
13535    circle.setAttribute("fill", color(node.kind));
13536    circle.setAttribute("class", "node" + (node.kind.startsWith("semantic_") ? " semantic" : ""));
13537    circle.addEventListener("click", () => selectNode(node));
13538    circle.appendChild(document.createElementNS("http://www.w3.org/2000/svg", "title")).textContent = node.kind + ": " + node.label;
13539    svg.appendChild(circle);
13540    const label = document.createElementNS("http://www.w3.org/2000/svg", "text");
13541    label.setAttribute("x", node.x + 9); label.setAttribute("y", node.y + 4);
13542    label.setAttribute("class", "node-label");
13543    label.textContent = node.label.length > 34 ? node.label.slice(0, 31) + "..." : node.label;
13544    svg.appendChild(label);
13545  }
13546  renderList(query);
13547}
13548function renderLegend(){
13549  const kinds = [...new Set(nodes.map(node => node.kind))].sort();
13550  legend.innerHTML = kinds.map(kind => `<span><b style="color:${color(kind)}">&#9679;</b> ${kind}</span>`).join("");
13551}
13552function renderList(query){
13553  const rows = nodes.filter(node => matches(node, query)).slice(0, 120);
13554  list.innerHTML = rows.map(node => `<div class="row" data-handle="${node.handle}"><div class="kind">${node.kind}</div><div class="label">${escapeHtml(node.label)}</div><div class="handle">${node.handle}</div></div>`).join("");
13555  for (const row of list.querySelectorAll(".row")) {
13556    row.addEventListener("click", () => selectNode(nodeByHandle.get(row.dataset.handle)));
13557  }
13558}
13559function selectNode(node){
13560  const adjacent = edges.filter(edge => edge.from === node.handle || edge.to === node.handle).slice(0, 20);
13561  selected.innerHTML = `<h2>${escapeHtml(node.label)}</h2><div class="kind">${node.kind}</div><p class="handle">${node.handle}</p>${node.path ? `<p>${escapeHtml(node.path)}${node.line != null ? ":" + node.line : ""}</p>` : ""}${node.detail ? `<p>${escapeHtml(node.detail)}</p>` : ""}<p><code>${escapeHtml(node.expand)}</code></p><h2>Edges</h2><div class="list">${adjacent.map(edge => `<div class="row"><div class="kind">${edge.relation}</div><div>${escapeHtml(edge.from)} -> ${escapeHtml(edge.to)}</div>${edge.label ? `<div>${escapeHtml(edge.label)}</div>` : ""}</div>`).join("") || "<div class=\"meta\">No visible edges.</div>"}</div>`;
13562}
13563function escapeHtml(value){
13564  return text(value).replace(/[&<>"']/g, ch => ({"&":"&amp;","<":"&lt;",">":"&gt;","\"":"&quot;","'":"&#39;"}[ch]));
13565}
13566filter.addEventListener("input", draw);
13567window.addEventListener("resize", () => { layout(); draw(); });
13568renderLegend();
13569layout();
13570draw();
13571if (nodes.length) selectNode(nodes[0]);
13572</script></div></body></html>"##,
13573    );
13574    Ok(html)
13575}
13576
13577fn semantic_related_report_from_store(
13578    root: &Path,
13579    scope: Option<&str>,
13580    query: &str,
13581    limit: usize,
13582    kind: SemanticRelatedKind,
13583    store: &impl GraphStore,
13584) -> Result<SemanticRelatedReport> {
13585    if query.trim().is_empty() {
13586        bail!("semantic query cannot be empty");
13587    }
13588
13589    let query_embedding = semantic_embedding(query);
13590    let node_kinds: &[&str] = match kind {
13591        SemanticRelatedKind::Concept => &["semantic_concept"],
13592        SemanticRelatedKind::Entity => &["semantic_entity"],
13593        SemanticRelatedKind::All => &["semantic_concept", "semantic_entity"],
13594    };
13595
13596    let mut items = Vec::new();
13597    for node_kind in node_kinds {
13598        for node in store.nodes_by_kind(node_kind)? {
13599            let Some(embedding) = node
13600                .properties
13601                .get("embedding")
13602                .and_then(|value| parse_semantic_embedding_property(value))
13603            else {
13604                continue;
13605            };
13606            let score = semantic_cosine(&query_embedding, &embedding);
13607            items.push(SemanticRelatedItem {
13608                handle: node
13609                    .properties
13610                    .get("handle")
13611                    .cloned()
13612                    .unwrap_or_else(|| node.id.clone()),
13613                kind: node.kind,
13614                label: node.label,
13615                score,
13616                file_path: node
13617                    .properties
13618                    .get("source_file")
13619                    .or_else(|| node.properties.get("path"))
13620                    .cloned(),
13621                source_symbol: node.properties.get("source_symbol").cloned(),
13622                detail: node
13623                    .properties
13624                    .get("description")
13625                    .or_else(|| node.properties.get("detail"))
13626                    .cloned(),
13627                expand: node
13628                    .properties
13629                    .get("expand")
13630                    .cloned()
13631                    .unwrap_or_else(|| traversal_expand_command(root, &node.id)),
13632            });
13633        }
13634    }
13635
13636    items.sort_by(|left, right| {
13637        right
13638            .score
13639            .partial_cmp(&left.score)
13640            .unwrap_or(Ordering::Equal)
13641            .then_with(|| left.kind.cmp(&right.kind))
13642            .then_with(|| left.label.cmp(&right.label))
13643            .then_with(|| left.handle.cmp(&right.handle))
13644    });
13645    if limit > 0 && items.len() > limit {
13646        items.truncate(limit);
13647    }
13648
13649    let mut warnings = Vec::new();
13650    if items.is_empty() {
13651        warnings.push(
13652            "no semantic graph rows found; run `tsift summarize --extract <path>` first"
13653                .to_string(),
13654        );
13655    }
13656
13657    Ok(SemanticRelatedReport {
13658        root: root.to_string_lossy().to_string(),
13659        scope: scope.map(str::to_string),
13660        query: query.to_string(),
13661        embedding_model: SEMANTIC_EMBEDDING_MODEL.to_string(),
13662        count: items.len(),
13663        items,
13664        warnings,
13665    })
13666}
13667
13668fn graph_db_semantic_edge_scan_cap(limit: usize) -> usize {
13669    if limit == 0 {
13670        return 0;
13671    }
13672    limit.saturating_mul(4).clamp(
13673        GRAPH_DB_SEMANTIC_MIN_EDGE_SCAN_CAP,
13674        GRAPH_DB_SEMANTIC_MAX_EDGE_SCAN_CAP,
13675    )
13676}
13677
13678fn graph_db_semantic_node_discovery_cap(seed_count: usize, limit: usize) -> usize {
13679    if limit == 0 {
13680        return usize::MAX;
13681    }
13682    limit.saturating_mul(3).max(limit).max(seed_count)
13683}
13684
13685fn graph_db_semantic_edge_other_id<'a>(
13686    edge: &'a SubstrateGraphEdge,
13687    current_id: &str,
13688) -> Option<&'a str> {
13689    if edge.from_id == current_id {
13690        Some(edge.to_id.as_str())
13691    } else if edge.to_id == current_id {
13692        Some(edge.from_id.as_str())
13693    } else {
13694        None
13695    }
13696}
13697
13698fn graph_db_semantic_edge_score(edge: &SubstrateGraphEdge, current_id: &str) -> i64 {
13699    let mut score = resolution::edge_kind_rank_score(&edge.kind).saturating_mul(10);
13700    score += if edge.from_id == current_id { 8 } else { 4 };
13701    score += match edge.kind.as_str() {
13702        "mentions_concept" | "mentions_entity" | "tagged_concept" | "tagged_entity"
13703        | "related_concept" => 30,
13704        "semantic_relation" => 28,
13705        "calls" => 24,
13706        "mentions" => 22,
13707        "requests_context" | "scopes_context" | "scopes_source" | "explains_result" => 18,
13708        "defines" | "contains" | "belongs_to" => 12,
13709        _ => 0,
13710    };
13711    score
13712}
13713
13714fn graph_db_semantic_seeded_neighborhood(
13715    store: &impl GraphStore,
13716    seed_ids: &[String],
13717    depth: usize,
13718    limit: usize,
13719) -> Result<GraphDbSemanticSeededSubgraph> {
13720    let seed_rank = seed_ids
13721        .iter()
13722        .enumerate()
13723        .map(|(idx, seed)| (seed.clone(), idx))
13724        .collect::<BTreeMap<_, _>>();
13725    let mut nodes = BTreeMap::<String, SubstrateGraphNode>::new();
13726    let mut edges = BTreeMap::<String, SubstrateGraphEdge>::new();
13727    let mut node_score_by_id = BTreeMap::<String, i64>::new();
13728    let mut queue = VecDeque::<(String, usize)>::new();
13729    let mut seen_at_depth = BTreeMap::<String, usize>::new();
13730    let edge_scan_cap = graph_db_semantic_edge_scan_cap(limit);
13731    let node_discovery_cap = graph_db_semantic_node_discovery_cap(seed_ids.len(), limit);
13732    let mut skipped_by_edge_cap = 0usize;
13733    let mut skipped_by_node_cap = 0usize;
13734    let mut diagnostics = vec![
13735        "semantic-seeded retrieval uses phrase similarity to pick graph seeds".to_string(),
13736        "seed expansion traverses both outgoing and incident edges so code, markdown, conversation, and memory adapters can link into semantic rows without reversing their edge direction".to_string(),
13737        format!(
13738            "seed expansion ranks incident/outgoing edges before caps; per-node edge scan cap={} node discovery cap={}",
13739            if edge_scan_cap == 0 {
13740                "unbounded".to_string()
13741            } else {
13742                edge_scan_cap.to_string()
13743            },
13744            if node_discovery_cap == usize::MAX {
13745                "unbounded".to_string()
13746            } else {
13747                node_discovery_cap.to_string()
13748            }
13749        ),
13750    ];
13751
13752    for (idx, seed_id) in seed_ids.iter().enumerate() {
13753        if let Some(node) = store.node(seed_id)? {
13754            nodes.entry(seed_id.clone()).or_insert(node);
13755            node_score_by_id
13756                .entry(seed_id.clone())
13757                .or_insert(1_000_000i64.saturating_sub(idx as i64));
13758            queue.push_back((seed_id.clone(), 0));
13759            seen_at_depth.entry(seed_id.clone()).or_insert(0);
13760        } else {
13761            diagnostics.push(format!(
13762                "semantic seed {seed_id} was not present in the graph store"
13763            ));
13764        }
13765    }
13766
13767    while let Some((current_id, current_depth)) = queue.pop_front() {
13768        if current_depth >= depth {
13769            continue;
13770        }
13771
13772        let mut expansion_edges_by_key = BTreeMap::<String, SubstrateGraphEdge>::new();
13773        for edge in store.outgoing_edges(&current_id, None)? {
13774            expansion_edges_by_key
13775                .entry(graph_db_edge_key(&edge))
13776                .or_insert(edge);
13777        }
13778        for edge in store.incident_edges(&current_id, None)? {
13779            expansion_edges_by_key
13780                .entry(graph_db_edge_key(&edge))
13781                .or_insert(edge);
13782        }
13783        let mut expansion_edges = expansion_edges_by_key.into_values().collect::<Vec<_>>();
13784        expansion_edges.sort_by(|left, right| {
13785            graph_db_semantic_edge_score(right, &current_id)
13786                .cmp(&graph_db_semantic_edge_score(left, &current_id))
13787                .then_with(|| graph_db_edge_key(left).cmp(&graph_db_edge_key(right)))
13788        });
13789        if edge_scan_cap > 0 && expansion_edges.len() > edge_scan_cap {
13790            skipped_by_edge_cap += expansion_edges.len() - edge_scan_cap;
13791            expansion_edges.truncate(edge_scan_cap);
13792        }
13793
13794        for edge in expansion_edges {
13795            let Some(other_id) = graph_db_semantic_edge_other_id(&edge, &current_id) else {
13796                continue;
13797            };
13798            let other_known = nodes.contains_key(other_id);
13799            if !other_known && nodes.len() >= node_discovery_cap {
13800                skipped_by_node_cap += 1;
13801                continue;
13802            }
13803            let other_id = other_id.to_string();
13804            let edge_score = graph_db_semantic_edge_score(&edge, &current_id)
13805                .saturating_add((depth.saturating_sub(current_depth) as i64).saturating_mul(5));
13806            node_score_by_id
13807                .entry(other_id.clone())
13808                .and_modify(|score| *score = (*score).max(edge_score))
13809                .or_insert(edge_score);
13810            let edge_key = graph_db_edge_key(&edge);
13811            edges.entry(edge_key).or_insert_with(|| edge.clone());
13812            if let std::collections::btree_map::Entry::Vacant(entry) = nodes.entry(other_id.clone())
13813                && let Some(node) = store.node(&other_id)?
13814            {
13815                entry.insert(node);
13816            }
13817            if !nodes.contains_key(&other_id) {
13818                continue;
13819            }
13820            let next_depth = current_depth + 1;
13821            let should_queue = seen_at_depth
13822                .get(&other_id)
13823                .is_none_or(|seen_depth| next_depth < *seen_depth);
13824            if should_queue {
13825                seen_at_depth.insert(other_id.clone(), next_depth);
13826                queue.push_back((other_id, next_depth));
13827            }
13828        }
13829    }
13830
13831    if skipped_by_edge_cap > 0 {
13832        diagnostics.push(format!(
13833            "semantic-seeded expansion skipped {skipped_by_edge_cap} lower-scoring incident/outgoing edge(s) after per-node caps"
13834        ));
13835    }
13836    if skipped_by_node_cap > 0 {
13837        diagnostics.push(format!(
13838            "semantic-seeded expansion skipped {skipped_by_node_cap} lower-scoring node discovery edge(s) after the discovery cap"
13839        ));
13840    }
13841
13842    let mut nodes = nodes.into_values().collect::<Vec<_>>();
13843    nodes.sort_by(|left, right| {
13844        seed_rank
13845            .get(&left.id)
13846            .copied()
13847            .unwrap_or(usize::MAX)
13848            .cmp(&seed_rank.get(&right.id).copied().unwrap_or(usize::MAX))
13849            .then_with(|| {
13850                node_score_by_id
13851                    .get(&right.id)
13852                    .copied()
13853                    .unwrap_or_default()
13854                    .cmp(&node_score_by_id.get(&left.id).copied().unwrap_or_default())
13855            })
13856            .then(left.id.cmp(&right.id))
13857    });
13858
13859    let before_limit = nodes.len();
13860    let truncated = limit > 0 && nodes.len() > limit;
13861    if truncated {
13862        nodes.truncate(limit);
13863        diagnostics.push(format!(
13864            "semantic-seeded neighborhood truncated from {before_limit} to {limit} node(s)"
13865        ));
13866    }
13867
13868    let node_ids = nodes
13869        .iter()
13870        .map(|node| node.id.as_str())
13871        .collect::<BTreeSet<_>>();
13872    let mut edges = edges
13873        .into_values()
13874        .filter(|edge| {
13875            node_ids.contains(edge.from_id.as_str()) && node_ids.contains(edge.to_id.as_str())
13876        })
13877        .collect::<Vec<_>>();
13878    edges.sort_by_key(graph_db_edge_key);
13879
13880    Ok(GraphDbSemanticSeededSubgraph {
13881        nodes,
13882        edges,
13883        truncated,
13884        diagnostics,
13885    })
13886}
13887
13888#[allow(clippy::too_many_arguments)]
13889fn cmd_semantic_related(
13890    query: &str,
13891    path: &Path,
13892    scope: Option<&str>,
13893    limit: usize,
13894    kind: SemanticRelatedKind,
13895    json_output: bool,
13896    compact: bool,
13897    pretty: bool,
13898    terse: bool,
13899    schema: bool,
13900) -> Result<()> {
13901    let root = lint::resolve_project_root_or_canonical_path(path)?;
13902    write_traversal_graph_store(&root, path, scope)?;
13903    let graph_db = graph_substrate_db_path(&root, scope);
13904    let store = SqliteGraphStore::open_read_only_resilient(&graph_db)?;
13905    let mut report = semantic_related_report_from_store(&root, scope, query, limit, kind, &store)?;
13906    if let Some(recovery) = store.read_only_recovery() {
13907        report
13908            .warnings
13909            .push(graph_db_read_recovery_diagnostic(recovery));
13910    }
13911
13912    if json_output {
13913        println!("{}", to_json_schema(&report, pretty, terse, schema)?);
13914    } else if compact {
13915        for item in &report.items {
13916            println!(
13917                "{:.3}\t{}\t{}\t{}",
13918                item.score, item.kind, item.label, item.handle
13919            );
13920        }
13921        for warning in &report.warnings {
13922            eprintln!("warning: {warning}");
13923        }
13924    } else {
13925        println!(
13926            "Related semantic graph rows for {:?} ({})",
13927            report.query, report.embedding_model
13928        );
13929        for item in &report.items {
13930            println!(
13931                "  {:.3} [{}] {} ({})",
13932                item.score, item.kind, item.label, item.handle
13933            );
13934            if let Some(detail) = &item.detail {
13935                println!("      {}", detail);
13936            }
13937            if let Some(file_path) = &item.file_path {
13938                println!("      file: {}", file_path);
13939            }
13940            println!("      expand: {}", item.expand);
13941        }
13942        for warning in &report.warnings {
13943            eprintln!("warning: {warning}");
13944        }
13945    }
13946
13947    Ok(())
13948}
13949
13950#[derive(Serialize)]
13951struct SourceLinePreview {
13952    line: usize,
13953    text: String,
13954}
13955
13956#[derive(Serialize)]
13957struct SourceRangePreview {
13958    start: usize,
13959    end: usize,
13960    total_lines: usize,
13961    truncated_before: bool,
13962    truncated_after: bool,
13963}
13964
13965#[derive(Serialize)]
13966struct SourceExpandCommands {
13967    #[serde(skip_serializing_if = "Option::is_none")]
13968    before: Option<String>,
13969    #[serde(skip_serializing_if = "Option::is_none")]
13970    after: Option<String>,
13971    file: String,
13972}
13973
13974#[derive(Serialize)]
13975struct SourceSymbolRef {
13976    handle: String,
13977    name: String,
13978    kind: String,
13979    language: String,
13980    file: String,
13981    line: usize,
13982    #[serde(skip_serializing_if = "Option::is_none")]
13983    end_line: Option<usize>,
13984    #[serde(skip_serializing_if = "Option::is_none")]
13985    signature: Option<String>,
13986    expand: String,
13987}
13988
13989#[derive(Serialize)]
13990struct SourceSummaryRef {
13991    handle: String,
13992    symbol_name: String,
13993    file_path: String,
13994    summary: String,
13995    expand: String,
13996}
13997
13998#[derive(Serialize)]
13999struct SourceReadReport {
14000    handle: String,
14001    root: String,
14002    file: String,
14003    range: SourceRangePreview,
14004    preview: Vec<SourceLinePreview>,
14005    symbols: Vec<SourceSymbolRef>,
14006    summaries: Vec<SourceSummaryRef>,
14007    expand: SourceExpandCommands,
14008    #[serde(skip_serializing_if = "Vec::is_empty", default)]
14009    warnings: Vec<String>,
14010}
14011
14012fn resolve_source_file(root: &Path, file: &Path) -> Result<PathBuf> {
14013    let candidate = if file.is_absolute() {
14014        file.to_path_buf()
14015    } else {
14016        root.join(file)
14017    };
14018    let canonical = candidate
14019        .canonicalize()
14020        .with_context(|| format!("canonicalizing source file {}", candidate.display()))?;
14021    if !canonical.is_file() {
14022        bail!("source file is not a regular file: {}", canonical.display());
14023    }
14024    let canonical_root = root
14025        .canonicalize()
14026        .with_context(|| format!("canonicalizing project root {}", root.display()))?;
14027    if !canonical.starts_with(&canonical_root) {
14028        bail!(
14029            "source file {} is outside project root {}",
14030            canonical.display(),
14031            canonical_root.display()
14032        );
14033    }
14034    Ok(canonical)
14035}
14036
14037fn source_read_command(root: &Path, file: &str, start: usize, lines: usize) -> String {
14038    format!(
14039        "tsift source-read {} --path {} --start {} --lines {} --budget normal",
14040        shell_quote(file),
14041        shell_quote(&root.to_string_lossy()),
14042        start,
14043        lines
14044    )
14045}
14046
14047fn source_symbol_expand_command(root: &Path, symbol: &str) -> String {
14048    format!(
14049        "tsift --envelope explain {} --path {} --budget normal",
14050        shell_quote(symbol),
14051        shell_quote(&root.to_string_lossy())
14052    )
14053}
14054
14055fn source_summary_expand_command(root: &Path, symbol: &str) -> String {
14056    format!(
14057        "tsift summarize {} --path {} --json",
14058        shell_quote(symbol),
14059        shell_quote(&root.to_string_lossy())
14060    )
14061}
14062
14063fn source_symbol_line(symbol: &index::StoredSymbol) -> usize {
14064    usize::try_from(symbol.line)
14065        .ok()
14066        .and_then(|line| line.checked_add(1))
14067        .unwrap_or(1)
14068}
14069
14070fn source_symbol_end_line(symbol: &index::StoredSymbol) -> Option<usize> {
14071    symbol
14072        .end_line
14073        .and_then(|line| usize::try_from(line).ok())
14074        .and_then(|line| line.checked_add(1))
14075}
14076
14077fn source_symbol_intersects(symbol: &index::StoredSymbol, start: usize, end: usize) -> bool {
14078    if end == 0 {
14079        return false;
14080    }
14081    let symbol_start = source_symbol_line(symbol);
14082    let symbol_end = source_symbol_end_line(symbol).unwrap_or(symbol_start);
14083    symbol_start <= end && symbol_end >= start
14084}
14085
14086#[allow(clippy::too_many_arguments)]
14087fn load_source_symbols(
14088    root: &Path,
14089    file_abs: &Path,
14090    file_display: &str,
14091    scope: Option<&str>,
14092    start: usize,
14093    end: usize,
14094    limit: usize,
14095    max_bytes: usize,
14096    warnings: &mut Vec<String>,
14097) -> Vec<SourceSymbolRef> {
14098    let db_path = match resolve_query_db_path(root, file_abs, scope) {
14099        Ok(path) => path,
14100        Err(err) => {
14101            warnings.push(format!("index refs unavailable: {err:#}"));
14102            return Vec::new();
14103        }
14104    };
14105    if !db_path.exists() {
14106        warnings.push(format!(
14107            "index refs unavailable: no index found at {}",
14108            db_path.display()
14109        ));
14110        return Vec::new();
14111    }
14112
14113    let db = match index::IndexDb::open_read_only_resilient(&db_path) {
14114        Ok(db) => db,
14115        Err(err) => {
14116            warnings.push(format!("index refs unavailable: {err:#}"));
14117            return Vec::new();
14118        }
14119    };
14120
14121    let file_key = file_abs.to_string_lossy().to_string();
14122    let symbols = match db.symbols_for_file(&file_key) {
14123        Ok(symbols) => symbols,
14124        Err(err) => {
14125            warnings.push(format!("symbol refs unavailable: {err:#}"));
14126            return Vec::new();
14127        }
14128    };
14129
14130    symbols
14131        .into_iter()
14132        .filter(|symbol| source_symbol_intersects(symbol, start, end))
14133        .take(limit)
14134        .map(|symbol| {
14135            let line = source_symbol_line(&symbol);
14136            let end_line = source_symbol_end_line(&symbol);
14137            let handle = stable_handle(
14138                "ssym",
14139                &format!("{}:{}:{}", file_display, symbol.name, line),
14140            );
14141            SourceSymbolRef {
14142                handle,
14143                name: truncate_for_budget(&symbol.name, max_bytes),
14144                kind: symbol.kind,
14145                language: symbol.language,
14146                file: file_display.to_string(),
14147                line,
14148                end_line,
14149                signature: symbol
14150                    .signature
14151                    .map(|signature| truncate_for_budget(&signature, max_bytes)),
14152                expand: source_symbol_expand_command(root, &symbol.name),
14153            }
14154        })
14155        .collect()
14156}
14157
14158fn load_source_summaries(
14159    root: &Path,
14160    file_display: &str,
14161    limit: usize,
14162    max_bytes: usize,
14163    warnings: &mut Vec<String>,
14164) -> Vec<SourceSummaryRef> {
14165    let db_path = root.join(".tsift/summaries.db");
14166    if !db_path.exists() {
14167        return Vec::new();
14168    }
14169    let db = match summarize::SummaryDb::open_read_only_resilient(&db_path) {
14170        Ok(db) => db,
14171        Err(err) => {
14172            warnings.push(format!("summary refs unavailable: {err:#}"));
14173            return Vec::new();
14174        }
14175    };
14176    let summaries = match db.get_by_file(file_display) {
14177        Ok(summaries) => summaries,
14178        Err(err) => {
14179            warnings.push(format!("summary refs unavailable: {err:#}"));
14180            return Vec::new();
14181        }
14182    };
14183
14184    summaries
14185        .into_iter()
14186        .take(limit)
14187        .map(|summary| SourceSummaryRef {
14188            handle: stable_handle(
14189                "sum",
14190                &format!(
14191                    "{}:{}:{}",
14192                    summary.file_path, summary.symbol_name, summary.id
14193                ),
14194            ),
14195            symbol_name: truncate_for_budget(&summary.symbol_name, max_bytes),
14196            file_path: summary.file_path,
14197            summary: truncate_for_budget(&summary.summary, max_bytes),
14198            expand: source_summary_expand_command(root, &summary.symbol_name),
14199        })
14200        .collect()
14201}
14202
14203#[allow(clippy::too_many_arguments)]
14204fn cmd_source_read(
14205    file: &Path,
14206    path: &Path,
14207    start: usize,
14208    lines: usize,
14209    end: Option<usize>,
14210    scope: Option<&str>,
14211    format: OutputFormat,
14212    absolute: bool,
14213    budget: ResponseBudget,
14214) -> Result<()> {
14215    if start == 0 {
14216        bail!("--start is 1-based and must be greater than zero");
14217    }
14218    if lines == 0 {
14219        bail!("--lines must be greater than zero");
14220    }
14221    if let Some(end) = end
14222        && end < start
14223    {
14224        bail!("--end must be greater than or equal to --start");
14225    }
14226
14227    let root = lint::resolve_project_root_or_canonical_path(path)?;
14228    let file_abs = resolve_source_file(&root, file)?;
14229    let file_display = if absolute {
14230        file_abs.to_string_lossy().to_string()
14231    } else {
14232        relativize_pathbuf(&file_abs, &root)
14233            .to_string_lossy()
14234            .to_string()
14235    };
14236
14237    let source = fs::read(&file_abs).with_context(|| format!("reading {}", file_abs.display()))?;
14238    let text = String::from_utf8_lossy(&source);
14239    let all_lines: Vec<&str> = text.lines().collect();
14240    let total_lines = all_lines.len();
14241    if total_lines > 0 && start > total_lines {
14242        bail!(
14243            "--start {} is beyond end of {} ({} lines)",
14244            start,
14245            file_display,
14246            total_lines
14247        );
14248    }
14249    let requested_end = end.unwrap_or_else(|| start.saturating_add(lines).saturating_sub(1));
14250    let end_line = requested_end.min(total_lines);
14251    let max_bytes = budget.preview_bytes();
14252    let preview = if total_lines == 0 {
14253        Vec::new()
14254    } else {
14255        all_lines[(start - 1)..end_line]
14256            .iter()
14257            .enumerate()
14258            .map(|(idx, line)| SourceLinePreview {
14259                line: start + idx,
14260                text: truncate_for_budget(line, max_bytes),
14261            })
14262            .collect()
14263    };
14264
14265    let mut warnings = Vec::new();
14266    let max_items = budget.preview_items();
14267    let symbols = load_source_symbols(
14268        &root,
14269        &file_abs,
14270        &file_display,
14271        scope,
14272        start,
14273        end_line,
14274        max_items,
14275        max_bytes,
14276        &mut warnings,
14277    );
14278    let summaries =
14279        load_source_summaries(&root, &file_display, max_items, max_bytes, &mut warnings);
14280
14281    let effective_lines = end_line.saturating_sub(start).saturating_add(1).max(1);
14282    let expand = SourceExpandCommands {
14283        before: (start > 1).then(|| {
14284            let before_start = start.saturating_sub(lines).max(1);
14285            source_read_command(&root, &file_display, before_start, start - before_start)
14286        }),
14287        after: (end_line < total_lines)
14288            .then(|| source_read_command(&root, &file_display, end_line + 1, lines)),
14289        file: source_read_command(&root, &file_display, 1, total_lines.max(effective_lines)),
14290    };
14291
14292    let report = SourceReadReport {
14293        handle: stable_handle("swin", &format!("{file_display}:{start}:{end_line}")),
14294        root: root.to_string_lossy().to_string(),
14295        file: file_display,
14296        range: SourceRangePreview {
14297            start,
14298            end: end_line,
14299            total_lines,
14300            truncated_before: start > 1,
14301            truncated_after: end_line < total_lines,
14302        },
14303        preview,
14304        symbols,
14305        summaries,
14306        expand,
14307        warnings,
14308    };
14309
14310    if format.json_output {
14311        let truncated = report.range.truncated_before || report.range.truncated_after;
14312        let follow_up = [
14313            report.expand.before.clone(),
14314            report.expand.after.clone(),
14315            Some(report.expand.file.clone()),
14316        ]
14317        .into_iter()
14318        .flatten()
14319        .collect::<Vec<_>>();
14320        print_json_or_envelope(
14321            &report,
14322            &format,
14323            "source-read",
14324            "window",
14325            ToolEnvelopeSummary {
14326                text: format!(
14327                    "source window {}:{}-{}",
14328                    report.file, report.range.start, report.range.end
14329                ),
14330                metrics: vec![
14331                    envelope_metric("lines", report.preview.len()),
14332                    envelope_metric("symbols", report.symbols.len()),
14333                    envelope_metric("summaries", report.summaries.len()),
14334                ],
14335            },
14336            truncated,
14337            follow_up,
14338        )?;
14339    } else if format.compact {
14340        println!(
14341            "source {}:{}-{} / {} handle:{}",
14342            report.file,
14343            report.range.start,
14344            report.range.end,
14345            report.range.total_lines,
14346            report.handle
14347        );
14348        for line in &report.preview {
14349            println!("{:>5} {}", line.line, line.text);
14350        }
14351        if !report.symbols.is_empty() {
14352            println!("syms[{}]:", report.symbols.len());
14353            for symbol in &report.symbols {
14354                println!("  {} {}:{}", symbol.name, symbol.file, symbol.line);
14355            }
14356        }
14357        if report.range.truncated_before || report.range.truncated_after {
14358            println!("expand: {}", report.expand.file);
14359        }
14360    } else {
14361        println!(
14362            "Source window `{}` lines {}-{} of {} ({})",
14363            report.file,
14364            report.range.start,
14365            report.range.end,
14366            report.range.total_lines,
14367            report.handle
14368        );
14369        for line in &report.preview {
14370            println!("{:>5} | {}", line.line, line.text);
14371        }
14372        if !report.symbols.is_empty() {
14373            println!();
14374            println!("Symbol refs:");
14375            for symbol in &report.symbols {
14376                println!(
14377                    "  {} `{}` {}:{} — {}",
14378                    symbol.handle, symbol.name, symbol.file, symbol.line, symbol.expand
14379                );
14380            }
14381        }
14382        if !report.summaries.is_empty() {
14383            println!();
14384            println!("Summary refs:");
14385            for summary in &report.summaries {
14386                println!(
14387                    "  {} `{}` — {}",
14388                    summary.handle, summary.symbol_name, summary.expand
14389                );
14390            }
14391        }
14392        if report.range.truncated_before || report.range.truncated_after {
14393            println!();
14394            println!("Expand:");
14395            if let Some(before) = &report.expand.before {
14396                println!("  before: {}", before);
14397            }
14398            if let Some(after) = &report.expand.after {
14399                println!("  after: {}", after);
14400            }
14401            println!("  file:   {}", report.expand.file);
14402        }
14403        for warning in &report.warnings {
14404            eprintln!("warning: {warning}");
14405        }
14406    }
14407
14408    Ok(())
14409}
14410
14411#[allow(clippy::too_many_arguments)]
14412#[derive(Serialize)]
14413struct ExplainBudgetDefinitionPreview {
14414    handle: String,
14415    #[serde(skip_serializing_if = "Option::is_none")]
14416    tag_alias: Option<String>,
14417    kind: String,
14418    name: String,
14419    file: String,
14420    line: i64,
14421    expand: String,
14422}
14423
14424#[derive(Serialize)]
14425struct ExplainBudgetEdgePreview {
14426    handle: String,
14427    #[serde(skip_serializing_if = "Option::is_none")]
14428    tag_alias: Option<String>,
14429    name: String,
14430    file: String,
14431    line: i64,
14432    expand: String,
14433}
14434
14435#[derive(Serialize)]
14436struct ExplainBudgetCommunityPreview {
14437    size: usize,
14438    members: Vec<String>,
14439}
14440
14441#[derive(Serialize)]
14442struct ExplainBudgetReport {
14443    symbol: String,
14444    max_items: usize,
14445    max_bytes: usize,
14446    definition_total: usize,
14447    callers_total: usize,
14448    callers_truncated_by_limit: bool,
14449    callees_total: usize,
14450    callees_truncated_by_limit: bool,
14451    truncated: bool,
14452    definitions: Vec<ExplainBudgetDefinitionPreview>,
14453    callers: Vec<ExplainBudgetEdgePreview>,
14454    callees: Vec<ExplainBudgetEdgePreview>,
14455    #[serde(skip_serializing_if = "Option::is_none")]
14456    community: Option<ExplainBudgetCommunityPreview>,
14457}
14458
14459#[allow(clippy::too_many_arguments)]
14460pub(crate) fn build_explain_budget_report(
14461    symbol: &str,
14462    _root: &Path,
14463    symbols: &[index::StoredSymbol],
14464    callers: &[index::StoredEdge],
14465    callers_total: usize,
14466    callers_truncated_by_limit: bool,
14467    callees: &[index::StoredEdge],
14468    callees_total: usize,
14469    callees_truncated_by_limit: bool,
14470    community: Option<&graph::Community>,
14471    budget: ResponseBudget,
14472) -> ExplainBudgetReport {
14473    let max_items = budget.preview_items();
14474    let max_bytes = budget.preview_bytes();
14475    let definitions = symbols
14476        .iter()
14477        .take(max_items)
14478        .map(|entry| {
14479            let symbol_ref = build_compact_symbol_ref(
14480                "edef",
14481                &format!(
14482                    "{}:{}:{}:{}",
14483                    entry.kind, entry.name, entry.file, entry.line
14484                ),
14485                &entry.name,
14486                entry.tags.as_deref(),
14487                max_bytes,
14488            );
14489            ExplainBudgetDefinitionPreview {
14490                handle: symbol_ref.handle,
14491                tag_alias: symbol_ref.tag_alias,
14492                kind: entry.kind.clone(),
14493                name: symbol_ref.name,
14494                file: truncate_for_budget(&entry.file, max_bytes),
14495                line: entry.line,
14496                expand: format!(
14497                    "tsift search {} --exact --path {} --limit 20",
14498                    shell_quote(&entry.name),
14499                    shell_quote(&entry.file)
14500                ),
14501            }
14502        })
14503        .collect();
14504    let callers_preview: Vec<ExplainBudgetEdgePreview> = callers
14505        .iter()
14506        .take(max_items)
14507        .map(|entry| {
14508            let symbol_ref = build_compact_symbol_ref(
14509                "ecall",
14510                &format!(
14511                    "{}:{}:{}:{}",
14512                    entry.caller_name, entry.caller_file, entry.call_site_line, symbol
14513                ),
14514                &entry.caller_name,
14515                None,
14516                max_bytes,
14517            );
14518            ExplainBudgetEdgePreview {
14519                handle: symbol_ref.handle,
14520                tag_alias: symbol_ref.tag_alias,
14521                name: symbol_ref.name,
14522                file: truncate_for_budget(&entry.caller_file, max_bytes),
14523                line: entry.call_site_line,
14524                expand: format!(
14525                    "tsift explain {} --path {} --limit 0",
14526                    shell_quote(&entry.caller_name),
14527                    shell_quote(&entry.caller_file)
14528                ),
14529            }
14530        })
14531        .collect();
14532    let callees_preview: Vec<ExplainBudgetEdgePreview> = callees
14533        .iter()
14534        .take(max_items)
14535        .map(|entry| {
14536            let symbol_ref = build_compact_symbol_ref(
14537                "eces",
14538                &format!(
14539                    "{}:{}:{}:{}",
14540                    entry.callee_name, entry.caller_file, entry.call_site_line, symbol
14541                ),
14542                &entry.callee_name,
14543                None,
14544                max_bytes,
14545            );
14546            ExplainBudgetEdgePreview {
14547                handle: symbol_ref.handle,
14548                tag_alias: symbol_ref.tag_alias,
14549                name: symbol_ref.name,
14550                file: truncate_for_budget(&entry.caller_file, max_bytes),
14551                line: entry.call_site_line,
14552                expand: format!(
14553                    "tsift explain {} --path {} --limit 0",
14554                    shell_quote(&entry.callee_name),
14555                    shell_quote(&entry.caller_file)
14556                ),
14557            }
14558        })
14559        .collect();
14560    let community_preview = community.map(|entry| ExplainBudgetCommunityPreview {
14561        size: entry.members.len(),
14562        members: entry
14563            .members
14564            .iter()
14565            .take(max_items)
14566            .map(|member| truncate_for_budget(&member.name, max_bytes))
14567            .collect(),
14568    });
14569
14570    ExplainBudgetReport {
14571        symbol: symbol.to_string(),
14572        max_items,
14573        max_bytes,
14574        definition_total: symbols.len(),
14575        callers_total,
14576        callers_truncated_by_limit,
14577        callees_total,
14578        callees_truncated_by_limit,
14579        truncated: symbols.len() > max_items
14580            || callers_total > callers_preview.len()
14581            || callees_total > callees_preview.len()
14582            || community
14583                .map(|entry| entry.members.len() > max_items)
14584                .unwrap_or(false),
14585        definitions,
14586        callers: callers_preview,
14587        callees: callees_preview,
14588        community: community_preview,
14589    }
14590}
14591
14592pub(crate) fn print_explain_budget_human(report: &ExplainBudgetReport) {
14593    println!(
14594        "explain-budget sym:{} defs:{}/{} crs:{}/{} ces:{}/{}",
14595        shell_quote(&report.symbol),
14596        report.definitions.len(),
14597        report.definition_total,
14598        report.callers.len(),
14599        report.callers_total,
14600        report.callees.len(),
14601        report.callees_total
14602    );
14603    for entry in &report.definitions {
14604        println!(
14605            "def {} {} {}:{} expand:{}",
14606            format_symbol_preview_line(&entry.handle, &entry.name, entry.tag_alias.as_deref()),
14607            entry.kind,
14608            entry.file,
14609            entry.line,
14610            entry.expand
14611        );
14612    }
14613    for entry in &report.callers {
14614        println!(
14615            "caller {} {}:{} expand:{}",
14616            format_symbol_preview_line(&entry.handle, &entry.name, entry.tag_alias.as_deref()),
14617            entry.file,
14618            entry.line,
14619            entry.expand
14620        );
14621    }
14622    for entry in &report.callees {
14623        println!(
14624            "callee {} {}:{} expand:{}",
14625            format_symbol_preview_line(&entry.handle, &entry.name, entry.tag_alias.as_deref()),
14626            entry.file,
14627            entry.line,
14628            entry.expand
14629        );
14630    }
14631    if let Some(community) = &report.community {
14632        println!(
14633            "community size:{} members:{}",
14634            community.size,
14635            community.members.join(", ")
14636        );
14637    }
14638    if report.truncated {
14639        println!(
14640            "budget truncated items:{} bytes:{}",
14641            report.max_items, report.max_bytes
14642        );
14643    }
14644}
14645
14646/// Reconcile the tsift symbol index against the tagpath `.naming/index.json`
14647/// source set and report files covered by one but not the other.
14648///
14649/// Today silent recall loss happens when tagpath's `[exclude]` / `extends`
14650/// chain or its hard-coded `SKIP_DIRS` skip files or languages that tsift
14651/// still indexes — the tsift symbols in those files cannot resolve a
14652/// `tagpath_handle` even with a fresh tagpath index. This audit surfaces
14653/// the diff so operators can decide whether to broaden the tagpath walk,
14654/// add an `[exclude]` to tsift, or accept the gap.
14655const TAGPATH_AUDIT_SKIP_DIRS: &[&str] = &[
14656    ".git",
14657    "node_modules",
14658    "target",
14659    "__pycache__",
14660    ".venv",
14661    "vendor",
14662];
14663
14664const TAGPATH_AUDIT_SOURCE_EXTENSIONS: &[&str] = &[
14665    "rs", "py", "ts", "js", "go", "java", "rb", "c", "cpp", "h", "hpp", "cs", "swift", "kt",
14666    "scala", "zig", "nim", "ex", "exs", "erl", "hs", "ml", "clj", "r", "lua", "php", "pl", "d",
14667    "cr", "dart", "jl", "v", "odin", "gleam", "rkt", "scm", "lisp", "lsp", "f", "fs", "fsi", "fsx",
14668    "sh", "bash", "zsh", "sql", "css", "tsx",
14669];
14670
14671pub(crate) fn tagpath_audit_supported_extensions(root: &Path) -> BTreeSet<String> {
14672    let mut extensions = TAGPATH_AUDIT_SOURCE_EXTENSIONS
14673        .iter()
14674        .map(|ext| (*ext).to_string())
14675        .collect::<BTreeSet<_>>();
14676
14677    let config_path = root.join(".naming.toml");
14678    if !config_path.exists() {
14679        return extensions;
14680    }
14681
14682    match tagpath::config::resolve(&config_path) {
14683        Ok(config) => {
14684            if let Some(grammars) = config.grammars {
14685                for grammar in grammars.languages.values() {
14686                    for ext in &grammar.extensions {
14687                        if let Some(normalized) = normalize_extension(ext) {
14688                            extensions.insert(normalized);
14689                        }
14690                    }
14691                }
14692            }
14693        }
14694        Err(err) => {
14695            eprintln!("tagpath_policy_hint_config_unreadable: {err}");
14696        }
14697    }
14698    extensions
14699}
14700
14701pub(crate) fn tagpath_audit_policy_hints(
14702    rel_path: &str,
14703    supported_extensions: &BTreeSet<String>,
14704) -> Vec<String> {
14705    let path = Path::new(rel_path);
14706    let mut hints = BTreeSet::new();
14707    if let Some(parent) = path.parent() {
14708        for component in parent.components() {
14709            if let std::path::Component::Normal(name) = component {
14710                let name = name.to_string_lossy();
14711                if TAGPATH_AUDIT_SKIP_DIRS.contains(&name.as_ref()) {
14712                    hints.insert(format!("skip_dir:{name}"));
14713                }
14714            }
14715        }
14716    }
14717    if path
14718        .extension()
14719        .and_then(|ext| ext.to_str())
14720        .and_then(normalize_extension)
14721        .is_some_and(|ext| !supported_extensions.contains(&ext))
14722    {
14723        hints.insert("extension_unsupported".to_string());
14724    }
14725    hints.into_iter().collect()
14726}
14727
14728fn normalize_extension(ext: &str) -> Option<String> {
14729    let normalized = ext.trim().trim_start_matches('.').to_ascii_lowercase();
14730    if normalized.is_empty() {
14731        None
14732    } else {
14733        Some(normalized)
14734    }
14735}
14736
14737pub(crate) fn diff_digest_status_label(status: diff_digest::DiffDigestFileStatus) -> &'static str {
14738    match status {
14739        diff_digest::DiffDigestFileStatus::Added => "added",
14740        diff_digest::DiffDigestFileStatus::Modified => "modified",
14741        diff_digest::DiffDigestFileStatus::Deleted => "deleted",
14742    }
14743}
14744
14745pub(crate) fn diff_digest_summary_label(
14746    state: diff_digest::DiffDigestSummaryState,
14747) -> &'static str {
14748    match state {
14749        diff_digest::DiffDigestSummaryState::Current => "current",
14750        diff_digest::DiffDigestSummaryState::Stale => "stale",
14751        diff_digest::DiffDigestSummaryState::Missing => "missing",
14752        diff_digest::DiffDigestSummaryState::Unavailable => "unavailable",
14753    }
14754}
14755
14756fn test_digest_summary_label(state: test_digest::TestDigestSummaryState) -> &'static str {
14757    match state {
14758        test_digest::TestDigestSummaryState::Current => "current",
14759        test_digest::TestDigestSummaryState::Stale => "stale",
14760        test_digest::TestDigestSummaryState::Missing => "missing",
14761        test_digest::TestDigestSummaryState::Unavailable => "unavailable",
14762    }
14763}
14764
14765fn log_digest_summary_label(state: log_digest::LogDigestSummaryState) -> &'static str {
14766    match state {
14767        log_digest::LogDigestSummaryState::Current => "current",
14768        log_digest::LogDigestSummaryState::Stale => "stale",
14769        log_digest::LogDigestSummaryState::Missing => "missing",
14770        log_digest::LogDigestSummaryState::Unavailable => "unavailable",
14771    }
14772}
14773
14774pub(crate) fn diff_digest_mode_label(mode: diff_digest::DiffDigestMode) -> &'static str {
14775    match mode {
14776        diff_digest::DiffDigestMode::WorkingTree => "worktree",
14777        diff_digest::DiffDigestMode::Cached => "cached",
14778        diff_digest::DiffDigestMode::Revision => "revision",
14779    }
14780}
14781
14782pub(crate) fn diff_digest_mode_display(report: &diff_digest::DiffDigestReport) -> String {
14783    match (&report.mode, &report.revision) {
14784        (diff_digest::DiffDigestMode::WorkingTree, _) => "working tree".to_string(),
14785        (diff_digest::DiffDigestMode::Cached, _) => "staged index".to_string(),
14786        (diff_digest::DiffDigestMode::Revision, Some(revision)) => {
14787            format!("revision {revision}")
14788        }
14789        (diff_digest::DiffDigestMode::Revision, None) => "revision".to_string(),
14790    }
14791}
14792
14793pub(crate) fn diff_digest_empty_message(report: &diff_digest::DiffDigestReport) -> String {
14794    match (&report.mode, &report.revision) {
14795        (diff_digest::DiffDigestMode::WorkingTree, _) => "No git changes found.".to_string(),
14796        (diff_digest::DiffDigestMode::Cached, _) => "No staged git changes found.".to_string(),
14797        (diff_digest::DiffDigestMode::Revision, Some(revision)) => {
14798            format!("No diff found for revision {revision}.")
14799        }
14800        (diff_digest::DiffDigestMode::Revision, None) => "No revision diff found.".to_string(),
14801    }
14802}
14803
14804fn cmd_impact(
14805    path: &Path,
14806    cached: bool,
14807    revision: Option<&str>,
14808    scope: Option<&str>,
14809    limit: usize,
14810    format: OutputFormat,
14811) -> Result<()> {
14812    let report = impact::compute(
14813        path,
14814        impact::ImpactOptions {
14815            cached,
14816            revision,
14817            scope,
14818            limit,
14819        },
14820    )?;
14821    if format.json_output {
14822        println!(
14823            "{}",
14824            to_json_schema(&report, format.pretty, format.terse, format.schema)?
14825        );
14826        return Ok(());
14827    }
14828
14829    if format.compact {
14830        println!(
14831            "impact mode:{} changed:{} symbols:{} tests:{}/{}",
14832            diff_digest_mode_label(report.mode),
14833            report.changed_files.len(),
14834            report.changed_symbols.len(),
14835            report.affected_tests.len(),
14836            report.affected_tests_total
14837        );
14838        for target in &report.affected_tests {
14839            println!(
14840                "{} reasons:{} command:{}",
14841                target.path,
14842                target.reasons.len(),
14843                target.commands.join(" && ")
14844            );
14845        }
14846        for warning in &report.warnings {
14847            println!("warning {warning}");
14848        }
14849        return Ok(());
14850    }
14851
14852    println!("Impact ({})", diff_digest_mode_label(report.mode));
14853    println!("  changed files:          {}", report.changed_files.len());
14854    println!("  changed symbols:        {}", report.changed_symbols.len());
14855    println!(
14856        "  affected tests:         {}/{}",
14857        report.affected_tests.len(),
14858        report.affected_tests_total
14859    );
14860    for target in &report.affected_tests {
14861        println!();
14862        println!("{}", target.path);
14863        for reason in &target.reasons {
14864            println!("  - {reason}");
14865        }
14866        if !target.symbols.is_empty() {
14867            println!("  symbols: {}", target.symbols.join(", "));
14868        }
14869        for command in &target.commands {
14870            println!("  run: {}", command);
14871        }
14872    }
14873    for warning in &report.warnings {
14874        println!("warning: {warning}");
14875    }
14876    Ok(())
14877}
14878
14879pub(crate) fn render_test_digest_from_input(
14880    path: &Path,
14881    input: &str,
14882    runner: Option<&str>,
14883    format: OutputFormat,
14884) -> Result<()> {
14885    let report = test_digest::compute(path, input, runner)?;
14886    if format.json_output {
14887        println!(
14888            "{}",
14889            to_json_schema(&report, format.pretty, format.terse, format.schema)?
14890        );
14891        return Ok(());
14892    }
14893
14894    if report.failure_groups.is_empty() {
14895        println!("No failures detected (runner: {}).", report.runner);
14896        for warning in &report.warnings {
14897            println!("warning: {warning}");
14898        }
14899        return Ok(());
14900    }
14901
14902    if format.compact {
14903        println!(
14904            "test runner:{} failures:{} groups:{} passed:{} failed:{} skipped:{}",
14905            report.runner,
14906            report.failures,
14907            report.grouped_failures,
14908            report.counts.passed.unwrap_or(0),
14909            report.counts.failed.unwrap_or(report.grouped_failures),
14910            report.counts.skipped.unwrap_or(0),
14911        );
14912        for failure in &report.failure_groups {
14913            let tests = truncate_for_compact(&failure.tests.join(","), 60);
14914            let location = match (&failure.path, failure.line) {
14915                (Some(path), Some(line)) => format!("{path}:{line}"),
14916                (Some(path), None) => path.clone(),
14917                _ => "-".to_string(),
14918            };
14919            println!(
14920                "{} tests:{} count:{} summaries:{} msg:{}",
14921                location,
14922                tests,
14923                failure.occurrences,
14924                test_digest_summary_label(failure.summary_state),
14925                truncate_for_compact(&failure.message, 80)
14926            );
14927        }
14928        for warning in &report.warnings {
14929            println!("warning: {warning}");
14930        }
14931        return Ok(());
14932    }
14933
14934    println!("Test digest ({})", report.runner);
14935    println!("  failures:        {}", report.failures);
14936    println!("  failure groups:  {}", report.grouped_failures);
14937    if let Some(passed) = report.counts.passed {
14938        println!("  passed:          {}", passed);
14939    }
14940    if let Some(failed) = report.counts.failed {
14941        println!("  failed:          {}", failed);
14942    }
14943    if let Some(skipped) = report.counts.skipped {
14944        println!("  skipped:         {}", skipped);
14945    }
14946
14947    for failure in &report.failure_groups {
14948        println!();
14949        match (&failure.path, failure.line, failure.column) {
14950            (Some(path), Some(line), Some(column)) => println!("{path}:{line}:{column}"),
14951            (Some(path), Some(line), None) => println!("{path}:{line}"),
14952            (Some(path), None, _) => println!("{path}"),
14953            (None, _, _) => println!("(no file anchor)"),
14954        }
14955        println!("  tests: {}", failure.tests.join(", "));
14956        println!("  occurrences: {}", failure.occurrences);
14957        println!("  message: {}", failure.message);
14958        println!(
14959            "  cached summaries: {}",
14960            test_digest_summary_label(failure.summary_state)
14961        );
14962        for summary in &failure.current_summaries {
14963            println!(
14964                "    - {}: {}",
14965                summary.symbol,
14966                truncate_for_compact(&summary.summary, 160)
14967            );
14968        }
14969    }
14970    for warning in &report.warnings {
14971        println!("warning: {warning}");
14972    }
14973    Ok(())
14974}
14975
14976#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize)]
14977#[serde(rename_all = "snake_case")]
14978enum ConflictMatrixRisk {
14979    Low,
14980    Medium,
14981    High,
14982    FailClosed,
14983}
14984
14985#[derive(Clone, Debug, Default, Serialize)]
14986struct ConflictMatrixOverlap {
14987    files: Vec<String>,
14988    symbols: Vec<String>,
14989    tests: Vec<String>,
14990    config_files: Vec<String>,
14991}
14992
14993#[derive(Clone, Debug, Serialize)]
14994struct ConflictMatrixSourceHandle {
14995    handle: String,
14996    file: String,
14997    start: usize,
14998    end: usize,
14999    reason: String,
15000    expand: String,
15001}
15002
15003#[derive(Clone, Debug, Serialize)]
15004struct ConflictMatrixSemanticRef {
15005    handle: String,
15006    kind: String,
15007    label: String,
15008    #[serde(skip_serializing_if = "Option::is_none")]
15009    source_file: Option<String>,
15010    #[serde(skip_serializing_if = "Option::is_none")]
15011    source_symbol: Option<String>,
15012    expand: String,
15013}
15014
15015#[derive(Clone, Debug, Default, Serialize)]
15016struct ConflictMatrixTokenBudget {
15017    prompt_estimated_tokens: usize,
15018    max_prompt_tokens: usize,
15019    source_window_count: usize,
15020    source_window_lines: usize,
15021    max_context_bytes: usize,
15022}
15023
15024#[derive(Clone, Debug, Default, Serialize)]
15025struct ConflictMatrixRequiredContext {
15026    read_only_files: Vec<String>,
15027    source_handles: Vec<String>,
15028    worker_context_handles: Vec<String>,
15029    semantic_handles: Vec<String>,
15030    expansion_commands: Vec<String>,
15031}
15032
15033#[derive(Clone, Debug, Default, Serialize)]
15034struct ConflictMatrixGraphHandles {
15035    target_node_id: String,
15036    evidence_packet_id: String,
15037    worker_prompt_packet_id: String,
15038    #[serde(skip_serializing_if = "Option::is_none")]
15039    projection_hash: Option<String>,
15040    source_handles: Vec<String>,
15041    worker_context_handles: Vec<String>,
15042    semantic_handles: Vec<String>,
15043}
15044
15045#[derive(Clone, Debug, Default, Serialize)]
15046struct ConflictMatrixWorkerFeedback {
15047    total: usize,
15048    completed: usize,
15049    blocked: usize,
15050    touched_files: Vec<String>,
15051    expected_tests: Vec<String>,
15052    follow_up_ids: Vec<String>,
15053    outcome_history: Vec<String>,
15054    repeated_blockage: bool,
15055    stale_expected_tests: Vec<String>,
15056    follow_up_debt: Vec<String>,
15057    closure_rank_score: usize,
15058    closure_rank_reasons: Vec<String>,
15059    #[serde(skip_serializing_if = "Vec::is_empty", default)]
15060    warnings: Vec<String>,
15061}
15062
15063#[derive(Clone, Debug, Serialize)]
15064struct ConflictMatrixOwnershipBlock {
15065    contract_version: &'static str,
15066    title: String,
15067    owned_files: Vec<String>,
15068    owned_symbols: Vec<String>,
15069    read_only_context: Vec<String>,
15070    read_only_files: Vec<String>,
15071    forbidden_files: Vec<String>,
15072    expected_tests: Vec<String>,
15073    expansion_commands: Vec<String>,
15074    token_budget: ConflictMatrixTokenBudget,
15075    prompt: String,
15076}
15077
15078#[derive(Clone, Debug, Serialize)]
15079struct ConflictMatrixWorkerPromptPacket {
15080    contract_version: &'static str,
15081    packet_id: String,
15082    target: String,
15083    rank: usize,
15084    risk: ConflictMatrixRisk,
15085    previously_completed: bool,
15086    parallel_safe: bool,
15087    blocks: Vec<String>,
15088    blocked_by: Vec<String>,
15089    required_context: ConflictMatrixRequiredContext,
15090    graph_handles: ConflictMatrixGraphHandles,
15091    #[serde(skip_serializing_if = "Option::is_none")]
15092    projection_hash: Option<String>,
15093    title: String,
15094    owned_files: Vec<String>,
15095    owned_symbols: Vec<String>,
15096    read_only_context: Vec<String>,
15097    forbidden_files: Vec<String>,
15098    expected_tests: Vec<String>,
15099    expansion_commands: Vec<String>,
15100    token_budget: ConflictMatrixTokenBudget,
15101    semantic_dispatch_score: usize,
15102    semantic_dispatch_reasons: Vec<String>,
15103    worker_feedback: ConflictMatrixWorkerFeedback,
15104    prompt: String,
15105}
15106
15107#[derive(Clone, Debug, Serialize)]
15108struct ConflictMatrixCandidate {
15109    rank: usize,
15110    target: String,
15111    evidence_packet_id: String,
15112    #[serde(skip_serializing_if = "Option::is_none")]
15113    projection_hash: Option<String>,
15114    target_node_id: String,
15115    target_kind: String,
15116    target_label: String,
15117    risk: ConflictMatrixRisk,
15118    previously_completed: bool,
15119    parallel_safe: bool,
15120    blocks: Vec<String>,
15121    blocked_by: Vec<String>,
15122    required_context: ConflictMatrixRequiredContext,
15123    graph_handles: ConflictMatrixGraphHandles,
15124    risk_score: usize,
15125    risk_reasons: Vec<String>,
15126    owned_files: Vec<String>,
15127    owned_symbols: Vec<String>,
15128    config_files: Vec<String>,
15129    affected_tests: Vec<String>,
15130    worker_context: Vec<String>,
15131    semantic_related: Vec<ConflictMatrixSemanticRef>,
15132    semantic_dispatch_score: usize,
15133    semantic_dispatch_reasons: Vec<String>,
15134    worker_feedback: ConflictMatrixWorkerFeedback,
15135    source_handles: Vec<ConflictMatrixSourceHandle>,
15136    worker_context_handles: Vec<String>,
15137    staged_overlap: ConflictMatrixOverlap,
15138    ownership: ConflictMatrixOwnershipBlock,
15139}
15140
15141#[derive(Clone, Debug, Serialize)]
15142struct ConflictMatrixPair {
15143    left: String,
15144    right: String,
15145    risk: ConflictMatrixRisk,
15146    risk_score: usize,
15147    shared_files: Vec<String>,
15148    shared_symbols: Vec<String>,
15149    shared_tests: Vec<String>,
15150    shared_config_files: Vec<String>,
15151    verdict: String,
15152}
15153
15154#[derive(Serialize)]
15155struct ConflictMatrixInputSummary {
15156    graph_db_evidence_targets: Vec<String>,
15157    evidence_packets: Vec<ConflictMatrixEvidencePacketSummary>,
15158    shared_preparation: ConflictMatrixSharedPreparationSummary,
15159    preparation_cache: ConflictMatrixPreparationCacheSummary,
15160    preparation_timings: Vec<GraphDbBackendEvalPhaseTiming>,
15161    context_pack_command: String,
15162    cached_diff_command: String,
15163    impact_command: String,
15164}
15165
15166#[derive(Clone, Serialize, Deserialize)]
15167struct ConflictMatrixPreparedSourceWindow {
15168    file: String,
15169    start: usize,
15170    end: usize,
15171}
15172
15173#[derive(Clone, Serialize, Deserialize)]
15174struct ConflictMatrixPreparedContext {
15175    target: String,
15176    target_kind: String,
15177    status_reminders: Vec<String>,
15178    prompt_targets: Vec<String>,
15179    touched_files: Vec<String>,
15180    touched_symbols: Vec<String>,
15181    files_changed: usize,
15182    worker_context: Vec<String>,
15183    source_windows: Vec<ConflictMatrixPreparedSourceWindow>,
15184}
15185
15186impl ConflictMatrixPreparedContext {
15187    fn from_context_pack(context_pack: &ContextPackReport) -> Self {
15188        Self {
15189            target: context_pack.target.clone(),
15190            target_kind: context_pack.target_kind.clone(),
15191            status_reminders: context_pack.status_reminders.clone(),
15192            prompt_targets: context_pack.next_context.prompt_targets.clone(),
15193            touched_files: context_pack.next_context.touched_files.clone(),
15194            touched_symbols: context_pack.next_context.touched_symbols.clone(),
15195            files_changed: context_pack.diff_digest.files_changed,
15196            worker_context: context_pack
15197                .exploration
15198                .worker_context
15199                .iter()
15200                .map(|worker| worker.summary.clone())
15201                .collect(),
15202            source_windows: context_pack
15203                .exploration
15204                .source_windows
15205                .iter()
15206                .map(|window| ConflictMatrixPreparedSourceWindow {
15207                    file: window.file.clone(),
15208                    start: window.start,
15209                    end: window.end,
15210                })
15211                .collect(),
15212        }
15213    }
15214}
15215
15216#[derive(Clone, Serialize, Deserialize)]
15217struct ConflictMatrixEvidencePacketSummary {
15218    target: String,
15219    packet_id: String,
15220    target_node_id: String,
15221    #[serde(skip_serializing_if = "Option::is_none")]
15222    projection_hash: Option<String>,
15223    replay_command: String,
15224}
15225
15226#[derive(Clone, Serialize, Deserialize)]
15227struct ConflictMatrixSharedPreparationSummary {
15228    evidence_cache_status: String,
15229    graph_nodes: usize,
15230    graph_edges: usize,
15231    evidence_packets: usize,
15232    source_handles: usize,
15233    worker_context: usize,
15234    worker_results: usize,
15235    semantic_rows: usize,
15236    dispatch_trace_snapshot_nodes: usize,
15237    dispatch_trace_snapshot_edges: usize,
15238}
15239
15240#[derive(Clone, Serialize, Deserialize)]
15241struct ConflictMatrixPreparationCacheSummary {
15242    version: String,
15243    key: String,
15244    status: String,
15245    source_watermark: String,
15246    document_watermark: String,
15247    staged_diff_watermark: String,
15248}
15249
15250#[derive(Serialize)]
15251struct ConflictMatrixContextSummary {
15252    target: String,
15253    target_kind: String,
15254    prompt_targets: Vec<String>,
15255    touched_files: Vec<String>,
15256    touched_symbols: Vec<String>,
15257    files_changed: usize,
15258    worker_context: Vec<String>,
15259    source_windows: Vec<String>,
15260    status_reminders: Vec<String>,
15261}
15262
15263#[derive(Clone, Debug, Serialize)]
15264struct ConflictMatrixPerTargetFailClosed {
15265    target: String,
15266    previously_completed: bool,
15267    risk_reasons: Vec<String>,
15268    owned_files: Vec<String>,
15269    source_handle_count: usize,
15270}
15271
15272#[derive(Serialize)]
15273struct ConflictMatrixOrchestrationObservability {
15274    contract_version: &'static str,
15275    projection_freshness: GraphDbFreshnessReport,
15276    projection_hashes: Vec<String>,
15277    evidence_packet_ids: Vec<String>,
15278    conflict_matrix_decisions: Vec<String>,
15279    worker_ownership_blocks: Vec<String>,
15280    follow_up_commands: Vec<String>,
15281}
15282
15283#[derive(Serialize)]
15284struct ConflictMatrixReport {
15285    contract_version: &'static str,
15286    root: String,
15287    #[serde(skip_serializing_if = "Option::is_none")]
15288    scope: Option<String>,
15289    targets: Vec<String>,
15290    can_parallel: bool,
15291    fail_closed: bool,
15292    cross_target_parallel_safe: bool,
15293    per_target_fail_closed: Vec<ConflictMatrixPerTargetFailClosed>,
15294    inputs: ConflictMatrixInputSummary,
15295    context_pack: ConflictMatrixContextSummary,
15296    cached_diff: diff_digest::DiffDigestReport,
15297    impact: impact::ImpactReport,
15298    candidates: Vec<ConflictMatrixCandidate>,
15299    worker_prompt_packets: Vec<ConflictMatrixWorkerPromptPacket>,
15300    conflicts: Vec<ConflictMatrixPair>,
15301    orchestration: ConflictMatrixOrchestrationObservability,
15302    next_commands: Vec<String>,
15303    #[serde(skip_serializing_if = "Vec::is_empty", default)]
15304    warnings: Vec<String>,
15305}
15306
15307fn conflict_risk_label(risk: ConflictMatrixRisk) -> &'static str {
15308    match risk {
15309        ConflictMatrixRisk::Low => "low",
15310        ConflictMatrixRisk::Medium => "medium",
15311        ConflictMatrixRisk::High => "high",
15312        ConflictMatrixRisk::FailClosed => "fail_closed",
15313    }
15314}
15315
15316fn sorted_set(values: &BTreeSet<String>) -> Vec<String> {
15317    values.iter().cloned().collect()
15318}
15319
15320fn sorted_intersection(left: &BTreeSet<String>, right: &BTreeSet<String>) -> Vec<String> {
15321    left.intersection(right).cloned().collect()
15322}
15323
15324fn normalize_conflict_target(raw: &str) -> Option<String> {
15325    let trimmed = raw
15326        .trim()
15327        .trim_matches(|ch: char| matches!(ch, '`' | ',' | ';' | '.'));
15328    let bracketed = trimmed
15329        .strip_prefix("[#")
15330        .and_then(|value| value.strip_suffix(']'))
15331        .unwrap_or(trimmed);
15332    let normalized = bracketed
15333        .trim()
15334        .trim_start_matches('#')
15335        .trim_matches(|ch: char| matches!(ch, '[' | ']'));
15336    (!normalized.is_empty()).then(|| normalized.to_string())
15337}
15338
15339fn extract_conflict_target_refs(input: &str) -> Vec<String> {
15340    input
15341        .split(|ch: char| {
15342            !(ch.is_ascii_alphanumeric()
15343                || ch == '#'
15344                || ch == '_'
15345                || ch == '-'
15346                || ch == '['
15347                || ch == ']')
15348        })
15349        .filter_map(|token| {
15350            let hash = token.find('#')?;
15351            normalize_conflict_target(&token[hash..])
15352        })
15353        .collect()
15354}
15355
15356fn conflict_targets_from_context_pack(
15357    store: &impl GraphStore,
15358    context_pack: &ConflictMatrixPreparedContext,
15359) -> Result<Vec<String>> {
15360    let mut candidates = Vec::new();
15361    for prompt in &context_pack.prompt_targets {
15362        candidates.extend(extract_conflict_target_refs(prompt));
15363    }
15364    for worker in &context_pack.worker_context {
15365        candidates.extend(extract_conflict_target_refs(worker));
15366    }
15367
15368    let mut targets = Vec::new();
15369    let mut seen = BTreeSet::new();
15370    for candidate in candidates {
15371        if !seen.insert(candidate.clone()) {
15372            continue;
15373        }
15374        if graph_db_resolve_evidence_target(store, &candidate)?.is_some() {
15375            targets.push(candidate);
15376        }
15377    }
15378    Ok(targets)
15379}
15380
15381fn resolve_conflict_matrix_targets(
15382    store: &impl GraphStore,
15383    raw_targets: &[String],
15384    context_pack: &ConflictMatrixPreparedContext,
15385) -> Result<Vec<String>> {
15386    let mut targets = raw_targets
15387        .iter()
15388        .filter_map(|target| normalize_conflict_target(target))
15389        .collect::<Vec<_>>();
15390    if targets.is_empty() {
15391        targets = conflict_targets_from_context_pack(store, context_pack)?;
15392    }
15393
15394    let mut seen = BTreeSet::new();
15395    targets.retain(|target| seen.insert(target.clone()));
15396    if targets.is_empty() {
15397        bail!(
15398            "conflict-matrix needs at least one resolvable backlog id, job handle, or graph node id"
15399        );
15400    }
15401    Ok(targets)
15402}
15403
15404fn is_planner_config_path(path: &str) -> bool {
15405    resolution::is_planner_config_path(path)
15406}
15407
15408fn conflict_matrix_source_handle(node: &SubstrateGraphNode) -> Option<ConflictMatrixSourceHandle> {
15409    let file = node.properties.get("file")?.clone();
15410    let start = node
15411        .properties
15412        .get("start")
15413        .and_then(|value| value.parse::<usize>().ok())
15414        .unwrap_or(1);
15415    let end = node
15416        .properties
15417        .get("end")
15418        .and_then(|value| value.parse::<usize>().ok())
15419        .unwrap_or(start);
15420    Some(ConflictMatrixSourceHandle {
15421        handle: node
15422            .properties
15423            .get("handle")
15424            .cloned()
15425            .unwrap_or_else(|| node.id.clone()),
15426        file,
15427        start,
15428        end,
15429        reason: node.properties.get("reason").cloned().unwrap_or_default(),
15430        expand: node.properties.get("expand").cloned().unwrap_or_default(),
15431    })
15432}
15433
15434fn conflict_matrix_semantic_ref(
15435    root: &Path,
15436    node: &SubstrateGraphNode,
15437) -> ConflictMatrixSemanticRef {
15438    ConflictMatrixSemanticRef {
15439        handle: node
15440            .properties
15441            .get("handle")
15442            .cloned()
15443            .unwrap_or_else(|| node.id.clone()),
15444        kind: node.kind.clone(),
15445        label: node.label.clone(),
15446        source_file: node
15447            .properties
15448            .get("source_file")
15449            .or_else(|| node.properties.get("path"))
15450            .cloned(),
15451        source_symbol: node.properties.get("source_symbol").cloned(),
15452        expand: node
15453            .properties
15454            .get("expand")
15455            .cloned()
15456            .unwrap_or_else(|| traversal_expand_command(root, &node.id)),
15457    }
15458}
15459
15460#[derive(Clone)]
15461struct ConflictMatrixGraphIndex {
15462    symbols_by_file: BTreeMap<String, Vec<String>>,
15463}
15464
15465fn conflict_matrix_graph_index(graph_nodes: &[SubstrateGraphNode]) -> ConflictMatrixGraphIndex {
15466    let mut symbols_by_file = BTreeMap::<String, Vec<String>>::new();
15467    for node in graph_nodes {
15468        if node.kind != "symbol" {
15469            continue;
15470        }
15471        if let Some(path) = node.properties.get("path") {
15472            symbols_by_file
15473                .entry(path.clone())
15474                .or_default()
15475                .push(node.label.clone());
15476        }
15477    }
15478    for symbols in symbols_by_file.values_mut() {
15479        symbols.sort();
15480        symbols.dedup();
15481    }
15482    ConflictMatrixGraphIndex { symbols_by_file }
15483}
15484
15485fn conflict_matrix_symbols_for_files(
15486    graph_index: &ConflictMatrixGraphIndex,
15487    files: &BTreeSet<String>,
15488    target_node: &SubstrateGraphNode,
15489) -> BTreeSet<String> {
15490    let mut symbols = BTreeSet::new();
15491    if target_node.kind == "symbol" {
15492        symbols.insert(target_node.label.clone());
15493    }
15494    for file in files {
15495        if let Some(file_symbols) = graph_index.symbols_by_file.get(file) {
15496            symbols.extend(file_symbols.iter().cloned());
15497        }
15498    }
15499    symbols
15500}
15501
15502fn conflict_matrix_test_commands(target: &impact::ImpactTestTarget) -> Vec<String> {
15503    if target.commands.is_empty() {
15504        vec![target.path.clone()]
15505    } else {
15506        target.commands.clone()
15507    }
15508}
15509
15510fn conflict_matrix_affected_tests(
15511    impact_report: &impact::ImpactReport,
15512    files: &BTreeSet<String>,
15513    symbols: &BTreeSet<String>,
15514    staged_overlap: &ConflictMatrixOverlap,
15515) -> Vec<String> {
15516    let mut tests = BTreeSet::new();
15517    for target in &impact_report.affected_tests {
15518        let path_match = files.contains(&target.path);
15519        let symbol_match = target.symbols.iter().any(|symbol| symbols.contains(symbol));
15520        if path_match || symbol_match {
15521            tests.extend(conflict_matrix_test_commands(target));
15522        }
15523    }
15524
15525    if tests.is_empty()
15526        && (!staged_overlap.files.is_empty()
15527            || !staged_overlap.symbols.is_empty()
15528            || !staged_overlap.config_files.is_empty())
15529    {
15530        for target in &impact_report.affected_tests {
15531            tests.extend(conflict_matrix_test_commands(target));
15532        }
15533    }
15534    tests.into_iter().collect()
15535}
15536
15537fn conflict_matrix_semantic_dispatch_score(
15538    semantic_related: &[ConflictMatrixSemanticRef],
15539    files: &BTreeSet<String>,
15540    symbols: &BTreeSet<String>,
15541) -> (usize, Vec<String>) {
15542    let mut score = 0usize;
15543    let mut reasons = Vec::new();
15544    for semantic in semantic_related {
15545        let base = match semantic.kind.as_str() {
15546            "semantic_concept" => 8,
15547            "semantic_entity" => 6,
15548            _ => 3,
15549        };
15550        let mut points = base;
15551        let mut detail = vec![format!("{} {}", semantic.kind, semantic.label)];
15552        if semantic
15553            .source_file
15554            .as_ref()
15555            .is_some_and(|file| files.contains(file))
15556        {
15557            points += 4;
15558            detail.push("owned file".to_string());
15559        }
15560        if semantic
15561            .source_symbol
15562            .as_ref()
15563            .is_some_and(|symbol| symbols.contains(symbol))
15564        {
15565            points += 2;
15566            detail.push("owned symbol".to_string());
15567        }
15568        score += points;
15569        reasons.push(format!("+{points} {}", detail.join(" / ")));
15570    }
15571    (score, reasons)
15572}
15573
15574fn conflict_matrix_staged_overlap(
15575    files: &BTreeSet<String>,
15576    symbols: &BTreeSet<String>,
15577    cached_diff: &diff_digest::DiffDigestReport,
15578) -> ConflictMatrixOverlap {
15579    let staged_files = cached_diff
15580        .files
15581        .iter()
15582        .map(|file| file.path.clone())
15583        .collect::<BTreeSet<_>>();
15584    let staged_symbols = cached_diff
15585        .files
15586        .iter()
15587        .flat_map(|file| file.touched_symbols.iter().cloned())
15588        .collect::<BTreeSet<_>>();
15589    let file_overlap = sorted_intersection(files, &staged_files);
15590    let symbol_overlap = sorted_intersection(symbols, &staged_symbols);
15591    let config_files = file_overlap
15592        .iter()
15593        .filter(|file| is_planner_config_path(file))
15594        .cloned()
15595        .collect::<Vec<_>>();
15596    ConflictMatrixOverlap {
15597        files: file_overlap,
15598        symbols: symbol_overlap,
15599        tests: Vec::new(),
15600        config_files,
15601    }
15602}
15603
15604fn graph_node_list_property(node: &SubstrateGraphNode, key: &str) -> Vec<String> {
15605    node.properties
15606        .get(key)
15607        .map(|value| {
15608            value
15609                .split([',', ';'])
15610                .flat_map(|part| part.split("&&"))
15611                .map(str::trim)
15612                .filter(|part| !part.is_empty())
15613                .map(str::to_string)
15614                .collect()
15615        })
15616        .unwrap_or_default()
15617}
15618
15619fn conflict_matrix_worker_feedback(
15620    worker_results: &[SubstrateGraphNode],
15621) -> ConflictMatrixWorkerFeedback {
15622    let mut touched_files = BTreeSet::new();
15623    let mut expected_tests = BTreeSet::new();
15624    let mut follow_up_ids = BTreeSet::new();
15625    let mut outcome_history = Vec::new();
15626    let mut completed = 0usize;
15627    let mut blocked = 0usize;
15628
15629    let mut results = worker_results.iter().collect::<Vec<_>>();
15630    results.sort_by(|left, right| {
15631        left.properties
15632            .get("line")
15633            .and_then(|value| value.parse::<i64>().ok())
15634            .cmp(
15635                &right
15636                    .properties
15637                    .get("line")
15638                    .and_then(|value| value.parse::<i64>().ok()),
15639            )
15640            .then(left.id.cmp(&right.id))
15641    });
15642
15643    for node in results {
15644        let status = node
15645            .properties
15646            .get("status")
15647            .map(String::as_str)
15648            .unwrap_or("unknown");
15649        match status {
15650            "completed" => completed += 1,
15651            "blocked" => blocked += 1,
15652            _ => {}
15653        }
15654        touched_files.extend(graph_node_list_property(node, "touched_files"));
15655        expected_tests.extend(graph_node_list_property(node, "expected_tests"));
15656        follow_up_ids.extend(graph_node_list_property(node, "follow_up_ids"));
15657        let location = match (node.properties.get("path"), node.properties.get("line")) {
15658            (Some(path), Some(line)) => format!("{path}:{line}"),
15659            (Some(path), None) => path.clone(),
15660            _ => node.id.clone(),
15661        };
15662        let detail = node
15663            .properties
15664            .get("detail")
15665            .cloned()
15666            .unwrap_or_else(|| node.label.clone());
15667        outcome_history.push(format!("{status} at {location}: {detail}"));
15668    }
15669
15670    let repeated_blockage = blocked > 1;
15671    let warnings = if repeated_blockage {
15672        vec![format!(
15673            "repeated blockage observed in {blocked} worker_result rows; inspect outcome_history before redispatch"
15674        )]
15675    } else {
15676        Vec::new()
15677    };
15678
15679    ConflictMatrixWorkerFeedback {
15680        total: worker_results.len(),
15681        completed,
15682        blocked,
15683        touched_files: touched_files.into_iter().collect(),
15684        expected_tests: expected_tests.into_iter().collect(),
15685        follow_up_ids: follow_up_ids.into_iter().collect(),
15686        outcome_history,
15687        repeated_blockage,
15688        stale_expected_tests: Vec::new(),
15689        follow_up_debt: Vec::new(),
15690        closure_rank_score: 0,
15691        closure_rank_reasons: Vec::new(),
15692        warnings,
15693    }
15694}
15695
15696fn feedback_ref_list(values: &[String]) -> String {
15697    if values.is_empty() {
15698        "none".to_string()
15699    } else {
15700        values.join(",")
15701    }
15702}
15703
15704fn stale_expected_tests_for_candidate(candidate: &ConflictMatrixCandidate) -> Vec<String> {
15705    if candidate.worker_feedback.expected_tests.is_empty() {
15706        return Vec::new();
15707    }
15708    let current_tests = candidate
15709        .affected_tests
15710        .iter()
15711        .cloned()
15712        .collect::<BTreeSet<_>>();
15713    if current_tests.is_empty() {
15714        return candidate.worker_feedback.expected_tests.clone();
15715    }
15716    candidate
15717        .worker_feedback
15718        .expected_tests
15719        .iter()
15720        .filter(|test| !current_tests.contains(*test))
15721        .cloned()
15722        .collect()
15723}
15724
15725fn apply_conflict_matrix_worker_feedback_controls(candidates: &mut [ConflictMatrixCandidate]) {
15726    for candidate in candidates.iter_mut() {
15727        let stale_expected_tests = stale_expected_tests_for_candidate(candidate);
15728        let follow_up_debt = candidate.worker_feedback.follow_up_ids.clone();
15729        let mut score = 0usize;
15730        let mut reasons = Vec::new();
15731
15732        if candidate.worker_feedback.repeated_blockage {
15733            score += candidate.worker_feedback.blocked.saturating_mul(40);
15734            reasons.push(format!(
15735                "repeated blockage: {} blocked worker_result rows",
15736                candidate.worker_feedback.blocked
15737            ));
15738        }
15739        if !stale_expected_tests.is_empty() {
15740            score += stale_expected_tests.len().saturating_mul(25);
15741            let reason = if candidate.affected_tests.is_empty() {
15742                format!(
15743                    "stale expected tests: {} no longer match current impact output",
15744                    feedback_ref_list(&stale_expected_tests)
15745                )
15746            } else {
15747                format!(
15748                    "stale expected tests: {} not in current impacted tests {}",
15749                    feedback_ref_list(&stale_expected_tests),
15750                    feedback_ref_list(&candidate.affected_tests)
15751                )
15752            };
15753            reasons.push(reason.clone());
15754            candidate.worker_feedback.warnings.push(format!(
15755                "{reason}; refresh impact or rerun the listed tests before redispatch"
15756            ));
15757        }
15758        if !follow_up_debt.is_empty() {
15759            score += follow_up_debt.len().saturating_mul(10);
15760            let reason = format!("follow-up debt: {}", feedback_ref_list(&follow_up_debt));
15761            reasons.push(reason.clone());
15762            candidate.worker_feedback.warnings.push(format!(
15763                "{reason}; include or resolve the referenced backlog ids before closing dispatch"
15764            ));
15765        }
15766
15767        candidate.worker_feedback.stale_expected_tests = stale_expected_tests;
15768        candidate.worker_feedback.follow_up_debt = follow_up_debt;
15769        candidate.worker_feedback.closure_rank_score = score;
15770        candidate.worker_feedback.closure_rank_reasons = reasons;
15771        candidate.worker_feedback.warnings =
15772            dedupe_preserve_order(std::mem::take(&mut candidate.worker_feedback.warnings));
15773    }
15774}
15775
15776fn empty_conflict_matrix_ownership(target: &str) -> ConflictMatrixOwnershipBlock {
15777    ConflictMatrixOwnershipBlock {
15778        contract_version: WORKER_PROMPT_PACKET_CONTRACT_VERSION,
15779        title: format!("Worker ownership for {target}"),
15780        owned_files: Vec::new(),
15781        owned_symbols: Vec::new(),
15782        read_only_context: Vec::new(),
15783        read_only_files: Vec::new(),
15784        forbidden_files: Vec::new(),
15785        expected_tests: Vec::new(),
15786        expansion_commands: Vec::new(),
15787        token_budget: ConflictMatrixTokenBudget::default(),
15788        prompt: String::new(),
15789    }
15790}
15791
15792fn conflict_matrix_candidate_from_evidence(
15793    root: &Path,
15794    evidence: &GraphDbEvidenceReport,
15795    graph_index: &ConflictMatrixGraphIndex,
15796    cached_diff: &diff_digest::DiffDigestReport,
15797    impact_report: &impact::ImpactReport,
15798) -> ConflictMatrixCandidate {
15799    let mut files = BTreeSet::new();
15800    let source_handles = evidence
15801        .source_handles
15802        .iter()
15803        .filter_map(|node| {
15804            let handle = conflict_matrix_source_handle(node)?;
15805            files.insert(handle.file.clone());
15806            Some(handle)
15807        })
15808        .collect::<Vec<_>>();
15809    if matches!(
15810        evidence.target_node.kind.as_str(),
15811        "file" | "symbol" | "route"
15812    ) && let Some(path) = evidence.target_node.properties.get("path")
15813    {
15814        files.insert(path.clone());
15815    }
15816
15817    let symbols = conflict_matrix_symbols_for_files(graph_index, &files, &evidence.target_node);
15818    let config_files = files
15819        .iter()
15820        .filter(|file| is_planner_config_path(file))
15821        .cloned()
15822        .collect::<BTreeSet<_>>();
15823    let mut staged_overlap = conflict_matrix_staged_overlap(&files, &symbols, cached_diff);
15824    let affected_tests =
15825        conflict_matrix_affected_tests(impact_report, &files, &symbols, &staged_overlap);
15826    staged_overlap.tests = affected_tests.clone();
15827    let mut worker_feedback = conflict_matrix_worker_feedback(&evidence.worker_results);
15828    let previously_completed = worker_feedback.completed > 0;
15829
15830    let mut risk_score = 0usize;
15831    let mut risk_reasons = Vec::new();
15832    if files.is_empty() && previously_completed {
15833        worker_feedback.warnings.push(format!(
15834            "previously completed: {} completed worker_result row(s) exist without source ownership evidence; treating no-owned-files as informational instead of per-target fail-closed",
15835            worker_feedback.completed
15836        ));
15837    } else if files.is_empty() {
15838        risk_score += 120;
15839        risk_reasons.push("no source ownership evidence; fail closed before dispatch".to_string());
15840    }
15841    if !config_files.is_empty() {
15842        risk_score += 80 * config_files.len();
15843        risk_reasons.push("candidate owns config or workflow files".to_string());
15844    }
15845    if !staged_overlap.config_files.is_empty() {
15846        risk_score += 100 * staged_overlap.config_files.len();
15847        risk_reasons.push("staged diff already touches candidate config files".to_string());
15848    }
15849    if !staged_overlap.files.is_empty() {
15850        risk_score += 70 * staged_overlap.files.len();
15851        risk_reasons.push("staged diff already touches candidate files".to_string());
15852    }
15853    if !staged_overlap.symbols.is_empty() {
15854        risk_score += 35 * staged_overlap.symbols.len();
15855        risk_reasons.push("staged diff already touches candidate symbols".to_string());
15856    }
15857    if affected_tests.len() > 1 {
15858        risk_score += affected_tests.len() * 5;
15859        risk_reasons.push("candidate fans into multiple affected test commands".to_string());
15860    }
15861    let risk = if (files.is_empty() && !previously_completed)
15862        || !staged_overlap.config_files.is_empty()
15863        || !staged_overlap.files.is_empty()
15864    {
15865        ConflictMatrixRisk::FailClosed
15866    } else if !config_files.is_empty() || !staged_overlap.symbols.is_empty() {
15867        ConflictMatrixRisk::High
15868    } else if affected_tests.len() > 1 {
15869        ConflictMatrixRisk::Medium
15870    } else {
15871        ConflictMatrixRisk::Low
15872    };
15873
15874    let worker_context = evidence
15875        .worker_context
15876        .iter()
15877        .map(|node| {
15878            node.properties
15879                .get("summary")
15880                .cloned()
15881                .unwrap_or_else(|| node.label.clone())
15882        })
15883        .collect::<Vec<_>>();
15884    let worker_context_handles = evidence
15885        .worker_context
15886        .iter()
15887        .map(|node| {
15888            node.properties
15889                .get("handle")
15890                .cloned()
15891                .unwrap_or_else(|| node.id.clone())
15892        })
15893        .collect::<Vec<_>>();
15894    let semantic_related = evidence
15895        .semantic_related
15896        .iter()
15897        .map(|node| conflict_matrix_semantic_ref(root, node))
15898        .collect::<Vec<_>>();
15899    let (semantic_dispatch_score, semantic_dispatch_reasons) =
15900        conflict_matrix_semantic_dispatch_score(&semantic_related, &files, &symbols);
15901
15902    ConflictMatrixCandidate {
15903        rank: 0,
15904        target: evidence.target.clone(),
15905        evidence_packet_id: evidence.packet_id.clone(),
15906        projection_hash: evidence.projection_hash.clone(),
15907        target_node_id: evidence.target_node.id.clone(),
15908        target_kind: evidence.target_node.kind.clone(),
15909        target_label: evidence.target_node.label.clone(),
15910        risk,
15911        previously_completed,
15912        parallel_safe: false,
15913        blocks: Vec::new(),
15914        blocked_by: Vec::new(),
15915        required_context: ConflictMatrixRequiredContext::default(),
15916        graph_handles: ConflictMatrixGraphHandles::default(),
15917        risk_score,
15918        risk_reasons,
15919        owned_files: sorted_set(&files),
15920        owned_symbols: sorted_set(&symbols),
15921        config_files: sorted_set(&config_files),
15922        affected_tests,
15923        worker_context,
15924        semantic_related,
15925        semantic_dispatch_score,
15926        semantic_dispatch_reasons,
15927        worker_feedback,
15928        source_handles,
15929        worker_context_handles,
15930        staged_overlap,
15931        ownership: empty_conflict_matrix_ownership(&evidence.target),
15932    }
15933}
15934
15935fn set_from_vec(values: &[String]) -> BTreeSet<String> {
15936    values.iter().cloned().collect()
15937}
15938
15939fn conflict_pair_risk(
15940    shared_files: &[String],
15941    shared_symbols: &[String],
15942    shared_tests: &[String],
15943    shared_config_files: &[String],
15944) -> (ConflictMatrixRisk, usize, String) {
15945    let score = shared_files.len() * 100
15946        + shared_config_files.len() * 100
15947        + shared_symbols.len() * 40
15948        + shared_tests.len() * 10;
15949    if !shared_files.is_empty() || !shared_config_files.is_empty() {
15950        (
15951            ConflictMatrixRisk::FailClosed,
15952            score,
15953            "serialize or assign one worker as the sole owner of the shared files".to_string(),
15954        )
15955    } else if !shared_symbols.is_empty() {
15956        (
15957            ConflictMatrixRisk::High,
15958            score,
15959            "split by file or serialize; shared symbols are not safe parallel ownership"
15960                .to_string(),
15961        )
15962    } else if !shared_tests.is_empty() {
15963        (
15964            ConflictMatrixRisk::Medium,
15965            score,
15966            "parallel work is possible, but keep a shared test gate after merge".to_string(),
15967        )
15968    } else {
15969        (
15970            ConflictMatrixRisk::Low,
15971            score,
15972            "no direct file, symbol, config, or test overlap found".to_string(),
15973        )
15974    }
15975}
15976
15977fn build_conflict_matrix_pairs(candidates: &[ConflictMatrixCandidate]) -> Vec<ConflictMatrixPair> {
15978    let mut pairs = Vec::new();
15979    for left_idx in 0..candidates.len() {
15980        for right_idx in (left_idx + 1)..candidates.len() {
15981            let left = &candidates[left_idx];
15982            let right = &candidates[right_idx];
15983            let left_files = set_from_vec(&left.owned_files);
15984            let right_files = set_from_vec(&right.owned_files);
15985            let left_symbols = set_from_vec(&left.owned_symbols);
15986            let right_symbols = set_from_vec(&right.owned_symbols);
15987            let left_tests = set_from_vec(&left.affected_tests);
15988            let right_tests = set_from_vec(&right.affected_tests);
15989            let left_config = set_from_vec(&left.config_files);
15990            let right_config = set_from_vec(&right.config_files);
15991            let shared_files = sorted_intersection(&left_files, &right_files);
15992            let shared_symbols = sorted_intersection(&left_symbols, &right_symbols);
15993            let shared_tests = sorted_intersection(&left_tests, &right_tests);
15994            let shared_config_files = sorted_intersection(&left_config, &right_config);
15995            let (risk, risk_score, verdict) = conflict_pair_risk(
15996                &shared_files,
15997                &shared_symbols,
15998                &shared_tests,
15999                &shared_config_files,
16000            );
16001            pairs.push(ConflictMatrixPair {
16002                left: left.target.clone(),
16003                right: right.target.clone(),
16004                risk,
16005                risk_score,
16006                shared_files,
16007                shared_symbols,
16008                shared_tests,
16009                shared_config_files,
16010                verdict,
16011            });
16012        }
16013    }
16014    pairs.sort_by(|left, right| {
16015        right
16016            .risk
16017            .cmp(&left.risk)
16018            .then_with(|| right.risk_score.cmp(&left.risk_score))
16019            .then_with(|| left.left.cmp(&right.left))
16020            .then_with(|| left.right.cmp(&right.right))
16021    });
16022    pairs
16023}
16024
16025fn conflict_matrix_per_target_fail_closed(
16026    candidates: &[ConflictMatrixCandidate],
16027) -> Vec<ConflictMatrixPerTargetFailClosed> {
16028    candidates
16029        .iter()
16030        .filter(|candidate| candidate.risk == ConflictMatrixRisk::FailClosed)
16031        .map(|candidate| ConflictMatrixPerTargetFailClosed {
16032            target: candidate.target.clone(),
16033            previously_completed: candidate.previously_completed,
16034            risk_reasons: candidate.risk_reasons.clone(),
16035            owned_files: candidate.owned_files.clone(),
16036            source_handle_count: candidate.source_handles.len(),
16037        })
16038        .collect()
16039}
16040
16041fn markdown_list(values: &[String]) -> String {
16042    if values.is_empty() {
16043        return "- none".to_string();
16044    }
16045    values
16046        .iter()
16047        .map(|value| format!("- {value}"))
16048        .collect::<Vec<_>>()
16049        .join("\n")
16050}
16051
16052fn conflict_matrix_expansion_commands(candidate: &ConflictMatrixCandidate) -> Vec<String> {
16053    let mut commands = candidate
16054        .source_handles
16055        .iter()
16056        .filter(|handle| !handle.expand.trim().is_empty())
16057        .map(|handle| handle.expand.clone())
16058        .chain(
16059            candidate
16060                .semantic_related
16061                .iter()
16062                .map(|semantic| semantic.expand.clone()),
16063        )
16064        .chain(candidate.affected_tests.iter().cloned())
16065        .collect::<Vec<_>>();
16066    if commands.is_empty() {
16067        commands.push(format!(
16068            "tsift graph-db evidence {} --depth 3 --limit 8 --json",
16069            shell_quote(&candidate.target)
16070        ));
16071    }
16072    dedupe_preserve_order(commands)
16073}
16074
16075fn conflict_matrix_token_budget(
16076    prompt: &str,
16077    source_handles: &[ConflictMatrixSourceHandle],
16078) -> ConflictMatrixTokenBudget {
16079    let source_window_lines = source_handles
16080        .iter()
16081        .map(|handle| handle.end.saturating_sub(handle.start).saturating_add(1))
16082        .sum::<usize>();
16083    let max_context_bytes = source_window_lines.saturating_mul(120).max(prompt.len());
16084    ConflictMatrixTokenBudget {
16085        prompt_estimated_tokens: estimated_tokens_from_bytes(prompt.len()),
16086        max_prompt_tokens: estimated_tokens_from_bytes(max_context_bytes),
16087        source_window_count: source_handles.len(),
16088        source_window_lines,
16089        max_context_bytes,
16090    }
16091}
16092
16093fn conflict_matrix_worker_prompt_packet_id(candidate: &ConflictMatrixCandidate) -> String {
16094    stable_handle(
16095        "wpp",
16096        &format!(
16097            "{}:{}:{}:{}",
16098            WORKER_PROMPT_PACKET_CONTRACT_VERSION,
16099            candidate.target,
16100            candidate.target_node_id,
16101            candidate.projection_hash.as_deref().unwrap_or("no-hash")
16102        ),
16103    )
16104}
16105
16106fn conflict_matrix_required_context(
16107    candidate: &ConflictMatrixCandidate,
16108) -> ConflictMatrixRequiredContext {
16109    ConflictMatrixRequiredContext {
16110        read_only_files: candidate.ownership.read_only_files.clone(),
16111        source_handles: candidate
16112            .source_handles
16113            .iter()
16114            .map(|handle| handle.handle.clone())
16115            .collect(),
16116        worker_context_handles: candidate.worker_context_handles.clone(),
16117        semantic_handles: candidate
16118            .semantic_related
16119            .iter()
16120            .map(|semantic| semantic.handle.clone())
16121            .collect(),
16122        expansion_commands: candidate.ownership.expansion_commands.clone(),
16123    }
16124}
16125
16126fn conflict_matrix_graph_handles(
16127    candidate: &ConflictMatrixCandidate,
16128) -> ConflictMatrixGraphHandles {
16129    ConflictMatrixGraphHandles {
16130        target_node_id: candidate.target_node_id.clone(),
16131        evidence_packet_id: candidate.evidence_packet_id.clone(),
16132        worker_prompt_packet_id: conflict_matrix_worker_prompt_packet_id(candidate),
16133        projection_hash: candidate.projection_hash.clone(),
16134        source_handles: candidate
16135            .source_handles
16136            .iter()
16137            .map(|handle| handle.handle.clone())
16138            .collect(),
16139        worker_context_handles: candidate.worker_context_handles.clone(),
16140        semantic_handles: candidate
16141            .semantic_related
16142            .iter()
16143            .map(|semantic| semantic.handle.clone())
16144            .collect(),
16145    }
16146}
16147
16148fn apply_conflict_matrix_ownership_blocks(candidates: &mut [ConflictMatrixCandidate]) {
16149    let all_files_by_target = candidates
16150        .iter()
16151        .map(|candidate| {
16152            (
16153                candidate.target.clone(),
16154                candidate
16155                    .owned_files
16156                    .iter()
16157                    .cloned()
16158                    .collect::<BTreeSet<_>>(),
16159            )
16160        })
16161        .collect::<Vec<_>>();
16162
16163    for candidate in candidates.iter_mut() {
16164        let mut read_only = BTreeSet::new();
16165        for (target, files) in &all_files_by_target {
16166            if target != &candidate.target {
16167                read_only.extend(files.iter().cloned());
16168            }
16169        }
16170        let mut forbidden = read_only.clone();
16171        forbidden.extend(candidate.staged_overlap.files.iter().cloned());
16172        forbidden.extend(candidate.staged_overlap.config_files.iter().cloned());
16173        let read_only_files = sorted_set(&read_only);
16174        let forbidden_files = sorted_set(&forbidden);
16175        let expected_tests = candidate.affected_tests.clone();
16176        let mut read_only_context = read_only_files.clone();
16177        read_only_context.extend(
16178            candidate
16179                .worker_context
16180                .iter()
16181                .map(|summary| format!("worker_context: {summary}")),
16182        );
16183        read_only_context.extend(candidate.semantic_related.iter().map(|semantic| {
16184            format!(
16185                "semantic:{}:{}{}",
16186                semantic.kind,
16187                semantic.label,
16188                semantic
16189                    .source_file
16190                    .as_ref()
16191                    .map(|file| format!(" ({file})"))
16192                    .unwrap_or_default()
16193            )
16194        }));
16195        read_only_context.extend(
16196            candidate
16197                .semantic_dispatch_reasons
16198                .iter()
16199                .map(|reason| format!("semantic_rank: {reason}")),
16200        );
16201        if candidate.worker_feedback.total > 0 {
16202            read_only_context.push(format!(
16203                "worker_feedback: completed={} blocked={} touched_files={} expected_tests={} follow_up_ids={}",
16204                candidate.worker_feedback.completed,
16205                candidate.worker_feedback.blocked,
16206                feedback_ref_list(&candidate.worker_feedback.touched_files),
16207                feedback_ref_list(&candidate.worker_feedback.expected_tests),
16208                feedback_ref_list(&candidate.worker_feedback.follow_up_ids),
16209            ));
16210        }
16211        if candidate.worker_feedback.closure_rank_score > 0 {
16212            read_only_context.push(format!(
16213                "worker_feedback_closure: score={} stale_expected_tests={} follow_up_debt={}",
16214                candidate.worker_feedback.closure_rank_score,
16215                feedback_ref_list(&candidate.worker_feedback.stale_expected_tests),
16216                feedback_ref_list(&candidate.worker_feedback.follow_up_debt),
16217            ));
16218        }
16219        read_only_context.extend(
16220            candidate
16221                .worker_feedback
16222                .warnings
16223                .iter()
16224                .map(|warning| format!("worker_feedback_warning: {warning}")),
16225        );
16226        read_only_context = dedupe_preserve_order(read_only_context);
16227        let expansion_commands = conflict_matrix_expansion_commands(candidate);
16228        let title = format!(
16229            "Worker {} owns {} ({})",
16230            candidate.rank, candidate.target, candidate.target_label
16231        );
16232        let prompt_body = format!(
16233            "{title}\n\nOwned files:\n{}\n\nOwned symbols:\n{}\n\nRead-only context:\n{}\n\nForbidden files:\n{}\n\nExpected tests:\n{}\n\nExpansion commands:\n{}\n\nSemantic dispatch score: {}\n{}\n\nFail closed if the task requires a forbidden/shared file, an unowned config file, or a public contract change outside this ownership block.",
16234            markdown_list(&candidate.owned_files),
16235            markdown_list(&candidate.owned_symbols),
16236            markdown_list(&read_only_context),
16237            markdown_list(&forbidden_files),
16238            markdown_list(&expected_tests),
16239            markdown_list(&expansion_commands),
16240            candidate.semantic_dispatch_score,
16241            markdown_list(&candidate.semantic_dispatch_reasons),
16242        );
16243        let token_budget = conflict_matrix_token_budget(&prompt_body, &candidate.source_handles);
16244        let prompt = format!(
16245            "{prompt_body}\n\nToken budget: prompt_estimated_tokens={} max_prompt_tokens={} source_windows={} source_window_lines={} max_context_bytes={}",
16246            token_budget.prompt_estimated_tokens,
16247            token_budget.max_prompt_tokens,
16248            token_budget.source_window_count,
16249            token_budget.source_window_lines,
16250            token_budget.max_context_bytes,
16251        );
16252        candidate.ownership = ConflictMatrixOwnershipBlock {
16253            contract_version: WORKER_PROMPT_PACKET_CONTRACT_VERSION,
16254            title,
16255            owned_files: candidate.owned_files.clone(),
16256            owned_symbols: candidate.owned_symbols.clone(),
16257            read_only_context,
16258            read_only_files,
16259            forbidden_files,
16260            expected_tests,
16261            expansion_commands,
16262            token_budget,
16263            prompt,
16264        };
16265    }
16266}
16267
16268fn conflict_matrix_pair_requires_serial(pair: &ConflictMatrixPair) -> bool {
16269    matches!(
16270        pair.risk,
16271        ConflictMatrixRisk::High | ConflictMatrixRisk::FailClosed
16272    )
16273}
16274
16275fn apply_conflict_matrix_scheduler_fields(
16276    candidates: &mut [ConflictMatrixCandidate],
16277    conflicts: &[ConflictMatrixPair],
16278) {
16279    let rank_by_target = candidates
16280        .iter()
16281        .map(|candidate| (candidate.target.clone(), candidate.rank))
16282        .collect::<BTreeMap<_, _>>();
16283    let mut blocks = BTreeMap::<String, BTreeSet<String>>::new();
16284    let mut blocked_by = BTreeMap::<String, BTreeSet<String>>::new();
16285
16286    for pair in conflicts {
16287        if !conflict_matrix_pair_requires_serial(pair) {
16288            continue;
16289        }
16290        let left_rank = rank_by_target
16291            .get(&pair.left)
16292            .copied()
16293            .unwrap_or(usize::MAX);
16294        let right_rank = rank_by_target
16295            .get(&pair.right)
16296            .copied()
16297            .unwrap_or(usize::MAX);
16298        let (blocker, blocked) = if left_rank <= right_rank {
16299            (&pair.left, &pair.right)
16300        } else {
16301            (&pair.right, &pair.left)
16302        };
16303        blocks
16304            .entry(blocker.clone())
16305            .or_default()
16306            .insert(blocked.clone());
16307        blocked_by
16308            .entry(blocked.clone())
16309            .or_default()
16310            .insert(blocker.clone());
16311    }
16312
16313    for candidate in candidates.iter() {
16314        for follow_up in &candidate.worker_feedback.follow_up_debt {
16315            blocks
16316                .entry(candidate.target.clone())
16317                .or_default()
16318                .insert(follow_up.clone());
16319            if rank_by_target.contains_key(follow_up) {
16320                blocked_by
16321                    .entry(follow_up.clone())
16322                    .or_default()
16323                    .insert(candidate.target.clone());
16324            }
16325        }
16326    }
16327
16328    for candidate in candidates.iter_mut() {
16329        let candidate_blocks: Vec<String> = blocks
16330            .remove(&candidate.target)
16331            .map(|values| values.into_iter().collect())
16332            .unwrap_or_default();
16333        let candidate_blocked_by: Vec<String> = blocked_by
16334            .remove(&candidate.target)
16335            .map(|values| values.into_iter().collect())
16336            .unwrap_or_default();
16337        let has_serial_edges = !candidate_blocks.is_empty() || !candidate_blocked_by.is_empty();
16338        candidate.parallel_safe =
16339            candidate.risk != ConflictMatrixRisk::FailClosed && !has_serial_edges;
16340        candidate.blocks = candidate_blocks;
16341        candidate.blocked_by = candidate_blocked_by;
16342        candidate.required_context = conflict_matrix_required_context(candidate);
16343        candidate.graph_handles = conflict_matrix_graph_handles(candidate);
16344    }
16345}
16346
16347fn conflict_matrix_worker_prompt_packets(
16348    candidates: &[ConflictMatrixCandidate],
16349) -> Vec<ConflictMatrixWorkerPromptPacket> {
16350    candidates
16351        .iter()
16352        .map(|candidate| ConflictMatrixWorkerPromptPacket {
16353            contract_version: WORKER_PROMPT_PACKET_CONTRACT_VERSION,
16354            packet_id: conflict_matrix_worker_prompt_packet_id(candidate),
16355            target: candidate.target.clone(),
16356            rank: candidate.rank,
16357            risk: candidate.risk,
16358            previously_completed: candidate.previously_completed,
16359            parallel_safe: candidate.parallel_safe,
16360            blocks: candidate.blocks.clone(),
16361            blocked_by: candidate.blocked_by.clone(),
16362            required_context: candidate.required_context.clone(),
16363            graph_handles: candidate.graph_handles.clone(),
16364            projection_hash: candidate.projection_hash.clone(),
16365            title: candidate.ownership.title.clone(),
16366            owned_files: candidate.ownership.owned_files.clone(),
16367            owned_symbols: candidate.ownership.owned_symbols.clone(),
16368            read_only_context: candidate.ownership.read_only_context.clone(),
16369            forbidden_files: candidate.ownership.forbidden_files.clone(),
16370            expected_tests: candidate.ownership.expected_tests.clone(),
16371            expansion_commands: candidate.ownership.expansion_commands.clone(),
16372            token_budget: candidate.ownership.token_budget.clone(),
16373            semantic_dispatch_score: candidate.semantic_dispatch_score,
16374            semantic_dispatch_reasons: candidate.semantic_dispatch_reasons.clone(),
16375            worker_feedback: candidate.worker_feedback.clone(),
16376            prompt: candidate.ownership.prompt.clone(),
16377        })
16378        .collect()
16379}
16380
16381fn conflict_matrix_orchestration_observability(
16382    freshness: &GraphDbFreshnessReport,
16383    candidates: &[ConflictMatrixCandidate],
16384    conflicts: &[ConflictMatrixPair],
16385    next_commands: &[String],
16386) -> ConflictMatrixOrchestrationObservability {
16387    let evidence_packet_ids = candidates
16388        .iter()
16389        .map(|candidate| candidate.evidence_packet_id.clone())
16390        .collect::<Vec<_>>();
16391    let projection_hashes = candidates
16392        .iter()
16393        .filter_map(|candidate| candidate.projection_hash.clone())
16394        .collect::<BTreeSet<_>>()
16395        .into_iter()
16396        .collect::<Vec<_>>();
16397    let mut conflict_matrix_decisions = candidates
16398        .iter()
16399        .map(|candidate| {
16400            format!(
16401                "candidate #{} {} risk={} previously_completed={} closure_score={} semantic_score={} owned_files={} forbidden_files={}",
16402                candidate.rank,
16403                candidate.target,
16404                conflict_risk_label(candidate.risk),
16405                candidate.previously_completed,
16406                candidate.worker_feedback.closure_rank_score,
16407                candidate.semantic_dispatch_score,
16408                candidate.ownership.owned_files.len(),
16409                candidate.ownership.forbidden_files.len()
16410            )
16411        })
16412        .collect::<Vec<_>>();
16413    conflict_matrix_decisions.extend(conflicts.iter().map(|pair| {
16414        format!(
16415            "pair {}<->{} risk={} verdict={}",
16416            pair.left,
16417            pair.right,
16418            conflict_risk_label(pair.risk),
16419            pair.verdict
16420        )
16421    }));
16422    let worker_ownership_blocks = candidates
16423        .iter()
16424        .map(|candidate| candidate.ownership.title.clone())
16425        .collect::<Vec<_>>();
16426    ConflictMatrixOrchestrationObservability {
16427        contract_version: CONFLICT_MATRIX_CONTRACT_VERSION,
16428        projection_freshness: freshness.clone(),
16429        projection_hashes,
16430        evidence_packet_ids,
16431        conflict_matrix_decisions,
16432        worker_ownership_blocks,
16433        follow_up_commands: next_commands.to_vec(),
16434    }
16435}
16436
16437fn conflict_matrix_context_summary(
16438    context_pack: &ConflictMatrixPreparedContext,
16439) -> ConflictMatrixContextSummary {
16440    ConflictMatrixContextSummary {
16441        target: context_pack.target.clone(),
16442        target_kind: context_pack.target_kind.clone(),
16443        prompt_targets: context_pack.prompt_targets.clone(),
16444        touched_files: context_pack.touched_files.clone(),
16445        touched_symbols: context_pack.touched_symbols.clone(),
16446        files_changed: context_pack.files_changed,
16447        worker_context: context_pack.worker_context.clone(),
16448        source_windows: context_pack
16449            .source_windows
16450            .iter()
16451            .map(|window| format!("{}:{}-{}", window.file, window.start, window.end))
16452            .collect(),
16453        status_reminders: context_pack.status_reminders.clone(),
16454    }
16455}
16456
16457fn conflict_matrix_next_commands(
16458    root: &Path,
16459    path: &Path,
16460    scope: Option<&str>,
16461    targets: &[String],
16462    depth: usize,
16463    limit: usize,
16464    impact_limit: usize,
16465) -> Vec<String> {
16466    let mut commands = Vec::new();
16467    for target in targets {
16468        commands.push(format!(
16469            "tsift graph-db --path {}{} evidence {} --depth {} --limit {} --json",
16470            shell_quote(root.to_string_lossy().as_ref()),
16471            graph_db_scope_arg(scope),
16472            shell_quote(target),
16473            depth,
16474            limit
16475        ));
16476    }
16477    commands.push(format!(
16478        "tsift --envelope context-pack {} --budget normal",
16479        shell_quote(path.to_string_lossy().as_ref())
16480    ));
16481    commands.push(format!(
16482        "tsift diff-digest --cached {} --json",
16483        shell_quote(root.to_string_lossy().as_ref())
16484    ));
16485    commands.push(format!(
16486        "tsift impact {} --cached{} --limit {} --json",
16487        shell_quote(root.to_string_lossy().as_ref()),
16488        scope
16489            .map(|scope| format!(" --scope {}", shell_quote(scope)))
16490            .unwrap_or_default(),
16491        impact_limit
16492    ));
16493    dedupe_preserve_order(commands)
16494}
16495
16496fn print_conflict_matrix_human(report: &ConflictMatrixReport, compact: bool) {
16497    if compact {
16498        println!(
16499            "conflict-matrix targets:{} candidates:{} conflicts:{} can_parallel:{} fail_closed:{} cross_safe:{} per_target_fail_closed:{}",
16500            report.targets.len(),
16501            report.candidates.len(),
16502            report.conflicts.len(),
16503            report.can_parallel,
16504            report.fail_closed,
16505            report.cross_target_parallel_safe,
16506            report.per_target_fail_closed.len()
16507        );
16508    } else {
16509        println!("Conflict matrix");
16510        println!("  targets:      {}", report.targets.join(", "));
16511        println!("  can parallel: {}", report.can_parallel);
16512        println!("  fail closed:  {}", report.fail_closed);
16513        println!(
16514            "  cross target parallel safe: {}",
16515            report.cross_target_parallel_safe
16516        );
16517        println!(
16518            "  per target fail closed: {}",
16519            report.per_target_fail_closed.len()
16520        );
16521    }
16522    for candidate in &report.candidates {
16523        println!(
16524            "candidate #{} {} risk:{} score:{} semantic:{} files:{} symbols:{} tests:{}",
16525            candidate.rank,
16526            candidate.target,
16527            conflict_risk_label(candidate.risk),
16528            candidate.risk_score,
16529            candidate.semantic_dispatch_score,
16530            candidate.owned_files.len(),
16531            candidate.owned_symbols.len(),
16532            candidate.affected_tests.len()
16533        );
16534        if candidate.previously_completed {
16535            println!("  previously completed: true");
16536        }
16537        for reason in &candidate.risk_reasons {
16538            println!("  reason: {reason}");
16539        }
16540        if candidate.worker_feedback.total > 0 {
16541            println!(
16542                "  worker feedback: completed:{} blocked:{} files:{} tests:{} follow-ups:{} closure:{}",
16543                candidate.worker_feedback.completed,
16544                candidate.worker_feedback.blocked,
16545                candidate.worker_feedback.touched_files.len(),
16546                candidate.worker_feedback.expected_tests.len(),
16547                candidate.worker_feedback.follow_up_ids.len(),
16548                candidate.worker_feedback.closure_rank_score
16549            );
16550            for reason in &candidate.worker_feedback.closure_rank_reasons {
16551                println!("  closure: {reason}");
16552            }
16553            for warning in &candidate.worker_feedback.warnings {
16554                println!("  warning: {warning}");
16555            }
16556        }
16557    }
16558    for pair in &report.conflicts {
16559        println!(
16560            "conflict {} <-> {} risk:{} score:{} verdict:{}",
16561            pair.left,
16562            pair.right,
16563            conflict_risk_label(pair.risk),
16564            pair.risk_score,
16565            pair.verdict
16566        );
16567        for file in &pair.shared_files {
16568            println!("  shared file: {file}");
16569        }
16570        for symbol in &pair.shared_symbols {
16571            println!("  shared symbol: {symbol}");
16572        }
16573    }
16574    for command in &report.next_commands {
16575        println!("next: {command}");
16576    }
16577    for packet in &report.worker_prompt_packets {
16578        println!("worker-prompt #{} {}", packet.rank, packet.title);
16579    }
16580    for warning in &report.warnings {
16581        println!("warning: {warning}");
16582    }
16583    if !report.per_target_fail_closed.is_empty() {
16584        println!(
16585            "per-target fail closed: {} target(s)",
16586            report.per_target_fail_closed.len()
16587        );
16588        for target in &report.per_target_fail_closed {
16589            println!(
16590                "  {} source_handles:{} owned_files:{} reasons:{}",
16591                target.target,
16592                target.source_handle_count,
16593                target.owned_files.len(),
16594                target.risk_reasons.join("; ")
16595            );
16596        }
16597    }
16598}
16599
16600#[derive(Clone, Serialize, Deserialize)]
16601struct ConflictMatrixPreparedInputs {
16602    context_pack: ConflictMatrixPreparedContext,
16603    cached_diff: diff_digest::DiffDigestReport,
16604    impact_report: impact::ImpactReport,
16605    preparation_cache: ConflictMatrixPreparationCacheSummary,
16606    preparation_timings: Vec<GraphDbBackendEvalPhaseTiming>,
16607}
16608
16609struct ConflictMatrixGraphSnapshot {
16610    nodes: Vec<SubstrateGraphNode>,
16611    edges: Vec<SubstrateGraphEdge>,
16612    index: ConflictMatrixGraphIndex,
16613}
16614
16615#[derive(Clone, Serialize, Deserialize)]
16616struct ConflictMatrixPreparedEvidence {
16617    report: GraphDbEvidenceReport,
16618    summary: ConflictMatrixEvidencePacketSummary,
16619}
16620
16621struct ConflictMatrixGraphPreparedInputs {
16622    targets: Vec<String>,
16623    graph: ConflictMatrixGraphSnapshot,
16624    evidence: Vec<ConflictMatrixPreparedEvidence>,
16625    shared_preparation: ConflictMatrixSharedPreparationSummary,
16626}
16627
16628#[derive(Clone, Serialize, Deserialize)]
16629struct ConflictMatrixGraphPreparedCache {
16630    version: String,
16631    key: String,
16632    targets: Vec<String>,
16633    nodes: Vec<SubstrateGraphNode>,
16634    edges: Vec<SubstrateGraphEdge>,
16635    evidence: Vec<ConflictMatrixPreparedEvidence>,
16636    shared_preparation: ConflictMatrixSharedPreparationSummary,
16637}
16638
16639static CONFLICT_MATRIX_PREPARATION_CACHE: OnceLock<
16640    Mutex<BTreeMap<String, ConflictMatrixPreparedInputs>>,
16641> = OnceLock::new();
16642
16643fn conflict_matrix_preparation_cache()
16644-> &'static Mutex<BTreeMap<String, ConflictMatrixPreparedInputs>> {
16645    CONFLICT_MATRIX_PREPARATION_CACHE.get_or_init(|| Mutex::new(BTreeMap::new()))
16646}
16647
16648fn hash_bytes_hex(bytes: &[u8]) -> String {
16649    blake3::hash(bytes).to_hex().to_string()
16650}
16651
16652fn conflict_matrix_disk_cache_dir(root: &Path) -> PathBuf {
16653    root.join(".tsift/conflict-matrix-cache")
16654}
16655
16656fn conflict_matrix_disk_cache_path(root: &Path, kind: &str, key: &str) -> PathBuf {
16657    conflict_matrix_disk_cache_dir(root)
16658        .join(kind)
16659        .join(format!("{key}.json"))
16660}
16661
16662fn conflict_matrix_read_disk_cache<T: for<'de> Deserialize<'de>>(
16663    root: &Path,
16664    kind: &str,
16665    key: &str,
16666) -> Option<T> {
16667    let path = conflict_matrix_disk_cache_path(root, kind, key);
16668    let bytes = fs::read(path).ok()?;
16669    serde_json::from_slice(&bytes).ok()
16670}
16671
16672fn conflict_matrix_write_disk_cache<T: Serialize>(root: &Path, kind: &str, key: &str, value: &T) {
16673    let path = conflict_matrix_disk_cache_path(root, kind, key);
16674    let Some(parent) = path.parent() else {
16675        return;
16676    };
16677    if fs::create_dir_all(parent).is_err() {
16678        return;
16679    }
16680    if let Ok(bytes) = serde_json::to_vec(value) {
16681        let _ = fs::write(path, bytes);
16682    }
16683}
16684
16685fn conflict_matrix_document_watermark(path: &Path) -> Result<String> {
16686    if path.is_dir() {
16687        let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
16688        return Ok(hash_bytes_hex(
16689            format!("directory:{}", canonical.display()).as_bytes(),
16690        ));
16691    }
16692    let bytes = fs::read(path)
16693        .with_context(|| format!("reading conflict-matrix document {}", path.display()))?;
16694    Ok(hash_bytes_hex(&bytes))
16695}
16696
16697fn conflict_matrix_staged_diff_watermark(root: &Path) -> String {
16698    match Command::new("git")
16699        .arg("-C")
16700        .arg(root)
16701        .args(["diff", "--cached", "--raw", "--no-ext-diff"])
16702        .output()
16703    {
16704        Ok(output) => {
16705            let mut bytes = Vec::new();
16706            bytes.extend_from_slice(output.status.to_string().as_bytes());
16707            bytes.extend_from_slice(&output.stdout);
16708            bytes.extend_from_slice(&output.stderr);
16709            hash_bytes_hex(&bytes)
16710        }
16711        Err(err) => hash_bytes_hex(format!("git-diff-cached-unavailable:{err:#}").as_bytes()),
16712    }
16713}
16714
16715fn conflict_matrix_preparation_cache_summary(
16716    root: &Path,
16717    path: &Path,
16718    scope: Option<&str>,
16719) -> Result<ConflictMatrixPreparationCacheSummary> {
16720    let source_watermark = traversal_source_watermark(root, path, scope, false)?
16721        .unwrap_or_else(|| "unavailable".to_string());
16722    let document_watermark = conflict_matrix_document_watermark(path)?;
16723    let staged_diff_watermark = conflict_matrix_staged_diff_watermark(root);
16724    let key = content_hash(&vec![
16725        format!("version:{CONFLICT_MATRIX_PREPARATION_CACHE_VERSION}"),
16726        format!("root:{}", root.display()),
16727        format!("path:{}", path.display()),
16728        format!("scope:{}", scope.unwrap_or("root")),
16729        format!("source:{source_watermark}"),
16730        format!("document:{document_watermark}"),
16731        format!("staged_diff:{staged_diff_watermark}"),
16732    ])?;
16733    Ok(ConflictMatrixPreparationCacheSummary {
16734        version: CONFLICT_MATRIX_PREPARATION_CACHE_VERSION.to_string(),
16735        key,
16736        status: "memory_miss".to_string(),
16737        source_watermark,
16738        document_watermark,
16739        staged_diff_watermark,
16740    })
16741}
16742
16743fn conflict_matrix_prepared_inputs_cache_hit(
16744    mut cached: ConflictMatrixPreparedInputs,
16745    status: &str,
16746    duration_micros: u128,
16747    detail: &str,
16748) -> ConflictMatrixPreparedInputs {
16749    cached.preparation_cache.status = status.to_string();
16750    let cached_detail = format!(
16751        "reused from {status} conflict-matrix preparation cache by source/document/staged-diff watermark; cost accounted in preparation_cache_lookup"
16752    );
16753    cached.preparation_timings = vec![
16754        graph_db_backend_eval_phase_timing("preparation_cache_lookup", duration_micros, detail),
16755        graph_db_backend_eval_phase_timing("session_review_compute", 0, &cached_detail),
16756        graph_db_backend_eval_phase_timing(
16757            "session_review_compute.target_context_build",
16758            0,
16759            &cached_detail,
16760        ),
16761        graph_db_backend_eval_phase_timing(
16762            "session_review_compute.session_discovery",
16763            0,
16764            &cached_detail,
16765        ),
16766        graph_db_backend_eval_phase_timing(
16767            "session_review_compute.session_digest_total",
16768            0,
16769            &cached_detail,
16770        ),
16771        graph_db_backend_eval_phase_timing(
16772            "session_review_compute.session_cost_total",
16773            0,
16774            &cached_detail,
16775        ),
16776        graph_db_backend_eval_phase_timing(
16777            "session_review_compute.session_aggregation",
16778            0,
16779            &cached_detail,
16780        ),
16781        graph_db_backend_eval_phase_timing(
16782            "session_review_compute.report_assembly",
16783            0,
16784            &cached_detail,
16785        ),
16786        graph_db_backend_eval_phase_timing("status_index_gate", 0, &cached_detail),
16787        graph_db_backend_eval_phase_timing(
16788            "status_index_gate.prepare_agent_doc_index_gate",
16789            0,
16790            &cached_detail,
16791        ),
16792        graph_db_backend_eval_phase_timing(
16793            "status_index_gate.context_pack_status_reminders",
16794            0,
16795            &cached_detail,
16796        ),
16797        graph_db_backend_eval_phase_timing(
16798            "status_index_gate.load_tag_ontology_preview_context",
16799            0,
16800            &cached_detail,
16801        ),
16802        graph_db_backend_eval_phase_timing("context_pack_diff", 0, &cached_detail),
16803        graph_db_backend_eval_phase_timing("exploration_materialization", 0, &cached_detail),
16804        graph_db_backend_eval_phase_timing("graph_orchestration", 0, &cached_detail),
16805        graph_db_backend_eval_phase_timing("staged_diff", 0, &cached_detail),
16806        graph_db_backend_eval_phase_timing("impact", 0, &cached_detail),
16807        graph_db_backend_eval_phase_timing("impact.context_resolution", 0, &cached_detail),
16808        graph_db_backend_eval_phase_timing("impact.diff_digest", 0, &cached_detail),
16809        graph_db_backend_eval_phase_timing("impact.test_path_scan", 0, &cached_detail),
16810        graph_db_backend_eval_phase_timing("impact.index_open", 0, &cached_detail),
16811        graph_db_backend_eval_phase_timing("impact.call_edge_impacts", 0, &cached_detail),
16812        graph_db_backend_eval_phase_timing("impact.route_handler_impacts", 0, &cached_detail),
16813        graph_db_backend_eval_phase_timing("impact.import_impacts", 0, &cached_detail),
16814        graph_db_backend_eval_phase_timing("impact.report_assembly", 0, &cached_detail),
16815    ];
16816    cached
16817}
16818
16819pub(crate) fn prepare_conflict_matrix_inputs(
16820    root: &Path,
16821    path: &Path,
16822    scope: Option<&str>,
16823    impact_limit: usize,
16824) -> Result<ConflictMatrixPreparedInputs> {
16825    let cache_lookup_started = Instant::now();
16826    let mut cache_summary = conflict_matrix_preparation_cache_summary(root, path, scope)?;
16827    if let Some(cached) = conflict_matrix_preparation_cache()
16828        .lock()
16829        .map_err(|_| anyhow::anyhow!("conflict-matrix preparation cache lock poisoned"))?
16830        .get(&cache_summary.key)
16831        .cloned()
16832    {
16833        return Ok(conflict_matrix_prepared_inputs_cache_hit(
16834            cached,
16835            "memory_hit",
16836            cache_lookup_started.elapsed().as_micros(),
16837            "reused prepared context-pack, staged diff, and impact packet from memory by source/document/staged-diff watermark",
16838        ));
16839    }
16840    if let Some(cached) = conflict_matrix_read_disk_cache::<ConflictMatrixPreparedInputs>(
16841        root,
16842        "inputs",
16843        &cache_summary.key,
16844    ) {
16845        let cached = conflict_matrix_prepared_inputs_cache_hit(
16846            cached,
16847            "disk_hit",
16848            cache_lookup_started.elapsed().as_micros(),
16849            "reused prepared context-pack, staged diff, and impact packet from .tsift/conflict-matrix-cache by source/document/staged-diff watermark",
16850        );
16851        conflict_matrix_preparation_cache()
16852            .lock()
16853            .map_err(|_| anyhow::anyhow!("conflict-matrix preparation cache lock poisoned"))?
16854            .insert(cached.preparation_cache.key.clone(), cached.clone());
16855        return Ok(cached);
16856    }
16857
16858    let mut preparation_timings = vec![graph_db_backend_eval_phase_timing(
16859        "preparation_cache_lookup",
16860        cache_lookup_started.elapsed().as_micros(),
16861        "no prepared packet matched the source/document/staged-diff watermark",
16862    )];
16863    cache_summary.status = "computed".to_string();
16864    let (context_pack_report, context_pack_timings) = build_context_pack_report_with_profile(
16865        path,
16866        None,
16867        None,
16868        None,
16869        ResponseBudget::from_cli(None, None, Some(ResponseBudgetPreset::Normal), false),
16870    )?;
16871    preparation_timings.extend(context_pack_timings);
16872    let context_pack = ConflictMatrixPreparedContext::from_context_pack(&context_pack_report);
16873    let cached_diff = graph_db_backend_eval_timed_phase(
16874        &mut preparation_timings,
16875        "staged_diff",
16876        "cached/staged diff digest used for ownership overlap checks",
16877        || {
16878            diff_digest::compute(
16879                root,
16880                diff_digest::DiffDigestOptions {
16881                    cached: true,
16882                    revision: None,
16883                    max_parsed_files: None,
16884                },
16885            )
16886            .with_context(|| format!("computing cached diff digest for {}", root.display()))
16887        },
16888    )?;
16889    let impact_started = Instant::now();
16890    let (impact_report, impact_sub_phases) = impact::compute_with_phases(
16891        root,
16892        impact::ImpactOptions {
16893            cached: true,
16894            revision: None,
16895            scope,
16896            limit: impact_limit,
16897        },
16898    )
16899    .with_context(|| format!("computing cached impact report for {}", root.display()))?;
16900    let impact_total_micros = impact_started.elapsed().as_micros();
16901    preparation_timings.push(graph_db_backend_eval_phase_timing(
16902        "impact",
16903        impact_total_micros,
16904        "cached impact analysis used for affected-test ownership checks",
16905    ));
16906    for sub in &impact_sub_phases {
16907        preparation_timings.push(graph_db_backend_eval_phase_timing(
16908            &format!("impact.{}", sub.name),
16909            sub.duration_micros,
16910            &sub.detail,
16911        ));
16912    }
16913    let prepared = ConflictMatrixPreparedInputs {
16914        context_pack,
16915        cached_diff,
16916        impact_report,
16917        preparation_cache: cache_summary,
16918        preparation_timings,
16919    };
16920    conflict_matrix_preparation_cache()
16921        .lock()
16922        .map_err(|_| anyhow::anyhow!("conflict-matrix preparation cache lock poisoned"))?
16923        .insert(prepared.preparation_cache.key.clone(), prepared.clone());
16924    conflict_matrix_write_disk_cache(root, "inputs", &prepared.preparation_cache.key, &prepared);
16925    Ok(prepared)
16926}
16927
16928fn conflict_matrix_evidence_packet_summary(
16929    root: &Path,
16930    scope: Option<&str>,
16931    target: &str,
16932    depth: usize,
16933    limit: usize,
16934    evidence: &GraphDbEvidenceReport,
16935) -> ConflictMatrixEvidencePacketSummary {
16936    ConflictMatrixEvidencePacketSummary {
16937        target: evidence.target.clone(),
16938        packet_id: evidence.packet_id.clone(),
16939        target_node_id: evidence.target_node.id.clone(),
16940        projection_hash: evidence.projection_hash.clone(),
16941        replay_command: evidence
16942            .replay_commands
16943            .first()
16944            .cloned()
16945            .unwrap_or_else(|| {
16946                format!(
16947                    "tsift graph-db --path {}{} evidence {} --depth {} --limit {} --json",
16948                    shell_quote(root.to_string_lossy().as_ref()),
16949                    graph_db_scope_arg(scope),
16950                    shell_quote(target),
16951                    depth,
16952                    limit
16953                )
16954            }),
16955    }
16956}
16957
16958fn conflict_matrix_shared_preparation_summary(
16959    graph: &ConflictMatrixGraphSnapshot,
16960    evidence: &[ConflictMatrixPreparedEvidence],
16961    evidence_cache_status: &str,
16962) -> ConflictMatrixSharedPreparationSummary {
16963    ConflictMatrixSharedPreparationSummary {
16964        evidence_cache_status: evidence_cache_status.to_string(),
16965        graph_nodes: graph.nodes.len(),
16966        graph_edges: graph.edges.len(),
16967        evidence_packets: evidence.len(),
16968        source_handles: evidence
16969            .iter()
16970            .map(|entry| entry.report.source_handles.len())
16971            .sum(),
16972        worker_context: evidence
16973            .iter()
16974            .map(|entry| entry.report.worker_context.len())
16975            .sum(),
16976        worker_results: evidence
16977            .iter()
16978            .map(|entry| entry.report.worker_results.len())
16979            .sum(),
16980        semantic_rows: evidence
16981            .iter()
16982            .map(|entry| entry.report.semantic_related.len())
16983            .sum(),
16984        dispatch_trace_snapshot_nodes: graph.nodes.len(),
16985        dispatch_trace_snapshot_edges: graph.edges.len(),
16986    }
16987}
16988
16989#[allow(dead_code)]
16990fn conflict_matrix_graph_snapshot(store: &impl GraphStore) -> Result<ConflictMatrixGraphSnapshot> {
16991    let nodes = store.all_nodes()?;
16992    let edges = store.all_edges()?;
16993    let index = conflict_matrix_graph_index(&nodes);
16994    Ok(ConflictMatrixGraphSnapshot {
16995        nodes,
16996        edges,
16997        index,
16998    })
16999}
17000
17001fn insert_conflict_graph_node(
17002    nodes: &mut BTreeMap<String, SubstrateGraphNode>,
17003    node: SubstrateGraphNode,
17004) {
17005    nodes.entry(node.id.clone()).or_insert(node);
17006}
17007
17008fn insert_conflict_graph_edge(
17009    edges: &mut BTreeMap<(String, String, String), SubstrateGraphEdge>,
17010    edge: SubstrateGraphEdge,
17011) {
17012    edges
17013        .entry((edge.from_id.clone(), edge.kind.clone(), edge.to_id.clone()))
17014        .or_insert(edge);
17015}
17016
17017fn conflict_matrix_files_from_evidence(evidence: &GraphDbEvidenceReport) -> BTreeSet<String> {
17018    let mut files = BTreeSet::new();
17019    if matches!(
17020        evidence.target_node.kind.as_str(),
17021        "file" | "symbol" | "route"
17022    ) && let Some(path) = evidence.target_node.properties.get("path")
17023    {
17024        files.insert(path.clone());
17025    }
17026    for node in &evidence.source_handles {
17027        if let Some(handle) = conflict_matrix_source_handle(node) {
17028            files.insert(handle.file);
17029        }
17030    }
17031    files
17032}
17033
17034fn conflict_matrix_add_path_nodes<S: GraphStore>(
17035    store: &S,
17036    nodes: &mut BTreeMap<String, SubstrateGraphNode>,
17037    evidence: &GraphDbEvidenceReport,
17038) -> Result<()> {
17039    for path in &evidence.shortest_paths {
17040        let Some(graph_path) = &path.path else {
17041            continue;
17042        };
17043        for id in &graph_path.nodes {
17044            if nodes.contains_key(id) {
17045                continue;
17046            }
17047            if let Some(node) = store.node(id)? {
17048                insert_conflict_graph_node(nodes, node);
17049            }
17050        }
17051    }
17052    Ok(())
17053}
17054
17055fn conflict_matrix_add_file_symbol_nodes<S: GraphStore>(
17056    store: &S,
17057    nodes: &mut BTreeMap<String, SubstrateGraphNode>,
17058    files: &BTreeSet<String>,
17059) -> Result<()> {
17060    for file in files {
17061        for kind in ["file", "route", "symbol"] {
17062            let page = store.paged_nodes_by_kind(
17063                kind,
17064                GraphQueryOptions {
17065                    property_filters: vec![GraphPropertyFilter {
17066                        key: "path".to_string(),
17067                        value: file.clone(),
17068                    }],
17069                    ..GraphQueryOptions::default()
17070                },
17071            )?;
17072            for node in page.nodes {
17073                insert_conflict_graph_node(nodes, node);
17074            }
17075        }
17076    }
17077    Ok(())
17078}
17079
17080fn conflict_matrix_add_target_ref_nodes<S: GraphStore>(
17081    store: &S,
17082    nodes: &mut BTreeMap<String, SubstrateGraphNode>,
17083    target_node: &SubstrateGraphNode,
17084) -> Result<()> {
17085    let Some(ref_id) = target_node.properties.get("ref_id") else {
17086        return Ok(());
17087    };
17088    for kind in ["backlog", "job_packet", "worker_result"] {
17089        let page = store.paged_nodes_by_kind(
17090            kind,
17091            GraphQueryOptions {
17092                property_filters: vec![GraphPropertyFilter {
17093                    key: "ref_id".to_string(),
17094                    value: ref_id.clone(),
17095                }],
17096                ..GraphQueryOptions::default()
17097            },
17098        )?;
17099        for node in page.nodes {
17100            insert_conflict_graph_node(nodes, node);
17101        }
17102    }
17103    Ok(())
17104}
17105
17106fn conflict_matrix_add_target_neighborhood<S: GraphStore>(
17107    store: &S,
17108    nodes: &mut BTreeMap<String, SubstrateGraphNode>,
17109    edges: &mut BTreeMap<(String, String, String), SubstrateGraphEdge>,
17110    target_node: &SubstrateGraphNode,
17111    depth: usize,
17112    limit: usize,
17113) -> Result<()> {
17114    let node_limit = if limit == 0 {
17115        None
17116    } else {
17117        Some(limit.saturating_mul(depth.max(1)).saturating_mul(8).max(64))
17118    };
17119    if let Some(page) = store.paged_neighborhood(
17120        &target_node.id,
17121        depth,
17122        None,
17123        GraphQueryOptions {
17124            limit: node_limit,
17125            ..GraphQueryOptions::default()
17126        },
17127    )? {
17128        for node in page.nodes {
17129            insert_conflict_graph_node(nodes, node);
17130        }
17131        for edge in page.edges {
17132            insert_conflict_graph_edge(edges, edge);
17133        }
17134    }
17135    Ok(())
17136}
17137
17138fn conflict_matrix_add_scoped_edges<S: GraphStore>(
17139    store: &S,
17140    nodes: &BTreeMap<String, SubstrateGraphNode>,
17141    edges: &mut BTreeMap<(String, String, String), SubstrateGraphEdge>,
17142) -> Result<()> {
17143    let node_ids = nodes.keys().cloned().collect::<BTreeSet<_>>();
17144    for edge in store.edges_between_nodes(&node_ids)? {
17145        insert_conflict_graph_edge(edges, edge);
17146    }
17147    Ok(())
17148}
17149
17150fn conflict_matrix_target_scoped_graph_snapshot<S: GraphStore>(
17151    store: &S,
17152    evidence: &[ConflictMatrixPreparedEvidence],
17153    depth: usize,
17154    limit: usize,
17155) -> Result<ConflictMatrixGraphSnapshot> {
17156    let mut nodes = BTreeMap::<String, SubstrateGraphNode>::new();
17157    let mut edges = BTreeMap::<(String, String, String), SubstrateGraphEdge>::new();
17158    let mut files = BTreeSet::new();
17159
17160    for prepared in evidence {
17161        let report = &prepared.report;
17162        insert_conflict_graph_node(&mut nodes, report.target_node.clone());
17163        for node in report
17164            .worker_context
17165            .iter()
17166            .chain(report.source_handles.iter())
17167            .chain(report.worker_results.iter())
17168            .chain(report.semantic_related.iter())
17169        {
17170            insert_conflict_graph_node(&mut nodes, node.clone());
17171        }
17172        files.extend(conflict_matrix_files_from_evidence(report));
17173        conflict_matrix_add_target_ref_nodes(store, &mut nodes, &report.target_node)?;
17174        conflict_matrix_add_path_nodes(store, &mut nodes, report)?;
17175        conflict_matrix_add_target_neighborhood(
17176            store,
17177            &mut nodes,
17178            &mut edges,
17179            &report.target_node,
17180            depth,
17181            limit,
17182        )?;
17183    }
17184
17185    conflict_matrix_add_file_symbol_nodes(store, &mut nodes, &files)?;
17186    conflict_matrix_add_scoped_edges(store, &nodes, &mut edges)?;
17187
17188    let nodes = nodes.into_values().collect::<Vec<_>>();
17189    let edges = edges.into_values().collect::<Vec<_>>();
17190    let index = conflict_matrix_graph_index(&nodes);
17191    Ok(ConflictMatrixGraphSnapshot {
17192        nodes,
17193        edges,
17194        index,
17195    })
17196}
17197
17198#[allow(clippy::too_many_arguments)]
17199fn collect_conflict_matrix_evidence_packets<S: GraphStore>(
17200    root: &Path,
17201    scope: Option<&str>,
17202    backend: &str,
17203    targets: &[String],
17204    depth: usize,
17205    limit: usize,
17206    store: &S,
17207    freshness: GraphDbFreshnessReport,
17208) -> Result<Vec<ConflictMatrixPreparedEvidence>> {
17209    let mut evidence = Vec::new();
17210    for target in targets {
17211        let report = graph_db_evidence_report_from_store(GraphDbEvidenceInput {
17212            root,
17213            scope,
17214            backend,
17215            target,
17216            depth,
17217            limit,
17218            store,
17219            freshness: freshness.clone(),
17220            warnings: Vec::new(),
17221        })
17222        .with_context(|| format!("collecting graph-db evidence for {target}"))?;
17223        let summary =
17224            conflict_matrix_evidence_packet_summary(root, scope, target, depth, limit, &report);
17225        evidence.push(ConflictMatrixPreparedEvidence { report, summary });
17226    }
17227    Ok(evidence)
17228}
17229
17230fn conflict_matrix_graph_preparation_cache_key(
17231    prepared: &ConflictMatrixPreparedInputs,
17232    scope: Option<&str>,
17233    backend: &str,
17234    targets: &[String],
17235    depth: usize,
17236    limit: usize,
17237    freshness: &GraphDbFreshnessReport,
17238) -> Result<String> {
17239    content_hash(&serde_json::json!({
17240        "version": CONFLICT_MATRIX_GRAPH_PREPARATION_CACHE_VERSION,
17241        "prepared_inputs_key": prepared.preparation_cache.key.as_str(),
17242        "scope": scope.unwrap_or("root"),
17243        "backend": backend,
17244        "targets": targets,
17245        "depth": depth,
17246        "limit": limit,
17247        "projection_version": freshness.projection_version.as_deref(),
17248        "projection_hash": freshness.content_hash.as_deref(),
17249        "source_watermark": freshness.source_watermark.as_deref(),
17250    }))
17251}
17252
17253fn conflict_matrix_graph_prepared_cache_hit(
17254    cached: ConflictMatrixGraphPreparedCache,
17255    status: &str,
17256) -> ConflictMatrixGraphPreparedInputs {
17257    let mut shared_preparation = cached.shared_preparation;
17258    shared_preparation.evidence_cache_status = status.to_string();
17259    let index = conflict_matrix_graph_index(&cached.nodes);
17260    ConflictMatrixGraphPreparedInputs {
17261        targets: cached.targets,
17262        graph: ConflictMatrixGraphSnapshot {
17263            nodes: cached.nodes,
17264            edges: cached.edges,
17265            index,
17266        },
17267        evidence: cached.evidence,
17268        shared_preparation,
17269    }
17270}
17271
17272fn conflict_matrix_graph_prepared_cache_from_inputs(
17273    key: &str,
17274    prepared: &ConflictMatrixGraphPreparedInputs,
17275) -> ConflictMatrixGraphPreparedCache {
17276    ConflictMatrixGraphPreparedCache {
17277        version: CONFLICT_MATRIX_GRAPH_PREPARATION_CACHE_VERSION.to_string(),
17278        key: key.to_string(),
17279        targets: prepared.targets.clone(),
17280        nodes: prepared.graph.nodes.clone(),
17281        edges: prepared.graph.edges.clone(),
17282        evidence: prepared.evidence.clone(),
17283        shared_preparation: prepared.shared_preparation.clone(),
17284    }
17285}
17286
17287#[allow(clippy::too_many_arguments)]
17288fn prepare_conflict_matrix_graph_orchestration<S: GraphStore>(
17289    root: &Path,
17290    scope: Option<&str>,
17291    backend: &str,
17292    raw_targets: &[String],
17293    prepared: &ConflictMatrixPreparedInputs,
17294    depth: usize,
17295    limit: usize,
17296    store: &S,
17297    freshness: GraphDbFreshnessReport,
17298) -> Result<ConflictMatrixGraphPreparedInputs> {
17299    let targets = resolve_conflict_matrix_targets(store, raw_targets, &prepared.context_pack)?;
17300    let graph_cache_key = conflict_matrix_graph_preparation_cache_key(
17301        prepared, scope, backend, &targets, depth, limit, &freshness,
17302    )?;
17303    if let Some(cached) = conflict_matrix_read_disk_cache::<ConflictMatrixGraphPreparedCache>(
17304        root,
17305        "graph",
17306        &graph_cache_key,
17307    ) && cached.version == CONFLICT_MATRIX_GRAPH_PREPARATION_CACHE_VERSION
17308        && cached.key == graph_cache_key
17309        && cached.targets == targets
17310    {
17311        return Ok(conflict_matrix_graph_prepared_cache_hit(cached, "disk_hit"));
17312    }
17313    let evidence = collect_conflict_matrix_evidence_packets(
17314        root, scope, backend, &targets, depth, limit, store, freshness,
17315    )?;
17316    let graph = conflict_matrix_target_scoped_graph_snapshot(store, &evidence, depth, limit)?;
17317    let shared_preparation =
17318        conflict_matrix_shared_preparation_summary(&graph, &evidence, "computed");
17319
17320    let prepared_graph = ConflictMatrixGraphPreparedInputs {
17321        targets,
17322        graph,
17323        evidence,
17324        shared_preparation,
17325    };
17326    let cache = conflict_matrix_graph_prepared_cache_from_inputs(&graph_cache_key, &prepared_graph);
17327    conflict_matrix_write_disk_cache(root, "graph", &graph_cache_key, &cache);
17328    Ok(prepared_graph)
17329}
17330
17331#[allow(clippy::too_many_arguments)]
17332fn build_conflict_matrix_report_from_prepared_graph(
17333    root: &Path,
17334    path: &Path,
17335    scope: Option<&str>,
17336    depth: usize,
17337    limit: usize,
17338    impact_limit: usize,
17339    freshness: GraphDbFreshnessReport,
17340    extra_warnings: Vec<String>,
17341    prepared: &ConflictMatrixPreparedInputs,
17342    graph_prepared: &ConflictMatrixGraphPreparedInputs,
17343) -> Result<ConflictMatrixReport> {
17344    let context_pack = &prepared.context_pack;
17345    let targets = graph_prepared.targets.clone();
17346    let graph_index = &graph_prepared.graph.index;
17347
17348    let mut warnings = context_pack.status_reminders.clone();
17349    warnings.extend(extra_warnings);
17350    let mut candidates = Vec::new();
17351    let mut evidence_packets = Vec::new();
17352    for prepared_evidence in &graph_prepared.evidence {
17353        let evidence = &prepared_evidence.report;
17354        warnings.extend(evidence.warnings.clone());
17355        evidence_packets.push(prepared_evidence.summary.clone());
17356        candidates.push(conflict_matrix_candidate_from_evidence(
17357            root,
17358            evidence,
17359            graph_index,
17360            &prepared.cached_diff,
17361            &prepared.impact_report,
17362        ));
17363    }
17364
17365    apply_conflict_matrix_worker_feedback_controls(&mut candidates);
17366    candidates.sort_by(|left, right| {
17367        left.risk
17368            .cmp(&right.risk)
17369            .then_with(|| left.risk_score.cmp(&right.risk_score))
17370            .then_with(|| {
17371                right
17372                    .worker_feedback
17373                    .closure_rank_score
17374                    .cmp(&left.worker_feedback.closure_rank_score)
17375            })
17376            .then_with(|| {
17377                right
17378                    .semantic_dispatch_score
17379                    .cmp(&left.semantic_dispatch_score)
17380            })
17381            .then_with(|| left.target.cmp(&right.target))
17382    });
17383    for (idx, candidate) in candidates.iter_mut().enumerate() {
17384        candidate.rank = idx + 1;
17385    }
17386    warnings.extend(candidates.iter().flat_map(|candidate| {
17387        candidate
17388            .worker_feedback
17389            .warnings
17390            .iter()
17391            .map(|warning| format!("{}: {warning}", candidate.target))
17392    }));
17393    let conflicts = build_conflict_matrix_pairs(&candidates);
17394    apply_conflict_matrix_ownership_blocks(&mut candidates);
17395    apply_conflict_matrix_scheduler_fields(&mut candidates, &conflicts);
17396    let worker_prompt_packets = conflict_matrix_worker_prompt_packets(&candidates);
17397
17398    let per_target_fail_closed = conflict_matrix_per_target_fail_closed(&candidates);
17399    let cross_target_parallel_safe = conflicts
17400        .iter()
17401        .all(|pair| pair.risk <= ConflictMatrixRisk::Medium);
17402    let fail_closed = !per_target_fail_closed.is_empty()
17403        || conflicts
17404            .iter()
17405            .any(|pair| pair.risk == ConflictMatrixRisk::FailClosed);
17406    let can_parallel = !fail_closed && cross_target_parallel_safe;
17407    let next_commands =
17408        conflict_matrix_next_commands(root, path, scope, &targets, depth, limit, impact_limit);
17409    let orchestration = conflict_matrix_orchestration_observability(
17410        &freshness,
17411        &candidates,
17412        &conflicts,
17413        &next_commands,
17414    );
17415    let inputs = ConflictMatrixInputSummary {
17416        graph_db_evidence_targets: targets.clone(),
17417        evidence_packets,
17418        shared_preparation: graph_prepared.shared_preparation.clone(),
17419        preparation_cache: prepared.preparation_cache.clone(),
17420        preparation_timings: prepared.preparation_timings.clone(),
17421        context_pack_command: format!(
17422            "tsift --envelope context-pack {} --budget normal",
17423            shell_quote(path.to_string_lossy().as_ref())
17424        ),
17425        cached_diff_command: format!(
17426            "tsift diff-digest --cached {} --json",
17427            shell_quote(root.to_string_lossy().as_ref())
17428        ),
17429        impact_command: format!(
17430            "tsift impact {} --cached{} --limit {} --json",
17431            shell_quote(root.to_string_lossy().as_ref()),
17432            scope
17433                .map(|scope| format!(" --scope {}", shell_quote(scope)))
17434                .unwrap_or_default(),
17435            impact_limit
17436        ),
17437    };
17438    let context_summary = conflict_matrix_context_summary(context_pack);
17439    Ok(ConflictMatrixReport {
17440        contract_version: CONFLICT_MATRIX_CONTRACT_VERSION,
17441        root: root.to_string_lossy().to_string(),
17442        scope: scope.map(str::to_string),
17443        targets,
17444        can_parallel,
17445        fail_closed,
17446        cross_target_parallel_safe,
17447        per_target_fail_closed,
17448        inputs,
17449        context_pack: context_summary,
17450        cached_diff: prepared.cached_diff.clone(),
17451        impact: prepared.impact_report.clone(),
17452        candidates,
17453        worker_prompt_packets,
17454        conflicts,
17455        orchestration,
17456        next_commands,
17457        warnings,
17458    })
17459}
17460
17461#[allow(clippy::too_many_arguments)]
17462fn build_conflict_matrix_report_with_prepared<S: GraphStore>(
17463    root: &Path,
17464    path: &Path,
17465    scope: Option<&str>,
17466    raw_targets: &[String],
17467    depth: usize,
17468    limit: usize,
17469    impact_limit: usize,
17470    store: &S,
17471    freshness: GraphDbFreshnessReport,
17472    extra_warnings: Vec<String>,
17473    prepared: &ConflictMatrixPreparedInputs,
17474) -> Result<ConflictMatrixReport> {
17475    let graph_prepared = prepare_conflict_matrix_graph_orchestration(
17476        root,
17477        scope,
17478        "sqlite",
17479        raw_targets,
17480        prepared,
17481        depth,
17482        limit,
17483        store,
17484        freshness.clone(),
17485    )?;
17486    build_conflict_matrix_report_from_prepared_graph(
17487        root,
17488        path,
17489        scope,
17490        depth,
17491        limit,
17492        impact_limit,
17493        freshness,
17494        extra_warnings,
17495        prepared,
17496        &graph_prepared,
17497    )
17498}
17499
17500#[allow(clippy::too_many_arguments)]
17501fn build_conflict_matrix_report_with_store<S: GraphStore>(
17502    root: &Path,
17503    path: &Path,
17504    scope: Option<&str>,
17505    raw_targets: &[String],
17506    depth: usize,
17507    limit: usize,
17508    impact_limit: usize,
17509    store: &S,
17510    freshness: GraphDbFreshnessReport,
17511    extra_warnings: Vec<String>,
17512) -> Result<ConflictMatrixReport> {
17513    let prepared = prepare_conflict_matrix_inputs(root, path, scope, impact_limit)?;
17514    build_conflict_matrix_report_with_prepared(
17515        root,
17516        path,
17517        scope,
17518        raw_targets,
17519        depth,
17520        limit,
17521        impact_limit,
17522        store,
17523        freshness,
17524        extra_warnings,
17525        &prepared,
17526    )
17527}
17528
17529fn build_conflict_matrix_report(
17530    path: &Path,
17531    scope: Option<&str>,
17532    raw_targets: &[String],
17533    depth: usize,
17534    limit: usize,
17535    impact_limit: usize,
17536) -> Result<ConflictMatrixReport> {
17537    let root = lint::resolve_project_root_or_canonical_path(path)?;
17538    let source_watermark = traversal_source_watermark(&root, path, scope, false)?;
17539    if graph_db_backend_eval_cached_refresh(&root, scope, source_watermark.as_deref())?.is_none() {
17540        write_traversal_graph_store(&root, path, scope)
17541            .with_context(|| format!("refreshing graph-db projection for {}", root.display()))?;
17542    }
17543    let graph_db = graph_substrate_db_path(&root, scope);
17544    let store = SqliteGraphStore::open_read_only_resilient(&graph_db)
17545        .with_context(|| format!("opening graph-db projection: {}", graph_db.display()))?;
17546    let freshness = sqlite_graph_freshness(&store, scope.unwrap_or("root"))?;
17547    let mut warnings = Vec::new();
17548    if let Some(recovery) = store.read_only_recovery() {
17549        warnings.push(graph_db_read_recovery_diagnostic(recovery));
17550    }
17551    build_conflict_matrix_report_with_store(
17552        &root,
17553        path,
17554        scope,
17555        raw_targets,
17556        depth,
17557        limit,
17558        impact_limit,
17559        &store,
17560        freshness,
17561        warnings,
17562    )
17563}
17564
17565fn cmd_conflict_matrix(
17566    path: &Path,
17567    scope: Option<&str>,
17568    raw_targets: &[String],
17569    depth: usize,
17570    limit: usize,
17571    impact_limit: usize,
17572    format: OutputFormat,
17573) -> Result<()> {
17574    let report =
17575        build_conflict_matrix_report(path, scope, raw_targets, depth, limit, impact_limit)?;
17576    if format.json_output {
17577        print_json_or_envelope(
17578            &report,
17579            &format,
17580            "conflict-matrix",
17581            "parallel-planning",
17582            ToolEnvelopeSummary {
17583                text: format!(
17584                    "Conflict matrix for {} target(s): can_parallel={} fail_closed={} cross_target_parallel_safe={} per_target_fail_closed={}",
17585                    report.targets.len(),
17586                    report.can_parallel,
17587                    report.fail_closed,
17588                    report.cross_target_parallel_safe,
17589                    report.per_target_fail_closed.len()
17590                ),
17591                metrics: vec![
17592                    envelope_metric("targets", report.targets.len()),
17593                    envelope_metric("candidates", report.candidates.len()),
17594                    envelope_metric("conflicts", report.conflicts.len()),
17595                    envelope_metric("can_parallel", report.can_parallel),
17596                    envelope_metric("fail_closed", report.fail_closed),
17597                    envelope_metric(
17598                        "cross_target_parallel_safe",
17599                        report.cross_target_parallel_safe,
17600                    ),
17601                    envelope_metric(
17602                        "per_target_fail_closed",
17603                        report.per_target_fail_closed.len(),
17604                    ),
17605                ],
17606            },
17607            report.fail_closed,
17608            report.next_commands.clone(),
17609        )
17610    } else {
17611        print_conflict_matrix_human(&report, format.compact);
17612        Ok(())
17613    }
17614}
17615
17616#[derive(Serialize)]
17617struct DispatchTraceSummary {
17618    backlog: usize,
17619    job_packet: usize,
17620    worker_result: usize,
17621    worker_context: usize,
17622    source_handle: usize,
17623    semantic_rows: usize,
17624}
17625
17626#[derive(Serialize)]
17627struct DispatchTraceReport {
17628    contract_version: &'static str,
17629    root: String,
17630    #[serde(skip_serializing_if = "Option::is_none")]
17631    scope: Option<String>,
17632    targets: Vec<String>,
17633    projection_freshness: GraphDbFreshnessReport,
17634    projection_hashes: Vec<String>,
17635    evidence_packet_ids: Vec<String>,
17636    shared_preparation: ConflictMatrixSharedPreparationSummary,
17637    worker_prompt_packets: Vec<ConflictMatrixWorkerPromptPacket>,
17638    worker_feedback: Vec<ConflictMatrixWorkerFeedback>,
17639    summary: DispatchTraceSummary,
17640    nodes: Vec<SubstrateGraphNode>,
17641    edges: Vec<SubstrateGraphEdge>,
17642    conflict_matrix_decisions: Vec<String>,
17643    replay_commands: Vec<String>,
17644    repair_commands: Vec<String>,
17645    truncated: bool,
17646    #[serde(skip_serializing_if = "Vec::is_empty", default)]
17647    warnings: Vec<String>,
17648}
17649
17650fn dispatch_trace_allowed_node_kind(kind: &str) -> bool {
17651    matches!(
17652        kind,
17653        "session"
17654            | "backlog"
17655            | "job_packet"
17656            | "worker_result"
17657            | "worker_context"
17658            | "source_handle"
17659            | "semantic_concept"
17660            | "semantic_entity"
17661            | "file"
17662            | "symbol"
17663            | "route"
17664    )
17665}
17666
17667fn dispatch_trace_kind_rank(kind: &str) -> usize {
17668    match kind {
17669        "backlog" => 0,
17670        "job_packet" => 1,
17671        "worker_result" => 2,
17672        "worker_context" => 3,
17673        "source_handle" => 4,
17674        "file" => 5,
17675        "symbol" => 6,
17676        "route" => 7,
17677        "semantic_concept" => 8,
17678        "semantic_entity" => 9,
17679        "session" => 10,
17680        _ => 99,
17681    }
17682}
17683
17684fn dispatch_trace_summary(nodes: &[SubstrateGraphNode]) -> DispatchTraceSummary {
17685    DispatchTraceSummary {
17686        backlog: nodes.iter().filter(|node| node.kind == "backlog").count(),
17687        job_packet: nodes
17688            .iter()
17689            .filter(|node| node.kind == "job_packet")
17690            .count(),
17691        worker_result: nodes
17692            .iter()
17693            .filter(|node| node.kind == "worker_result")
17694            .count(),
17695        worker_context: nodes
17696            .iter()
17697            .filter(|node| node.kind == "worker_context")
17698            .count(),
17699        source_handle: nodes
17700            .iter()
17701            .filter(|node| node.kind == "source_handle")
17702            .count(),
17703        semantic_rows: nodes
17704            .iter()
17705            .filter(|node| matches!(node.kind.as_str(), "semantic_concept" | "semantic_entity"))
17706            .count(),
17707    }
17708}
17709
17710fn dispatch_trace_shared_preparation_summary(
17711    graph_nodes: &[SubstrateGraphNode],
17712    graph_edges: &[SubstrateGraphEdge],
17713    conflict: &ConflictMatrixReport,
17714) -> ConflictMatrixSharedPreparationSummary {
17715    ConflictMatrixSharedPreparationSummary {
17716        evidence_cache_status: conflict
17717            .inputs
17718            .shared_preparation
17719            .evidence_cache_status
17720            .clone(),
17721        graph_nodes: graph_nodes.len(),
17722        graph_edges: graph_edges.len(),
17723        evidence_packets: conflict.orchestration.evidence_packet_ids.len(),
17724        source_handles: conflict
17725            .candidates
17726            .iter()
17727            .map(|candidate| candidate.source_handles.len())
17728            .sum(),
17729        worker_context: conflict
17730            .candidates
17731            .iter()
17732            .map(|candidate| candidate.worker_context_handles.len())
17733            .sum(),
17734        worker_results: conflict
17735            .candidates
17736            .iter()
17737            .map(|candidate| candidate.worker_feedback.total)
17738            .sum(),
17739        semantic_rows: conflict
17740            .candidates
17741            .iter()
17742            .map(|candidate| candidate.semantic_related.len())
17743            .sum(),
17744        dispatch_trace_snapshot_nodes: graph_nodes.len(),
17745        dispatch_trace_snapshot_edges: graph_edges.len(),
17746    }
17747}
17748
17749fn dispatch_trace_collect_ids(
17750    targets: &[String],
17751    candidates: &[ConflictMatrixCandidate],
17752    graph_nodes: &[SubstrateGraphNode],
17753    graph_edges: &[SubstrateGraphEdge],
17754    depth: usize,
17755    limit: usize,
17756) -> (BTreeSet<String>, bool) {
17757    let target_refs = targets
17758        .iter()
17759        .map(|target| target.trim_start_matches('#').to_string())
17760        .collect::<BTreeSet<_>>();
17761    let mut ids = BTreeSet::new();
17762    for candidate in candidates {
17763        ids.insert(candidate.target_node_id.clone());
17764        for source in &candidate.source_handles {
17765            ids.insert(source.handle.clone());
17766        }
17767        for handle in &candidate.worker_context_handles {
17768            ids.insert(handle.clone());
17769        }
17770        for semantic in &candidate.semantic_related {
17771            ids.insert(semantic.handle.clone());
17772        }
17773    }
17774    for node in graph_nodes {
17775        if !dispatch_trace_allowed_node_kind(&node.kind) {
17776            continue;
17777        }
17778        if node
17779            .properties
17780            .get("ref_id")
17781            .is_some_and(|ref_id| target_refs.contains(ref_id))
17782        {
17783            ids.insert(node.id.clone());
17784        }
17785    }
17786
17787    let node_by_id = graph_nodes
17788        .iter()
17789        .map(|node| (node.id.as_str(), node))
17790        .collect::<BTreeMap<_, _>>();
17791    let max_nodes = if limit == 0 {
17792        usize::MAX
17793    } else {
17794        limit
17795            .saturating_mul(targets.len().max(1))
17796            .saturating_mul(12)
17797            .max(64)
17798    };
17799    let mut truncated = false;
17800    for _ in 0..depth.max(1) {
17801        let before = ids.len();
17802        let current_ids = ids.clone();
17803        for edge in graph_edges {
17804            if ids.len() >= max_nodes {
17805                truncated = true;
17806                break;
17807            }
17808            let touches = current_ids.contains(&edge.from_id) || current_ids.contains(&edge.to_id);
17809            if !touches {
17810                continue;
17811            }
17812            for endpoint in [&edge.from_id, &edge.to_id] {
17813                let Some(node) = node_by_id.get(endpoint.as_str()) else {
17814                    continue;
17815                };
17816                if dispatch_trace_allowed_node_kind(&node.kind) {
17817                    ids.insert(endpoint.clone());
17818                }
17819            }
17820        }
17821        if ids.len() == before || truncated {
17822            break;
17823        }
17824    }
17825    (ids, truncated)
17826}
17827
17828#[allow(clippy::too_many_arguments)]
17829fn build_dispatch_trace_report_from_conflict_snapshot(
17830    root: &Path,
17831    scope: Option<&str>,
17832    conflict: ConflictMatrixReport,
17833    graph_nodes: Vec<SubstrateGraphNode>,
17834    graph_edges: Vec<SubstrateGraphEdge>,
17835    depth: usize,
17836    limit: usize,
17837    extra_warnings: Vec<String>,
17838) -> Result<DispatchTraceReport> {
17839    let shared_preparation =
17840        dispatch_trace_shared_preparation_summary(&graph_nodes, &graph_edges, &conflict);
17841    let (ids, truncated) = dispatch_trace_collect_ids(
17842        &conflict.targets,
17843        &conflict.candidates,
17844        &graph_nodes,
17845        &graph_edges,
17846        depth,
17847        limit,
17848    );
17849    let mut nodes = graph_nodes
17850        .into_iter()
17851        .filter(|node| ids.contains(&node.id))
17852        .collect::<Vec<_>>();
17853    nodes.sort_by(|left, right| {
17854        dispatch_trace_kind_rank(&left.kind)
17855            .cmp(&dispatch_trace_kind_rank(&right.kind))
17856            .then(left.id.cmp(&right.id))
17857    });
17858    let node_ids = nodes
17859        .iter()
17860        .map(|node| node.id.as_str())
17861        .collect::<BTreeSet<_>>();
17862    let mut edges = graph_edges
17863        .into_iter()
17864        .filter(|edge| {
17865            node_ids.contains(edge.from_id.as_str()) && node_ids.contains(edge.to_id.as_str())
17866        })
17867        .collect::<Vec<_>>();
17868    edges.sort_by(|left, right| {
17869        left.from_id
17870            .cmp(&right.from_id)
17871            .then(left.kind.cmp(&right.kind))
17872            .then(left.to_id.cmp(&right.to_id))
17873    });
17874    let mut warnings = conflict.warnings;
17875    warnings.extend(extra_warnings);
17876
17877    Ok(DispatchTraceReport {
17878        contract_version: DISPATCH_TRACE_CONTRACT_VERSION,
17879        root: conflict.root,
17880        scope: conflict.scope,
17881        targets: conflict.targets,
17882        projection_freshness: conflict.orchestration.projection_freshness,
17883        projection_hashes: conflict.orchestration.projection_hashes,
17884        evidence_packet_ids: conflict.orchestration.evidence_packet_ids,
17885        shared_preparation,
17886        worker_prompt_packets: conflict.worker_prompt_packets,
17887        worker_feedback: conflict
17888            .candidates
17889            .iter()
17890            .map(|candidate| candidate.worker_feedback.clone())
17891            .collect(),
17892        summary: dispatch_trace_summary(&nodes),
17893        nodes,
17894        edges,
17895        conflict_matrix_decisions: conflict.orchestration.conflict_matrix_decisions,
17896        replay_commands: conflict.next_commands,
17897        repair_commands: graph_db_repair_commands(root, scope),
17898        truncated,
17899        warnings,
17900    })
17901}
17902
17903fn build_dispatch_trace_report(
17904    path: &Path,
17905    scope: Option<&str>,
17906    raw_targets: &[String],
17907    depth: usize,
17908    limit: usize,
17909    impact_limit: usize,
17910) -> Result<DispatchTraceReport> {
17911    let root = lint::resolve_project_root_or_canonical_path(path)?;
17912    let source_watermark = traversal_source_watermark(&root, path, scope, false)?;
17913    if graph_db_backend_eval_cached_refresh(&root, scope, source_watermark.as_deref())?.is_none() {
17914        write_traversal_graph_store(&root, path, scope)
17915            .with_context(|| format!("refreshing graph-db projection for {}", root.display()))?;
17916    }
17917    let graph_db = graph_substrate_db_path(&root, scope);
17918    let store = SqliteGraphStore::open_read_only_resilient(&graph_db)
17919        .with_context(|| format!("opening graph-db projection: {}", graph_db.display()))?;
17920    let freshness = sqlite_graph_freshness(&store, scope.unwrap_or("root"))?;
17921    let extra_warnings = store
17922        .read_only_recovery()
17923        .map(graph_db_read_recovery_diagnostic)
17924        .into_iter()
17925        .collect::<Vec<_>>();
17926    let prepared = prepare_conflict_matrix_inputs(&root, path, scope, impact_limit)?;
17927    let graph_prepared = prepare_conflict_matrix_graph_orchestration(
17928        &root,
17929        scope,
17930        "sqlite",
17931        raw_targets,
17932        &prepared,
17933        depth,
17934        limit,
17935        &store,
17936        freshness.clone(),
17937    )?;
17938    let conflict = build_conflict_matrix_report_from_prepared_graph(
17939        &root,
17940        path,
17941        scope,
17942        depth,
17943        limit,
17944        impact_limit,
17945        freshness,
17946        extra_warnings.clone(),
17947        &prepared,
17948        &graph_prepared,
17949    )?;
17950    build_dispatch_trace_report_from_conflict_snapshot(
17951        &root,
17952        scope,
17953        conflict,
17954        graph_prepared.graph.nodes,
17955        graph_prepared.graph.edges,
17956        depth,
17957        limit,
17958        extra_warnings,
17959    )
17960}
17961
17962fn dispatch_trace_html(report: &DispatchTraceReport) -> Result<String> {
17963    let json = serde_json::to_string(report)?.replace("</", "<\\/");
17964    let mut html = String::new();
17965    html.push_str(
17966        "<!doctype html><html><head><meta charset=\"utf-8\"><title>tsift dispatch trace</title>",
17967    );
17968    html.push_str(
17969        r#"<style>
17970:root{color-scheme:light dark;--bg:#f7f8fb;--panel:#fff;--text:#17202a;--muted:#5c6674;--line:#d7dce3;--edge:#8b98a8;--accent:#0f766e}
17971@media (prefers-color-scheme:dark){:root{--bg:#111318;--panel:#1b2028;--text:#ecf1f7;--muted:#a8b3c1;--line:#323946;--edge:#667386;--accent:#2dd4bf}}
17972*{box-sizing:border-box}body{margin:0;background:var(--bg);color:var(--text);font-family:Inter,ui-sans-serif,system-ui,sans-serif;line-height:1.4}.page{max-width:1280px;margin:0 auto;padding:20px}.top{display:flex;align-items:flex-end;justify-content:space-between;gap:16px;margin-bottom:14px}.top h1{font-size:22px;margin:0}.meta{color:var(--muted);font-size:13px}.layout{display:grid;grid-template-columns:minmax(0,1fr) 360px;gap:14px}.panel,.side{background:var(--panel);border:1px solid var(--line);border-radius:8px;overflow:hidden}.side{padding:14px;overflow:auto;max-height:720px}.side h2{font-size:15px;margin:12px 0 8px}.side h2:first-child{margin-top:0}.list{display:grid;gap:8px}.row{border:1px solid var(--line);border-radius:6px;padding:8px}.kind{font-size:11px;text-transform:uppercase;color:var(--muted);letter-spacing:.04em}.label{font-weight:650;overflow-wrap:anywhere}.handle,code{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;font-size:12px;color:var(--muted);overflow-wrap:anywhere}svg{width:100%;height:680px;display:block}.edge{stroke:var(--edge);stroke-width:1.4;opacity:.72}.node{stroke:var(--panel);stroke-width:2}.node-label{font-size:12px;paint-order:stroke;stroke:var(--panel);stroke-width:4px;stroke-linejoin:round;fill:var(--text)}@media(max-width:900px){.top{display:block}.layout{grid-template-columns:1fr}.side{max-height:none}svg{height:560px}}
17973</style>"#,
17974    );
17975    html.push_str("</head><body><div class=\"page\">");
17976    html.push_str(&format!(
17977        "<header class=\"top\"><div><h1>tsift dispatch trace</h1><div class=\"meta\">targets <code>{}</code> | evidence <code>{}</code> | nodes <code>{}</code> | worker_prompt_packets <code>{}</code></div></div><div class=\"meta\"><code>{}</code></div></header>",
17978        html_escape(&report.targets.join(", ")),
17979        report.evidence_packet_ids.len(),
17980        report.nodes.len(),
17981        report.worker_prompt_packets.len(),
17982        html_escape(report.contract_version)
17983    ));
17984    html.push_str(
17985        r#"<main class="layout"><section class="panel"><svg id="graph-canvas" role="img" aria-label="Dispatch trace graph"></svg></section><aside class="side"><h2>Worker Prompt Packets</h2><div id="packets" class="list"></div><h2>Worker Feedback</h2><div id="feedback" class="list"></div><h2>Nodes</h2><div id="nodes" class="list"></div></aside></main>"#,
17986    );
17987    html.push_str("<script id=\"trace-data\" type=\"application/json\">");
17988    html.push_str(&json);
17989    html.push_str(
17990        r##"</script><script>
17991const report = JSON.parse(document.getElementById("trace-data").textContent);
17992const svg = document.getElementById("graph-canvas");
17993const nodeList = document.getElementById("nodes");
17994const packets = document.getElementById("packets");
17995const feedback = document.getElementById("feedback");
17996const nodes = report.nodes.map((node, index) => ({...node, index}));
17997const nodeById = new Map(nodes.map(node => [node.id, node]));
17998const edges = report.edges.filter(edge => nodeById.has(edge.from_id) && nodeById.has(edge.to_id));
17999const colorByKind = new Map([["backlog","#dc2626"],["job_packet","#ea580c"],["worker_result","#15803d"],["worker_context","#475569"],["source_handle","#64748b"],["semantic_concept","#9a3412"],["semantic_entity","#b45309"],["file","#2563eb"],["symbol","#16a34a"],["route","#7c3aed"],["session","#0891b2"]]);
18000function color(kind){return colorByKind.get(kind)||"#6b7280";}
18001function text(value){return value == null ? "" : String(value);}
18002function escapeHtml(value){return text(value).replace(/[&<>"']/g, ch => ({"&":"&amp;","<":"&lt;",">":"&gt;","\"":"&quot;","'":"&#39;"}[ch]));}
18003function layout(){
18004  const rect = svg.getBoundingClientRect();
18005  const width = rect.width || 900, height = rect.height || 680, cx = width / 2, cy = height / 2;
18006  const kinds = [...new Set(nodes.map(node => node.kind))].sort();
18007  const counts = new Map();
18008  for (const node of nodes) counts.set(node.kind, (counts.get(node.kind)||0)+1);
18009  const offsets = new Map();
18010  for (const node of nodes) {
18011    const group = kinds.indexOf(node.kind);
18012    const index = offsets.get(node.kind) || 0;
18013    offsets.set(node.kind, index + 1);
18014    const total = counts.get(node.kind) || 1;
18015    const ring = Math.min(width, height) * (0.18 + ((group % 4) * 0.09));
18016    const angle = Math.PI * 2 * index / Math.max(total, 1) + group * 0.53;
18017    node.x = cx + Math.cos(angle) * ring;
18018    node.y = cy + Math.sin(angle) * ring;
18019  }
18020}
18021function draw(){
18022  svg.innerHTML = "";
18023  for (const edge of edges) {
18024    const from = nodeById.get(edge.from_id), to = nodeById.get(edge.to_id);
18025    const line = document.createElementNS("http://www.w3.org/2000/svg", "line");
18026    line.setAttribute("x1", from.x); line.setAttribute("y1", from.y);
18027    line.setAttribute("x2", to.x); line.setAttribute("y2", to.y);
18028    line.setAttribute("class", "edge");
18029    line.appendChild(document.createElementNS("http://www.w3.org/2000/svg", "title")).textContent = edge.kind;
18030    svg.appendChild(line);
18031  }
18032  for (const node of nodes) {
18033    const circle = document.createElementNS("http://www.w3.org/2000/svg", "circle");
18034    circle.setAttribute("cx", node.x); circle.setAttribute("cy", node.y);
18035    circle.setAttribute("r", node.kind.startsWith("semantic_") ? 8 : 6);
18036    circle.setAttribute("fill", color(node.kind));
18037    circle.setAttribute("class", "node");
18038    circle.appendChild(document.createElementNS("http://www.w3.org/2000/svg", "title")).textContent = node.kind + ": " + node.label;
18039    svg.appendChild(circle);
18040    const label = document.createElementNS("http://www.w3.org/2000/svg", "text");
18041    label.setAttribute("x", node.x + 9); label.setAttribute("y", node.y + 4);
18042    label.setAttribute("class", "node-label");
18043    label.textContent = node.label.length > 34 ? node.label.slice(0,31) + "..." : node.label;
18044    svg.appendChild(label);
18045  }
18046}
18047packets.innerHTML = report.worker_prompt_packets.map(packet => `<div class="row"><div class="kind">${escapeHtml(packet.contract_version)} - ${escapeHtml(packet.risk)} - parallel_safe ${packet.parallel_safe ? "true" : "false"} - closure ${packet.worker_feedback ? packet.worker_feedback.closure_rank_score : 0}</div><div class="label">${escapeHtml(packet.title)}</div><div class="handle">${escapeHtml(packet.packet_id)}</div><div class="handle">blocks ${escapeHtml((packet.blocks||[]).join(", ") || "none")} | blocked_by ${escapeHtml((packet.blocked_by||[]).join(", ") || "none")}</div></div>`).join("") || "<div class=\"meta\">No packets.</div>";
18048feedback.innerHTML = report.worker_feedback.map(item => `<div class="row"><div class="kind">completed ${item.completed} - blocked ${item.blocked} - closure ${item.closure_rank_score}</div><div>files ${escapeHtml((item.touched_files||[]).join(", ") || "none")}</div><div>tests ${escapeHtml((item.expected_tests||[]).join(" && ") || "none")}</div>${item.repeated_blockage ? "<div class=\"label\">Repeated blockage</div>" : ""}${(item.stale_expected_tests||[]).length ? `<div class="label">Stale tests: ${escapeHtml(item.stale_expected_tests.join(", "))}</div>` : ""}${(item.follow_up_debt||[]).length ? `<div class="label">Follow-up debt: ${escapeHtml(item.follow_up_debt.join(", "))}</div>` : ""}</div>`).join("") || "<div class=\"meta\">No worker results.</div>";
18049nodeList.innerHTML = nodes.map(node => `<div class="row"><div class="kind">${escapeHtml(node.kind)}</div><div class="label">${escapeHtml(node.label)}</div><div class="handle">${escapeHtml(node.id)}</div></div>`).join("");
18050window.addEventListener("resize", () => { layout(); draw(); });
18051layout(); draw();
18052</script></div></body></html>"##,
18053    );
18054    Ok(html)
18055}
18056
18057struct DispatchTraceOptions<'a> {
18058    path: &'a Path,
18059    scope: Option<&'a str>,
18060    raw_targets: &'a [String],
18061    depth: usize,
18062    limit: usize,
18063    impact_limit: usize,
18064    trace_format: DispatchTraceFormat,
18065}
18066
18067fn cmd_dispatch_trace(
18068    options: DispatchTraceOptions<'_>,
18069    output_format: OutputFormat,
18070) -> Result<()> {
18071    let report = build_dispatch_trace_report(
18072        options.path,
18073        options.scope,
18074        options.raw_targets,
18075        options.depth,
18076        options.limit,
18077        options.impact_limit,
18078    )?;
18079    match options.trace_format {
18080        DispatchTraceFormat::Json => {
18081            if output_format.envelope {
18082                print_json_or_envelope(
18083                    &report,
18084                    &output_format,
18085                    "dispatch-trace",
18086                    "operator-review",
18087                    ToolEnvelopeSummary {
18088                        text: format!(
18089                            "Dispatch trace for {} target(s): {} graph node(s), {} worker prompt packet(s)",
18090                            report.targets.len(),
18091                            report.nodes.len(),
18092                            report.worker_prompt_packets.len()
18093                        ),
18094                        metrics: vec![
18095                            envelope_metric("targets", report.targets.len()),
18096                            envelope_metric("nodes", report.nodes.len()),
18097                            envelope_metric("edges", report.edges.len()),
18098                            envelope_metric(
18099                                "worker_prompt_packets",
18100                                report.worker_prompt_packets.len(),
18101                            ),
18102                        ],
18103                    },
18104                    report.truncated,
18105                    report.replay_commands.clone(),
18106                )
18107            } else {
18108                println!(
18109                    "{}",
18110                    to_json_schema(
18111                        &report,
18112                        output_format.pretty,
18113                        output_format.terse,
18114                        output_format.schema
18115                    )?
18116                );
18117                Ok(())
18118            }
18119        }
18120        DispatchTraceFormat::Html => {
18121            println!("{}", dispatch_trace_html(&report)?);
18122            Ok(())
18123        }
18124    }
18125}
18126
18127#[derive(Clone, Debug)]
18128struct DependencyDagProfile {
18129    id: String,
18130    graph_node_id: String,
18131    label: String,
18132    path: Option<String>,
18133    line: Option<i64>,
18134    detail: Option<String>,
18135    source_files: BTreeSet<String>,
18136    source_symbols: BTreeSet<String>,
18137    config_files: BTreeSet<String>,
18138    expected_tests: BTreeSet<String>,
18139    semantic_refs: BTreeMap<String, ConflictMatrixSemanticRef>,
18140    worker_feedback: ConflictMatrixWorkerFeedback,
18141}
18142
18143#[derive(Clone, Debug, Serialize)]
18144struct DependencyDagNode {
18145    id: String,
18146    graph_node_id: String,
18147    label: String,
18148    #[serde(skip_serializing_if = "Option::is_none")]
18149    path: Option<String>,
18150    #[serde(skip_serializing_if = "Option::is_none")]
18151    line: Option<i64>,
18152    #[serde(skip_serializing_if = "Option::is_none")]
18153    detail: Option<String>,
18154    source_files: Vec<String>,
18155    source_symbols: Vec<String>,
18156    config_files: Vec<String>,
18157    expected_tests: Vec<String>,
18158    semantic_refs: Vec<ConflictMatrixSemanticRef>,
18159    worker_feedback: ConflictMatrixWorkerFeedback,
18160}
18161
18162#[derive(Clone, Debug, Serialize)]
18163struct DependencyDagEdge {
18164    from: String,
18165    to: String,
18166    kind: String,
18167    weight: usize,
18168    reasons: Vec<String>,
18169    #[serde(skip_serializing_if = "Vec::is_empty", default)]
18170    shared_files: Vec<String>,
18171    #[serde(skip_serializing_if = "Vec::is_empty", default)]
18172    shared_symbols: Vec<String>,
18173    #[serde(skip_serializing_if = "Vec::is_empty", default)]
18174    shared_tests: Vec<String>,
18175    #[serde(skip_serializing_if = "Vec::is_empty", default)]
18176    shared_config_files: Vec<String>,
18177    #[serde(skip_serializing_if = "Vec::is_empty", default)]
18178    shared_semantic_refs: Vec<String>,
18179}
18180
18181#[derive(Clone, Debug, Serialize)]
18182struct DependencyDagTopoBatch {
18183    batch: usize,
18184    targets: Vec<String>,
18185}
18186
18187#[derive(Clone, Debug, Serialize)]
18188struct DependencyDagCycleDiagnostics {
18189    has_cycles: bool,
18190    blocked_nodes: Vec<String>,
18191    cycle_edges: Vec<DependencyDagEdge>,
18192}
18193
18194#[derive(Serialize)]
18195struct DependencyDagSummary {
18196    nodes: usize,
18197    edges: usize,
18198    topo_batches: usize,
18199    has_cycles: bool,
18200}
18201
18202#[derive(Serialize)]
18203struct DependencyDagReport {
18204    contract_version: &'static str,
18205    root: String,
18206    #[serde(skip_serializing_if = "Option::is_none")]
18207    scope: Option<String>,
18208    path: String,
18209    targets: Vec<String>,
18210    projection_freshness: GraphDbFreshnessReport,
18211    projection_hashes: Vec<String>,
18212    nodes: Vec<DependencyDagNode>,
18213    edges: Vec<DependencyDagEdge>,
18214    topo_batches: Vec<DependencyDagTopoBatch>,
18215    cycle_diagnostics: DependencyDagCycleDiagnostics,
18216    summary: DependencyDagSummary,
18217    replay_commands: Vec<String>,
18218    repair_commands: Vec<String>,
18219    #[serde(skip_serializing_if = "Vec::is_empty", default)]
18220    warnings: Vec<String>,
18221}
18222
18223fn dependency_dag_backlog_node_for_target(
18224    store: &impl GraphStore,
18225    target: &str,
18226) -> Result<SubstrateGraphNode> {
18227    let resolved = graph_db_resolve_evidence_target(store, target)?
18228        .with_context(|| format!("dependency-dag target not found: {target}"))?;
18229    if resolved.kind == "backlog" {
18230        return Ok(resolved);
18231    }
18232    let Some(ref_id) = resolved.properties.get("ref_id").cloned() else {
18233        bail!(
18234            "dependency-dag target {} resolved to {} without a backlog ref_id",
18235            target,
18236            resolved.kind
18237        );
18238    };
18239    store
18240        .nodes_by_kind("backlog")?
18241        .into_iter()
18242        .filter(|node| node.properties.get("ref_id") == Some(&ref_id))
18243        .min_by(|left, right| {
18244            left.properties
18245                .get("line")
18246                .and_then(|value| value.parse::<i64>().ok())
18247                .cmp(
18248                    &right
18249                        .properties
18250                        .get("line")
18251                        .and_then(|value| value.parse::<i64>().ok()),
18252                )
18253                .then(left.id.cmp(&right.id))
18254        })
18255        .with_context(|| format!("dependency-dag backlog node not found for #{ref_id}"))
18256}
18257
18258fn dependency_dag_resolve_backlog_nodes(
18259    root: &Path,
18260    path: &Path,
18261    store: &impl GraphStore,
18262    raw_targets: &[String],
18263) -> Result<Vec<SubstrateGraphNode>> {
18264    let mut nodes = Vec::new();
18265    let mut seen = BTreeSet::new();
18266    if raw_targets.is_empty() {
18267        let hinted_path = if path.is_absolute() {
18268            path.to_path_buf()
18269        } else {
18270            root.join(path)
18271        };
18272        let hinted_markdown = hinted_path
18273            .extension()
18274            .and_then(|ext| ext.to_str())
18275            .is_some_and(|ext| ext.eq_ignore_ascii_case("md"));
18276        let hinted_rel = hinted_markdown.then(|| {
18277            relativize_pathbuf(&hinted_path, root)
18278                .to_string_lossy()
18279                .replace('\\', "/")
18280        });
18281        for node in store.nodes_by_kind("backlog")? {
18282            if let Some(expected_path) = &hinted_rel
18283                && node.properties.get("path") != Some(expected_path)
18284            {
18285                continue;
18286            }
18287            if seen.insert(node.id.clone()) {
18288                nodes.push(node);
18289            }
18290        }
18291        if nodes.is_empty() && hinted_rel.is_some() {
18292            for node in store.nodes_by_kind("backlog")? {
18293                if seen.insert(node.id.clone()) {
18294                    nodes.push(node);
18295                }
18296            }
18297        }
18298    } else {
18299        for target in raw_targets {
18300            let normalized = normalize_conflict_target(target).unwrap_or_else(|| target.clone());
18301            let node = dependency_dag_backlog_node_for_target(store, &normalized)?;
18302            if seen.insert(node.id.clone()) {
18303                nodes.push(node);
18304            }
18305        }
18306    }
18307    if nodes.is_empty() {
18308        bail!("dependency-dag needs at least one resolvable backlog id");
18309    }
18310    nodes.sort_by(|left, right| {
18311        left.properties
18312            .get("line")
18313            .and_then(|value| value.parse::<i64>().ok())
18314            .cmp(
18315                &right
18316                    .properties
18317                    .get("line")
18318                    .and_then(|value| value.parse::<i64>().ok()),
18319            )
18320            .then(left.id.cmp(&right.id))
18321    });
18322    Ok(nodes)
18323}
18324
18325fn dependency_dag_node_id(node: &SubstrateGraphNode) -> String {
18326    node.properties
18327        .get("ref_id")
18328        .cloned()
18329        .unwrap_or_else(|| node.label.trim_start_matches('#').to_string())
18330}
18331
18332fn dependency_dag_node_profile(
18333    root: &Path,
18334    store: &impl GraphStore,
18335    node: &SubstrateGraphNode,
18336    graph_nodes_by_id: &BTreeMap<String, SubstrateGraphNode>,
18337    graph_edges: &[SubstrateGraphEdge],
18338    depth: usize,
18339    limit: usize,
18340) -> Result<DependencyDagProfile> {
18341    let id = dependency_dag_node_id(node);
18342    let mut source_files = BTreeSet::new();
18343    let mut source_symbols = BTreeSet::new();
18344    for edge in graph_edges
18345        .iter()
18346        .filter(|edge| edge.from_id == node.id && edge.kind == "mentions")
18347    {
18348        let Some(target) = graph_nodes_by_id.get(&edge.to_id) else {
18349            continue;
18350        };
18351        match target.kind.as_str() {
18352            "file" | "route" => {
18353                if let Some(path) = target.properties.get("path") {
18354                    source_files.insert(path.clone());
18355                }
18356            }
18357            "symbol" => {
18358                source_symbols.insert(target.label.clone());
18359                if let Some(path) = target.properties.get("path") {
18360                    source_files.insert(path.clone());
18361                }
18362            }
18363            _ => {}
18364        }
18365    }
18366
18367    let max_rows = if limit == 0 { usize::MAX } else { limit };
18368    for (source, _) in
18369        graph_db_reachable_nodes_by_kind(store, &node.id, "source_handle", depth, max_rows)?
18370    {
18371        if let Some(handle) = conflict_matrix_source_handle(&source) {
18372            source_files.insert(handle.file);
18373        }
18374    }
18375
18376    let worker_results = graph_nodes_by_id
18377        .values()
18378        .filter(|candidate| {
18379            candidate.kind == "worker_result"
18380                && candidate.properties.get("ref_id").map(String::as_str) == Some(id.as_str())
18381        })
18382        .cloned()
18383        .collect::<Vec<_>>();
18384    let worker_feedback = conflict_matrix_worker_feedback(&worker_results);
18385    let expected_tests = worker_feedback.expected_tests.iter().cloned().collect();
18386    let config_files = source_files
18387        .iter()
18388        .filter(|file| is_planner_config_path(file))
18389        .cloned()
18390        .collect();
18391
18392    let mut semantic_refs = BTreeMap::new();
18393    for kind in ["semantic_concept", "semantic_entity"] {
18394        for (semantic, _) in
18395            graph_db_reachable_nodes_by_kind(store, &node.id, kind, depth, max_rows)?
18396        {
18397            let item = conflict_matrix_semantic_ref(root, &semantic);
18398            semantic_refs
18399                .entry(format!("{}:{}", item.kind, item.label))
18400                .or_insert(item);
18401        }
18402    }
18403
18404    Ok(DependencyDagProfile {
18405        id,
18406        graph_node_id: node.id.clone(),
18407        label: node.label.clone(),
18408        path: node.properties.get("path").cloned(),
18409        line: node
18410            .properties
18411            .get("line")
18412            .and_then(|value| value.parse::<i64>().ok()),
18413        detail: node.properties.get("detail").cloned(),
18414        source_files,
18415        source_symbols,
18416        config_files,
18417        expected_tests,
18418        semantic_refs,
18419        worker_feedback,
18420    })
18421}
18422
18423fn dependency_dag_marker_refs(text: &str, markers: &[&str]) -> Vec<String> {
18424    let lower = text.to_ascii_lowercase();
18425    let mut refs = Vec::new();
18426    for marker in markers {
18427        let mut offset = 0usize;
18428        while let Some(pos) = lower[offset..].find(marker) {
18429            let start = offset + pos + marker.len();
18430            let segment = text[start..]
18431                .split(['\n', '.'])
18432                .next()
18433                .unwrap_or(&text[start..]);
18434            refs.extend(extract_conflict_target_refs(segment));
18435            offset = start;
18436        }
18437    }
18438    dedupe_preserve_order(refs)
18439}
18440
18441fn dependency_dag_push_edge(
18442    edges: &mut Vec<DependencyDagEdge>,
18443    seen: &mut BTreeSet<(String, String, String)>,
18444    edge: DependencyDagEdge,
18445) {
18446    if edge.from == edge.to {
18447        return;
18448    }
18449    if seen.insert((edge.from.clone(), edge.to.clone(), edge.kind.clone())) {
18450        edges.push(edge);
18451    }
18452}
18453
18454fn dependency_dag_explicit_edges(
18455    profiles: &[DependencyDagProfile],
18456    target_ids: &BTreeSet<String>,
18457    edges: &mut Vec<DependencyDagEdge>,
18458    seen: &mut BTreeSet<(String, String, String)>,
18459) {
18460    for profile in profiles {
18461        let detail = profile.detail.as_deref().unwrap_or_default();
18462        for dep in dependency_dag_marker_refs(
18463            detail,
18464            &[
18465                "depends on",
18466                "depends-on",
18467                "deps:",
18468                "after",
18469                "blocked by",
18470                "requires",
18471            ],
18472        ) {
18473            if target_ids.contains(&dep) {
18474                dependency_dag_push_edge(
18475                    edges,
18476                    seen,
18477                    DependencyDagEdge {
18478                        from: dep.clone(),
18479                        to: profile.id.clone(),
18480                        kind: "explicit_depends_on".to_string(),
18481                        weight: 1000,
18482                        reasons: vec![format!("{} declares dependency on #{dep}", profile.id)],
18483                        shared_files: Vec::new(),
18484                        shared_symbols: Vec::new(),
18485                        shared_tests: Vec::new(),
18486                        shared_config_files: Vec::new(),
18487                        shared_semantic_refs: Vec::new(),
18488                    },
18489                );
18490            }
18491        }
18492        for downstream in dependency_dag_marker_refs(detail, &["before", "unblocks"]) {
18493            if target_ids.contains(&downstream) {
18494                dependency_dag_push_edge(
18495                    edges,
18496                    seen,
18497                    DependencyDagEdge {
18498                        from: profile.id.clone(),
18499                        to: downstream.clone(),
18500                        kind: "explicit_before".to_string(),
18501                        weight: 900,
18502                        reasons: vec![format!(
18503                            "{} declares it should run before #{downstream}",
18504                            profile.id
18505                        )],
18506                        shared_files: Vec::new(),
18507                        shared_symbols: Vec::new(),
18508                        shared_tests: Vec::new(),
18509                        shared_config_files: Vec::new(),
18510                        shared_semantic_refs: Vec::new(),
18511                    },
18512                );
18513            }
18514        }
18515    }
18516}
18517
18518fn dependency_dag_worker_follow_up_edges(
18519    profiles: &[DependencyDagProfile],
18520    target_ids: &BTreeSet<String>,
18521    edges: &mut Vec<DependencyDagEdge>,
18522    seen: &mut BTreeSet<(String, String, String)>,
18523) {
18524    for profile in profiles {
18525        for follow_up in &profile.worker_feedback.follow_up_ids {
18526            if target_ids.contains(follow_up) {
18527                dependency_dag_push_edge(
18528                    edges,
18529                    seen,
18530                    DependencyDagEdge {
18531                        from: profile.id.clone(),
18532                        to: follow_up.clone(),
18533                        kind: "worker_result_follow_up".to_string(),
18534                        weight: 700,
18535                        reasons: vec![format!(
18536                            "worker_result for #{} references follow-up #{}",
18537                            profile.id, follow_up
18538                        )],
18539                        shared_files: Vec::new(),
18540                        shared_symbols: Vec::new(),
18541                        shared_tests: Vec::new(),
18542                        shared_config_files: Vec::new(),
18543                        shared_semantic_refs: Vec::new(),
18544                    },
18545                );
18546            }
18547        }
18548    }
18549}
18550
18551fn dependency_dag_overlap_edges(
18552    profiles: &[DependencyDagProfile],
18553    edges: &mut Vec<DependencyDagEdge>,
18554    seen: &mut BTreeSet<(String, String, String)>,
18555) {
18556    for left_idx in 0..profiles.len() {
18557        for right_idx in (left_idx + 1)..profiles.len() {
18558            let left = &profiles[left_idx];
18559            let right = &profiles[right_idx];
18560            let shared_files = sorted_intersection(&left.source_files, &right.source_files);
18561            let shared_symbols = sorted_intersection(&left.source_symbols, &right.source_symbols);
18562            let shared_tests = sorted_intersection(&left.expected_tests, &right.expected_tests);
18563            let shared_config_files = sorted_intersection(&left.config_files, &right.config_files);
18564            let left_semantic = left.semantic_refs.keys().cloned().collect::<BTreeSet<_>>();
18565            let right_semantic = right.semantic_refs.keys().cloned().collect::<BTreeSet<_>>();
18566            let shared_semantic_refs = sorted_intersection(&left_semantic, &right_semantic);
18567            if shared_files.is_empty()
18568                && shared_symbols.is_empty()
18569                && shared_tests.is_empty()
18570                && shared_config_files.is_empty()
18571                && shared_semantic_refs.is_empty()
18572            {
18573                continue;
18574            }
18575            let kind = if shared_files.is_empty()
18576                && shared_symbols.is_empty()
18577                && shared_tests.is_empty()
18578                && shared_config_files.is_empty()
18579            {
18580                "semantic_relation"
18581            } else {
18582                "shared_resource"
18583            };
18584            let mut reasons = Vec::new();
18585            if !shared_files.is_empty() {
18586                reasons.push(format!("shared files: {}", shared_files.join(", ")));
18587            }
18588            if !shared_symbols.is_empty() {
18589                reasons.push(format!("shared symbols: {}", shared_symbols.join(", ")));
18590            }
18591            if !shared_tests.is_empty() {
18592                reasons.push(format!("shared tests: {}", shared_tests.join(" && ")));
18593            }
18594            if !shared_config_files.is_empty() {
18595                reasons.push(format!(
18596                    "shared config files: {}",
18597                    shared_config_files.join(", ")
18598                ));
18599            }
18600            if !shared_semantic_refs.is_empty() {
18601                reasons.push(format!(
18602                    "shared semantic refs: {}",
18603                    shared_semantic_refs.join(", ")
18604                ));
18605            }
18606            let weight = shared_files.len() * 100
18607                + shared_config_files.len() * 100
18608                + shared_symbols.len() * 40
18609                + shared_tests.len() * 10
18610                + shared_semantic_refs.len() * 5;
18611            dependency_dag_push_edge(
18612                edges,
18613                seen,
18614                DependencyDagEdge {
18615                    from: left.id.clone(),
18616                    to: right.id.clone(),
18617                    kind: kind.to_string(),
18618                    weight,
18619                    reasons,
18620                    shared_files,
18621                    shared_symbols,
18622                    shared_tests,
18623                    shared_config_files,
18624                    shared_semantic_refs,
18625                },
18626            );
18627        }
18628    }
18629}
18630
18631fn dependency_dag_topo_batches(
18632    targets: &[String],
18633    edges: &[DependencyDagEdge],
18634) -> (Vec<DependencyDagTopoBatch>, DependencyDagCycleDiagnostics) {
18635    let target_set = targets.iter().cloned().collect::<BTreeSet<_>>();
18636    let order = targets
18637        .iter()
18638        .enumerate()
18639        .map(|(idx, id)| (id.clone(), idx))
18640        .collect::<BTreeMap<_, _>>();
18641    let mut indegree = targets
18642        .iter()
18643        .map(|id| (id.clone(), 0usize))
18644        .collect::<BTreeMap<_, _>>();
18645    let mut outgoing = BTreeMap::<String, Vec<String>>::new();
18646    let mut seen_pairs = BTreeSet::<(String, String)>::new();
18647    for edge in edges {
18648        if !target_set.contains(&edge.from) || !target_set.contains(&edge.to) {
18649            continue;
18650        }
18651        if !seen_pairs.insert((edge.from.clone(), edge.to.clone())) {
18652            continue;
18653        }
18654        *indegree.entry(edge.to.clone()).or_default() += 1;
18655        outgoing
18656            .entry(edge.from.clone())
18657            .or_default()
18658            .push(edge.to.clone());
18659    }
18660    for values in outgoing.values_mut() {
18661        values.sort_by_key(|id| order.get(id).copied().unwrap_or(usize::MAX));
18662        values.dedup();
18663    }
18664
18665    let mut processed = BTreeSet::new();
18666    let mut batches = Vec::new();
18667    loop {
18668        let mut ready = targets
18669            .iter()
18670            .filter(|id| !processed.contains(*id))
18671            .filter(|id| indegree.get(*id).copied().unwrap_or(0) == 0)
18672            .cloned()
18673            .collect::<Vec<_>>();
18674        ready.sort_by_key(|id| order.get(id).copied().unwrap_or(usize::MAX));
18675        if ready.is_empty() {
18676            break;
18677        }
18678        for id in &ready {
18679            processed.insert(id.clone());
18680            for next in outgoing.get(id).into_iter().flatten() {
18681                if let Some(value) = indegree.get_mut(next) {
18682                    *value = value.saturating_sub(1);
18683                }
18684            }
18685        }
18686        batches.push(DependencyDagTopoBatch {
18687            batch: batches.len() + 1,
18688            targets: ready,
18689        });
18690    }
18691
18692    let blocked_nodes = targets
18693        .iter()
18694        .filter(|id| !processed.contains(*id))
18695        .cloned()
18696        .collect::<Vec<_>>();
18697    let blocked_set = blocked_nodes.iter().cloned().collect::<BTreeSet<_>>();
18698    let cycle_edges = edges
18699        .iter()
18700        .filter(|edge| blocked_set.contains(&edge.from) && blocked_set.contains(&edge.to))
18701        .cloned()
18702        .collect::<Vec<_>>();
18703    (
18704        batches,
18705        DependencyDagCycleDiagnostics {
18706            has_cycles: !blocked_nodes.is_empty(),
18707            blocked_nodes,
18708            cycle_edges,
18709        },
18710    )
18711}
18712
18713fn dependency_dag_replay_commands(
18714    path: &Path,
18715    scope: Option<&str>,
18716    targets: &[String],
18717    depth: usize,
18718    limit: usize,
18719) -> Vec<String> {
18720    let target_args = targets
18721        .iter()
18722        .map(|target| shell_quote(target))
18723        .collect::<Vec<_>>()
18724        .join(" ");
18725    let mut command = format!(
18726        "tsift dependency-dag --path {}{} --depth {} --limit {} --json",
18727        shell_quote(path.to_string_lossy().as_ref()),
18728        scope
18729            .map(|scope| format!(" --scope {}", shell_quote(scope)))
18730            .unwrap_or_default(),
18731        depth,
18732        limit
18733    );
18734    if !target_args.is_empty() {
18735        command.push(' ');
18736        command.push_str(&target_args);
18737    }
18738    vec![command]
18739}
18740
18741fn build_dependency_dag_report(
18742    path: &Path,
18743    scope: Option<&str>,
18744    raw_targets: &[String],
18745    depth: usize,
18746    limit: usize,
18747) -> Result<DependencyDagReport> {
18748    let root = lint::resolve_project_root_or_canonical_path(path)?;
18749    write_traversal_graph_store(&root, path, scope)
18750        .with_context(|| format!("refreshing graph-db projection for {}", root.display()))?;
18751    let graph_db = graph_substrate_db_path(&root, scope);
18752    let store = SqliteGraphStore::open_read_only_resilient(&graph_db)
18753        .with_context(|| format!("opening graph-db projection: {}", graph_db.display()))?;
18754    let mut warnings = Vec::new();
18755    if let Some(recovery) = store.read_only_recovery() {
18756        warnings.push(graph_db_read_recovery_diagnostic(recovery));
18757    }
18758    let freshness = sqlite_graph_freshness(&store, scope.unwrap_or("root"))?;
18759    if freshness.fail_closed {
18760        bail!(
18761            "dependency-dag graph projection failed closed: {}; repair: {}",
18762            freshness.diagnostics.join("; "),
18763            graph_db_repair_commands(&root, scope).join("; ")
18764        );
18765    }
18766
18767    let target_nodes = dependency_dag_resolve_backlog_nodes(&root, path, &store, raw_targets)?;
18768    let graph_nodes = store.all_nodes()?;
18769    let graph_edges = store.all_edges()?;
18770    let graph_nodes_by_id = graph_nodes
18771        .into_iter()
18772        .map(|node| (node.id.clone(), node))
18773        .collect::<BTreeMap<_, _>>();
18774    let profiles = target_nodes
18775        .iter()
18776        .map(|node| {
18777            dependency_dag_node_profile(
18778                &root,
18779                &store,
18780                node,
18781                &graph_nodes_by_id,
18782                &graph_edges,
18783                depth,
18784                limit,
18785            )
18786        })
18787        .collect::<Result<Vec<_>>>()?;
18788    let targets = profiles
18789        .iter()
18790        .map(|profile| profile.id.clone())
18791        .collect::<Vec<_>>();
18792    let target_ids = targets.iter().cloned().collect::<BTreeSet<_>>();
18793
18794    let mut edges = Vec::new();
18795    let mut seen_edges = BTreeSet::new();
18796    dependency_dag_explicit_edges(&profiles, &target_ids, &mut edges, &mut seen_edges);
18797    dependency_dag_worker_follow_up_edges(&profiles, &target_ids, &mut edges, &mut seen_edges);
18798    dependency_dag_overlap_edges(&profiles, &mut edges, &mut seen_edges);
18799    edges.sort_by(|left, right| {
18800        left.from
18801            .cmp(&right.from)
18802            .then(left.to.cmp(&right.to))
18803            .then(left.kind.cmp(&right.kind))
18804    });
18805    let (topo_batches, cycle_diagnostics) = dependency_dag_topo_batches(&targets, &edges);
18806
18807    let nodes = profiles
18808        .into_iter()
18809        .map(|profile| DependencyDagNode {
18810            id: profile.id,
18811            graph_node_id: profile.graph_node_id,
18812            label: profile.label,
18813            path: profile.path,
18814            line: profile.line,
18815            detail: profile.detail,
18816            source_files: sorted_set(&profile.source_files),
18817            source_symbols: sorted_set(&profile.source_symbols),
18818            config_files: sorted_set(&profile.config_files),
18819            expected_tests: sorted_set(&profile.expected_tests),
18820            semantic_refs: profile.semantic_refs.into_values().collect(),
18821            worker_feedback: profile.worker_feedback,
18822        })
18823        .collect::<Vec<_>>();
18824    let projection_hashes = freshness
18825        .content_hash
18826        .clone()
18827        .into_iter()
18828        .collect::<Vec<_>>();
18829    let replay_commands = dependency_dag_replay_commands(path, scope, &targets, depth, limit);
18830    let repair_commands = graph_db_repair_commands(&root, scope);
18831    let summary = DependencyDagSummary {
18832        nodes: nodes.len(),
18833        edges: edges.len(),
18834        topo_batches: topo_batches.len(),
18835        has_cycles: cycle_diagnostics.has_cycles,
18836    };
18837
18838    Ok(DependencyDagReport {
18839        contract_version: DEPENDENCY_DAG_CONTRACT_VERSION,
18840        root: root.to_string_lossy().to_string(),
18841        scope: scope.map(str::to_string),
18842        path: path.to_string_lossy().to_string(),
18843        targets,
18844        projection_freshness: freshness,
18845        projection_hashes,
18846        nodes,
18847        edges,
18848        topo_batches,
18849        cycle_diagnostics,
18850        summary,
18851        replay_commands,
18852        repair_commands,
18853        warnings,
18854    })
18855}
18856
18857fn print_dependency_dag_human(report: &DependencyDagReport, compact: bool) {
18858    if compact {
18859        println!(
18860            "dependency-dag targets:{} edges:{} batches:{} cycles:{}",
18861            report.targets.len(),
18862            report.edges.len(),
18863            report.topo_batches.len(),
18864            report.cycle_diagnostics.has_cycles
18865        );
18866    } else {
18867        println!("Dependency DAG");
18868        println!("  targets: {}", report.targets.join(", "));
18869        println!("  edges:   {}", report.edges.len());
18870        println!("  cycles:  {}", report.cycle_diagnostics.has_cycles);
18871    }
18872    for batch in &report.topo_batches {
18873        println!("batch #{}: {}", batch.batch, batch.targets.join(", "));
18874    }
18875    for edge in &report.edges {
18876        println!(
18877            "edge {} -> {} kind:{} weight:{}",
18878            edge.from, edge.to, edge.kind, edge.weight
18879        );
18880        for reason in &edge.reasons {
18881            println!("  reason: {reason}");
18882        }
18883    }
18884    if report.cycle_diagnostics.has_cycles {
18885        println!(
18886            "cycle blocked nodes: {}",
18887            report.cycle_diagnostics.blocked_nodes.join(", ")
18888        );
18889    }
18890    for command in &report.replay_commands {
18891        println!("replay: {command}");
18892    }
18893    for command in &report.repair_commands {
18894        println!("repair: {command}");
18895    }
18896    for warning in &report.warnings {
18897        println!("warning: {warning}");
18898    }
18899}
18900
18901fn cmd_dependency_dag(
18902    path: &Path,
18903    scope: Option<&str>,
18904    raw_targets: &[String],
18905    depth: usize,
18906    limit: usize,
18907    format: OutputFormat,
18908) -> Result<()> {
18909    let report = build_dependency_dag_report(path, scope, raw_targets, depth, limit)?;
18910    if format.json_output {
18911        print_json_or_envelope(
18912            &report,
18913            &format,
18914            "dependency-dag",
18915            "topological-planning",
18916            ToolEnvelopeSummary {
18917                text: format!(
18918                    "Dependency DAG for {} target(s): edges={} batches={} cycles={}",
18919                    report.targets.len(),
18920                    report.edges.len(),
18921                    report.topo_batches.len(),
18922                    report.cycle_diagnostics.has_cycles
18923                ),
18924                metrics: vec![
18925                    envelope_metric("targets", report.targets.len()),
18926                    envelope_metric("edges", report.edges.len()),
18927                    envelope_metric("topo_batches", report.topo_batches.len()),
18928                    envelope_metric("has_cycles", report.cycle_diagnostics.has_cycles),
18929                ],
18930            },
18931            report.cycle_diagnostics.has_cycles,
18932            report.replay_commands.clone(),
18933        )
18934    } else {
18935        print_dependency_dag_human(&report, format.compact);
18936        Ok(())
18937    }
18938}
18939
18940pub(crate) fn render_log_digest_from_input(
18941    path: &Path,
18942    input: &str,
18943    format: OutputFormat,
18944) -> Result<()> {
18945    let report = log_digest::compute(path, input)?;
18946    if format.json_output {
18947        println!(
18948            "{}",
18949            to_json_schema(&report, format.pretty, format.terse, format.schema)?
18950        );
18951        return Ok(());
18952    }
18953
18954    if format.compact {
18955        println!(
18956            "log lines:{} signals:{} repeats:{} files:{} syms:{} stacks:{}",
18957            report.non_empty_lines,
18958            report.signal_groups,
18959            report.repeated_line_groups,
18960            report.file_ref_groups,
18961            report.symbol_ref_groups,
18962            report.stack_groups
18963        );
18964        for signal in &report.signals {
18965            let location = match (&signal.path, signal.line) {
18966                (Some(path), Some(line)) => format!("{path}:{line}"),
18967                (Some(path), None) => path.clone(),
18968                _ => "-".to_string(),
18969            };
18970            println!(
18971                "{} sev:{} count:{} sums:{} msg:{}",
18972                location,
18973                signal.severity,
18974                signal.occurrences,
18975                log_digest_summary_label(signal.summary_state),
18976                truncate_for_compact(&signal.message, 80)
18977            );
18978        }
18979        for repeated in &report.repeated_lines {
18980            println!(
18981                "repeat count:{} line:{}",
18982                repeated.occurrences,
18983                truncate_for_compact(&repeated.line, 80)
18984            );
18985        }
18986        for symbol in &report.symbol_refs {
18987            println!(
18988                "sym:{} count:{} sums:{}",
18989                symbol.symbol,
18990                symbol.occurrences,
18991                log_digest_summary_label(symbol.summary_state)
18992            );
18993        }
18994        for warning in &report.warnings {
18995            println!("warning: {warning}");
18996        }
18997        return Ok(());
18998    }
18999
19000    println!("Log digest");
19001    println!("  lines:                    {}", report.total_lines);
19002    println!("  non-empty lines:          {}", report.non_empty_lines);
19003    println!("  signal groups:            {}", report.signal_groups);
19004    println!(
19005        "  repeated lines:           {}",
19006        report.repeated_line_groups
19007    );
19008    println!(
19009        "  repeated line instances:  {}",
19010        report.repeated_line_occurrences
19011    );
19012    println!("  file refs:                {}", report.file_ref_groups);
19013    println!("  symbol refs:              {}", report.symbol_ref_groups);
19014    println!("  stack groups:             {}", report.stack_groups);
19015
19016    if !report.signals.is_empty() {
19017        println!();
19018        println!("Signals:");
19019        for signal in &report.signals {
19020            match (&signal.path, signal.line, signal.column) {
19021                (Some(path), Some(line), Some(column)) => println!("{path}:{line}:{column}"),
19022                (Some(path), Some(line), None) => println!("{path}:{line}"),
19023                (Some(path), None, _) => println!("{path}"),
19024                (None, _, _) => println!("(no file anchor)"),
19025            }
19026            println!("  severity: {}", signal.severity);
19027            println!("  occurrences: {}", signal.occurrences);
19028            println!("  message: {}", signal.message);
19029            println!(
19030                "  cached summaries: {}",
19031                log_digest_summary_label(signal.summary_state)
19032            );
19033            for summary in &signal.current_summaries {
19034                println!(
19035                    "    - {}: {}",
19036                    summary.symbol,
19037                    truncate_for_compact(&summary.summary, 160)
19038                );
19039            }
19040        }
19041    }
19042
19043    if !report.repeated_lines.is_empty() {
19044        println!();
19045        println!("Repeated lines:");
19046        for repeated in &report.repeated_lines {
19047            println!(
19048                "  {}x {}",
19049                repeated.occurrences,
19050                truncate_for_compact(&repeated.line, 180)
19051            );
19052        }
19053    }
19054
19055    if !report.file_refs.is_empty() {
19056        println!();
19057        println!("Anchored files:");
19058        for file_ref in &report.file_refs {
19059            match (file_ref.line, file_ref.column) {
19060                (Some(line), Some(column)) => println!("{}:{}:{}", file_ref.path, line, column),
19061                (Some(line), None) => println!("{}:{}", file_ref.path, line),
19062                (None, _) => println!("{}", file_ref.path),
19063            }
19064            println!("  occurrences: {}", file_ref.occurrences);
19065            println!(
19066                "  cached summaries: {}",
19067                log_digest_summary_label(file_ref.summary_state)
19068            );
19069            for summary in &file_ref.current_summaries {
19070                println!(
19071                    "    - {}: {}",
19072                    summary.symbol,
19073                    truncate_for_compact(&summary.summary, 160)
19074                );
19075            }
19076        }
19077    }
19078
19079    if !report.symbol_refs.is_empty() {
19080        println!();
19081        println!("Symbol candidates:");
19082        for symbol in &report.symbol_refs {
19083            println!("{}", symbol.symbol);
19084            println!("  occurrences: {}", symbol.occurrences);
19085            println!(
19086                "  cached summaries: {}",
19087                log_digest_summary_label(symbol.summary_state)
19088            );
19089            for summary in &symbol.current_summaries {
19090                println!(
19091                    "    - {}: {}",
19092                    summary.symbol,
19093                    truncate_for_compact(&summary.summary, 160)
19094                );
19095            }
19096        }
19097    }
19098
19099    if !report.stack_traces.is_empty() {
19100        println!();
19101        println!("Stack groups:");
19102        for stack in &report.stack_traces {
19103            println!("  occurrences: {}", stack.occurrences);
19104            for frame in &stack.frames {
19105                println!("    - {}", frame);
19106            }
19107        }
19108    }
19109
19110    for warning in &report.warnings {
19111        println!("warning: {warning}");
19112    }
19113    Ok(())
19114}
19115
19116pub(crate) fn metric_digest_trend_label(trend: metric_digest::MetricDigestTrend) -> &'static str {
19117    match trend {
19118        metric_digest::MetricDigestTrend::Improved => "improved",
19119        metric_digest::MetricDigestTrend::Regressed => "regressed",
19120        metric_digest::MetricDigestTrend::Flat => "flat",
19121        metric_digest::MetricDigestTrend::Unknown => "changed",
19122    }
19123}
19124
19125pub(crate) fn metric_digest_gate_label(
19126    decision: metric_digest::CommunitySearchGateDecision,
19127) -> &'static str {
19128    match decision {
19129        metric_digest::CommunitySearchGateDecision::Pass => "pass",
19130        metric_digest::CommunitySearchGateDecision::Block => "block",
19131    }
19132}
19133
19134fn cmd_dci_benchmark(fixture_path: &Path, format: OutputFormat) -> Result<()> {
19135    let input = fs::read_to_string(fixture_path)
19136        .with_context(|| format!("reading dci-benchmark fixture: {}", fixture_path.display()))?;
19137    let report = dci_benchmark::compute(&input)?;
19138
19139    if format.json_output {
19140        println!(
19141            "{}",
19142            to_json_schema(&report, format.pretty, format.terse, format.schema)?
19143        );
19144        return Ok(());
19145    }
19146
19147    if format.compact {
19148        println!(
19149            "dci tasks:{} strategies:{} warnings:{}",
19150            report.tasks_loaded,
19151            report.strategies_compared,
19152            report.warnings.len()
19153        );
19154        for summary in &report.strategy_summaries {
19155            println!(
19156                "{} rank:{} loc:{}/{} rate:{} calls:{} latency_ms:{} tokens:{}",
19157                summary.strategy,
19158                summary.rank,
19159                summary.localized,
19160                summary.task_runs,
19161                dci_benchmark::format_number(summary.localization_rate * 100.0),
19162                dci_benchmark::format_number(summary.avg_tool_calls),
19163                dci_benchmark::format_number(summary.avg_latency_ms),
19164                dci_benchmark::format_number(summary.avg_estimated_tokens)
19165            );
19166        }
19167        for warning in &report.warnings {
19168            println!("warning: {warning}");
19169        }
19170        return Ok(());
19171    }
19172
19173    println!("DCI benchmark");
19174    if let Some(description) = &report.description {
19175        println!("  description: {}", description);
19176    }
19177    println!("  tasks loaded:        {}", report.tasks_loaded);
19178    println!("  strategies compared: {}", report.strategies_compared);
19179
19180    println!();
19181    println!("Strategy summary:");
19182    for summary in &report.strategy_summaries {
19183        println!(
19184            "  #{} {}: localization {}/{} ({:.1}%), avg calls {}, avg latency {}ms, avg tokens {}",
19185            summary.rank,
19186            summary.strategy,
19187            summary.localized,
19188            summary.task_runs,
19189            summary.localization_rate * 100.0,
19190            dci_benchmark::format_number(summary.avg_tool_calls),
19191            dci_benchmark::format_number(summary.avg_latency_ms),
19192            dci_benchmark::format_number(summary.avg_estimated_tokens)
19193        );
19194    }
19195
19196    println!();
19197    println!("Task winners:");
19198    for row in &report.task_rows {
19199        let label = row
19200            .label
19201            .as_ref()
19202            .map(|value| format!(" ({value})"))
19203            .unwrap_or_default();
19204        println!("  {}{}", row.task_id, label);
19205        println!("    localized: {}", row.best_localization.join(", "));
19206        println!(
19207            "    lowest calls: {}, lowest latency: {}, lowest tokens: {}",
19208            row.lowest_tool_calls.as_deref().unwrap_or("-"),
19209            row.lowest_latency.as_deref().unwrap_or("-"),
19210            row.lowest_token_budget.as_deref().unwrap_or("-")
19211        );
19212    }
19213
19214    for warning in &report.warnings {
19215        println!("warning: {warning}");
19216    }
19217    Ok(())
19218}
19219
19220#[derive(Clone, Serialize)]
19221struct SessionReviewBudgetSessionPreview {
19222    handle: String,
19223    source: String,
19224    path: String,
19225    matched_by: Vec<String>,
19226    total_tokens: u64,
19227    largest_turn_total_tokens: u64,
19228    prompt_targets: usize,
19229    failures: usize,
19230    expand: String,
19231}
19232
19233#[derive(Clone, Serialize)]
19234struct SessionReviewBudgetPromptPreview {
19235    handle: String,
19236    text: String,
19237    occurrences: usize,
19238    expand: String,
19239}
19240
19241#[derive(Clone, Serialize)]
19242struct SessionReviewBudgetFailurePreview {
19243    handle: String,
19244    kind: String,
19245    message: String,
19246    occurrences: usize,
19247    #[serde(skip_serializing_if = "Option::is_none")]
19248    command: Option<String>,
19249    #[serde(skip_serializing_if = "Option::is_none")]
19250    session_path: Option<String>,
19251    expand: String,
19252}
19253
19254#[derive(Clone, Serialize)]
19255struct SessionReviewBudgetReport {
19256    target: String,
19257    target_kind: String,
19258    max_items: usize,
19259    max_bytes: usize,
19260    sessions_matched: usize,
19261    prompt_tokens: u64,
19262    cached_input_tokens: u64,
19263    total_tokens: u64,
19264    #[serde(skip_serializing_if = "Option::is_none")]
19265    latest_session_total_tokens: Option<u64>,
19266    #[serde(skip_serializing_if = "Option::is_none")]
19267    latest_session_largest_turn_total_tokens: Option<u64>,
19268    truncated: bool,
19269    sessions: Vec<SessionReviewBudgetSessionPreview>,
19270    prompt_targets: Vec<SessionReviewBudgetPromptPreview>,
19271    failures: Vec<SessionReviewBudgetFailurePreview>,
19272    guardrails: Vec<String>,
19273    warnings: Vec<String>,
19274}
19275
19276#[derive(Clone, Serialize)]
19277struct SessionReviewNextTokenAction {
19278    priority: usize,
19279    kind: String,
19280    severity: String,
19281    message: String,
19282    guidance: String,
19283    #[serde(skip_serializing_if = "Option::is_none")]
19284    compact_command: Option<String>,
19285    #[serde(skip_serializing_if = "Option::is_none")]
19286    restart_command: Option<String>,
19287    digest_commands: Vec<String>,
19288}
19289
19290#[derive(Clone, Serialize)]
19291struct SessionReviewNextContextBudgetReport {
19292    contract_version: &'static str,
19293    target: String,
19294    max_items: usize,
19295    max_bytes: usize,
19296    prompt_target_total: usize,
19297    touched_file_total: usize,
19298    touched_symbol_total: usize,
19299    unresolved_failure_total: usize,
19300    truncated: bool,
19301    prompt_targets: Vec<String>,
19302    touched_files: Vec<String>,
19303    touched_symbols: Vec<String>,
19304    #[serde(skip_serializing_if = "Vec::is_empty", default)]
19305    touched_symbol_refs: Vec<CompactSymbolRefPreview>,
19306    unresolved_failures: Vec<SessionReviewBudgetFailurePreview>,
19307    #[serde(skip_serializing_if = "Vec::is_empty", default)]
19308    next_token_actions: Vec<SessionReviewNextTokenAction>,
19309    next_digest_commands: Vec<String>,
19310}
19311
19312#[derive(Clone, Serialize)]
19313struct ContextPackGraphOrchestration {
19314    contract_version: &'static str,
19315    graph_db_command: String,
19316    projection_freshness: GraphDbFreshnessReport,
19317    projection_hashes: Vec<String>,
19318    evidence_packet_ids: Vec<String>,
19319    conflict_matrix_decisions: Vec<String>,
19320    worker_ownership_blocks: Vec<String>,
19321    follow_up_commands: Vec<String>,
19322    #[serde(skip_serializing_if = "Vec::is_empty", default)]
19323    warnings: Vec<String>,
19324}
19325
19326#[derive(Clone, Serialize)]
19327struct ContextPackReport {
19328    root: String,
19329    target: String,
19330    target_kind: String,
19331    max_items: usize,
19332    max_bytes: usize,
19333    #[serde(skip_serializing_if = "Vec::is_empty", default)]
19334    status_reminders: Vec<String>,
19335    #[serde(skip_serializing_if = "Vec::is_empty", default)]
19336    ontology_refs: Vec<CompactOntologyRefPreview>,
19337    next_context: SessionReviewNextContextBudgetReport,
19338    diff_digest: ContextPackDiffPreview,
19339    test_digest: ContextPackOptionalSection<ContextPackTestPreview>,
19340    log_digest: ContextPackOptionalSection<ContextPackLogPreview>,
19341    exploration: ExplorationPacket,
19342    graph_orchestration: ContextPackGraphOrchestration,
19343    resume_commands: Vec<String>,
19344}
19345
19346#[derive(Clone, Serialize)]
19347struct ContextPackOptionalSection<T> {
19348    status: String,
19349    command: String,
19350    #[serde(skip_serializing_if = "Option::is_none")]
19351    source: Option<String>,
19352    #[serde(skip_serializing_if = "Option::is_none")]
19353    report: Option<T>,
19354}
19355
19356#[derive(Clone, Serialize)]
19357struct ContextPackDiffPreview {
19358    mode: String,
19359    files_changed: usize,
19360    files_with_current_summaries: usize,
19361    symbols_touched: usize,
19362    call_edges_added: usize,
19363    call_edges_removed: usize,
19364    truncated: bool,
19365    files: Vec<ContextPackDiffFilePreview>,
19366}
19367
19368#[derive(Clone, Serialize)]
19369struct ContextPackDiffFilePreview {
19370    path: String,
19371    status: String,
19372    touched_symbols: Vec<String>,
19373    #[serde(skip_serializing_if = "Vec::is_empty", default)]
19374    touched_symbol_refs: Vec<CompactSymbolRefPreview>,
19375    summary_state: String,
19376    #[serde(skip_serializing_if = "Vec::is_empty", default)]
19377    summary_refs: Vec<ContextPackSummaryRefPreview>,
19378    added_call_edges: usize,
19379    removed_call_edges: usize,
19380    warnings: Vec<String>,
19381}
19382
19383#[derive(Clone, Serialize)]
19384struct ContextPackSummaryRefPreview {
19385    handle: String,
19386    symbol: String,
19387    #[serde(skip_serializing_if = "Option::is_none")]
19388    tag_alias: Option<String>,
19389    #[serde(skip_serializing_if = "Vec::is_empty", default)]
19390    ontology_refs: Vec<CompactOntologyRefPreview>,
19391    summary: String,
19392    expand: String,
19393}
19394
19395#[derive(Clone, Serialize)]
19396struct ContextPackTestPreview {
19397    runner: String,
19398    failures: usize,
19399    grouped_failures: usize,
19400    counts: ContextPackTestCounts,
19401    truncated: bool,
19402    failure_groups: Vec<ContextPackTestFailurePreview>,
19403    warnings: Vec<String>,
19404}
19405
19406#[derive(Clone, Serialize)]
19407struct ContextPackTestCounts {
19408    #[serde(skip_serializing_if = "Option::is_none")]
19409    passed: Option<usize>,
19410    #[serde(skip_serializing_if = "Option::is_none")]
19411    failed: Option<usize>,
19412    #[serde(skip_serializing_if = "Option::is_none")]
19413    skipped: Option<usize>,
19414}
19415
19416#[derive(Clone, Serialize)]
19417struct ContextPackTestFailurePreview {
19418    tests: Vec<String>,
19419    message: String,
19420    #[serde(skip_serializing_if = "Option::is_none")]
19421    path: Option<String>,
19422    #[serde(skip_serializing_if = "Option::is_none")]
19423    line: Option<usize>,
19424    occurrences: usize,
19425    summary_state: String,
19426    #[serde(skip_serializing_if = "Vec::is_empty", default)]
19427    summary_refs: Vec<ContextPackSummaryRefPreview>,
19428}
19429
19430#[derive(Clone, Serialize)]
19431struct ContextPackLogPreview {
19432    total_lines: usize,
19433    non_empty_lines: usize,
19434    signal_groups: usize,
19435    repeated_line_groups: usize,
19436    file_ref_groups: usize,
19437    symbol_ref_groups: usize,
19438    stack_groups: usize,
19439    truncated: bool,
19440    signals: Vec<ContextPackLogSignalPreview>,
19441    repeated_lines: Vec<ContextPackLogRepeatedLinePreview>,
19442    file_refs: Vec<ContextPackLogFileRefPreview>,
19443    symbol_refs: Vec<ContextPackLogSymbolRefPreview>,
19444    warnings: Vec<String>,
19445}
19446
19447#[derive(Clone, Serialize)]
19448struct ContextPackLogSignalPreview {
19449    severity: String,
19450    message: String,
19451    #[serde(skip_serializing_if = "Option::is_none")]
19452    path: Option<String>,
19453    #[serde(skip_serializing_if = "Option::is_none")]
19454    line: Option<usize>,
19455    occurrences: usize,
19456    summary_state: String,
19457    #[serde(skip_serializing_if = "Vec::is_empty", default)]
19458    summary_refs: Vec<ContextPackSummaryRefPreview>,
19459}
19460
19461#[derive(Clone, Serialize)]
19462struct ContextPackLogRepeatedLinePreview {
19463    line: String,
19464    occurrences: usize,
19465}
19466
19467#[derive(Clone, Serialize)]
19468struct ContextPackLogFileRefPreview {
19469    path: String,
19470    #[serde(skip_serializing_if = "Option::is_none")]
19471    line: Option<usize>,
19472    occurrences: usize,
19473    summary_state: String,
19474    #[serde(skip_serializing_if = "Vec::is_empty", default)]
19475    summary_refs: Vec<ContextPackSummaryRefPreview>,
19476}
19477
19478#[derive(Clone, Serialize)]
19479struct ContextPackLogSymbolRefPreview {
19480    handle: String,
19481    symbol: String,
19482    #[serde(skip_serializing_if = "Option::is_none")]
19483    tag_alias: Option<String>,
19484    #[serde(skip_serializing_if = "Vec::is_empty", default)]
19485    ontology_refs: Vec<CompactOntologyRefPreview>,
19486    occurrences: usize,
19487    summary_state: String,
19488    #[serde(skip_serializing_if = "Vec::is_empty", default)]
19489    summary_refs: Vec<ContextPackSummaryRefPreview>,
19490}
19491
19492fn session_review_source_flag(source: &str) -> &'static str {
19493    match source {
19494        "claude_jsonl" => "claude-jsonl",
19495        "codex_jsonl" => "codex-jsonl",
19496        "agent_doc_log" => "agent-doc-log",
19497        _ => "markdown",
19498    }
19499}
19500
19501pub(crate) fn build_session_review_budget_report(
19502    report: &session_review::SessionReviewReport,
19503    budget: ResponseBudget,
19504) -> SessionReviewBudgetReport {
19505    let max_items = budget.preview_items();
19506    let max_bytes = budget.preview_bytes();
19507    let review_expand = format!(
19508        "tsift session-review {} --json",
19509        shell_quote(&report.target)
19510    );
19511    let sessions = report
19512        .sessions
19513        .iter()
19514        .take(max_items)
19515        .map(|entry| SessionReviewBudgetSessionPreview {
19516            handle: stable_handle(
19517                "srev",
19518                &format!("{}:{}:{}", entry.source, entry.path, entry.total_tokens),
19519            ),
19520            source: entry.source.clone(),
19521            path: truncate_for_budget(&entry.path, max_bytes),
19522            matched_by: entry
19523                .matched_by
19524                .iter()
19525                .take(max_items)
19526                .map(|value| truncate_for_budget(value, max_bytes))
19527                .collect(),
19528            total_tokens: entry.total_tokens,
19529            largest_turn_total_tokens: entry.largest_turn_total_tokens,
19530            prompt_targets: entry.prompt_target_count,
19531            failures: entry.failure_groups,
19532            expand: format!(
19533                "tsift session-digest --path {} --input {} --source {}",
19534                shell_quote(&report.root),
19535                shell_quote(&entry.path),
19536                session_review_source_flag(&entry.source)
19537            ),
19538        })
19539        .collect();
19540    let prompt_targets = report
19541        .prompt_targets
19542        .iter()
19543        .take(max_items)
19544        .map(|entry| SessionReviewBudgetPromptPreview {
19545            handle: stable_handle("spt", &entry.text),
19546            text: truncate_for_budget(&entry.text, max_bytes),
19547            occurrences: entry.occurrences,
19548            expand: review_expand.clone(),
19549        })
19550        .collect();
19551    let failures = report
19552        .failures
19553        .iter()
19554        .take(max_items)
19555        .map(|entry| SessionReviewBudgetFailurePreview {
19556            handle: stable_handle("sfl", &format!("{}:{}", entry.kind, entry.message)),
19557            kind: entry.kind.clone(),
19558            message: truncate_for_budget(&entry.message, max_bytes),
19559            occurrences: entry.occurrences,
19560            command: entry
19561                .command
19562                .as_ref()
19563                .map(|command| truncate_for_budget(command, max_bytes)),
19564            session_path: entry
19565                .session_path
19566                .as_ref()
19567                .map(|path| truncate_for_budget(path, max_bytes)),
19568            expand: review_expand.clone(),
19569        })
19570        .collect();
19571    let guardrails = report
19572        .guardrails
19573        .iter()
19574        .take(max_items)
19575        .map(|entry| truncate_for_budget(&entry.message, max_bytes))
19576        .collect();
19577    let warnings = report
19578        .warnings
19579        .iter()
19580        .take(max_items)
19581        .map(|entry| truncate_for_budget(entry, max_bytes))
19582        .collect();
19583
19584    SessionReviewBudgetReport {
19585        target: report.target.clone(),
19586        target_kind: report.target_kind.clone(),
19587        max_items,
19588        max_bytes,
19589        sessions_matched: report.sessions_matched,
19590        prompt_tokens: report.prompt_tokens,
19591        cached_input_tokens: report.cached_input_tokens,
19592        total_tokens: report.total_tokens,
19593        latest_session_total_tokens: report
19594            .latest_session_cost
19595            .as_ref()
19596            .map(|cost| cost.total_tokens),
19597        latest_session_largest_turn_total_tokens: report
19598            .latest_session_cost
19599            .as_ref()
19600            .map(|cost| cost.largest_turn_total_tokens),
19601        truncated: report.sessions.len() > max_items
19602            || report.prompt_targets.len() > max_items
19603            || report.failures.len() > max_items
19604            || report.guardrails.len() > max_items
19605            || report.warnings.len() > max_items,
19606        sessions,
19607        prompt_targets,
19608        failures,
19609        guardrails,
19610        warnings,
19611    }
19612}
19613
19614pub(crate) fn build_session_review_next_context_budget_report(
19615    report: &session_review::SessionReviewReport,
19616    budget: ResponseBudget,
19617    ontology: Option<&TagOntologyPreviewContext>,
19618) -> SessionReviewNextContextBudgetReport {
19619    let max_items = budget.preview_items();
19620    let max_bytes = budget.preview_bytes();
19621    let follow_up_items = budget.follow_up_items();
19622    let next_token_actions = build_next_token_actions(report, max_items, max_bytes);
19623    let actionable_guardrail_failures = next_token_actions
19624        .iter()
19625        .map(|action| format!("guardrail:{}", action.kind))
19626        .collect::<BTreeSet<_>>();
19627    let unresolved_failures = report
19628        .next_context
19629        .unresolved_failures
19630        .iter()
19631        .filter(|entry| !actionable_guardrail_failures.contains(&entry.kind))
19632        .collect::<Vec<_>>();
19633    let unresolved_failure_total = unresolved_failures.len();
19634    SessionReviewNextContextBudgetReport {
19635        contract_version: SESSION_REVIEW_FOLLOW_UP_CONTRACT_VERSION,
19636        target: report.next_context.target.clone(),
19637        max_items,
19638        max_bytes,
19639        prompt_target_total: report.next_context.active_prompt_targets.len(),
19640        touched_file_total: report.next_context.touched_files.len(),
19641        touched_symbol_total: report.next_context.touched_symbols.len(),
19642        unresolved_failure_total,
19643        truncated: report.next_context.active_prompt_targets.len() > max_items
19644            || report.next_context.touched_files.len() > max_items
19645            || report.next_context.touched_symbols.len() > max_items
19646            || unresolved_failure_total > max_items
19647            || report.next_context.next_digest_commands.len() > follow_up_items,
19648        prompt_targets: report
19649            .next_context
19650            .active_prompt_targets
19651            .iter()
19652            .take(max_items)
19653            .map(|entry| truncate_for_budget(entry, max_bytes))
19654            .collect(),
19655        touched_files: report
19656            .next_context
19657            .touched_files
19658            .iter()
19659            .take(max_items)
19660            .map(|entry| truncate_for_budget(entry, max_bytes))
19661            .collect(),
19662        touched_symbols: report
19663            .next_context
19664            .touched_symbols
19665            .iter()
19666            .take(max_items)
19667            .map(|entry| truncate_for_budget(entry, max_bytes))
19668            .collect(),
19669        touched_symbol_refs: report
19670            .next_context
19671            .touched_symbols
19672            .iter()
19673            .take(max_items)
19674            .map(|entry| {
19675                build_compact_symbol_ref_with_ontology(
19676                    "ncsym",
19677                    &format!("{}:{}", report.next_context.target, entry),
19678                    entry,
19679                    None,
19680                    max_bytes,
19681                    ontology,
19682                )
19683            })
19684            .collect(),
19685        unresolved_failures: unresolved_failures
19686            .iter()
19687            .take(max_items)
19688            .map(|entry| SessionReviewBudgetFailurePreview {
19689                handle: stable_handle("snf", &format!("{}:{}", entry.kind, entry.message)),
19690                kind: entry.kind.clone(),
19691                message: truncate_for_budget(&entry.message, max_bytes),
19692                occurrences: entry.occurrences,
19693                command: entry
19694                    .command
19695                    .as_ref()
19696                    .map(|command| truncate_for_budget(command, max_bytes)),
19697                session_path: entry
19698                    .session_path
19699                    .as_ref()
19700                    .map(|path| truncate_for_budget(path, max_bytes)),
19701                expand: format!(
19702                    "tsift session-review {} --next-context --json",
19703                    shell_quote(&report.target)
19704                ),
19705            })
19706            .collect(),
19707        next_token_actions,
19708        next_digest_commands: report
19709            .next_context
19710            .next_digest_commands
19711            .iter()
19712            .take(follow_up_items)
19713            .cloned()
19714            .collect(),
19715    }
19716}
19717
19718fn build_next_token_actions(
19719    report: &session_review::SessionReviewReport,
19720    max_items: usize,
19721    max_bytes: usize,
19722) -> Vec<SessionReviewNextTokenAction> {
19723    let target = shell_quote(&report.target);
19724    let doc_command_target =
19725        (report.target_kind == "file" && report.target.ends_with(".md")).then_some(target.clone());
19726    let mut actions = report
19727        .guardrails
19728        .iter()
19729        .filter_map(|guardrail| {
19730            let priority = token_action_priority(&guardrail.kind)?;
19731            let compact_command = doc_command_target
19732                .as_ref()
19733                .map(|target| format!("agent-doc compact {target} --commit"));
19734            let restart_command = doc_command_target
19735                .as_ref()
19736                .map(|target| format!("agent-doc start {target}"));
19737            Some(SessionReviewNextTokenAction {
19738                priority,
19739                kind: guardrail.kind.clone(),
19740                severity: guardrail.severity.clone(),
19741                message: truncate_for_budget(&guardrail.message, max_bytes),
19742                guidance: truncate_for_budget(&guardrail.guidance, max_bytes),
19743                compact_command,
19744                restart_command,
19745                digest_commands: vec![
19746                    format!(
19747                        "tsift --envelope session-review {target} --next-context --budget normal"
19748                    ),
19749                    format!("tsift --envelope context-pack {target} --budget normal"),
19750                ],
19751            })
19752        })
19753        .collect::<Vec<_>>();
19754    actions.sort_by(|left, right| {
19755        left.priority
19756            .cmp(&right.priority)
19757            .then(left.kind.cmp(&right.kind))
19758    });
19759    actions.dedup_by(|left, right| left.kind == right.kind);
19760    actions.truncate(max_items);
19761    actions
19762}
19763
19764fn token_action_priority(kind: &str) -> Option<usize> {
19765    match kind {
19766        "prompt_budget" => Some(1),
19767        "cache_resend" => Some(2),
19768        "restart_loop" => Some(3),
19769        "noop_closeout" => Some(4),
19770        _ => None,
19771    }
19772}
19773
19774pub(crate) fn print_session_review_budget_human(report: &SessionReviewBudgetReport) {
19775    let latest_total = report
19776        .latest_session_total_tokens
19777        .map(format_compact_count)
19778        .unwrap_or_else(|| "-".to_string());
19779    let latest_largest_turn = report
19780        .latest_session_largest_turn_total_tokens
19781        .map(format_compact_count)
19782        .unwrap_or_else(|| "-".to_string());
19783    println!(
19784        "session-review-budget target:{} kind:{} sessions:{}/{} aggregate_prompt:{} aggregate_cached:{} aggregate_total:{} latest_total:{} latest_largest_turn:{}",
19785        shell_quote(&report.target),
19786        report.target_kind,
19787        report.sessions.len(),
19788        report.sessions_matched,
19789        format_compact_count(report.prompt_tokens),
19790        format_compact_count(report.cached_input_tokens),
19791        format_compact_count(report.total_tokens),
19792        latest_total,
19793        latest_largest_turn
19794    );
19795    for session in &report.sessions {
19796        println!(
19797            "session {} {} total:{} largest_turn:{} prompts:{} fails:{} expand:{}",
19798            session.handle,
19799            session.path,
19800            format_compact_count(session.total_tokens),
19801            format_compact_count(session.largest_turn_total_tokens),
19802            session.prompt_targets,
19803            session.failures,
19804            session.expand
19805        );
19806    }
19807    for prompt in &report.prompt_targets {
19808        println!(
19809            "prompt {} count:{} {} expand:{}",
19810            prompt.handle, prompt.occurrences, prompt.text, prompt.expand
19811        );
19812    }
19813    for failure in &report.failures {
19814        println!(
19815            "fail {} {} count:{} {}{}{} expand:{}",
19816            failure.handle,
19817            failure.kind,
19818            failure.occurrences,
19819            failure.message,
19820            failure
19821                .command
19822                .as_ref()
19823                .map(|command| format!(" command:{command}"))
19824                .unwrap_or_default(),
19825            failure
19826                .session_path
19827                .as_ref()
19828                .map(|path| format!(" session:{path}"))
19829                .unwrap_or_default(),
19830            failure.expand
19831        );
19832    }
19833    for guardrail in &report.guardrails {
19834        println!("guardrail {guardrail}");
19835    }
19836    for warning in &report.warnings {
19837        println!("warning {warning}");
19838    }
19839    if report.truncated {
19840        println!(
19841            "budget truncated items:{} bytes:{}",
19842            report.max_items, report.max_bytes
19843        );
19844    }
19845}
19846
19847pub(crate) fn print_session_review_next_context_budget_human(
19848    report: &SessionReviewNextContextBudgetReport,
19849) {
19850    println!(
19851        "next-context-budget target:{} prompts:{}/{} files:{}/{} symbols:{}/{} failures:{}/{}",
19852        shell_quote(&report.target),
19853        report.prompt_targets.len(),
19854        report.prompt_target_total,
19855        report.touched_files.len(),
19856        report.touched_file_total,
19857        report.touched_symbols.len(),
19858        report.touched_symbol_total,
19859        report.unresolved_failures.len(),
19860        report.unresolved_failure_total
19861    );
19862    for prompt in &report.prompt_targets {
19863        println!("prompt {prompt}");
19864    }
19865    for file in &report.touched_files {
19866        println!("file {file}");
19867    }
19868    for symbol in &report.touched_symbols {
19869        if let Some(symbol_ref) = report
19870            .touched_symbol_refs
19871            .iter()
19872            .find(|entry| entry.name == *symbol)
19873        {
19874            println!(
19875                "symbol {}",
19876                format_symbol_preview_line(
19877                    &symbol_ref.handle,
19878                    &symbol_ref.name,
19879                    symbol_ref.tag_alias.as_deref()
19880                )
19881            );
19882        } else {
19883            println!("symbol {symbol}");
19884        }
19885    }
19886    for failure in &report.unresolved_failures {
19887        println!(
19888            "fail {} {} count:{} {}{}{} expand:{}",
19889            failure.handle,
19890            failure.kind,
19891            failure.occurrences,
19892            failure.message,
19893            failure
19894                .command
19895                .as_ref()
19896                .map(|command| format!(" command:{command}"))
19897                .unwrap_or_default(),
19898            failure
19899                .session_path
19900                .as_ref()
19901                .map(|path| format!(" session:{path}"))
19902                .unwrap_or_default(),
19903            failure.expand
19904        );
19905    }
19906    for action in &report.next_token_actions {
19907        println!(
19908            "token-action {} {} severity:{} {} guidance:{}",
19909            action.priority, action.kind, action.severity, action.message, action.guidance
19910        );
19911        if let Some(command) = &action.compact_command {
19912            println!("token-action-command {} compact {}", action.kind, command);
19913        }
19914        if let Some(command) = &action.restart_command {
19915            println!("token-action-command {} restart {}", action.kind, command);
19916        }
19917        for command in &action.digest_commands {
19918            println!("token-action-command {} digest {}", action.kind, command);
19919        }
19920    }
19921    for command in &report.next_digest_commands {
19922        println!("next {command}");
19923    }
19924    if report.truncated {
19925        println!(
19926            "budget truncated items:{} bytes:{}",
19927            report.max_items, report.max_bytes
19928        );
19929    }
19930}
19931
19932fn effective_context_budget(budget: ResponseBudget) -> ResponseBudget {
19933    ResponseBudget::new(Some(budget.preview_items()), Some(budget.preview_bytes()))
19934}
19935
19936fn build_context_summary_refs<'a>(
19937    prefix: &str,
19938    key_scope: &str,
19939    file_path: Option<&str>,
19940    snippets: impl Iterator<Item = (&'a str, &'a str)>,
19941    budget: ResponseBudget,
19942    ontology: Option<&TagOntologyPreviewContext>,
19943) -> Vec<ContextPackSummaryRefPreview> {
19944    let max_items = budget.preview_items();
19945    let max_bytes = budget.preview_bytes();
19946    snippets
19947        .take(max_items)
19948        .map(|(symbol, summary)| {
19949            let tag_alias = tag_alias_from_name(symbol);
19950            let ontology_refs = tag_alias
19951                .as_deref()
19952                .map(|alias| ontology_refs_for_alias(ontology, alias))
19953                .unwrap_or_default();
19954            let expand = match file_path {
19955                Some(path) => format!("tsift summarize --file {}", shell_quote(path)),
19956                None => format!("tsift summarize {}", shell_quote(symbol)),
19957            };
19958            ContextPackSummaryRefPreview {
19959                handle: stable_handle(prefix, &format!("{key_scope}:{symbol}:{summary}")),
19960                symbol: truncate_for_budget(symbol, max_bytes),
19961                tag_alias: tag_alias.map(|alias| truncate_for_budget(&alias, max_bytes)),
19962                ontology_refs,
19963                summary: truncate_for_budget(summary, max_bytes),
19964                expand,
19965            }
19966        })
19967        .collect()
19968}
19969
19970fn build_context_pack_diff_preview(
19971    report: &diff_digest::DiffDigestReport,
19972    budget: ResponseBudget,
19973    ontology: Option<&TagOntologyPreviewContext>,
19974) -> ContextPackDiffPreview {
19975    let max_items = budget.preview_items();
19976    let max_bytes = budget.preview_bytes();
19977    ContextPackDiffPreview {
19978        mode: diff_digest_mode_label(report.mode).to_string(),
19979        files_changed: report.files_changed,
19980        files_with_current_summaries: report.files_with_current_summaries,
19981        symbols_touched: report.symbols_touched,
19982        call_edges_added: report.call_edges_added,
19983        call_edges_removed: report.call_edges_removed,
19984        truncated: report.files.len() > max_items,
19985        files: report
19986            .files
19987            .iter()
19988            .take(max_items)
19989            .map(|file| ContextPackDiffFilePreview {
19990                path: truncate_for_budget(&file.path, max_bytes),
19991                status: diff_digest_status_label(file.status).to_string(),
19992                touched_symbols: file
19993                    .touched_symbols
19994                    .iter()
19995                    .take(max_items)
19996                    .map(|symbol| truncate_for_budget(symbol, max_bytes))
19997                    .collect(),
19998                touched_symbol_refs: file
19999                    .touched_symbols
20000                    .iter()
20001                    .take(max_items)
20002                    .map(|symbol| {
20003                        build_compact_symbol_ref_with_ontology(
20004                            "cdsym",
20005                            &format!("{}:{}", file.path, symbol),
20006                            symbol,
20007                            None,
20008                            max_bytes,
20009                            ontology,
20010                        )
20011                    })
20012                    .collect(),
20013                summary_state: diff_digest_summary_label(file.summary_state).to_string(),
20014                summary_refs: build_context_summary_refs(
20015                    "cdsum",
20016                    &file.path,
20017                    Some(&file.path),
20018                    file.current_summaries
20019                        .iter()
20020                        .map(|snippet| (snippet.symbol.as_str(), snippet.summary.as_str())),
20021                    budget,
20022                    ontology,
20023                ),
20024                added_call_edges: file.added_call_edges.len(),
20025                removed_call_edges: file.removed_call_edges.len(),
20026                warnings: file
20027                    .warnings
20028                    .iter()
20029                    .take(max_items)
20030                    .map(|warning| truncate_for_budget(warning, max_bytes))
20031                    .collect(),
20032            })
20033            .collect(),
20034    }
20035}
20036
20037fn enrich_next_context_with_diff_symbols(
20038    next_context: &mut SessionReviewNextContextBudgetReport,
20039    diff_digest: &ContextPackDiffPreview,
20040    ontology: Option<&TagOntologyPreviewContext>,
20041) {
20042    let mut symbols = next_context.touched_symbols.clone();
20043    for file in &diff_digest.files {
20044        for symbol in &file.touched_symbol_refs {
20045            if !symbols.iter().any(|existing| existing == &symbol.name) {
20046                symbols.push(symbol.name.clone());
20047            }
20048        }
20049    }
20050
20051    if symbols.is_empty() {
20052        return;
20053    }
20054
20055    let max_items = next_context.max_items;
20056    let max_bytes = next_context.max_bytes;
20057    next_context.touched_symbol_total = next_context.touched_symbol_total.max(symbols.len());
20058    next_context.truncated |= symbols.len() > max_items;
20059    next_context.touched_symbols = symbols
20060        .iter()
20061        .take(max_items)
20062        .map(|entry| truncate_for_budget(entry, max_bytes))
20063        .collect();
20064    next_context.touched_symbol_refs = symbols
20065        .iter()
20066        .take(max_items)
20067        .map(|entry| {
20068            build_compact_symbol_ref_with_ontology(
20069                "ncsym",
20070                &format!("{}:{}", next_context.target, entry),
20071                entry,
20072                None,
20073                max_bytes,
20074                ontology,
20075            )
20076        })
20077        .collect();
20078}
20079
20080fn context_exploration_source_window(
20081    root: &Path,
20082    file: &str,
20083    reason: String,
20084    budget: &ExplorationBudget,
20085) -> ExplorationSourceWindow {
20086    let start = 1;
20087    let end = budget.lines_per_window;
20088    ExplorationSourceWindow {
20089        handle: stable_handle("xwin", &format!("context:{file}:{start}:{end}:{reason}")),
20090        file: file.to_string(),
20091        start,
20092        end,
20093        reason,
20094        expand: source_read_command(root, file, start, budget.lines_per_window),
20095    }
20096}
20097
20098fn build_context_pack_exploration_packet(
20099    root: &Path,
20100    next_context: &SessionReviewNextContextBudgetReport,
20101    diff_digest: &ContextPackDiffPreview,
20102) -> ExplorationPacket {
20103    let node_count = diff_digest
20104        .files_changed
20105        .saturating_add(next_context.touched_file_total)
20106        .saturating_add(next_context.touched_symbol_total);
20107    let edge_count = diff_digest
20108        .call_edges_added
20109        .saturating_add(diff_digest.call_edges_removed)
20110        .saturating_add(
20111            diff_digest
20112                .files
20113                .iter()
20114                .map(|file| file.touched_symbol_refs.len())
20115                .sum::<usize>(),
20116        );
20117    let budget = exploration_budget_for_counts(node_count, edge_count);
20118
20119    let mut relationship_map = Vec::new();
20120    for file in &diff_digest.files {
20121        for symbol in &file.touched_symbol_refs {
20122            if relationship_map.len() >= budget.relationship_limit {
20123                break;
20124            }
20125            relationship_map.push(ExplorationRelation {
20126                from: format!("file:{}", file.path),
20127                relation: "touches_symbol".to_string(),
20128                to: format!("symbol:{}", symbol.name),
20129                label: Some(format!("{} diff", file.status)),
20130            });
20131        }
20132    }
20133    for symbol in &next_context.touched_symbol_refs {
20134        if relationship_map.len() >= budget.relationship_limit {
20135            break;
20136        }
20137        relationship_map.push(ExplorationRelation {
20138            from: format!("context:{}", next_context.target),
20139            relation: "mentions_symbol".to_string(),
20140            to: format!("symbol:{}", symbol.name),
20141            label: Some("session next-context symbol".to_string()),
20142        });
20143    }
20144
20145    let mut source_windows = Vec::new();
20146    let mut seen_files = BTreeSet::new();
20147    for file in &diff_digest.files {
20148        if source_windows.len() >= budget.max_source_windows {
20149            break;
20150        }
20151        if seen_files.insert(file.path.clone()) {
20152            source_windows.push(context_exploration_source_window(
20153                root,
20154                &file.path,
20155                format!("changed file ({})", file.status),
20156                &budget,
20157            ));
20158        }
20159    }
20160    for file in &next_context.touched_files {
20161        if source_windows.len() >= budget.max_source_windows {
20162            break;
20163        }
20164        if seen_files.insert(file.clone()) {
20165            source_windows.push(context_exploration_source_window(
20166                root,
20167                file,
20168                "session touched file".to_string(),
20169                &budget,
20170            ));
20171        }
20172    }
20173
20174    let worker_seeds = if next_context.prompt_targets.is_empty() {
20175        next_context.next_digest_commands.clone()
20176    } else {
20177        next_context.prompt_targets.clone()
20178    };
20179    let mut worker_context = Vec::new();
20180    for (idx, prompt) in worker_seeds
20181        .iter()
20182        .take(budget.relationship_limit)
20183        .enumerate()
20184    {
20185        let summary = truncate_for_budget(prompt, next_context.max_bytes);
20186        worker_context.push(ExplorationWorkerContext {
20187            handle: stable_handle(
20188                "xwrk",
20189                &format!("{}:{}:{}", next_context.target, idx, prompt),
20190            ),
20191            target: next_context.target.clone(),
20192            summary,
20193            expand: format!(
20194                "tsift --envelope context-pack {} --budget normal",
20195                shell_quote(&next_context.target)
20196            ),
20197        });
20198    }
20199
20200    ExplorationPacket {
20201        budget,
20202        relationship_map,
20203        source_windows,
20204        worker_context,
20205        no_reread_guidance:
20206            "Use worker_context for bounded handoff scope, then source_windows expand commands before broad file reads; relationship_map explains why each window is in the handoff."
20207                .to_string(),
20208    }
20209}
20210
20211fn exploration_ref_id(label: &str) -> String {
20212    stable_handle("xref", label)
20213}
20214
20215fn context_pack_exploration_projection(packet: &ExplorationPacket) -> Result<GraphProjection> {
20216    let provenance = GraphProvenance::new("tsift.context-pack", "exploration");
20217    let mut nodes = BTreeMap::<String, SubstrateGraphNode>::new();
20218    let mut edges = Vec::new();
20219
20220    for relation in &packet.relationship_map {
20221        for label in [&relation.from, &relation.to] {
20222            let id = exploration_ref_id(label);
20223            nodes.entry(id.clone()).or_insert_with(|| {
20224                SubstrateGraphNode::new(id, "exploration_ref", label.clone())
20225                    .with_property("label", label.clone())
20226                    .with_provenance(provenance.clone())
20227            });
20228        }
20229        let mut edge = SubstrateGraphEdge::new(
20230            exploration_ref_id(&relation.from),
20231            exploration_ref_id(&relation.to),
20232            relation.relation.clone(),
20233        )
20234        .with_provenance(provenance.clone());
20235        if let Some(label) = &relation.label {
20236            edge = edge.with_property("label", label.clone());
20237        }
20238        edges.push(edge_with_content_freshness(edge)?);
20239    }
20240
20241    for window in &packet.source_windows {
20242        let label = format!("{}:{}-{}", window.file, window.start, window.end);
20243        let node = SubstrateGraphNode::new(window.handle.clone(), "source_handle", label)
20244            .with_property("handle", window.handle.clone())
20245            .with_property("file", window.file.clone())
20246            .with_property("start", window.start.to_string())
20247            .with_property("end", window.end.to_string())
20248            .with_property("reason", window.reason.clone())
20249            .with_property("expand", window.expand.clone())
20250            .with_provenance(provenance.clone());
20251        nodes.insert(window.handle.clone(), node_with_content_freshness(node)?);
20252
20253        let file_ref = format!("file:{}", window.file);
20254        let file_ref_id = exploration_ref_id(&file_ref);
20255        nodes.entry(file_ref_id.clone()).or_insert_with(|| {
20256            SubstrateGraphNode::new(file_ref_id.clone(), "exploration_ref", file_ref.clone())
20257                .with_property("label", file_ref.clone())
20258                .with_provenance(provenance.clone())
20259        });
20260        let edge = SubstrateGraphEdge::new(window.handle.clone(), file_ref_id, "expands_source")
20261            .with_property("label", window.reason.clone())
20262            .with_provenance(provenance.clone());
20263        edges.push(edge_with_content_freshness(edge)?);
20264    }
20265
20266    for worker in &packet.worker_context {
20267        let node = SubstrateGraphNode::new(
20268            worker.handle.clone(),
20269            "worker_context",
20270            worker.summary.clone(),
20271        )
20272        .with_property("handle", worker.handle.clone())
20273        .with_property("target", worker.target.clone())
20274        .with_property("summary", worker.summary.clone())
20275        .with_property("expand", worker.expand.clone())
20276        .with_provenance(provenance.clone());
20277        nodes.insert(worker.handle.clone(), node_with_content_freshness(node)?);
20278
20279        let target_ref = format!("context:{}", worker.target);
20280        let target_ref_id = exploration_ref_id(&target_ref);
20281        nodes.entry(target_ref_id.clone()).or_insert_with(|| {
20282            SubstrateGraphNode::new(target_ref_id.clone(), "exploration_ref", target_ref.clone())
20283                .with_property("label", target_ref.clone())
20284                .with_provenance(provenance.clone())
20285        });
20286        edges.push(edge_with_content_freshness(
20287            SubstrateGraphEdge::new(worker.handle.clone(), target_ref_id, "scopes_context")
20288                .with_property("label", "bounded worker context".to_string())
20289                .with_provenance(provenance.clone()),
20290        )?);
20291
20292        for window in &packet.source_windows {
20293            edges.push(edge_with_content_freshness(
20294                SubstrateGraphEdge::new(
20295                    worker.handle.clone(),
20296                    window.handle.clone(),
20297                    "scopes_source",
20298                )
20299                .with_property("label", window.reason.clone())
20300                .with_provenance(provenance.clone()),
20301            )?);
20302        }
20303    }
20304
20305    let mut nodes = nodes.into_values().collect::<Vec<_>>();
20306    for node in &mut nodes {
20307        if node.freshness.is_none() {
20308            let fresh = node_with_content_freshness(node.clone())?;
20309            *node = fresh;
20310        }
20311    }
20312
20313    Ok(GraphProjection { nodes, edges })
20314}
20315
20316fn source_window_from_graph_node(node: SubstrateGraphNode) -> Result<ExplorationSourceWindow> {
20317    let file = node
20318        .properties
20319        .get("file")
20320        .cloned()
20321        .with_context(|| format!("source handle {} missing file property", node.id))?;
20322    let start = node
20323        .properties
20324        .get("start")
20325        .with_context(|| format!("source handle {} missing start property", node.id))?
20326        .parse::<usize>()
20327        .with_context(|| format!("source handle {} has invalid start", node.id))?;
20328    let end = node
20329        .properties
20330        .get("end")
20331        .with_context(|| format!("source handle {} missing end property", node.id))?
20332        .parse::<usize>()
20333        .with_context(|| format!("source handle {} has invalid end", node.id))?;
20334    Ok(ExplorationSourceWindow {
20335        handle: node
20336            .properties
20337            .get("handle")
20338            .cloned()
20339            .unwrap_or_else(|| node.id.clone()),
20340        file,
20341        start,
20342        end,
20343        reason: node
20344            .properties
20345            .get("reason")
20346            .cloned()
20347            .unwrap_or_else(|| "source context".to_string()),
20348        expand: node.properties.get("expand").cloned().unwrap_or_default(),
20349    })
20350}
20351
20352fn materialize_context_pack_exploration_packet(
20353    root: &Path,
20354    packet: ExplorationPacket,
20355) -> Result<ExplorationPacket> {
20356    let projection = context_pack_exploration_projection(&packet)?;
20357    let graph_db = graph_substrate_db_path(root, None);
20358    let mut store = SqliteGraphStore::open(&graph_db)?;
20359    store.upsert_projection(&projection)?;
20360
20361    let mut source_windows = Vec::new();
20362    for window in &packet.source_windows {
20363        let node = store
20364            .node(&window.handle)?
20365            .with_context(|| format!("source handle {} was not materialized", window.handle))?;
20366        source_windows.push(source_window_from_graph_node(node)?);
20367    }
20368
20369    let mut relationship_map = Vec::new();
20370    for relation in &packet.relationship_map {
20371        let from_id = exploration_ref_id(&relation.from);
20372        let to_id = exploration_ref_id(&relation.to);
20373        let from = store
20374            .node(&from_id)?
20375            .with_context(|| format!("exploration ref {} was not materialized", relation.from))?;
20376        let to = store
20377            .node(&to_id)?
20378            .with_context(|| format!("exploration ref {} was not materialized", relation.to))?;
20379        let edge = store
20380            .outgoing_edges(&from_id, Some(&relation.relation))?
20381            .into_iter()
20382            .find(|edge| edge.to_id == to_id)
20383            .with_context(|| {
20384                format!(
20385                    "exploration relation {} -> {} ({}) was not materialized",
20386                    relation.from, relation.to, relation.relation
20387                )
20388            })?;
20389        relationship_map.push(ExplorationRelation {
20390            from: from.label,
20391            relation: edge.kind,
20392            to: to.label,
20393            label: edge.properties.get("label").cloned(),
20394        });
20395    }
20396
20397    Ok(ExplorationPacket {
20398        budget: packet.budget,
20399        relationship_map,
20400        source_windows,
20401        worker_context: packet.worker_context,
20402        no_reread_guidance: packet.no_reread_guidance,
20403    })
20404}
20405
20406fn build_context_pack_test_preview(
20407    report: &test_digest::TestDigestReport,
20408    budget: ResponseBudget,
20409    ontology: Option<&TagOntologyPreviewContext>,
20410) -> ContextPackTestPreview {
20411    let max_items = budget.preview_items();
20412    let max_bytes = budget.preview_bytes();
20413    ContextPackTestPreview {
20414        runner: report.runner.clone(),
20415        failures: report.failures,
20416        grouped_failures: report.grouped_failures,
20417        counts: ContextPackTestCounts {
20418            passed: report.counts.passed,
20419            failed: report.counts.failed,
20420            skipped: report.counts.skipped,
20421        },
20422        truncated: report.failure_groups.len() > max_items || report.warnings.len() > max_items,
20423        failure_groups: report
20424            .failure_groups
20425            .iter()
20426            .take(max_items)
20427            .map(|failure| ContextPackTestFailurePreview {
20428                tests: failure
20429                    .tests
20430                    .iter()
20431                    .take(max_items)
20432                    .map(|test| truncate_for_budget(test, max_bytes))
20433                    .collect(),
20434                message: truncate_for_budget(&failure.message, max_bytes),
20435                path: failure
20436                    .path
20437                    .as_ref()
20438                    .map(|path| truncate_for_budget(path, max_bytes)),
20439                line: failure.line,
20440                occurrences: failure.occurrences,
20441                summary_state: test_digest_summary_label(failure.summary_state).to_string(),
20442                summary_refs: build_context_summary_refs(
20443                    "ctsum",
20444                    failure.path.as_deref().unwrap_or("test-failure"),
20445                    failure.path.as_deref(),
20446                    failure
20447                        .current_summaries
20448                        .iter()
20449                        .map(|snippet| (snippet.symbol.as_str(), snippet.summary.as_str())),
20450                    budget,
20451                    ontology,
20452                ),
20453            })
20454            .collect(),
20455        warnings: report
20456            .warnings
20457            .iter()
20458            .take(max_items)
20459            .map(|warning| truncate_for_budget(warning, max_bytes))
20460            .collect(),
20461    }
20462}
20463
20464fn build_context_pack_log_preview(
20465    report: &log_digest::LogDigestReport,
20466    budget: ResponseBudget,
20467    ontology: Option<&TagOntologyPreviewContext>,
20468) -> ContextPackLogPreview {
20469    let max_items = budget.preview_items();
20470    let max_bytes = budget.preview_bytes();
20471    ContextPackLogPreview {
20472        total_lines: report.total_lines,
20473        non_empty_lines: report.non_empty_lines,
20474        signal_groups: report.signal_groups,
20475        repeated_line_groups: report.repeated_line_groups,
20476        file_ref_groups: report.file_ref_groups,
20477        symbol_ref_groups: report.symbol_ref_groups,
20478        stack_groups: report.stack_groups,
20479        truncated: report.signals.len() > max_items
20480            || report.repeated_lines.len() > max_items
20481            || report.file_refs.len() > max_items
20482            || report.symbol_refs.len() > max_items
20483            || report.warnings.len() > max_items,
20484        signals: report
20485            .signals
20486            .iter()
20487            .take(max_items)
20488            .map(|signal| ContextPackLogSignalPreview {
20489                severity: signal.severity.clone(),
20490                message: truncate_for_budget(&signal.message, max_bytes),
20491                path: signal
20492                    .path
20493                    .as_ref()
20494                    .map(|path| truncate_for_budget(path, max_bytes)),
20495                line: signal.line,
20496                occurrences: signal.occurrences,
20497                summary_state: log_digest_summary_label(signal.summary_state).to_string(),
20498                summary_refs: build_context_summary_refs(
20499                    "clsum",
20500                    signal.path.as_deref().unwrap_or("log-signal"),
20501                    signal.path.as_deref(),
20502                    signal
20503                        .current_summaries
20504                        .iter()
20505                        .map(|snippet| (snippet.symbol.as_str(), snippet.summary.as_str())),
20506                    budget,
20507                    ontology,
20508                ),
20509            })
20510            .collect(),
20511        repeated_lines: report
20512            .repeated_lines
20513            .iter()
20514            .take(max_items)
20515            .map(|line| ContextPackLogRepeatedLinePreview {
20516                line: truncate_for_budget(&line.line, max_bytes),
20517                occurrences: line.occurrences,
20518            })
20519            .collect(),
20520        file_refs: report
20521            .file_refs
20522            .iter()
20523            .take(max_items)
20524            .map(|file| ContextPackLogFileRefPreview {
20525                path: truncate_for_budget(&file.path, max_bytes),
20526                line: file.line,
20527                occurrences: file.occurrences,
20528                summary_state: log_digest_summary_label(file.summary_state).to_string(),
20529                summary_refs: build_context_summary_refs(
20530                    "clfsum",
20531                    &file.path,
20532                    Some(&file.path),
20533                    file.current_summaries
20534                        .iter()
20535                        .map(|snippet| (snippet.symbol.as_str(), snippet.summary.as_str())),
20536                    budget,
20537                    ontology,
20538                ),
20539            })
20540            .collect(),
20541        symbol_refs: report
20542            .symbol_refs
20543            .iter()
20544            .take(max_items)
20545            .map(|symbol| ContextPackLogSymbolRefPreview {
20546                handle: stable_handle("clsym", &symbol.symbol),
20547                symbol: truncate_for_budget(&symbol.symbol, max_bytes),
20548                tag_alias: tag_alias_from_name(&symbol.symbol)
20549                    .map(|alias| truncate_for_budget(&alias, max_bytes)),
20550                ontology_refs: tag_alias_from_name(&symbol.symbol)
20551                    .as_deref()
20552                    .map(|alias| ontology_refs_for_alias(ontology, alias))
20553                    .unwrap_or_default(),
20554                occurrences: symbol.occurrences,
20555                summary_state: log_digest_summary_label(symbol.summary_state).to_string(),
20556                summary_refs: build_context_summary_refs(
20557                    "clssum",
20558                    &symbol.symbol,
20559                    None,
20560                    symbol
20561                        .current_summaries
20562                        .iter()
20563                        .map(|snippet| (snippet.symbol.as_str(), snippet.summary.as_str())),
20564                    budget,
20565                    ontology,
20566                ),
20567            })
20568            .collect(),
20569        warnings: report
20570            .warnings
20571            .iter()
20572            .take(max_items)
20573            .map(|warning| truncate_for_budget(warning, max_bytes))
20574            .collect(),
20575    }
20576}
20577
20578fn enrich_log_preview_with_diff_symbols(
20579    log_preview: &mut ContextPackLogPreview,
20580    diff_digest: &ContextPackDiffPreview,
20581    ontology: Option<&TagOntologyPreviewContext>,
20582) {
20583    if !log_preview.symbol_refs.is_empty() {
20584        return;
20585    }
20586
20587    let mut symbols = Vec::new();
20588    for file in &diff_digest.files {
20589        for symbol in &file.touched_symbol_refs {
20590            if !symbols
20591                .iter()
20592                .any(|existing: &String| existing == &symbol.name)
20593            {
20594                symbols.push(symbol.name.clone());
20595            }
20596        }
20597    }
20598
20599    if symbols.is_empty() {
20600        return;
20601    }
20602
20603    log_preview.symbol_ref_groups = log_preview.symbol_ref_groups.max(symbols.len());
20604    log_preview.symbol_refs = symbols
20605        .into_iter()
20606        .map(|symbol| ContextPackLogSymbolRefPreview {
20607            handle: stable_handle("clsym", &symbol),
20608            symbol: symbol.clone(),
20609            tag_alias: tag_alias_from_name(&symbol),
20610            ontology_refs: tag_alias_from_name(&symbol)
20611                .as_deref()
20612                .map(|alias| ontology_refs_for_alias(ontology, alias))
20613                .unwrap_or_default(),
20614            occurrences: 1,
20615            summary_state: "unavailable".to_string(),
20616            summary_refs: Vec::new(),
20617        })
20618        .collect();
20619}
20620
20621fn insert_ontology_refs(
20622    refs: &mut BTreeMap<String, CompactOntologyRefPreview>,
20623    candidates: &[CompactOntologyRefPreview],
20624) {
20625    for candidate in candidates {
20626        refs.entry(candidate.handle.clone())
20627            .or_insert_with(|| candidate.clone());
20628    }
20629}
20630
20631fn collect_context_pack_ontology_refs(
20632    next_context: &SessionReviewNextContextBudgetReport,
20633    diff_digest: &ContextPackDiffPreview,
20634    test_digest: &ContextPackOptionalSection<ContextPackTestPreview>,
20635    log_digest: &ContextPackOptionalSection<ContextPackLogPreview>,
20636) -> Vec<CompactOntologyRefPreview> {
20637    let mut refs = BTreeMap::new();
20638    for symbol in &next_context.touched_symbol_refs {
20639        insert_ontology_refs(&mut refs, &symbol.ontology_refs);
20640    }
20641    for file in &diff_digest.files {
20642        for symbol in &file.touched_symbol_refs {
20643            insert_ontology_refs(&mut refs, &symbol.ontology_refs);
20644        }
20645        for summary in &file.summary_refs {
20646            insert_ontology_refs(&mut refs, &summary.ontology_refs);
20647        }
20648    }
20649    if let Some(test) = &test_digest.report {
20650        for failure in &test.failure_groups {
20651            for summary in &failure.summary_refs {
20652                insert_ontology_refs(&mut refs, &summary.ontology_refs);
20653            }
20654        }
20655    }
20656    if let Some(log) = &log_digest.report {
20657        for signal in &log.signals {
20658            for summary in &signal.summary_refs {
20659                insert_ontology_refs(&mut refs, &summary.ontology_refs);
20660            }
20661        }
20662        for file in &log.file_refs {
20663            for summary in &file.summary_refs {
20664                insert_ontology_refs(&mut refs, &summary.ontology_refs);
20665            }
20666        }
20667        for symbol in &log.symbol_refs {
20668            insert_ontology_refs(&mut refs, &symbol.ontology_refs);
20669            for summary in &symbol.summary_refs {
20670                insert_ontology_refs(&mut refs, &summary.ontology_refs);
20671            }
20672        }
20673    }
20674    refs.into_values().collect()
20675}
20676
20677pub(crate) fn build_context_pack_report(
20678    path: &Path,
20679    test_input: Option<&Path>,
20680    runner: Option<&str>,
20681    log_input: Option<&Path>,
20682    budget: ResponseBudget,
20683) -> Result<ContextPackReport> {
20684    Ok(build_context_pack_report_with_profile(path, test_input, runner, log_input, budget)?.0)
20685}
20686
20687fn build_context_pack_report_with_profile(
20688    path: &Path,
20689    test_input: Option<&Path>,
20690    runner: Option<&str>,
20691    log_input: Option<&Path>,
20692    budget: ResponseBudget,
20693) -> Result<(ContextPackReport, Vec<GraphDbBackendEvalPhaseTiming>)> {
20694    // #gdbgatecold: trusted scope share — `prepare_agent_doc_index_gate_cached`
20695    // and `context_pack_status_reminders` both call `IndexDb::inspect_read_only`
20696    // on the same `(root, .tsift/index.db)` cold path. While this guard is
20697    // alive, the second call reuses the cached inspection on the same thread
20698    // instead of paying the disk/SQLite walk a second time. Search runs
20699    // entirely outside this scope, so freshness re-checks after a file
20700    // mutation are unaffected.
20701    let _inspect_scope = index::InspectScopeGuard::new();
20702    let budget = effective_context_budget(budget);
20703    let mut phases = Vec::new();
20704    let session_review_started = Instant::now();
20705    let (review, session_review_sub_phases) = session_review::compute_with_phases(path)?;
20706    let session_review_total_micros = session_review_started.elapsed().as_micros();
20707    phases.push(graph_db_backend_eval_phase_timing(
20708        "session_review_compute",
20709        session_review_total_micros,
20710        "session-review prompt/touched-file/touched-symbol/failure aggregation for the context-pack handoff",
20711    ));
20712    for sub_phase in &session_review_sub_phases {
20713        phases.push(graph_db_backend_eval_phase_timing(
20714            &format!("session_review_compute.{}", sub_phase.name),
20715            sub_phase.duration_micros,
20716            &sub_phase.detail,
20717        ));
20718    }
20719    let root = PathBuf::from(&review.root);
20720    let status_index_gate_started = Instant::now();
20721    let mut status_index_gate_sub_phases: Vec<(String, u128, String)> = Vec::with_capacity(3);
20722    let index_gate_started = Instant::now();
20723    let (gate, gate_cache_detail) =
20724        prepare_agent_doc_index_gate_cached(&root, path, None, "context-pack handoff");
20725    let index_gate_micros = index_gate_started.elapsed().as_micros();
20726    status_index_gate_sub_phases.push((
20727        "prepare_agent_doc_index_gate".to_string(),
20728        index_gate_micros,
20729        gate_cache_detail,
20730    ));
20731
20732    let reminders_started = Instant::now();
20733    let mut status_reminders = gate.diagnostics.clone();
20734    status_reminders.extend(context_pack_status_reminders(&root));
20735    let reminders_micros = reminders_started.elapsed().as_micros();
20736    status_index_gate_sub_phases.push((
20737        "context_pack_status_reminders".to_string(),
20738        reminders_micros,
20739        "tsift status reminders for the cached preparation context".to_string(),
20740    ));
20741
20742    let ontology_started = Instant::now();
20743    let ontology = load_tag_ontology_preview_context(&root);
20744    let ontology_micros = ontology_started.elapsed().as_micros();
20745    status_index_gate_sub_phases.push((
20746        "load_tag_ontology_preview_context".to_string(),
20747        ontology_micros,
20748        "tag ontology preview context load".to_string(),
20749    ));
20750
20751    let status_index_gate_total_micros = status_index_gate_started.elapsed().as_micros();
20752    phases.push(graph_db_backend_eval_phase_timing(
20753        "status_index_gate",
20754        status_index_gate_total_micros,
20755        "agent-doc index gate, tsift status reminders, and ontology preview loading",
20756    ));
20757    for (name, micros, detail) in &status_index_gate_sub_phases {
20758        phases.push(graph_db_backend_eval_phase_timing(
20759            &format!("status_index_gate.{name}"),
20760            *micros,
20761            detail,
20762        ));
20763    }
20764    let ontology_ref = ontology.as_ref();
20765    let mut next_context =
20766        build_session_review_next_context_budget_report(&review, budget, ontology_ref);
20767    // #gdbprephot: cap working-tree diff_digest parsing to the preview budget.
20768    // build_context_pack_diff_preview only emits files.take(preview_items),
20769    // and enrich_next_context_with_diff_symbols / build_context_pack_exploration_packet
20770    // only iterate diff_digest.files (the preview window). The full-fat parse
20771    // of every working-tree changed file dominated context_pack_diff cost on
20772    // repos with many unstaged edits.
20773    let diff_parse_budget = budget.preview_items();
20774    let diff_digest = graph_db_backend_eval_timed_phase(
20775        &mut phases,
20776        "context_pack_diff",
20777        "working-tree diff digest preview used to enrich next-context symbols",
20778        || {
20779            Ok(build_context_pack_diff_preview(
20780                &diff_digest::compute(
20781                    &root,
20782                    diff_digest::DiffDigestOptions {
20783                        cached: false,
20784                        revision: None,
20785                        max_parsed_files: Some(diff_parse_budget),
20786                    },
20787                )
20788                .with_context(|| {
20789                    format!("computing context-pack diff digest for {}", root.display())
20790                })?,
20791                budget,
20792                ontology_ref,
20793            ))
20794        },
20795    )?;
20796    enrich_next_context_with_diff_symbols(&mut next_context, &diff_digest, ontology_ref);
20797    let test_digest = match test_input {
20798        Some(file_path) => {
20799            let input = fs::read_to_string(file_path)
20800                .with_context(|| format!("reading test output: {}", file_path.display()))?;
20801            if input.trim().is_empty() {
20802                bail!("no test output provided in {}", file_path.display());
20803            }
20804            let report = test_digest::compute(&root, &input, runner)?;
20805            ContextPackOptionalSection {
20806                status: "included".to_string(),
20807                command: format!(
20808                    "tsift test-digest --path . --input {}{}",
20809                    shell_quote(file_path.to_str().unwrap_or_default()),
20810                    runner
20811                        .map(|value| format!(" --runner {}", shell_quote(value)))
20812                        .unwrap_or_default()
20813                ),
20814                source: Some(file_path.display().to_string()),
20815                report: Some(build_context_pack_test_preview(
20816                    &report,
20817                    budget,
20818                    ontology_ref,
20819                )),
20820            }
20821        }
20822        None => ContextPackOptionalSection {
20823            status: "not_provided".to_string(),
20824            command: "tsift test-digest --path . < test.log".to_string(),
20825            source: None,
20826            report: None,
20827        },
20828    };
20829    let log_digest = match log_input {
20830        Some(file_path) => {
20831            let input = fs::read_to_string(file_path)
20832                .with_context(|| format!("reading log output: {}", file_path.display()))?;
20833            if input.trim().is_empty() {
20834                bail!("no log output provided in {}", file_path.display());
20835            }
20836            let report = log_digest::compute(&root, &input)?;
20837            let mut preview = build_context_pack_log_preview(&report, budget, ontology_ref);
20838            enrich_log_preview_with_diff_symbols(&mut preview, &diff_digest, ontology_ref);
20839            ContextPackOptionalSection {
20840                status: "included".to_string(),
20841                command: format!(
20842                    "tsift log-digest --path . --input {}",
20843                    shell_quote(file_path.to_str().unwrap_or_default())
20844                ),
20845                source: Some(file_path.display().to_string()),
20846                report: Some(preview),
20847            }
20848        }
20849        None => ContextPackOptionalSection {
20850            status: "not_provided".to_string(),
20851            command: "tsift log-digest --path . < build.log".to_string(),
20852            source: None,
20853            report: None,
20854        },
20855    };
20856
20857    let ontology_refs =
20858        collect_context_pack_ontology_refs(&next_context, &diff_digest, &test_digest, &log_digest);
20859    let exploration = graph_db_backend_eval_timed_phase(
20860        &mut phases,
20861        "exploration_materialization",
20862        "context-pack source-window and worker-context exploration packet projection",
20863        || {
20864            materialize_context_pack_exploration_packet(
20865                &root,
20866                build_context_pack_exploration_packet(&root, &next_context, &diff_digest),
20867            )
20868        },
20869    )?;
20870    let graph_orchestration = graph_db_backend_eval_timed_phase(
20871        &mut phases,
20872        "graph_orchestration",
20873        "context-pack graph freshness, evidence packet ids, and conflict-matrix follow-up commands",
20874        || context_pack_graph_orchestration(&root, path, &next_context, &exploration),
20875    )?;
20876
20877    Ok((
20878        ContextPackReport {
20879            root: review.root,
20880            target: review.target,
20881            target_kind: review.target_kind,
20882            max_items: budget.preview_items(),
20883            max_bytes: budget.preview_bytes(),
20884            status_reminders,
20885            ontology_refs,
20886            next_context,
20887            diff_digest,
20888            test_digest,
20889            log_digest,
20890            exploration,
20891            graph_orchestration,
20892            resume_commands: review.next_context.next_digest_commands,
20893        },
20894        phases,
20895    ))
20896}
20897
20898fn context_pack_status_reminders(root: &Path) -> Vec<String> {
20899    status::check_status(root)
20900        .map(|report| report.reminders)
20901        .unwrap_or_default()
20902}
20903
20904fn context_pack_graph_orchestration(
20905    root: &Path,
20906    path: &Path,
20907    next_context: &SessionReviewNextContextBudgetReport,
20908    exploration: &ExplorationPacket,
20909) -> Result<ContextPackGraphOrchestration> {
20910    let graph_db = graph_substrate_db_path(root, None);
20911    let store = SqliteGraphStore::open_read_only_resilient(&graph_db)
20912        .with_context(|| format!("opening graph-db projection: {}", graph_db.display()))?;
20913    let projection_freshness = sqlite_graph_freshness(&store, "root")?;
20914    let mut warnings = projection_freshness.diagnostics.clone();
20915    if let Some(recovery) = store.read_only_recovery() {
20916        warnings.push(graph_db_read_recovery_diagnostic(recovery));
20917    }
20918    let mut targets = next_context
20919        .prompt_targets
20920        .iter()
20921        .flat_map(|prompt| extract_conflict_target_refs(prompt))
20922        .collect::<Vec<_>>();
20923    if targets.is_empty() {
20924        targets.extend(
20925            exploration
20926                .worker_context
20927                .iter()
20928                .flat_map(|worker| extract_conflict_target_refs(&worker.summary)),
20929        );
20930    }
20931    targets = dedupe_preserve_order(targets);
20932
20933    let mut evidence_packet_ids = Vec::new();
20934    let mut resolvable_targets = Vec::new();
20935    for target in &targets {
20936        match graph_db_resolve_evidence_target(&store, target)? {
20937            Some(node) => {
20938                evidence_packet_ids.push(graph_db_evidence_packet_id(
20939                    target,
20940                    &node,
20941                    &projection_freshness,
20942                ));
20943                resolvable_targets.push(target.clone());
20944            }
20945            None => warnings.push(format!("graph evidence target not found: {target}")),
20946        }
20947    }
20948
20949    let mut follow_up_commands = vec![format!(
20950        "tsift graph-db --path {} status --json",
20951        shell_quote(root.to_string_lossy().as_ref())
20952    )];
20953    for target in &resolvable_targets {
20954        follow_up_commands.push(format!(
20955            "tsift graph-db --path {} evidence {} --depth 3 --limit 8 --json",
20956            shell_quote(root.to_string_lossy().as_ref()),
20957            shell_quote(target)
20958        ));
20959    }
20960    if !resolvable_targets.is_empty() {
20961        follow_up_commands.push(format!(
20962            "tsift conflict-matrix --path {} {} --json",
20963            shell_quote(path.to_string_lossy().as_ref()),
20964            resolvable_targets
20965                .iter()
20966                .map(|target| shell_quote(target))
20967                .collect::<Vec<_>>()
20968                .join(" ")
20969        ));
20970    }
20971
20972    let conflict_matrix_decisions = if resolvable_targets.is_empty() {
20973        vec!["no resolvable backlog/job targets found for conflict-matrix".to_string()]
20974    } else {
20975        vec![format!(
20976            "run conflict-matrix before parallel dispatch for {} target(s)",
20977            resolvable_targets.len()
20978        )]
20979    };
20980    let worker_ownership_blocks = exploration
20981        .worker_context
20982        .iter()
20983        .map(|worker| format!("{} scopes {}", worker.handle, worker.summary))
20984        .collect::<Vec<_>>();
20985    let projection_hashes = projection_freshness
20986        .content_hash
20987        .clone()
20988        .into_iter()
20989        .collect();
20990
20991    Ok(ContextPackGraphOrchestration {
20992        contract_version: CONTEXT_PACK_GRAPH_ORCHESTRATION_CONTRACT_VERSION,
20993        graph_db_command: format!(
20994            "tsift graph-db --path {} status --json",
20995            shell_quote(root.to_string_lossy().as_ref())
20996        ),
20997        projection_freshness,
20998        projection_hashes,
20999        evidence_packet_ids,
21000        conflict_matrix_decisions,
21001        worker_ownership_blocks,
21002        follow_up_commands: dedupe_preserve_order(follow_up_commands),
21003        warnings,
21004    })
21005}
21006
21007pub(crate) fn print_context_pack_human(report: &ContextPackReport, compact: bool) {
21008    if compact {
21009        println!(
21010            "context-pack target:{} prompts:{}/{} diff:{}/{} test:{} log:{}",
21011            shell_quote(&report.target),
21012            report.next_context.prompt_targets.len(),
21013            report.next_context.prompt_target_total,
21014            report.diff_digest.files.len(),
21015            report.diff_digest.files_changed,
21016            report.test_digest.status,
21017            report.log_digest.status
21018        );
21019        for reminder in &report.status_reminders {
21020            println!("reminder {reminder}");
21021        }
21022        for prompt in &report.next_context.prompt_targets {
21023            println!("prompt {prompt}");
21024        }
21025        for action in &report.next_context.next_token_actions {
21026            println!(
21027                "token-action {} {} commands:{}",
21028                action.priority,
21029                action.kind,
21030                action.digest_commands.len()
21031                    + usize::from(action.compact_command.is_some())
21032                    + usize::from(action.restart_command.is_some())
21033            );
21034        }
21035        for file in &report.diff_digest.files {
21036            println!(
21037                "diff {} status:{} syms:{} sums:{}",
21038                file.path,
21039                file.status,
21040                if file.touched_symbol_refs.is_empty() {
21041                    "-".to_string()
21042                } else {
21043                    file.touched_symbol_refs
21044                        .iter()
21045                        .map(compact_symbol_ref_token)
21046                        .collect::<Vec<_>>()
21047                        .join(",")
21048                },
21049                if file.summary_refs.is_empty() {
21050                    "-".to_string()
21051                } else {
21052                    file.summary_refs
21053                        .iter()
21054                        .map(|summary| summary.handle.as_str())
21055                        .collect::<Vec<_>>()
21056                        .join(",")
21057                }
21058            );
21059        }
21060        if let Some(test) = &report.test_digest.report {
21061            println!(
21062                "test runner:{} failures:{} groups:{}",
21063                test.runner, test.failures, test.grouped_failures
21064            );
21065        } else {
21066            println!("test {}", report.test_digest.command);
21067        }
21068        if let Some(log) = &report.log_digest.report {
21069            println!(
21070                "log lines:{} signals:{} files:{} syms:{}",
21071                log.non_empty_lines, log.signal_groups, log.file_ref_groups, log.symbol_ref_groups
21072            );
21073        } else {
21074            println!("log {}", report.log_digest.command);
21075        }
21076        println!(
21077            "explore windows:{} relations:{} budget:{}",
21078            report.exploration.source_windows.len(),
21079            report.exploration.relationship_map.len(),
21080            report.exploration.budget.project_size
21081        );
21082        println!(
21083            "graph-orchestration freshness:{} evidence:{} ownership:{}",
21084            report.graph_orchestration.projection_freshness.status,
21085            report.graph_orchestration.evidence_packet_ids.len(),
21086            report.graph_orchestration.worker_ownership_blocks.len()
21087        );
21088        return;
21089    }
21090
21091    println!("Context pack");
21092    println!("  target:                 {}", report.target);
21093    println!("  target kind:            {}", report.target_kind);
21094    println!("  root:                   {}", report.root);
21095    println!(
21096        "  preview budget:         {} items / {} bytes",
21097        report.max_items, report.max_bytes
21098    );
21099    if !report.status_reminders.is_empty() {
21100        println!("  status reminders:");
21101        for reminder in &report.status_reminders {
21102            println!("  - {reminder}");
21103        }
21104    }
21105    println!();
21106    println!("Next context");
21107    println!(
21108        "  prompt targets:         {}/{}",
21109        report.next_context.prompt_targets.len(),
21110        report.next_context.prompt_target_total
21111    );
21112    println!(
21113        "  touched files:          {}/{}",
21114        report.next_context.touched_files.len(),
21115        report.next_context.touched_file_total
21116    );
21117    println!(
21118        "  touched symbols:        {}/{}",
21119        report.next_context.touched_symbols.len(),
21120        report.next_context.touched_symbol_total
21121    );
21122    println!(
21123        "  unresolved failures:    {}/{}",
21124        report.next_context.unresolved_failures.len(),
21125        report.next_context.unresolved_failure_total
21126    );
21127    if !report.next_context.prompt_targets.is_empty() {
21128        for prompt in &report.next_context.prompt_targets {
21129            println!("  - prompt: {prompt}");
21130        }
21131    }
21132    if !report.next_context.touched_files.is_empty() {
21133        for path in &report.next_context.touched_files {
21134            println!("  - file: {path}");
21135        }
21136    }
21137    if !report.next_context.touched_symbols.is_empty() {
21138        for symbol in &report.next_context.touched_symbol_refs {
21139            println!(
21140                "  - symbol: {}",
21141                format_symbol_preview_line(
21142                    &symbol.handle,
21143                    &symbol.name,
21144                    symbol.tag_alias.as_deref()
21145                )
21146            );
21147        }
21148    }
21149    if !report.next_context.next_token_actions.is_empty() {
21150        println!("  token actions:");
21151        for action in &report.next_context.next_token_actions {
21152            println!(
21153                "  - [{}:{}] {} | guidance: {}",
21154                action.priority, action.kind, action.message, action.guidance
21155            );
21156            if let Some(command) = &action.compact_command {
21157                println!("    compact: {command}");
21158            }
21159            if let Some(command) = &action.restart_command {
21160                println!("    restart: {command}");
21161            }
21162            for command in &action.digest_commands {
21163                println!("    digest: {command}");
21164            }
21165        }
21166    }
21167
21168    println!();
21169    println!("Diff digest");
21170    println!("  mode:                   {}", report.diff_digest.mode);
21171    println!(
21172        "  files changed:          {}/{}",
21173        report.diff_digest.files.len(),
21174        report.diff_digest.files_changed
21175    );
21176    println!(
21177        "  touched symbols:        {}",
21178        report.diff_digest.symbols_touched
21179    );
21180    println!(
21181        "  call edges:             +{} / -{}",
21182        report.diff_digest.call_edges_added, report.diff_digest.call_edges_removed
21183    );
21184    for file in &report.diff_digest.files {
21185        println!("  - {} [{}]", file.path, file.status);
21186        if !file.touched_symbol_refs.is_empty() {
21187            println!(
21188                "    symbols: {}",
21189                file.touched_symbol_refs
21190                    .iter()
21191                    .map(|symbol| format_symbol_preview_line(
21192                        &symbol.handle,
21193                        &symbol.name,
21194                        symbol.tag_alias.as_deref()
21195                    ))
21196                    .collect::<Vec<_>>()
21197                    .join(" | ")
21198            );
21199        }
21200        if !file.warnings.is_empty() {
21201            println!("    warnings: {}", file.warnings.join(" | "));
21202        }
21203        if !file.summary_refs.is_empty() {
21204            println!(
21205                "    summaries: {}",
21206                file.summary_refs
21207                    .iter()
21208                    .map(format_summary_ref_line)
21209                    .collect::<Vec<_>>()
21210                    .join(" | ")
21211            );
21212        }
21213    }
21214
21215    println!();
21216    println!("Test digest");
21217    println!("  status:                 {}", report.test_digest.status);
21218    match &report.test_digest.report {
21219        Some(test) => {
21220            println!("  runner:                 {}", test.runner);
21221            println!("  failures:               {}", test.failures);
21222            println!("  failure groups:         {}", test.grouped_failures);
21223            for failure in &test.failure_groups {
21224                let location = match (&failure.path, failure.line) {
21225                    (Some(path), Some(line)) => format!("{path}:{line}"),
21226                    (Some(path), None) => path.clone(),
21227                    _ => "(no file anchor)".to_string(),
21228                };
21229                println!(
21230                    "  - {} count:{} msg:{}",
21231                    location, failure.occurrences, failure.message
21232                );
21233                if !failure.summary_refs.is_empty() {
21234                    println!(
21235                        "    summaries: {}",
21236                        failure
21237                            .summary_refs
21238                            .iter()
21239                            .map(format_summary_ref_line)
21240                            .collect::<Vec<_>>()
21241                            .join(" | ")
21242                    );
21243                }
21244            }
21245        }
21246        None => println!("  capture:                {}", report.test_digest.command),
21247    }
21248
21249    println!();
21250    println!("Log digest");
21251    println!("  status:                 {}", report.log_digest.status);
21252    match &report.log_digest.report {
21253        Some(log) => {
21254            println!("  non-empty lines:        {}", log.non_empty_lines);
21255            println!("  signal groups:          {}", log.signal_groups);
21256            println!("  file refs:              {}", log.file_ref_groups);
21257            println!("  symbol refs:            {}", log.symbol_ref_groups);
21258            for signal in &log.signals {
21259                let location = match (&signal.path, signal.line) {
21260                    (Some(path), Some(line)) => format!("{path}:{line}"),
21261                    (Some(path), None) => path.clone(),
21262                    _ => "(no file anchor)".to_string(),
21263                };
21264                println!(
21265                    "  - {} {} count:{} msg:{}",
21266                    location, signal.severity, signal.occurrences, signal.message
21267                );
21268                if !signal.summary_refs.is_empty() {
21269                    println!(
21270                        "    summaries: {}",
21271                        signal
21272                            .summary_refs
21273                            .iter()
21274                            .map(format_summary_ref_line)
21275                            .collect::<Vec<_>>()
21276                            .join(" | ")
21277                    );
21278                }
21279            }
21280            for symbol in &log.symbol_refs {
21281                println!(
21282                    "  - symbol: {} count:{} state:{}",
21283                    format_symbol_preview_line(
21284                        &symbol.handle,
21285                        &symbol.symbol,
21286                        symbol.tag_alias.as_deref()
21287                    ),
21288                    symbol.occurrences,
21289                    symbol.summary_state
21290                );
21291                if !symbol.summary_refs.is_empty() {
21292                    println!(
21293                        "    summaries: {}",
21294                        symbol
21295                            .summary_refs
21296                            .iter()
21297                            .map(format_summary_ref_line)
21298                            .collect::<Vec<_>>()
21299                            .join(" | ")
21300                    );
21301                }
21302            }
21303        }
21304        None => println!("  capture:                {}", report.log_digest.command),
21305    }
21306
21307    println!();
21308    println!("Exploration packet");
21309    println!(
21310        "  budget:                 {} ({} windows x {} lines)",
21311        report.exploration.budget.project_size,
21312        report.exploration.budget.max_source_windows,
21313        report.exploration.budget.lines_per_window
21314    );
21315    for window in &report.exploration.source_windows {
21316        println!(
21317            "  - window {}:{}-{} ({})",
21318            window.file, window.start, window.end, window.reason
21319        );
21320        println!("    expand: {}", window.expand);
21321    }
21322    for relation in &report.exploration.relationship_map {
21323        println!(
21324            "  - relation {} -{}-> {}",
21325            relation.from, relation.relation, relation.to
21326        );
21327    }
21328
21329    println!();
21330    println!("Graph orchestration");
21331    println!(
21332        "  projection freshness:   {}",
21333        report.graph_orchestration.projection_freshness.status
21334    );
21335    for evidence in &report.graph_orchestration.evidence_packet_ids {
21336        println!("  - evidence: {evidence}");
21337    }
21338    for decision in &report.graph_orchestration.conflict_matrix_decisions {
21339        println!("  - decision: {decision}");
21340    }
21341    for block in &report.graph_orchestration.worker_ownership_blocks {
21342        println!("  - ownership: {block}");
21343    }
21344    for command in &report.graph_orchestration.follow_up_commands {
21345        println!("  - next: {command}");
21346    }
21347
21348    println!();
21349    println!("Resume commands:");
21350    for command in &report.resume_commands {
21351        println!("  - {}", command);
21352    }
21353}
21354
21355pub(crate) fn format_compact_count(value: u64) -> String {
21356    if value >= 1_000_000 {
21357        format!("{:.1}M", value as f64 / 1_000_000.0)
21358    } else if value >= 1_000 {
21359        format!("{:.1}K", value as f64 / 1_000.0)
21360    } else {
21361        value.to_string()
21362    }
21363}
21364
21365fn cmd_digest_runner(
21366    kind: &str,
21367    path: &Path,
21368    runner: Option<&str>,
21369    shell_command: &str,
21370    format: OutputFormat,
21371) -> Result<()> {
21372    let digest_kind = DigestRunnerKind::parse(kind)?;
21373    let root = transcript_artifact_root(path)?;
21374    let execution = run_digest_runner_command(shell_command)?;
21375    let output = &execution.output;
21376    let captured = String::from_utf8_lossy(&output.stdout).into_owned();
21377    let exit_code = output.status.code().unwrap_or(-1);
21378    if format.json_output && format.envelope {
21379        let artifact_key = format!(
21380            "{}:{}:{}:{}",
21381            digest_kind.as_str(),
21382            shell_command,
21383            execution.executed_command,
21384            captured
21385        );
21386        let artifact = if captured.trim().is_empty() {
21387            None
21388        } else {
21389            let (suffix, expand) = match digest_kind {
21390                DigestRunnerKind::Test => (
21391                    "test.log",
21392                    format!(
21393                        "tsift test-digest --path {} --input {}{} --json",
21394                        shell_quote(root.to_string_lossy().as_ref()),
21395                        shell_quote(
21396                            root.join(".tsift/artifacts")
21397                                .join(format!("{}.test.log", stable_handle("tart", &artifact_key)))
21398                                .to_string_lossy()
21399                                .as_ref()
21400                        ),
21401                        runner
21402                            .map(|value| format!(" --runner {}", shell_quote(value)))
21403                            .unwrap_or_default()
21404                    ),
21405                ),
21406                DigestRunnerKind::Log => (
21407                    "log",
21408                    format!(
21409                        "tsift log-digest --path {} --input {} --json",
21410                        shell_quote(root.to_string_lossy().as_ref()),
21411                        shell_quote(
21412                            root.join(".tsift/artifacts")
21413                                .join(format!("{}.log", stable_handle("tart", &artifact_key)))
21414                                .to_string_lossy()
21415                                .as_ref()
21416                        )
21417                    ),
21418                ),
21419            };
21420            Some(persist_transcript_artifact(
21421                &root,
21422                "tart",
21423                suffix,
21424                &artifact_key,
21425                &captured,
21426                expand,
21427            )?)
21428        };
21429        let filter_report = execution.filter.as_ref().map(DigestRunnerFilter::to_json);
21430
21431        match digest_kind {
21432            DigestRunnerKind::Test => {
21433                let digest_report = test_digest::compute(path, &captured, runner)?;
21434                let report = serde_json::json!({
21435                    "kind": digest_kind.as_str(),
21436                    "command": shell_command,
21437                    "executed_command": execution.executed_command,
21438                    "exit_code": exit_code,
21439                    "success": output.status.success(),
21440                    "filter": filter_report,
21441                    "artifact": artifact,
21442                    "digest": digest_report,
21443                });
21444                let mut follow_up = artifact
21445                    .as_ref()
21446                    .map(|entry| vec![entry.expand.clone()])
21447                    .unwrap_or_default();
21448                follow_up.push(format!(
21449                    "tsift rewrite --run {}",
21450                    shell_quote(shell_command)
21451                ));
21452                let summary_text = if output.status.success() && digest_report.failures == 0 {
21453                    format!("test run passed for {}", runner.unwrap_or("auto"))
21454                } else {
21455                    format!("test run captured {} failure(s)", digest_report.failures)
21456                };
21457                print_json_or_envelope(
21458                    &report,
21459                    &format,
21460                    "digest-runner",
21461                    "test-run",
21462                    ToolEnvelopeSummary {
21463                        text: summary_text,
21464                        metrics: vec![
21465                            envelope_metric("runner", &digest_report.runner),
21466                            envelope_metric("exit_code", exit_code),
21467                            envelope_metric("filter", execution.filter_label()),
21468                            envelope_metric("failures", digest_report.failures),
21469                            envelope_metric("groups", digest_report.grouped_failures),
21470                            envelope_metric(
21471                                "artifact",
21472                                artifact
21473                                    .as_ref()
21474                                    .map(|entry| entry.handle.as_str())
21475                                    .unwrap_or("-"),
21476                            ),
21477                        ],
21478                    },
21479                    false,
21480                    follow_up,
21481                )?;
21482            }
21483            DigestRunnerKind::Log => {
21484                let digest_report = log_digest::compute(path, &captured)?;
21485                let report = serde_json::json!({
21486                    "kind": digest_kind.as_str(),
21487                    "command": shell_command,
21488                    "executed_command": execution.executed_command,
21489                    "exit_code": exit_code,
21490                    "success": output.status.success(),
21491                    "filter": filter_report,
21492                    "artifact": artifact,
21493                    "digest": digest_report,
21494                });
21495                let mut follow_up = artifact
21496                    .as_ref()
21497                    .map(|entry| vec![entry.expand.clone()])
21498                    .unwrap_or_default();
21499                follow_up.push(format!(
21500                    "tsift rewrite --run {}",
21501                    shell_quote(shell_command)
21502                ));
21503                let summary_text = if output.status.success() && digest_report.signal_groups == 0 {
21504                    "command finished without log signals".to_string()
21505                } else {
21506                    format!(
21507                        "command emitted {} log signal group(s)",
21508                        digest_report.signal_groups
21509                    )
21510                };
21511                print_json_or_envelope(
21512                    &report,
21513                    &format,
21514                    "digest-runner",
21515                    "command-run",
21516                    ToolEnvelopeSummary {
21517                        text: summary_text,
21518                        metrics: vec![
21519                            envelope_metric("exit_code", exit_code),
21520                            envelope_metric("filter", execution.filter_label()),
21521                            envelope_metric("signals", digest_report.signal_groups),
21522                            envelope_metric("file_refs", digest_report.file_ref_groups),
21523                            envelope_metric(
21524                                "artifact",
21525                                artifact
21526                                    .as_ref()
21527                                    .map(|entry| entry.handle.as_str())
21528                                    .unwrap_or("-"),
21529                            ),
21530                        ],
21531                    },
21532                    false,
21533                    follow_up,
21534                )?;
21535            }
21536        }
21537
21538        if output.status.success() {
21539            return Ok(());
21540        }
21541        if let Some(code) = output.status.code() {
21542            std::process::exit(code);
21543        }
21544        bail!("digest-wrapped command terminated by signal: {shell_command}");
21545    }
21546
21547    if captured.trim().is_empty() {
21548        let label = match digest_kind {
21549            DigestRunnerKind::Test => "test",
21550            DigestRunnerKind::Log => "log",
21551        };
21552        println!("No {label} output captured.");
21553    } else {
21554        match digest_kind {
21555            DigestRunnerKind::Test => {
21556                render_test_digest_from_input(path, &captured, runner, format)?
21557            }
21558            DigestRunnerKind::Log => render_log_digest_from_input(path, &captured, format)?,
21559        }
21560    }
21561
21562    if output.status.success() {
21563        return Ok(());
21564    }
21565    if let Some(code) = output.status.code() {
21566        std::process::exit(code);
21567    }
21568    bail!("digest-wrapped command terminated by signal: {shell_command}");
21569}
21570
21571struct DigestRunnerExecution {
21572    output: std::process::Output,
21573    executed_command: String,
21574    filter: Option<DigestRunnerFilter>,
21575}
21576
21577impl DigestRunnerExecution {
21578    fn filter_label(&self) -> &'static str {
21579        self.filter
21580            .as_ref()
21581            .map(|filter| filter.tool)
21582            .unwrap_or("none")
21583    }
21584}
21585
21586struct DigestRunnerFilter {
21587    tool: &'static str,
21588    command: String,
21589}
21590
21591impl DigestRunnerFilter {
21592    fn to_json(&self) -> serde_json::Value {
21593        serde_json::json!({
21594            "tool": self.tool,
21595            "command": self.command,
21596        })
21597    }
21598}
21599
21600fn run_digest_runner_command(shell_command: &str) -> Result<DigestRunnerExecution> {
21601    let filter = rtk_rewrite_for_digest_runner(shell_command);
21602    let executed_command = filter
21603        .as_ref()
21604        .map(|filter| filter.command.as_str())
21605        .unwrap_or(shell_command);
21606    let output = Command::new("sh")
21607        .arg("-lc")
21608        .arg(format!("({executed_command}) 2>&1"))
21609        .stdout(Stdio::piped())
21610        .output()
21611        .with_context(|| format!("running digest-wrapped command: {executed_command}"))?;
21612
21613    Ok(DigestRunnerExecution {
21614        output,
21615        executed_command: executed_command.to_string(),
21616        filter,
21617    })
21618}
21619
21620fn rtk_rewrite_for_digest_runner(shell_command: &str) -> Option<DigestRunnerFilter> {
21621    if shell_command.trim_start().starts_with("rtk ") || find_command_on_path("rtk").is_none() {
21622        return None;
21623    }
21624    let output = Command::new("rtk")
21625        .arg("rewrite")
21626        .arg(shell_command)
21627        .output()
21628        .ok()?;
21629    if !output.status.success() {
21630        return None;
21631    }
21632    let rewritten = String::from_utf8_lossy(&output.stdout).trim().to_string();
21633    if rewritten.is_empty() || rewritten == shell_command {
21634        return None;
21635    }
21636    Some(DigestRunnerFilter {
21637        tool: "rtk",
21638        command: rewritten,
21639    })
21640}
21641
21642fn find_command_on_path(command: &str) -> Option<PathBuf> {
21643    let path_var = std::env::var_os("PATH")?;
21644    std::env::split_paths(&path_var)
21645        .map(|dir| dir.join(command))
21646        .find(|candidate| candidate.is_file())
21647}
21648
21649pub(crate) fn open_existing_summary_db_read_only(db_path: &Path) -> Result<summarize::SummaryDb> {
21650    if !db_path.exists() {
21651        bail!("no summaries.db found — run `tsift summarize --extract <path>` first");
21652    }
21653    summarize::SummaryDb::open_read_only_resilient(db_path)
21654}
21655
21656fn status_index_needs_fix(report: &status::StatusReport) -> bool {
21657    !matches!(report.index, status::IndexStatus::Fresh { .. })
21658}
21659
21660fn status_instructions_need_fix(report: &status::StatusReport) -> bool {
21661    !matches!(report.instructions, init::InstructionStatus::Current { .. })
21662}
21663
21664pub(crate) fn apply_status_fixes(root: &Path, report: &status::StatusReport) -> Result<()> {
21665    if status_instructions_need_fix(report) {
21666        eprintln!("status fix: refreshing tsift instructions");
21667        init::init(root, false, false)?;
21668    }
21669
21670    if !status_index_needs_fix(report) {
21671        return Ok(());
21672    }
21673
21674    let scopes = config::Config::submodule_dirs(root)?;
21675    if scopes.is_empty() {
21676        eprintln!("status fix: refreshing index");
21677        run_index_update(
21678            &root.join(".tsift/index.db"),
21679            root,
21680            "status --fix refreshing index".to_string(),
21681            root,
21682            None,
21683            false,
21684            false,
21685        )?;
21686        return Ok(());
21687    }
21688
21689    let cfg = config::Config::load(root)?;
21690    for scope in scopes {
21691        if !scope.source_root.exists() {
21692            eprintln!(
21693                "status fix: skipping missing submodule `{}` ({})",
21694                scope.id,
21695                scope.source_root.display()
21696            );
21697            continue;
21698        }
21699        eprintln!("status fix: refreshing submodule `{}` index", scope.id);
21700        run_index_update(
21701            &cfg.db_path_for(root, &scope.id),
21702            &scope.source_root,
21703            format!("status --fix refreshing submodule `{}` index", scope.id),
21704            root,
21705            Some(scope.id.as_str()),
21706            false,
21707            false,
21708        )?;
21709    }
21710
21711    Ok(())
21712}
21713
21714pub(crate) fn status_missing_workspace_scopes(report: &status::StatusReport) -> bool {
21715    match &report.index {
21716        status::IndexStatus::Fresh { missing_scopes, .. }
21717        | status::IndexStatus::Stale { missing_scopes, .. }
21718        | status::IndexStatus::Missing { missing_scopes } => !missing_scopes.is_empty(),
21719    }
21720}
21721
21722pub(crate) fn autoindex_missing_workspace_scopes(
21723    root: &Path,
21724    report: &status::StatusReport,
21725) -> Result<()> {
21726    let missing_scopes = match &report.index {
21727        status::IndexStatus::Fresh { missing_scopes, .. }
21728        | status::IndexStatus::Stale { missing_scopes, .. }
21729        | status::IndexStatus::Missing { missing_scopes } => missing_scopes,
21730    };
21731    if missing_scopes.is_empty() {
21732        return Ok(());
21733    }
21734
21735    let missing_scope_ids = missing_scopes
21736        .iter()
21737        .map(|scope| scope.scope.as_str())
21738        .collect::<std::collections::HashSet<_>>();
21739    let cfg = config::Config::load(root)?;
21740    for scope in config::Config::submodule_dirs(root)? {
21741        if !missing_scope_ids.contains(scope.id.as_str()) || !scope.source_root.exists() {
21742            continue;
21743        }
21744        let db_path = cfg.db_path_for(root, &scope.id);
21745        run_index_update(
21746            &db_path,
21747            &scope.source_root,
21748            format!(
21749                "autoindexing missing submodule `{}` during status",
21750                scope.id
21751            ),
21752            root,
21753            Some(scope.id.as_str()),
21754            false,
21755            false,
21756        )?;
21757    }
21758    Ok(())
21759}
21760
21761pub(crate) fn emit_summary_stats_warnings(stats: &summarize::SummaryStats, root: &Path) {
21762    for warning in &stats.warnings {
21763        let rel_path = relativize_pathbuf(&warning.path, root);
21764        eprintln!(
21765            "warning: summarize stats {}: {}",
21766            rel_path.display(),
21767            warning.message
21768        );
21769    }
21770}
21771
21772fn contextualize_error(err: anyhow::Error, context: String) -> anyhow::Error {
21773    Result::<(), anyhow::Error>::Err(err)
21774        .context(context)
21775        .unwrap_err()
21776}
21777
21778fn should_attach_lock_diagnostics(err: &anyhow::Error) -> bool {
21779    let message = err.to_string();
21780    message.contains("another tsift index writer is already active")
21781        || substrate::error_mentions_locked_db(err)
21782}
21783
21784fn add_write_lock_context(
21785    err: anyhow::Error,
21786    action: String,
21787    root: &std::path::Path,
21788    scope: Option<&str>,
21789) -> anyhow::Error {
21790    if !should_attach_lock_diagnostics(&err) {
21791        return contextualize_error(err, action);
21792    }
21793
21794    let Ok(report) = status::check_locks(root, None, scope) else {
21795        return contextualize_error(err, action);
21796    };
21797
21798    contextualize_error(
21799        err,
21800        format!(
21801            "{}\n\nlock diagnostics:\n{}",
21802            action,
21803            status::format_locks_human(&report, false).trim_end()
21804        ),
21805    )
21806}
21807
21808pub(crate) fn run_index_update(
21809    db_path: &std::path::Path,
21810    source_root: &std::path::Path,
21811    action: String,
21812    root: &std::path::Path,
21813    scope: Option<&str>,
21814    rebuild: bool,
21815    prune: bool,
21816) -> Result<index::IndexSummary> {
21817    let result = (|| {
21818        let db = index::IndexDb::open(db_path)?;
21819        if rebuild {
21820            db.rebuild(source_root)
21821        } else if prune {
21822            db.apply_changes_pruned(source_root)
21823        } else {
21824            db.apply_changes(source_root)
21825        }
21826    })();
21827
21828    let summary = result.map_err(|err| add_write_lock_context(err, action, root, scope))?;
21829    emit_index_warnings(&summary, source_root, scope);
21830    Ok(summary)
21831}
21832
21833pub(crate) fn relativize_index_summary(summary: &mut index::IndexSummary, root: &Path) {
21834    for change in &mut summary.changes {
21835        change.path = relativize_pathbuf(&change.path, root);
21836    }
21837    for warning in &mut summary.warnings {
21838        warning.path = relativize_pathbuf(&warning.path, root);
21839    }
21840}
21841
21842fn emit_index_warnings(summary: &index::IndexSummary, root: &Path, scope: Option<&str>) {
21843    for warning in &summary.warnings {
21844        let rel_path = relativize_pathbuf(&warning.path, root);
21845        let stage = match warning.stage {
21846            index::IndexWarningStage::ReadSource => "read failed",
21847            index::IndexWarningStage::ExtractSymbols => "symbol extraction failed",
21848            index::IndexWarningStage::ExtractCallSites => "call extraction failed",
21849            index::IndexWarningStage::ExtractRoutes => "route extraction failed",
21850        };
21851        let scope_prefix = scope.map(|name| format!("[{}] ", name)).unwrap_or_default();
21852        let lang_suffix = warning
21853            .language
21854            .as_deref()
21855            .map(|lang| format!(" [{}]", lang))
21856            .unwrap_or_default();
21857        eprintln!(
21858            "warning: {}{}{}: {}: {}",
21859            scope_prefix,
21860            rel_path.display(),
21861            lang_suffix,
21862            stage,
21863            warning.message
21864        );
21865    }
21866}
21867
21868pub(crate) fn load_summarize_config(root: &std::path::Path) -> summarize::SummarizeConfig {
21869    let config_path = root.join(".tsift/config.toml");
21870    if !config_path.exists() {
21871        return summarize::SummarizeConfig::default();
21872    }
21873    #[derive(serde::Deserialize, Default)]
21874    struct RawConfig {
21875        #[serde(default)]
21876        summarize: Option<RawSummarize>,
21877    }
21878    #[derive(serde::Deserialize)]
21879    struct RawSummarize {
21880        model: Option<String>,
21881        max_file_tokens: Option<usize>,
21882        api_key_env: Option<String>,
21883    }
21884    let content = std::fs::read_to_string(&config_path).unwrap_or_default();
21885    let raw: RawConfig = toml::from_str(&content).unwrap_or_default();
21886    let defaults = summarize::SummarizeConfig::default();
21887    match raw.summarize {
21888        Some(s) => summarize::SummarizeConfig {
21889            model: s.model.unwrap_or(defaults.model),
21890            max_file_tokens: s.max_file_tokens.unwrap_or(defaults.max_file_tokens),
21891            api_key_env: s.api_key_env.unwrap_or(defaults.api_key_env),
21892        },
21893        None => defaults,
21894    }
21895}
21896
21897#[derive(Debug, Clone, PartialEq, Eq)]
21898struct ExtractSymbolContext {
21899    db_path: PathBuf,
21900    source_root: PathBuf,
21901}
21902
21903pub(crate) fn find_symbols_db_for_file(
21904    root: &Path,
21905    file_path: &Path,
21906) -> Result<Option<ExtractSymbolContext>> {
21907    let cfg = config::Config::load(root)?;
21908    let mut submodules = config::Config::submodule_dirs(root)?;
21909    submodules.sort_by(|left, right| {
21910        right
21911            .source_root
21912            .components()
21913            .count()
21914            .cmp(&left.source_root.components().count())
21915    });
21916
21917    for scope in submodules {
21918        if !file_path.starts_with(&scope.source_root) {
21919            continue;
21920        }
21921        let db_path = cfg.db_path_for(root, &scope.id);
21922        if db_path.exists() {
21923            return Ok(Some(ExtractSymbolContext {
21924                db_path,
21925                source_root: scope.source_root,
21926            }));
21927        }
21928    }
21929
21930    let single = root.join(".tsift/index.db");
21931    if single.exists() && file_path.starts_with(root) {
21932        return Ok(Some(ExtractSymbolContext {
21933            db_path: single,
21934            source_root: root.to_path_buf(),
21935        }));
21936    }
21937
21938    Ok(None)
21939}
21940
21941pub(crate) fn resolve_extract_base(path: &Path) -> Result<PathBuf> {
21942    let canonical = path
21943        .canonicalize()
21944        .with_context(|| format!("canonicalizing {}", path.display()))?;
21945
21946    Ok(if canonical.is_dir() {
21947        canonical
21948    } else {
21949        canonical
21950            .parent()
21951            .map(Path::to_path_buf)
21952            .unwrap_or(canonical)
21953    })
21954}
21955
21956fn normalize_extract_scope_path(path: &Path) -> Result<PathBuf> {
21957    if path.exists() {
21958        return path
21959            .canonicalize()
21960            .with_context(|| format!("canonicalizing extract scope {}", path.display()));
21961    }
21962
21963    Ok(summarize::normalize_lexical_path(path))
21964}
21965
21966pub(crate) fn resolve_extract_scope(root: &Path, extract_path: &Path) -> Result<PathBuf> {
21967    let scope = if extract_path.is_absolute() {
21968        extract_path.to_path_buf()
21969    } else {
21970        root.join(extract_path)
21971    };
21972    normalize_extract_scope_path(&scope)
21973}
21974
21975pub(crate) fn summarize_diff_matches_scope(changed_path: &Path, extract_scope: &Path) -> bool {
21976    normalize_extract_scope_path(changed_path)
21977        .unwrap_or_else(|_| summarize::normalize_lexical_path(changed_path))
21978        .starts_with(extract_scope)
21979}
21980
21981pub(crate) fn summarize_relative_file_path(root: &Path, file_path: &Path) -> String {
21982    summarize::normalize_summary_file_key(file_path.strip_prefix(root).unwrap_or(file_path))
21983}
21984
21985pub(crate) fn summarize_full_extract_deleted_summary_paths(
21986    summary_db: &summarize::SummaryDb,
21987    root: &Path,
21988    extract_scope: &Path,
21989    files_to_extract: &[PathBuf],
21990) -> Result<BTreeSet<String>> {
21991    let live_paths = files_to_extract
21992        .iter()
21993        .map(|file_path| summarize_relative_file_path(root, file_path))
21994        .collect::<BTreeSet<_>>();
21995    let mut deleted = BTreeSet::new();
21996
21997    for cached_path in summary_db.cached_file_paths()? {
21998        if !summarize_diff_matches_scope(&root.join(&cached_path), extract_scope) {
21999            continue;
22000        }
22001        if !live_paths.contains(&cached_path) {
22002            deleted.insert(cached_path);
22003        }
22004    }
22005
22006    Ok(deleted)
22007}
22008
22009#[derive(Debug, Clone)]
22010struct SearchIndexTarget {
22011    label: String,
22012    db_path: PathBuf,
22013    source_root: PathBuf,
22014    scope_name: Option<String>,
22015    reindex_cmd: String,
22016}
22017
22018#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22019enum SearchIndexState {
22020    Missing,
22021    Fresh,
22022    Stale { stale_files: usize },
22023}
22024
22025fn resolve_search_index_targets(
22026    root: &Path,
22027    path_hint: &Path,
22028    scope: Option<&str>,
22029    federated: bool,
22030) -> Result<Vec<SearchIndexTarget>> {
22031    if let Some(scope_name) = scope {
22032        let scope = config::Config::resolve_submodule(root, scope_name)?;
22033        let cfg = config::Config::load(root)?;
22034        return Ok(vec![SearchIndexTarget {
22035            label: format!("submodule `{}` index", scope.id),
22036            db_path: cfg.db_path_for(root, &scope.id),
22037            source_root: scope.source_root.clone(),
22038            scope_name: Some(scope.id.clone()),
22039            reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
22040        }]);
22041    }
22042
22043    if federated {
22044        let cfg = config::Config::load(root)?;
22045        let mut targets = Vec::new();
22046        for scope in config::Config::submodule_dirs(root)? {
22047            if !cfg.federation_for_scope(&scope) {
22048                continue;
22049            }
22050            targets.push(SearchIndexTarget {
22051                label: format!("submodule `{}` index", scope.id),
22052                db_path: cfg.db_path_for(root, &scope.id),
22053                source_root: scope.source_root.clone(),
22054                scope_name: Some(scope.id.clone()),
22055                reindex_cmd: format!("tsift index --workspace {}", root.display()),
22056            });
22057        }
22058        return Ok(targets);
22059    }
22060
22061    if let Some(scope) = config::Config::infer_submodule_from_path(root, path_hint)? {
22062        let cfg = config::Config::load(root)?;
22063        return Ok(vec![SearchIndexTarget {
22064            label: format!("submodule `{}` index", scope.id),
22065            db_path: cfg.db_path_for(root, &scope.id),
22066            source_root: scope.source_root.clone(),
22067            scope_name: Some(scope.id.clone()),
22068            reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
22069        }]);
22070    }
22071
22072    if let Some(scope) = infer_agent_doc_task_submodule(root, path_hint)? {
22073        let cfg = config::Config::load(root)?;
22074        return Ok(vec![SearchIndexTarget {
22075            label: format!("submodule `{}` index", scope.id),
22076            db_path: cfg.db_path_for(root, &scope.id),
22077            source_root: scope.source_root.clone(),
22078            scope_name: Some(scope.id.clone()),
22079            reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
22080        }]);
22081    }
22082
22083    let scopes = config::Config::submodule_dirs(root)?;
22084    if !scopes.is_empty() {
22085        let root_db = root.join(".tsift/index.db");
22086        if !root_db.exists() {
22087            let available_scopes = scopes
22088                .iter()
22089                .map(|scope| scope.id.as_str())
22090                .collect::<Vec<_>>()
22091                .join(", ");
22092            let cfg = config::Config::load(root)?;
22093            let indexed_scopes = scopes
22094                .iter()
22095                .filter(|scope| cfg.db_path_for(root, &scope.id).exists())
22096                .map(|scope| scope.id.as_str())
22097                .collect::<Vec<_>>();
22098            let indexed_label = if indexed_scopes.is_empty() {
22099                "none".to_string()
22100            } else {
22101                indexed_scopes.join(", ")
22102            };
22103            bail!(
22104                "workspace root {} has no shared root index at {}. Default search requires `--scope <scope>` or `--federated` when the workspace uses scoped `.tsift/indexes/*/index.db` files. Available scopes: {}. Indexed scopes: {}.",
22105                root.display(),
22106                root_db.display(),
22107                available_scopes,
22108                indexed_label,
22109            );
22110        }
22111    }
22112
22113    Ok(vec![SearchIndexTarget {
22114        label: "index".to_string(),
22115        db_path: root.join(".tsift/index.db"),
22116        source_root: root.to_path_buf(),
22117        scope_name: None,
22118        reindex_cmd: format!("tsift index {}", root.display()),
22119    }])
22120}
22121
22122fn inspect_search_index(target: &SearchIndexTarget) -> Result<SearchIndexState> {
22123    if !target.source_root.exists() || !target.db_path.exists() {
22124        return Ok(SearchIndexState::Missing);
22125    }
22126
22127    let inspection =
22128        index::IndexDb::inspect_read_only(&target.db_path, &target.source_root, false)?;
22129    let stale_files =
22130        inspection.summary.new + inspection.summary.modified + inspection.summary.deleted;
22131    if stale_files == 0 {
22132        Ok(SearchIndexState::Fresh)
22133    } else {
22134        Ok(SearchIndexState::Stale { stale_files })
22135    }
22136}
22137
22138#[derive(Debug, Clone, PartialEq, Eq)]
22139struct RebuildSearchTarget {
22140    label: String,
22141    reason: RebuildSearchReason,
22142    reindex_cmd: String,
22143}
22144
22145#[derive(Debug, Clone, PartialEq, Eq)]
22146enum RebuildSearchReason {
22147    Missing,
22148    Stale { stale_files: usize },
22149}
22150
22151#[derive(Debug, Clone, PartialEq, Eq)]
22152struct DegradedSearchTarget {
22153    label: String,
22154    reason: RebuildSearchReason,
22155    reindex_cmd: String,
22156}
22157
22158#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22159pub(crate) enum DegradedSearchMode {
22160    ReadOnly,
22161    Exact,
22162}
22163
22164#[derive(Debug)]
22165struct SearchPrecheck {
22166    targets: Vec<SearchIndexTarget>,
22167    degraded_targets: Vec<DegradedSearchTarget>,
22168}
22169
22170fn is_active_writer_lock_error(err: &anyhow::Error) -> bool {
22171    err.chain().any(|cause| {
22172        cause
22173            .to_string()
22174            .contains("another tsift index writer is already active")
22175    })
22176}
22177
22178fn infer_agent_doc_task_submodule(
22179    root: &Path,
22180    path_hint: &Path,
22181) -> Result<Option<config::WorkspaceScope>> {
22182    let hinted_path = if path_hint.is_absolute() {
22183        path_hint.to_path_buf()
22184    } else {
22185        root.join(path_hint)
22186    };
22187    let Ok(relative) = hinted_path.strip_prefix(root) else {
22188        return Ok(None);
22189    };
22190    let mut components = relative.components();
22191    let Some(std::path::Component::Normal(first)) = components.next() else {
22192        return Ok(None);
22193    };
22194    if first != "tasks" {
22195        return Ok(None);
22196    }
22197    let Some(file_stem) = relative.file_stem().and_then(|stem| stem.to_str()) else {
22198        return Ok(None);
22199    };
22200    config::Config::find_submodule(root, file_stem)
22201}
22202
22203fn degraded_search_target(
22204    target: &SearchIndexTarget,
22205    reason: RebuildSearchReason,
22206) -> DegradedSearchTarget {
22207    DegradedSearchTarget {
22208        label: target.label.clone(),
22209        reason,
22210        reindex_cmd: target.reindex_cmd.clone(),
22211    }
22212}
22213
22214fn apply_search_index_update(
22215    root: &Path,
22216    target: &SearchIndexTarget,
22217) -> Result<index::IndexSummary> {
22218    run_index_update(
22219        &target.db_path,
22220        &target.source_root,
22221        format!("autoindexing {}", target.label),
22222        root,
22223        target.scope_name.as_deref(),
22224        false,
22225        false,
22226    )
22227}
22228
22229fn collect_rebuild_search_targets(
22230    targets: &[SearchIndexTarget],
22231) -> Result<Vec<RebuildSearchTarget>> {
22232    let mut rebuild_targets = Vec::new();
22233    for target in targets {
22234        let reason = match inspect_search_index(target)? {
22235            SearchIndexState::Missing => RebuildSearchReason::Missing,
22236            SearchIndexState::Fresh => continue,
22237            SearchIndexState::Stale { stale_files } => RebuildSearchReason::Stale { stale_files },
22238        };
22239        rebuild_targets.push(RebuildSearchTarget {
22240            label: target.label.clone(),
22241            reason,
22242            reindex_cmd: target.reindex_cmd.clone(),
22243        });
22244    }
22245    Ok(rebuild_targets)
22246}
22247
22248fn rebuild_search_target_detail(target: &RebuildSearchTarget) -> String {
22249    match target.reason {
22250        RebuildSearchReason::Missing => format!("{} is missing", target.label),
22251        RebuildSearchReason::Stale { stale_files } => {
22252            let file_suffix = if stale_files == 1 { "" } else { "s" };
22253            format!(
22254                "{} is stale ({} file{})",
22255                target.label, stale_files, file_suffix
22256            )
22257        }
22258    }
22259}
22260
22261fn rebuild_search_targets_message(rebuild_targets: &[RebuildSearchTarget]) -> String {
22262    if rebuild_targets.len() == 1 {
22263        let target = &rebuild_targets[0];
22264        return format!(
22265            "{}. Run `{}` to rebuild before retrying.",
22266            rebuild_search_target_detail(target),
22267            target.reindex_cmd
22268        );
22269    }
22270
22271    let summary: Vec<String> = rebuild_targets
22272        .iter()
22273        .take(3)
22274        .map(rebuild_search_target_detail)
22275        .collect();
22276    let overflow = rebuild_targets.len().saturating_sub(summary.len());
22277    let mut details = summary.join(", ");
22278    if overflow > 0 {
22279        details.push_str(&format!(", +{} more", overflow));
22280    }
22281    let reindex_cmd = rebuild_targets[0].reindex_cmd.clone();
22282    format!(
22283        "{} indexes need rebuild: {}. Run `{}` to rebuild before retrying.",
22284        rebuild_targets.len(),
22285        details,
22286        reindex_cmd
22287    )
22288}
22289
22290pub(crate) fn precheck_search_indexes(
22291    root: &Path,
22292    path_hint: &Path,
22293    scope: Option<&str>,
22294    federated: bool,
22295    autoindex: bool,
22296) -> Result<SearchPrecheck> {
22297    let targets = resolve_search_index_targets(root, path_hint, scope, federated)?;
22298    let mut stale_targets = Vec::new();
22299    let mut degraded_targets = Vec::new();
22300
22301    for target in &targets {
22302        match inspect_search_index(target)? {
22303            SearchIndexState::Missing => {
22304                if autoindex && let Err(err) = apply_search_index_update(root, target) {
22305                    if is_active_writer_lock_error(&err) {
22306                        degraded_targets
22307                            .push(degraded_search_target(target, RebuildSearchReason::Missing));
22308                    } else {
22309                        return Err(err);
22310                    }
22311                }
22312            }
22313            SearchIndexState::Fresh => {}
22314            SearchIndexState::Stale { stale_files } => {
22315                if autoindex {
22316                    if let Err(err) = apply_search_index_update(root, target) {
22317                        if is_active_writer_lock_error(&err) {
22318                            degraded_targets.push(degraded_search_target(
22319                                target,
22320                                RebuildSearchReason::Stale { stale_files },
22321                            ));
22322                        } else {
22323                            return Err(err);
22324                        }
22325                    }
22326                } else {
22327                    stale_targets.push(RebuildSearchTarget {
22328                        label: target.label.clone(),
22329                        reason: RebuildSearchReason::Stale { stale_files },
22330                        reindex_cmd: target.reindex_cmd.clone(),
22331                    });
22332                }
22333            }
22334        }
22335    }
22336
22337    if stale_targets.is_empty() {
22338        return Ok(SearchPrecheck {
22339            targets,
22340            degraded_targets,
22341        });
22342    }
22343
22344    bail!(
22345        "tsift search aborted: {} \
22346         or re-run without `--no-autoindex`.",
22347        rebuild_search_targets_message(&stale_targets),
22348    );
22349}
22350
22351pub(crate) fn degraded_search_mode(targets: &[DegradedSearchTarget]) -> Option<DegradedSearchMode> {
22352    if targets.is_empty() {
22353        return None;
22354    }
22355
22356    if targets
22357        .iter()
22358        .all(|target| matches!(target.reason, RebuildSearchReason::Missing))
22359    {
22360        Some(DegradedSearchMode::Exact)
22361    } else {
22362        Some(DegradedSearchMode::ReadOnly)
22363    }
22364}
22365
22366fn degraded_search_targets_summary(targets: &[DegradedSearchTarget]) -> String {
22367    if targets.len() == 1 {
22368        let target = &targets[0];
22369        return match target.reason {
22370            RebuildSearchReason::Missing => format!("{} is missing", target.label),
22371            RebuildSearchReason::Stale { stale_files } => {
22372                let file_suffix = if stale_files == 1 { "" } else { "s" };
22373                format!(
22374                    "{} is stale ({} file{})",
22375                    target.label, stale_files, file_suffix
22376                )
22377            }
22378        };
22379    }
22380
22381    let missing = targets
22382        .iter()
22383        .filter(|target| matches!(target.reason, RebuildSearchReason::Missing))
22384        .count();
22385    let stale = targets.len().saturating_sub(missing);
22386    let mut parts = Vec::new();
22387    if stale > 0 {
22388        let suffix = if stale == 1 { "" } else { "es" };
22389        parts.push(format!("{stale} stale index{suffix}"));
22390    }
22391    if missing > 0 {
22392        let suffix = if missing == 1 { "" } else { "es" };
22393        parts.push(format!("{missing} missing index{suffix}"));
22394    }
22395    parts.join(", ")
22396}
22397
22398pub(crate) fn emit_degraded_search_note(
22399    targets: &[DegradedSearchTarget],
22400    mode: DegradedSearchMode,
22401) {
22402    let summary = degraded_search_targets_summary(targets);
22403    let reindex_cmd = &targets[0].reindex_cmd;
22404    match mode {
22405        DegradedSearchMode::ReadOnly => eprintln!(
22406            "note: active tsift writer detected; skipping autoindex because {}. \
22407             Continuing with read-only search and the current index snapshot; symbol hits may lag. \
22408             Retry `{}` after the active writer finishes for fresh index results.",
22409            summary, reindex_cmd
22410        ),
22411        DegradedSearchMode::Exact => eprintln!(
22412            "note: active tsift writer detected; skipping autoindex because {}. \
22413             Continuing with exact live-file search. Retry `{}` after the active writer finishes \
22414             for indexed symbol hits.",
22415            summary, reindex_cmd
22416        ),
22417    }
22418}
22419
22420fn search_timeout_message(
22421    timeout_secs: u64,
22422    strategy: &str,
22423    targets: &[SearchIndexTarget],
22424) -> Result<String> {
22425    let rebuild_targets = collect_rebuild_search_targets(targets)?;
22426    if rebuild_targets.is_empty() {
22427        return Ok(format!(
22428            "tsift search timed out after {}s (strategy: {}). \
22429             The search root looks fresh, so reindexing is unlikely to help. \
22430             Re-run with `--timeout 0` to disable the timeout, narrow `--path` / `--scope`, \
22431             or try a different strategy.",
22432            timeout_secs, strategy,
22433        ));
22434    }
22435
22436    Ok(format!(
22437        "tsift search timed out after {}s (strategy: {}). {}",
22438        timeout_secs,
22439        strategy,
22440        rebuild_search_targets_message(&rebuild_targets),
22441    ))
22442}
22443
22444fn is_exact_preferring_query_char(ch: char) -> bool {
22445    matches!(ch, '-' | '_' | '/' | '\\' | '.' | ':' | '#' | '@')
22446}
22447
22448fn query_prefers_exact_search(query: &str) -> bool {
22449    let trimmed = query.trim();
22450    !trimmed.is_empty()
22451        && !trimmed.chars().any(char::is_whitespace)
22452        && trimmed.chars().any(|ch| ch.is_alphanumeric())
22453        && trimmed.chars().any(is_exact_preferring_query_char)
22454        && trimmed
22455            .chars()
22456            .all(|ch| ch.is_alphanumeric() || is_exact_preferring_query_char(ch))
22457}
22458
22459pub(crate) fn resolve_search_strategy(query: &str, strategy: Option<String>) -> String {
22460    strategy.unwrap_or_else(|| {
22461        if query_prefers_exact_search(query) {
22462            "exact".to_string()
22463        } else {
22464            "lexical".to_string()
22465        }
22466    })
22467}
22468
22469#[derive(Serialize)]
22470struct SearchBudgetSymbolPreview {
22471    handle: String,
22472    #[serde(skip_serializing_if = "Option::is_none")]
22473    tag_alias: Option<String>,
22474    match_type: String,
22475    kind: String,
22476    name: String,
22477    file: String,
22478    line: i64,
22479    score: f64,
22480    match_count: usize,
22481    surface_count: usize,
22482    file_count: usize,
22483    #[serde(skip_serializing_if = "Vec::is_empty", default)]
22484    surface_examples: Vec<String>,
22485    expand: String,
22486}
22487
22488#[derive(Serialize)]
22489struct SearchBudgetHitPreview {
22490    handle: String,
22491    rank: usize,
22492    path: String,
22493    confidence: String,
22494    score: f64,
22495    preview: String,
22496    expand: String,
22497}
22498
22499#[derive(Serialize)]
22500struct SearchScaleSignals {
22501    preview_symbols: usize,
22502    symbol_families: usize,
22503    raw_symbol_matches: usize,
22504    preview_hits: usize,
22505    returned_hits: usize,
22506    indexed_artifacts: usize,
22507    skipped_artifacts: usize,
22508    max_items: usize,
22509    max_bytes: usize,
22510}
22511
22512#[derive(Serialize)]
22513struct SearchScaleGuard {
22514    level: String,
22515    warning: String,
22516    signals: SearchScaleSignals,
22517    narrow_commands: Vec<String>,
22518}
22519
22520#[derive(Serialize)]
22521struct SearchBudgetReport {
22522    query: String,
22523    strategy: String,
22524    indexed_artifacts: usize,
22525    skipped_artifacts: usize,
22526    max_items: usize,
22527    max_bytes: usize,
22528    symbol_total: usize,
22529    raw_symbol_total: usize,
22530    hit_total: usize,
22531    truncated: bool,
22532    #[serde(skip_serializing_if = "Option::is_none")]
22533    scale_guard: Option<SearchScaleGuard>,
22534    symbols: Vec<SearchBudgetSymbolPreview>,
22535    hits: Vec<SearchBudgetHitPreview>,
22536}
22537
22538const SEARCH_BUDGET_SURFACE_PREVIEW_LIMIT: usize = 3;
22539
22540struct SearchBudgetSymbolFamily {
22541    canonical_family: Option<String>,
22542    canonical_tag_alias: Option<String>,
22543    representative_name: String,
22544    representative_kind: String,
22545    representative_match_type: String,
22546    representative_file: String,
22547    representative_line: i64,
22548    representative_score: f64,
22549    seen_surfaces: HashSet<String>,
22550    seen_files: HashSet<String>,
22551    surface_examples: Vec<String>,
22552    match_count: usize,
22553}
22554
22555fn search_budget_family_query(tag_alias: Option<&str>, fallback_name: &str) -> String {
22556    if let Some(alias) = tag_alias
22557        && let Some(query) = family_query_from_tag_alias(alias)
22558    {
22559        return query;
22560    }
22561    fallback_name.to_string()
22562}
22563
22564fn build_search_budget_family_expand(
22565    strategy: &str,
22566    path: &str,
22567    tag_alias: Option<&str>,
22568    fallback_name: &str,
22569) -> String {
22570    let query = search_budget_family_query(tag_alias, fallback_name);
22571    let effective_strategy = if strategy == "exact" {
22572        "lexical"
22573    } else {
22574        strategy
22575    };
22576    build_search_budget_follow_up(&query, effective_strategy, path)
22577}
22578
22579fn format_search_budget_symbol_name(name: &str, surface_count: usize, max_bytes: usize) -> String {
22580    let preview = if surface_count > 1 {
22581        let extra = surface_count - 1;
22582        let label = if extra == 1 { "variant" } else { "variants" };
22583        format!("{name} (+{extra} {label})")
22584    } else {
22585        name.to_string()
22586    };
22587    truncate_for_budget(&preview, max_bytes)
22588}
22589
22590fn format_search_budget_symbol_file(file: &str, file_count: usize, max_bytes: usize) -> String {
22591    let preview = if file_count > 1 {
22592        let extra = file_count - 1;
22593        let label = if extra == 1 { "file" } else { "files" };
22594        format!("{file} (+{extra} {label})")
22595    } else {
22596        file.to_string()
22597    };
22598    truncate_for_budget(&preview, max_bytes)
22599}
22600
22601pub(crate) fn build_search_budget_follow_up(query: &str, strategy: &str, path: &str) -> String {
22602    let mut command = format!(
22603        "tsift search {} --path {} --limit 20",
22604        shell_quote(query),
22605        shell_quote(path)
22606    );
22607    if strategy == "exact" {
22608        command.push_str(" --exact");
22609    } else if strategy != "lexical" {
22610        command.push_str(&format!(" --strategy {}", shell_quote(strategy)));
22611    }
22612    command
22613}
22614
22615fn build_search_exact_narrow_command(query: &str, path: &str, max_items: usize) -> String {
22616    format!(
22617        "tsift search {} --path {} --limit {} --exact",
22618        shell_quote(query),
22619        shell_quote(path),
22620        max_items.max(1)
22621    )
22622}
22623
22624fn build_search_path_narrow_command(query: &str, strategy: &str, path: &str) -> String {
22625    let mut command = format!(
22626        "tsift search {} --path {} --limit 20",
22627        shell_quote(query),
22628        shell_quote(path)
22629    );
22630    if strategy == "exact" {
22631        command.push_str(" --exact");
22632    } else if strategy != "lexical" {
22633        command.push_str(&format!(" --strategy {}", shell_quote(strategy)));
22634    }
22635    command
22636}
22637
22638#[allow(clippy::too_many_arguments)]
22639fn build_search_scale_guard(
22640    query: &str,
22641    strategy: &str,
22642    root: &Path,
22643    response: &sift::SearchResponse,
22644    symbol_total: usize,
22645    raw_symbol_total: usize,
22646    hit_total: usize,
22647    max_items: usize,
22648    max_bytes: usize,
22649    symbols: &[SearchBudgetSymbolPreview],
22650    hits: &[SearchBudgetHitPreview],
22651) -> Option<SearchScaleGuard> {
22652    let broad_symbols = symbol_total > max_items || raw_symbol_total > max_items;
22653    let broad_hits = hit_total > max_items;
22654    let broad_corpus = response
22655        .indexed_artifacts
22656        .saturating_add(response.skipped_artifacts)
22657        >= 250;
22658    if !broad_symbols && !broad_hits && !broad_corpus {
22659        return None;
22660    }
22661
22662    let mut narrow_commands = Vec::new();
22663    let root_path = root.to_string_lossy();
22664    if strategy != "exact" {
22665        narrow_commands.push(build_search_exact_narrow_command(
22666            query,
22667            root_path.as_ref(),
22668            max_items,
22669        ));
22670    }
22671    if let Some(symbol) = symbols.first() {
22672        narrow_commands.push(symbol.expand.clone());
22673    }
22674    if let Some(hit) = hits.first() {
22675        narrow_commands.push(build_search_path_narrow_command(query, strategy, &hit.path));
22676    }
22677    narrow_commands.push(
22678        "tsift workflow search --json # preserve handles, expand only cited parents".to_string(),
22679    );
22680
22681    Some(SearchScaleGuard {
22682        level: if broad_hits || broad_symbols {
22683            "high-hit".to_string()
22684        } else {
22685            "corpus-size".to_string()
22686        },
22687        warning: "Broad search surface: inspect the preview first and run a narrowing command before dispatching parallel agents."
22688            .to_string(),
22689        signals: SearchScaleSignals {
22690            preview_symbols: symbols.len(),
22691            symbol_families: symbol_total,
22692            raw_symbol_matches: raw_symbol_total,
22693            preview_hits: hits.len(),
22694            returned_hits: hit_total,
22695            indexed_artifacts: response.indexed_artifacts,
22696            skipped_artifacts: response.skipped_artifacts,
22697            max_items,
22698            max_bytes,
22699        },
22700        narrow_commands: dedupe_preserve_order(narrow_commands),
22701    })
22702}
22703
22704pub(crate) fn build_search_budget_report(
22705    query: &str,
22706    strategy: &str,
22707    root: &Path,
22708    response: &sift::SearchResponse,
22709    symbol_hits: &[index::SymbolHit],
22710    absolute: bool,
22711    budget: ResponseBudget,
22712) -> SearchBudgetReport {
22713    let max_items = budget.preview_items();
22714    let max_bytes = budget.preview_bytes();
22715    let raw_symbol_total = symbol_hits.len();
22716    let hit_total = response.hits.len();
22717    let mut family_positions = HashMap::new();
22718    let mut families = Vec::new();
22719
22720    for hit in symbol_hits {
22721        let display_file = if absolute {
22722            hit.file.clone()
22723        } else {
22724            relativize(&hit.file, root)
22725        };
22726        let canonical_family = canonical_tag_family_from_symbol(&hit.name, hit.tags.as_deref());
22727        let family_key = canonical_family
22728            .as_ref()
22729            .map(|family| family.canonical.clone())
22730            .unwrap_or_else(|| hit.name.clone());
22731        let position = *family_positions.entry(family_key).or_insert_with(|| {
22732            families.push(SearchBudgetSymbolFamily {
22733                canonical_family: canonical_family
22734                    .as_ref()
22735                    .map(|family| family.canonical.clone()),
22736                canonical_tag_alias: canonical_family
22737                    .as_ref()
22738                    .map(|family| family.tag_alias.clone()),
22739                representative_name: hit.name.clone(),
22740                representative_kind: hit.kind.clone(),
22741                representative_match_type: hit.match_type.clone(),
22742                representative_file: display_file.clone(),
22743                representative_line: hit.line,
22744                representative_score: hit.score,
22745                seen_surfaces: HashSet::new(),
22746                seen_files: HashSet::new(),
22747                surface_examples: Vec::new(),
22748                match_count: 0,
22749            });
22750            families.len() - 1
22751        });
22752
22753        let family = &mut families[position];
22754        family.match_count += 1;
22755        if family.seen_surfaces.insert(hit.name.clone())
22756            && family.surface_examples.len() < SEARCH_BUDGET_SURFACE_PREVIEW_LIMIT
22757        {
22758            family
22759                .surface_examples
22760                .push(truncate_for_budget(&hit.name, max_bytes));
22761        }
22762        family.seen_files.insert(display_file);
22763    }
22764
22765    let symbol_total = families.len();
22766    let symbols: Vec<SearchBudgetSymbolPreview> = families
22767        .into_iter()
22768        .take(max_items)
22769        .map(|family| {
22770            let file_count = family.seen_files.len();
22771            let surface_count = family.seen_surfaces.len();
22772            let key = format!(
22773                "{}:{}:{}:{}:{}:{}:{}",
22774                family
22775                    .canonical_family
22776                    .as_deref()
22777                    .or(family.canonical_tag_alias.as_deref())
22778                    .unwrap_or(&family.representative_name),
22779                family.canonical_tag_alias.as_deref().unwrap_or(""),
22780                family.representative_kind,
22781                family.representative_file,
22782                family.representative_line,
22783                query,
22784                strategy
22785            );
22786            SearchBudgetSymbolPreview {
22787                handle: stable_handle("sfam", &key),
22788                tag_alias: family
22789                    .canonical_tag_alias
22790                    .as_deref()
22791                    .map(|alias| truncate_for_budget(alias, max_bytes)),
22792                match_type: family.representative_match_type,
22793                kind: family.representative_kind,
22794                name: format_search_budget_symbol_name(
22795                    &family.representative_name,
22796                    surface_count,
22797                    max_bytes,
22798                ),
22799                file: format_search_budget_symbol_file(
22800                    &family.representative_file,
22801                    file_count,
22802                    max_bytes,
22803                ),
22804                line: family.representative_line,
22805                score: family.representative_score,
22806                match_count: family.match_count,
22807                surface_count,
22808                file_count,
22809                surface_examples: family.surface_examples,
22810                expand: build_search_budget_family_expand(
22811                    strategy,
22812                    root.to_string_lossy().as_ref(),
22813                    family.canonical_tag_alias.as_deref(),
22814                    &family.representative_name,
22815                ),
22816            }
22817        })
22818        .collect();
22819
22820    let hits: Vec<SearchBudgetHitPreview> = response
22821        .hits
22822        .iter()
22823        .take(max_items)
22824        .map(|hit| {
22825            let display_path = if absolute {
22826                hit.path.clone()
22827            } else {
22828                relativize(&hit.path, root)
22829            };
22830            let key = format!("{}:{}:{}:{}", display_path, hit.rank, hit.score, query);
22831            let preview = compact_snippet(&hit.snippet)
22832                .map(|snippet| truncate_for_budget(&snippet, max_bytes))
22833                .unwrap_or_default();
22834            SearchBudgetHitPreview {
22835                handle: stable_handle("shit", &key),
22836                rank: hit.rank,
22837                path: truncate_for_budget(&display_path, max_bytes),
22838                confidence: format!("{:?}", hit.confidence),
22839                score: hit.score,
22840                preview,
22841                expand: build_search_budget_follow_up(query, strategy, &display_path),
22842            }
22843        })
22844        .collect();
22845
22846    let scale_guard = build_search_scale_guard(
22847        query,
22848        strategy,
22849        root,
22850        response,
22851        symbol_total,
22852        raw_symbol_total,
22853        hit_total,
22854        max_items,
22855        max_bytes,
22856        &symbols,
22857        &hits,
22858    );
22859
22860    SearchBudgetReport {
22861        query: query.to_string(),
22862        strategy: strategy.to_string(),
22863        indexed_artifacts: response.indexed_artifacts,
22864        skipped_artifacts: response.skipped_artifacts,
22865        max_items,
22866        max_bytes,
22867        symbol_total,
22868        raw_symbol_total,
22869        hit_total,
22870        truncated: symbol_total > max_items || hit_total > max_items,
22871        scale_guard,
22872        symbols,
22873        hits,
22874    }
22875}
22876
22877pub(crate) fn print_search_budget_human(report: &SearchBudgetReport) {
22878    println!(
22879        "search-budget q:{} strategy:{} symbols:{}/{} raw-symbols:{} hits:{}/{} indexed:{} skipped:{}",
22880        shell_quote(&report.query),
22881        report.strategy,
22882        report.symbols.len(),
22883        report.symbol_total,
22884        report.raw_symbol_total,
22885        report.hits.len(),
22886        report.hit_total,
22887        report.indexed_artifacts,
22888        report.skipped_artifacts
22889    );
22890    for symbol in &report.symbols {
22891        let variants = if symbol.surface_examples.is_empty() {
22892            String::new()
22893        } else {
22894            format!(" variants:{}", symbol.surface_examples.join(", "))
22895        };
22896        println!(
22897            "sym {} [{}] {} {}:{} sc:{} matches:{} files:{}{} expand:{}",
22898            format_symbol_preview_line(&symbol.handle, &symbol.name, symbol.tag_alias.as_deref()),
22899            symbol.match_type,
22900            symbol.kind,
22901            symbol.file,
22902            symbol.line,
22903            format_score(symbol.score, true),
22904            symbol.match_count,
22905            symbol.file_count,
22906            variants,
22907            symbol.expand
22908        );
22909    }
22910    for hit in &report.hits {
22911        if hit.preview.is_empty() {
22912            println!(
22913                "hit {} #{} {} [{} {}] expand:{}",
22914                hit.handle,
22915                hit.rank,
22916                hit.path,
22917                hit.confidence,
22918                format_score(hit.score, true),
22919                hit.expand
22920            );
22921        } else {
22922            println!(
22923                "hit {} #{} {} [{} {}] {} expand:{}",
22924                hit.handle,
22925                hit.rank,
22926                hit.path,
22927                hit.confidence,
22928                format_score(hit.score, true),
22929                hit.preview,
22930                hit.expand
22931            );
22932        }
22933    }
22934    if report.truncated {
22935        println!(
22936            "budget truncated items:{} bytes:{}",
22937            report.max_items, report.max_bytes
22938        );
22939    }
22940    if let Some(guard) = &report.scale_guard {
22941        println!("scale guard [{}]: {}", guard.level, guard.warning);
22942        println!(
22943            "signals preview-symbols:{} symbol-families:{} raw-symbols:{} preview-hits:{} hits:{} indexed:{} skipped:{} budget-items:{} budget-bytes:{}",
22944            guard.signals.preview_symbols,
22945            guard.signals.symbol_families,
22946            guard.signals.raw_symbol_matches,
22947            guard.signals.preview_hits,
22948            guard.signals.returned_hits,
22949            guard.signals.indexed_artifacts,
22950            guard.signals.skipped_artifacts,
22951            guard.signals.max_items,
22952            guard.signals.max_bytes
22953        );
22954        for command in &guard.narrow_commands {
22955            println!("narrow: {command}");
22956        }
22957    }
22958}
22959
22960pub(crate) fn collect_source_files(path: &std::path::Path) -> Result<Vec<PathBuf>> {
22961    let mut files = Vec::new();
22962    if path.is_file() {
22963        files.push(path.to_path_buf());
22964        return Ok(files);
22965    }
22966    let walker = ignore::WalkBuilder::new(path)
22967        .hidden(true)
22968        .git_ignore(true)
22969        .build();
22970    for entry in walker {
22971        let entry = entry?;
22972        if entry.file_type().is_some_and(|ft| ft.is_file()) {
22973            let p = entry.path();
22974            if let Some(ext) = p.extension() {
22975                let ext = ext.to_string_lossy();
22976                if matches!(
22977                    ext.as_ref(),
22978                    "rs" | "py"
22979                        | "ts"
22980                        | "tsx"
22981                        | "js"
22982                        | "jsx"
22983                        | "kt"
22984                        | "kts"
22985                        | "zig"
22986                        | "sh"
22987                        | "bash"
22988                        | "zsh"
22989                ) {
22990                    files.push(p.to_path_buf());
22991                }
22992            }
22993        }
22994    }
22995    Ok(files)
22996}
22997
22998#[cfg(test)]
22999mod tests {
23000    use super::*;
23001
23002    use std::cell::RefCell;
23003    use substrate::{ConvexEdgeRow, ConvexGraphClient, ConvexGraphStore, ConvexNodeRow};
23004    fn parse_cli<I, T>(itr: I) -> Cli
23005    where
23006        I: IntoIterator<Item = T> + Send + 'static,
23007        T: Into<std::ffi::OsString> + Clone + Send + 'static,
23008    {
23009        std::thread::Builder::new()
23010            .name("cli-parse".to_string())
23011            .stack_size(16 * 1024 * 1024)
23012            .spawn(move || Cli::parse_from(itr))
23013            .unwrap()
23014            .join()
23015            .unwrap()
23016    }
23017
23018    fn try_parse_cli<I, T>(itr: I) -> std::result::Result<Cli, clap::Error>
23019    where
23020        I: IntoIterator<Item = T> + Send + 'static,
23021        T: Into<std::ffi::OsString> + Clone + Send + 'static,
23022    {
23023        std::thread::Builder::new()
23024            .name("cli-try-parse".to_string())
23025            .stack_size(16 * 1024 * 1024)
23026            .spawn(move || Cli::try_parse_from(itr))
23027            .unwrap()
23028            .join()
23029            .unwrap()
23030    }
23031
23032    #[derive(Default)]
23033    struct MemoryConvexGraphClient {
23034        nodes: RefCell<BTreeMap<String, ConvexNodeRow>>,
23035        edges: RefCell<BTreeMap<String, ConvexEdgeRow>>,
23036    }
23037
23038    impl ConvexGraphClient for MemoryConvexGraphClient {
23039        fn upsert_node_row(&self, row: &ConvexNodeRow) -> Result<()> {
23040            self.nodes
23041                .borrow_mut()
23042                .insert(row.external_id.clone(), row.clone());
23043            Ok(())
23044        }
23045
23046        fn upsert_edge_row(&self, row: &ConvexEdgeRow) -> Result<()> {
23047            self.edges
23048                .borrow_mut()
23049                .insert(row.edge_key.clone(), row.clone());
23050            Ok(())
23051        }
23052
23053        fn delete_node_row(&self, external_id: &str) -> Result<usize> {
23054            Ok(usize::from(
23055                self.nodes.borrow_mut().remove(external_id).is_some(),
23056            ))
23057        }
23058
23059        fn delete_edge_row(&self, edge_key: &str) -> Result<usize> {
23060            Ok(usize::from(
23061                self.edges.borrow_mut().remove(edge_key).is_some(),
23062            ))
23063        }
23064
23065        fn node_row(&self, external_id: &str) -> Result<Option<ConvexNodeRow>> {
23066            Ok(self.nodes.borrow().get(external_id).cloned())
23067        }
23068
23069        fn node_rows(&self) -> Result<Vec<ConvexNodeRow>> {
23070            Ok(self.nodes.borrow().values().cloned().collect())
23071        }
23072
23073        fn edge_rows(&self) -> Result<Vec<ConvexEdgeRow>> {
23074            Ok(self.edges.borrow().values().cloned().collect())
23075        }
23076
23077        fn node_rows_by_kind(&self, kind: &str) -> Result<Vec<ConvexNodeRow>> {
23078            Ok(self
23079                .nodes
23080                .borrow()
23081                .values()
23082                .filter(|row| row.kind == kind)
23083                .cloned()
23084                .collect())
23085        }
23086
23087        fn outgoing_edge_rows(
23088            &self,
23089            from_external_id: &str,
23090            kind: Option<&str>,
23091        ) -> Result<Vec<ConvexEdgeRow>> {
23092            Ok(self
23093                .edges
23094                .borrow()
23095                .values()
23096                .filter(|row| row.from_external_id == from_external_id)
23097                .filter(|row| kind.is_none_or(|kind| row.kind == kind))
23098                .cloned()
23099                .collect())
23100        }
23101    }
23102
23103    fn init_git_repo(path: &Path) {
23104        let status = std::process::Command::new("git")
23105            .args(["init"])
23106            .current_dir(path)
23107            .status()
23108            .unwrap();
23109        assert!(status.success(), "git init failed");
23110
23111        let status = std::process::Command::new("git")
23112            .args(["add", "."])
23113            .current_dir(path)
23114            .status()
23115            .unwrap();
23116        assert!(status.success(), "git add failed");
23117
23118        let status = std::process::Command::new("git")
23119            .args([
23120                "-c",
23121                "user.name=tsift-tests",
23122                "-c",
23123                "user.email=tsift-tests@example.com",
23124                "commit",
23125                "--quiet",
23126                "-m",
23127                "init",
23128            ])
23129            .current_dir(path)
23130            .status()
23131            .unwrap();
23132        assert!(status.success(), "git commit failed");
23133    }
23134
23135    fn write_empty_root_index(root: &Path) {
23136        let index_dir = root.join(".tsift");
23137        fs::create_dir_all(&index_dir).unwrap();
23138        fs::write(index_dir.join("index.db"), "").unwrap();
23139    }
23140
23141    fn write_repeated_lines(path: &Path, line: &str, lines: usize) -> PathBuf {
23142        if let Some(parent) = path.parent() {
23143            fs::create_dir_all(parent).unwrap();
23144        }
23145        let body = std::iter::repeat_n(line, lines)
23146            .collect::<Vec<_>>()
23147            .join("\n");
23148        fs::write(path, format!("{body}\n")).unwrap();
23149        path.to_path_buf()
23150    }
23151
23152    // --- classify_task ---
23153
23154    #[test]
23155    fn route_search_defaults_to_haiku() {
23156        let (tier, model) = classify_task("find all uses of authenticate");
23157        assert_eq!(tier, "haiku");
23158        assert!(
23159            model.contains("haiku"),
23160            "expected haiku model, got {}",
23161            model
23162        );
23163    }
23164
23165    #[test]
23166    fn route_edit_keywords_to_sonnet() {
23167        for kw in &[
23168            "edit the file",
23169            "fix the bug",
23170            "update the config",
23171            "remove dead code",
23172            "create a new module",
23173        ] {
23174            let (tier, _) = classify_task(kw);
23175            assert_eq!(tier, "sonnet", "expected sonnet for {:?}", kw);
23176        }
23177    }
23178
23179    #[test]
23180    fn route_architecture_keywords_to_opus() {
23181        for kw in &[
23182            "design the API",
23183            "architecture review",
23184            "plan the migration",
23185            "analyze the system",
23186            "evaluate trade-offs",
23187        ] {
23188            let (tier, _) = classify_task(kw);
23189            assert_eq!(tier, "opus", "expected opus for {:?}", kw);
23190        }
23191    }
23192
23193    #[test]
23194    fn route_architecture_beats_edit() {
23195        // "design and implement" — architecture signal wins (checked first)
23196        let (tier, _) = classify_task("design and implement the new auth service");
23197        assert_eq!(tier, "opus");
23198    }
23199
23200    #[test]
23201    fn cli_accepts_global_compact_flag() {
23202        let cli = parse_cli(["tsift", "--compact", "status"]);
23203        assert!(cli.compact);
23204        assert!(matches!(cli.command, Some(Commands::Status { .. })));
23205    }
23206
23207    #[test]
23208    fn summarize_diff_scope_matches_relative_directory() {
23209        let root = Path::new("/repo");
23210        let extract_scope = resolve_extract_scope(root, Path::new("src/feature")).unwrap();
23211
23212        assert!(summarize_diff_matches_scope(
23213            Path::new("/repo/src/feature/main.rs"),
23214            &extract_scope
23215        ));
23216        assert!(!summarize_diff_matches_scope(
23217            Path::new("/repo/src/other/main.rs"),
23218            &extract_scope
23219        ));
23220    }
23221
23222    #[test]
23223    fn summarize_diff_scope_matches_relative_file() {
23224        let root = Path::new("/repo");
23225        let extract_scope = resolve_extract_scope(root, Path::new("src/feature/main.rs")).unwrap();
23226
23227        assert!(summarize_diff_matches_scope(
23228            Path::new("/repo/src/feature/main.rs"),
23229            &extract_scope
23230        ));
23231        assert!(!summarize_diff_matches_scope(
23232            Path::new("/repo/src/feature/lib.rs"),
23233            &extract_scope
23234        ));
23235    }
23236
23237    #[test]
23238    fn summarize_extract_scope_walks_relative_paths_from_root() {
23239        let dir = tempfile::tempdir().unwrap();
23240        let source_dir = dir.path().join("src");
23241        std::fs::create_dir_all(&source_dir).unwrap();
23242        let main_rs = source_dir.join("main.rs");
23243        std::fs::write(&main_rs, "fn alpha() {}\n").unwrap();
23244
23245        let extract_scope = resolve_extract_scope(dir.path(), Path::new("src")).unwrap();
23246        let files = collect_source_files(&extract_scope).unwrap();
23247
23248        assert_eq!(files, vec![main_rs]);
23249    }
23250
23251    #[test]
23252    fn summarize_extract_base_uses_nested_path_instead_of_project_root() {
23253        let dir = tempfile::tempdir().unwrap();
23254        let nested = dir.path().join("src/nested");
23255        std::fs::create_dir_all(&nested).unwrap();
23256        std::fs::write(dir.path().join("root.rs"), "fn root_level() {}\n").unwrap();
23257        let nested_file = nested.join("main.rs");
23258        std::fs::write(&nested_file, "fn nested_only() {}\n").unwrap();
23259
23260        let extract_base = resolve_extract_base(&nested).unwrap();
23261        let extract_scope = resolve_extract_scope(&extract_base, Path::new(".")).unwrap();
23262        let files = collect_source_files(&extract_scope).unwrap();
23263
23264        assert_eq!(extract_scope, nested);
23265        assert_eq!(files, vec![nested_file]);
23266    }
23267
23268    #[test]
23269    fn summarize_extract_base_uses_parent_of_file_path() {
23270        let dir = tempfile::tempdir().unwrap();
23271        let nested = dir.path().join("src/nested");
23272        std::fs::create_dir_all(&nested).unwrap();
23273        let file_path = nested.join("main.rs");
23274        std::fs::write(&file_path, "fn nested_only() {}\n").unwrap();
23275
23276        let extract_base = resolve_extract_base(&file_path).unwrap();
23277
23278        assert_eq!(extract_base, nested);
23279    }
23280
23281    #[test]
23282    fn summarize_extract_scope_normalizes_dotdot_segments() {
23283        let dir = tempfile::tempdir().unwrap();
23284        let source_dir = dir.path().join("src");
23285        std::fs::create_dir_all(&source_dir).unwrap();
23286
23287        let extract_scope = resolve_extract_scope(dir.path(), Path::new("src/../src")).unwrap();
23288
23289        assert_eq!(extract_scope, source_dir.canonicalize().unwrap());
23290        assert!(summarize_diff_matches_scope(
23291            &source_dir.join("main.rs"),
23292            &extract_scope
23293        ));
23294    }
23295
23296    #[cfg(unix)]
23297    #[test]
23298    fn summarize_extract_scope_canonicalizes_absolute_symlink_paths() {
23299        use std::os::unix::fs::symlink;
23300
23301        let dir = tempfile::tempdir().unwrap();
23302        let real_root = dir.path().join("real");
23303        let source_dir = real_root.join("src");
23304        std::fs::create_dir_all(&source_dir).unwrap();
23305        let symlink_scope = dir.path().join("scope-link");
23306        symlink(&source_dir, &symlink_scope).unwrap();
23307
23308        let extract_scope = resolve_extract_scope(&real_root, &symlink_scope).unwrap();
23309
23310        assert_eq!(extract_scope, source_dir.canonicalize().unwrap());
23311        assert!(summarize_diff_matches_scope(
23312            &source_dir.join("lib.rs"),
23313            &extract_scope
23314        ));
23315    }
23316
23317    #[test]
23318    fn summarize_diff_extract_includes_untracked_files() {
23319        let dir = tempfile::tempdir().unwrap();
23320        std::fs::write(dir.path().join("README.md"), "# repo\n").unwrap();
23321        init_git_repo(dir.path());
23322
23323        let source_dir = dir.path().join("src");
23324        std::fs::create_dir_all(&source_dir).unwrap();
23325        let new_file = source_dir.join("new.rs");
23326        std::fs::write(&new_file, "fn alpha_helper() {}\n").unwrap();
23327
23328        let files = summarize::git_changed_files(dir.path()).unwrap();
23329
23330        assert_eq!(files.existing, vec![new_file]);
23331        assert!(files.deleted.is_empty());
23332    }
23333
23334    #[test]
23335    fn summarize_diff_extract_treats_unborn_head_as_untracked_only() {
23336        let dir = tempfile::tempdir().unwrap();
23337        let status = std::process::Command::new("git")
23338            .args(["init"])
23339            .current_dir(dir.path())
23340            .status()
23341            .unwrap();
23342        assert!(status.success(), "git init failed");
23343
23344        let source_dir = dir.path().join("src");
23345        std::fs::create_dir_all(&source_dir).unwrap();
23346        let new_file = source_dir.join("new.rs");
23347        std::fs::write(&new_file, "fn alpha_helper() {}\n").unwrap();
23348
23349        let files = summarize::git_changed_files(dir.path()).unwrap();
23350
23351        assert_eq!(files.existing, vec![new_file]);
23352        assert!(files.deleted.is_empty());
23353    }
23354
23355    #[test]
23356    fn summarize_diff_extract_tracks_deleted_files() {
23357        let dir = tempfile::tempdir().unwrap();
23358        let source_dir = dir.path().join("src");
23359        std::fs::create_dir_all(&source_dir).unwrap();
23360        let deleted_file = source_dir.join("gone.rs");
23361        std::fs::write(&deleted_file, "fn stale() {}\n").unwrap();
23362        init_git_repo(dir.path());
23363
23364        std::fs::remove_file(&deleted_file).unwrap();
23365
23366        let files = summarize::git_changed_files(dir.path()).unwrap();
23367
23368        assert!(files.existing.is_empty());
23369        assert_eq!(files.deleted, vec![deleted_file]);
23370    }
23371
23372    #[test]
23373    fn summarize_diff_extract_tracks_git_renames() {
23374        let dir = tempfile::tempdir().unwrap();
23375        let source_dir = dir.path().join("src");
23376        std::fs::create_dir_all(&source_dir).unwrap();
23377        let old_file = source_dir.join("old.rs");
23378        let new_file = source_dir.join("new.rs");
23379        std::fs::write(&old_file, "fn stale() {}\n").unwrap();
23380        init_git_repo(dir.path());
23381
23382        let status = std::process::Command::new("git")
23383            .args(["mv", "src/old.rs", "src/new.rs"])
23384            .current_dir(dir.path())
23385            .status()
23386            .unwrap();
23387        assert!(status.success(), "git mv failed");
23388
23389        let files = summarize::git_changed_files(dir.path()).unwrap();
23390
23391        assert_eq!(files.existing, vec![new_file]);
23392        assert_eq!(files.deleted, vec![old_file]);
23393    }
23394
23395    #[test]
23396    fn summarize_diff_extract_deletes_removed_summary_rows() {
23397        let dir = tempfile::tempdir().unwrap();
23398        let source_dir = dir.path().join("src");
23399        std::fs::create_dir_all(&source_dir).unwrap();
23400        let deleted_file = source_dir.join("gone.rs");
23401        std::fs::write(&deleted_file, "fn stale() {}\n").unwrap();
23402        std::fs::write(dir.path().join("README.md"), "# repo\n").unwrap();
23403        init_git_repo(dir.path());
23404
23405        let summary_db =
23406            summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
23407        summary_db
23408            .insert(&summarize::Summary {
23409                id: 0,
23410                symbol_name: "stale".to_string(),
23411                file_path: "src/gone.rs".to_string(),
23412                content_hash: "hash1".to_string(),
23413                summary: "stale summary".to_string(),
23414                entities: None,
23415                relationships: None,
23416                concept_labels: None,
23417                extracted_at: "1700000000".to_string(),
23418                model: "test".to_string(),
23419                tokens_input: Some(100),
23420                tokens_output: Some(50),
23421            })
23422            .unwrap();
23423
23424        std::fs::remove_file(&deleted_file).unwrap();
23425
23426        cmd_summarize(
23427            None,
23428            None,
23429            Some(PathBuf::from("src")),
23430            true,
23431            false,
23432            dir.path(),
23433            false,
23434            true,
23435            false,
23436            false,
23437            false,
23438        )
23439        .unwrap();
23440
23441        assert!(summary_db.get_by_file("src/gone.rs").unwrap().is_empty());
23442    }
23443
23444    #[test]
23445    fn summarize_diff_extract_deletes_renamed_summary_rows() {
23446        let dir = tempfile::tempdir().unwrap();
23447        let source_dir = dir.path().join("src");
23448        std::fs::create_dir_all(&source_dir).unwrap();
23449        let old_file = source_dir.join("old.rs");
23450        std::fs::write(&old_file, "fn stale() {}\n").unwrap();
23451        std::fs::write(dir.path().join("README.md"), "# repo\n").unwrap();
23452        init_git_repo(dir.path());
23453
23454        let summary_db =
23455            summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
23456        summary_db
23457            .insert(&summarize::Summary {
23458                id: 0,
23459                symbol_name: "stale".to_string(),
23460                file_path: "src/old.rs".to_string(),
23461                content_hash: "hash1".to_string(),
23462                summary: "stale summary".to_string(),
23463                entities: None,
23464                relationships: None,
23465                concept_labels: None,
23466                extracted_at: "1700000000".to_string(),
23467                model: "test".to_string(),
23468                tokens_input: Some(100),
23469                tokens_output: Some(50),
23470            })
23471            .unwrap();
23472
23473        let status = std::process::Command::new("git")
23474            .args(["mv", "src/old.rs", "src/new.rs"])
23475            .current_dir(dir.path())
23476            .status()
23477            .unwrap();
23478        assert!(status.success(), "git mv failed");
23479
23480        cmd_summarize(
23481            None,
23482            None,
23483            Some(PathBuf::from("src")),
23484            true,
23485            false,
23486            dir.path(),
23487            false,
23488            true,
23489            false,
23490            false,
23491            false,
23492        )
23493        .unwrap();
23494
23495        assert!(summary_db.get_by_file("src/old.rs").unwrap().is_empty());
23496    }
23497
23498    #[test]
23499    fn summarize_full_extract_deletes_removed_summary_rows_when_scope_is_empty() {
23500        let dir = tempfile::tempdir().unwrap();
23501        let source_dir = dir.path().join("src");
23502        std::fs::create_dir_all(&source_dir).unwrap();
23503        let deleted_file = source_dir.join("gone.rs");
23504        std::fs::write(&deleted_file, "fn stale() {}\n").unwrap();
23505
23506        let summary_db =
23507            summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
23508        summary_db
23509            .insert(&summarize::Summary {
23510                id: 0,
23511                symbol_name: "stale".to_string(),
23512                file_path: "src/gone.rs".to_string(),
23513                content_hash: "hash1".to_string(),
23514                summary: "stale summary".to_string(),
23515                entities: None,
23516                relationships: None,
23517                concept_labels: None,
23518                extracted_at: "1700000000".to_string(),
23519                model: "test".to_string(),
23520                tokens_input: Some(100),
23521                tokens_output: Some(50),
23522            })
23523            .unwrap();
23524
23525        std::fs::remove_file(&deleted_file).unwrap();
23526
23527        cmd_summarize(
23528            None,
23529            None,
23530            Some(PathBuf::from("src")),
23531            false,
23532            false,
23533            dir.path(),
23534            false,
23535            true,
23536            false,
23537            false,
23538            false,
23539        )
23540        .unwrap();
23541
23542        assert!(summary_db.get_by_file("src/gone.rs").unwrap().is_empty());
23543    }
23544
23545    #[test]
23546    fn summarize_extract_fails_fast_when_summary_writer_lock_is_live() {
23547        let dir = tempfile::tempdir().unwrap();
23548        let source_dir = dir.path().join("src");
23549        std::fs::create_dir_all(&source_dir).unwrap();
23550        let file = source_dir.join("lib.rs");
23551        std::fs::write(&file, "fn helper() {}\n").unwrap();
23552
23553        let content = std::fs::read(&file).unwrap();
23554        let summary_db =
23555            summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
23556        summary_db
23557            .insert(&summarize::Summary {
23558                id: 0,
23559                symbol_name: "lib.rs".to_string(),
23560                file_path: "src/lib.rs".to_string(),
23561                content_hash: summarize::content_hash(&content),
23562                summary: "cached summary".to_string(),
23563                entities: None,
23564                relationships: None,
23565                concept_labels: None,
23566                extracted_at: "1700000000".to_string(),
23567                model: "test".to_string(),
23568                tokens_input: Some(100),
23569                tokens_output: Some(50),
23570            })
23571            .unwrap();
23572        drop(summary_db);
23573
23574        let lock_path = summarize::writer_lock_path(&dir.path().join(".tsift/summaries.db"));
23575        let _lock = hold_writer_lock(&lock_path);
23576
23577        let err = cmd_summarize(
23578            None,
23579            None,
23580            Some(PathBuf::from("src")),
23581            false,
23582            false,
23583            dir.path(),
23584            false,
23585            true,
23586            false,
23587            false,
23588            false,
23589        )
23590        .unwrap_err();
23591        let message = err.to_string();
23592
23593        assert!(message.contains("another tsift summarize extractor is already active"));
23594        assert!(message.contains("tsift summarize --extract"));
23595    }
23596
23597    #[test]
23598    fn summarize_stats_fails_closed_when_cache_missing() {
23599        let dir = tempfile::tempdir().unwrap();
23600        let err = cmd_summarize(
23601            None,
23602            None,
23603            None,
23604            false,
23605            true,
23606            dir.path(),
23607            false,
23608            false,
23609            false,
23610            false,
23611            false,
23612        )
23613        .unwrap_err();
23614
23615        assert!(
23616            err.to_string().contains("no summaries.db found"),
23617            "got: {err}"
23618        );
23619        assert!(!dir.path().join(".tsift/summaries.db").exists());
23620    }
23621
23622    #[test]
23623    fn summarize_stats_uses_snapshot_fallback_when_rollback_journal_is_locked() {
23624        let dir = tempfile::tempdir().unwrap();
23625        let summary_db =
23626            summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
23627        summary_db
23628            .insert(&summarize::Summary {
23629                id: 0,
23630                symbol_name: "alpha_helper".to_string(),
23631                file_path: "src/lib.rs".to_string(),
23632                content_hash: "hash1".to_string(),
23633                summary: "cached summary".to_string(),
23634                entities: None,
23635                relationships: None,
23636                concept_labels: None,
23637                extracted_at: "1700000000".to_string(),
23638                model: "claude-haiku-4-5-20251001".to_string(),
23639                tokens_input: Some(100),
23640                tokens_output: Some(40),
23641            })
23642            .unwrap();
23643        drop(summary_db);
23644        let _lock = hold_rollback_journal_lock(&dir.path().join(".tsift/summaries.db"));
23645
23646        let result = cmd_summarize(
23647            None,
23648            None,
23649            None,
23650            false,
23651            true,
23652            dir.path(),
23653            false,
23654            false,
23655            false,
23656            false,
23657            false,
23658        );
23659
23660        assert!(result.is_ok());
23661    }
23662
23663    #[test]
23664    fn summarize_symbol_query_uses_snapshot_fallback_when_rollback_journal_is_locked() {
23665        let dir = tempfile::tempdir().unwrap();
23666        let summary_db =
23667            summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
23668        summary_db
23669            .insert(&summarize::Summary {
23670                id: 0,
23671                symbol_name: "alpha_helper".to_string(),
23672                file_path: "src/lib.rs".to_string(),
23673                content_hash: "hash1".to_string(),
23674                summary: "cached summary".to_string(),
23675                entities: None,
23676                relationships: None,
23677                concept_labels: None,
23678                extracted_at: "1700000000".to_string(),
23679                model: "claude-haiku-4-5-20251001".to_string(),
23680                tokens_input: Some(100),
23681                tokens_output: Some(40),
23682            })
23683            .unwrap();
23684        drop(summary_db);
23685        let _lock = hold_rollback_journal_lock(&dir.path().join(".tsift/summaries.db"));
23686
23687        let result = cmd_summarize(
23688            Some("alpha_helper".to_string()),
23689            None,
23690            None,
23691            false,
23692            false,
23693            dir.path(),
23694            false,
23695            true,
23696            false,
23697            false,
23698            false,
23699        );
23700
23701        assert!(result.is_ok());
23702    }
23703
23704    #[test]
23705    fn summarize_cmd_uses_ancestor_project_root_for_nested_paths() {
23706        let dir = tempfile::tempdir().unwrap();
23707        let nested = dir.path().join("src/nested");
23708        std::fs::create_dir_all(&nested).unwrap();
23709
23710        let summary_db =
23711            summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
23712        summary_db
23713            .insert(&summarize::Summary {
23714                id: 0,
23715                symbol_name: "alpha_helper".to_string(),
23716                file_path: "src/lib.rs".to_string(),
23717                content_hash: "hash1".to_string(),
23718                summary: "cached summary".to_string(),
23719                entities: None,
23720                relationships: None,
23721                concept_labels: None,
23722                extracted_at: "1700000000".to_string(),
23723                model: "claude-haiku-4-5-20251001".to_string(),
23724                tokens_input: Some(100),
23725                tokens_output: Some(40),
23726            })
23727            .unwrap();
23728
23729        let result = cmd_summarize(
23730            Some("alpha_helper".to_string()),
23731            None,
23732            None,
23733            false,
23734            false,
23735            &nested,
23736            false,
23737            true,
23738            false,
23739            false,
23740            false,
23741        );
23742
23743        assert!(result.is_ok());
23744        assert!(!nested.join(".tsift/summaries.db").exists());
23745    }
23746
23747    #[test]
23748    fn summarize_extract_uses_matching_scoped_index_for_workspace_file() {
23749        let dir = tempfile::tempdir().unwrap();
23750        std::fs::write(
23751            dir.path().join(".gitmodules"),
23752            r#"[submodule "src/alpha"]
23753	path = src/alpha
23754	url = https://example.com/alpha
23755[submodule "src/beta"]
23756	path = src/beta
23757	url = https://example.com/beta
23758"#,
23759        )
23760        .unwrap();
23761
23762        let alpha_root = dir.path().join("src/alpha");
23763        let beta_root = dir.path().join("src/beta");
23764        std::fs::create_dir_all(alpha_root.join("src")).unwrap();
23765        std::fs::create_dir_all(beta_root.join("src")).unwrap();
23766        std::fs::create_dir_all(dir.path().join(".tsift/indexes/alpha")).unwrap();
23767        std::fs::create_dir_all(dir.path().join(".tsift/indexes/beta")).unwrap();
23768        std::fs::write(alpha_root.join("src/lib.rs"), "fn alpha_helper() {}\n").unwrap();
23769        let beta_file = beta_root.join("src/lib.rs");
23770        std::fs::write(&beta_file, "fn beta_helper() {}\n").unwrap();
23771        std::fs::write(dir.path().join(".tsift/indexes/alpha/index.db"), "").unwrap();
23772        std::fs::write(dir.path().join(".tsift/indexes/beta/index.db"), "").unwrap();
23773
23774        let context = find_symbols_db_for_file(dir.path(), &beta_file)
23775            .unwrap()
23776            .expect("expected matching scoped index");
23777
23778        assert_eq!(
23779            context.db_path,
23780            dir.path().join(".tsift/indexes/beta/index.db")
23781        );
23782        assert_eq!(context.source_root, beta_root);
23783    }
23784
23785    // --- apply_edit_op ---
23786
23787    fn make_op(old: &str, new: &str, replace_all: bool) -> EditOp {
23788        EditOp {
23789            file: PathBuf::from("dummy.txt"),
23790            old: old.to_string(),
23791            new: new.to_string(),
23792            replace_all,
23793        }
23794    }
23795
23796    #[test]
23797    fn edit_replaces_single_occurrence() {
23798        let content = "hello world";
23799        let op = make_op("world", "rust", false);
23800        let (result, count) = apply_edit_op(content, &op).unwrap();
23801        assert_eq!(result, "hello rust");
23802        assert_eq!(count, 1);
23803    }
23804
23805    #[test]
23806    fn edit_replace_all_replaces_every_occurrence() {
23807        let content = "foo foo foo";
23808        let op = make_op("foo", "bar", true);
23809        let (result, count) = apply_edit_op(content, &op).unwrap();
23810        assert_eq!(result, "bar bar bar");
23811        assert_eq!(count, 3);
23812    }
23813
23814    #[test]
23815    fn edit_fails_when_old_not_found() {
23816        let content = "hello world";
23817        let op = make_op("missing", "x", false);
23818        assert!(apply_edit_op(content, &op).is_err());
23819    }
23820
23821    #[test]
23822    fn edit_fails_when_ambiguous_without_replace_all() {
23823        let content = "foo foo";
23824        let op = make_op("foo", "bar", false);
23825        let err = apply_edit_op(content, &op).unwrap_err();
23826        assert!(err.to_string().contains("2 times"), "got: {}", err);
23827    }
23828
23829    #[test]
23830    fn edit_fails_when_old_equals_new() {
23831        let content = "hello";
23832        let op = make_op("hello", "hello", false);
23833        assert!(apply_edit_op(content, &op).is_err());
23834    }
23835
23836    #[test]
23837    fn edit_batch_rolls_back_when_later_swap_fails() {
23838        let dir = tempfile::tempdir().unwrap();
23839        let alpha = dir.path().join("alpha.txt");
23840        let beta = dir.path().join("beta.txt");
23841        fs::write(&alpha, "alpha old\n").unwrap();
23842        fs::write(&beta, "beta old\n").unwrap();
23843
23844        let batch = EditBatch {
23845            edits: vec![
23846                EditOp {
23847                    file: alpha.clone(),
23848                    old: "old".to_string(),
23849                    new: "new".to_string(),
23850                    replace_all: false,
23851                },
23852                EditOp {
23853                    file: beta.clone(),
23854                    old: "old".to_string(),
23855                    new: "new".to_string(),
23856                    replace_all: false,
23857                },
23858            ],
23859        };
23860
23861        let plan = build_edit_plan(&batch).unwrap();
23862        let err = match apply_edit_plan_atomically_inner(plan, |commit_index, _| {
23863            if commit_index == 1 {
23864                bail!("simulated swap failure");
23865            }
23866            Ok(())
23867        }) {
23868            Ok(_) => panic!("expected simulated swap failure"),
23869            Err(err) => err,
23870        };
23871
23872        assert!(err.to_string().contains("simulated swap failure"));
23873        assert_eq!(fs::read_to_string(&alpha).unwrap(), "alpha old\n");
23874        assert_eq!(fs::read_to_string(&beta).unwrap(), "beta old\n");
23875    }
23876
23877    // --- SQL introspection ---
23878
23879    fn setup_test_db() -> (tempfile::NamedTempFile, Connection) {
23880        let tmp = tempfile::NamedTempFile::new().unwrap();
23881        let conn = Connection::open(tmp.path()).unwrap();
23882        conn.execute_batch(
23883            "CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT NOT NULL, email TEXT);
23884             INSERT INTO users VALUES (1, 'Alice', 'alice@example.com');
23885             INSERT INTO users VALUES (2, 'Bob', NULL);
23886             CREATE TABLE posts (id INTEGER PRIMARY KEY, user_id INTEGER NOT NULL, title TEXT NOT NULL, body TEXT,
23887                 FOREIGN KEY(user_id) REFERENCES users(id));
23888             INSERT INTO posts VALUES (1, 1, 'Hello World', 'First post');
23889             INSERT INTO posts VALUES (2, 1, 'Second', NULL);
23890             INSERT INTO posts VALUES (3, 2, 'Bob post', 'Content here');"
23891        ).unwrap();
23892        (tmp, conn)
23893    }
23894
23895    // --- rewrite_command ---
23896
23897    #[test]
23898    fn rewrite_rg_simple_pattern() {
23899        let result = rewrite_command("rg authenticate");
23900        assert_eq!(
23901            result,
23902            Some("tsift --envelope search \"authenticate\" --exact --budget normal".to_string(),)
23903        );
23904    }
23905
23906    #[test]
23907    fn rewrite_rg_with_path() {
23908        let result = rewrite_command("rg authenticate src/");
23909        assert_eq!(
23910            result,
23911            Some(
23912                "tsift --envelope search \"authenticate\" --exact --budget normal --path \"src/\""
23913                    .to_string()
23914            )
23915        );
23916    }
23917
23918    #[test]
23919    fn rewrite_rg_with_flags_ignored() {
23920        let result = rewrite_command("rg -i authenticate src/");
23921        assert_eq!(
23922            result,
23923            Some(
23924                "tsift --envelope search \"authenticate\" --exact --budget normal --path \"src/\""
23925                    .to_string()
23926            )
23927        );
23928    }
23929
23930    #[test]
23931    fn rewrite_rg_with_type_flag() {
23932        // -t rs takes a value, should be skipped; pattern is next positional
23933        let result = rewrite_command("rg -t rs authenticate");
23934        assert_eq!(
23935            result,
23936            Some("tsift --envelope search \"authenticate\" --exact --budget normal".to_string())
23937        );
23938    }
23939
23940    #[test]
23941    fn rewrite_rg_pipe_passthrough() {
23942        // Pipe chains can't be translated — pass through
23943        let result = rewrite_command("rg authenticate | head -5");
23944        assert_eq!(result, None);
23945    }
23946
23947    #[test]
23948    fn rewrite_rg_files_passthrough() {
23949        let result = rewrite_command("rg --files src/tsift .agent-doc logs");
23950        assert_eq!(result, None);
23951    }
23952
23953    #[test]
23954    fn rewrite_find_passthrough() {
23955        let result = rewrite_command("find src/tsift .agent-doc -type f -name '*.rs'");
23956        assert_eq!(result, None);
23957    }
23958
23959    #[test]
23960    fn rewrite_grep_recursive() {
23961        let result = rewrite_command("grep -r authenticate src/");
23962        assert_eq!(
23963            result,
23964            Some(
23965                "tsift --envelope search \"authenticate\" --exact --budget normal --path \"src/\""
23966                    .to_string()
23967            )
23968        );
23969    }
23970
23971    #[test]
23972    fn rewrite_grep_non_recursive_passthrough() {
23973        let result = rewrite_command("grep authenticate file.txt");
23974        assert_eq!(result, None);
23975    }
23976
23977    #[test]
23978    fn rewrite_tsift_passthrough() {
23979        let result = rewrite_command("tsift search \"foo\"");
23980        assert_eq!(result, Some("tsift search \"foo\"".to_string()));
23981    }
23982
23983    #[test]
23984    fn rewrite_run_tsift_search_disables_timeout_by_default() {
23985        let result = effective_rewrite_run_command("tsift search hookcaps --exact --path /tmp/x");
23986        assert_eq!(
23987            result,
23988            "tsift search hookcaps --exact --path /tmp/x --timeout 0"
23989        );
23990    }
23991
23992    #[test]
23993    fn rewrite_run_preserves_explicit_search_timeout() {
23994        let result = effective_rewrite_run_command(
23995            "tsift search hookcaps --exact --path /tmp/x --timeout 5",
23996        );
23997        assert_eq!(
23998            result,
23999            "tsift search hookcaps --exact --path /tmp/x --timeout 5"
24000        );
24001    }
24002
24003    #[test]
24004    fn rewrite_unrelated_passthrough() {
24005        let result = rewrite_command("echo cargo build");
24006        assert_eq!(result, None);
24007    }
24008
24009    #[test]
24010    fn rewrite_rg_quoted_pattern() {
24011        let result = rewrite_command("rg \"fn main\"");
24012        assert_eq!(
24013            result,
24014            Some("tsift --envelope search \"fn main\" --exact --budget normal".to_string())
24015        );
24016    }
24017
24018    #[test]
24019    fn rewrite_git_diff_to_diff_digest() {
24020        let result = rewrite_command("git diff");
24021        assert_eq!(result, Some("tsift diff-digest .".to_string()));
24022    }
24023
24024    #[test]
24025    fn rewrite_git_diff_cached_to_diff_digest() {
24026        let result = rewrite_command("git diff --cached");
24027        assert_eq!(result, Some("tsift diff-digest --cached .".to_string()));
24028    }
24029
24030    #[test]
24031    fn rewrite_git_diff_with_path_to_diff_digest() {
24032        let result = rewrite_command("git diff -- src/");
24033        assert_eq!(result, Some("tsift diff-digest \"src/\"".to_string()));
24034    }
24035
24036    #[test]
24037    fn rewrite_git_diff_with_revision_passthrough() {
24038        let result = rewrite_command("git diff HEAD~1");
24039        assert_eq!(result, None);
24040    }
24041
24042    #[test]
24043    fn rewrite_git_show_to_revision_diff_digest() {
24044        let result = rewrite_command("git show HEAD~1");
24045        assert_eq!(
24046            result,
24047            Some("tsift diff-digest --revision \"HEAD~1\" .".to_string())
24048        );
24049    }
24050
24051    #[test]
24052    fn rewrite_git_log_patch_history_to_revision_diff_digest() {
24053        let result = rewrite_command("git log -p -1 HEAD~2");
24054        assert_eq!(
24055            result,
24056            Some("tsift diff-digest --revision \"HEAD~2\" .".to_string())
24057        );
24058    }
24059
24060    #[test]
24061    fn rewrite_cat_long_agent_doc_session_to_session_digest() {
24062        let dir = tempfile::tempdir().unwrap();
24063        let session = dir.path().join("tsift.md");
24064        let mut body = String::from("---\nagent_doc_session: tsift-v0.1\n---\n\n## Exchange\n");
24065        for index in 0..90 {
24066            body.push_str(&format!("❯ prompt {index}?\n"));
24067        }
24068        fs::write(&session, body).unwrap();
24069
24070        let result = rewrite_command(&format!("cat {}", shell_quote(session.to_str().unwrap())));
24071        assert_eq!(
24072            result,
24073            Some(format!(
24074                "tsift session-digest --path {} --input {} --source markdown",
24075                shell_quote(&resolve_digest_context_path(&session)),
24076                shell_quote(session.to_str().unwrap())
24077            ))
24078        );
24079    }
24080
24081    #[test]
24082    fn rewrite_head_long_claude_jsonl_to_session_digest() {
24083        let dir = tempfile::tempdir().unwrap();
24084        let session = dir.path().join("session.jsonl");
24085        let line =
24086            r#"{"message":{"role":"assistant","content":[{"type":"text","text":"❯ do [#yyhd]"}]}}"#;
24087        let body = std::iter::repeat_n(line, 120)
24088            .collect::<Vec<_>>()
24089            .join("\n");
24090        fs::write(&session, format!("{body}\n")).unwrap();
24091
24092        let result = rewrite_command(&format!(
24093            "head -n 120 {}",
24094            shell_quote(session.to_str().unwrap())
24095        ));
24096        assert_eq!(
24097            result,
24098            Some(format!(
24099                "tsift session-digest --path {} --input {} --source claude-jsonl",
24100                shell_quote(&resolve_digest_context_path(&session)),
24101                shell_quote(session.to_str().unwrap())
24102            ))
24103        );
24104    }
24105
24106    #[test]
24107    fn rewrite_head_long_codex_jsonl_to_session_digest() {
24108        let dir = tempfile::tempdir().unwrap();
24109        let session = dir.path().join("codex.jsonl");
24110        let line = r#"{"type":"event_msg","payload":{"type":"user_message","message":"do [#cdxlog]. spec-test-build-install-commit-push"}}"#;
24111        let body = std::iter::repeat_n(line, 120)
24112            .collect::<Vec<_>>()
24113            .join("\n");
24114        fs::write(&session, format!("{body}\n")).unwrap();
24115
24116        let result = rewrite_command(&format!(
24117            "head -n 120 {}",
24118            shell_quote(session.to_str().unwrap())
24119        ));
24120        assert_eq!(
24121            result,
24122            Some(format!(
24123                "tsift session-digest --path {} --input {} --source codex-jsonl",
24124                shell_quote(&resolve_digest_context_path(&session)),
24125                shell_quote(session.to_str().unwrap())
24126            ))
24127        );
24128    }
24129
24130    #[test]
24131    fn rewrite_small_transcript_window_passthrough() {
24132        let dir = tempfile::tempdir().unwrap();
24133        let session = dir.path().join("session.jsonl");
24134        let line = r#"{"message":{"role":"assistant","content":[{"type":"text","text":"hello"}]}}"#;
24135        let body = std::iter::repeat_n(line, 120)
24136            .collect::<Vec<_>>()
24137            .join("\n");
24138        fs::write(&session, format!("{body}\n")).unwrap();
24139
24140        let result = rewrite_command(&format!(
24141            "tail -n 20 {}",
24142            shell_quote(session.to_str().unwrap())
24143        ));
24144        assert_eq!(result, None);
24145    }
24146
24147    #[test]
24148    fn rewrite_sed_large_agent_doc_range_to_session_digest() {
24149        let dir = tempfile::tempdir().unwrap();
24150        let session = dir.path().join("tsift.md");
24151        let mut body = String::from("---\nagent_doc_session: tsift-v0.1\n---\n\n## Exchange\n");
24152        for index in 0..120 {
24153            body.push_str(&format!("### Re: topic {index}\n"));
24154        }
24155        fs::write(&session, body).unwrap();
24156
24157        let result = rewrite_command(&format!(
24158            "sed -n '1,120p' {}",
24159            shell_quote(session.to_str().unwrap())
24160        ));
24161        assert_eq!(
24162            result,
24163            Some(format!(
24164                "tsift session-digest --path {} --input {} --source markdown",
24165                shell_quote(&resolve_digest_context_path(&session)),
24166                shell_quote(session.to_str().unwrap())
24167            ))
24168        );
24169    }
24170
24171    #[test]
24172    fn rewrite_cat_large_agent_doc_log_to_session_digest() {
24173        let dir = tempfile::tempdir().unwrap();
24174        let session = dir.path().join("tsift.log");
24175        let line = "[1776528398] claude_start mode=fresh_restart restart_count=1";
24176        let body = std::iter::repeat_n(line, 120)
24177            .collect::<Vec<_>>()
24178            .join("\n");
24179        fs::write(&session, format!("{body}\n")).unwrap();
24180
24181        let result = rewrite_command(&format!("cat {}", shell_quote(session.to_str().unwrap())));
24182        assert_eq!(
24183            result,
24184            Some(format!(
24185                "tsift session-digest --path {} --input {} --source agent-doc-log",
24186                shell_quote(&resolve_digest_context_path(&session)),
24187                shell_quote(session.to_str().unwrap())
24188            ))
24189        );
24190    }
24191
24192    #[test]
24193    fn rewrite_session_reads_prefer_submodule_root_for_digest_path() {
24194        let dir = tempfile::tempdir().unwrap();
24195        fs::write(
24196            dir.path().join(".gitmodules"),
24197            r#"[submodule "src/tsift"]
24198	path = src/tsift
24199	url = https://example.com/tsift
24200"#,
24201        )
24202        .unwrap();
24203        let submodule = dir.path().join("src/tsift");
24204        fs::create_dir_all(submodule.join("tasks")).unwrap();
24205        fs::write(
24206            submodule.join(".git"),
24207            "gitdir: ../../.git/modules/src/tsift\n",
24208        )
24209        .unwrap();
24210        let session = submodule.join("tasks/plan.md");
24211        let mut body = String::from("---\nagent_doc_session: tsift-v0.1\n---\n\n## Exchange\n");
24212        for index in 0..90 {
24213            body.push_str(&format!("❯ prompt {index}?\n"));
24214        }
24215        fs::write(&session, body).unwrap();
24216
24217        let result = rewrite_command(&format!("cat {}", shell_quote(session.to_str().unwrap())));
24218
24219        assert_eq!(
24220            result,
24221            Some(format!(
24222                "tsift session-digest --path {} --input {} --source markdown",
24223                shell_quote(submodule.to_str().unwrap()),
24224                shell_quote(session.to_str().unwrap())
24225            ))
24226        );
24227    }
24228
24229    #[test]
24230    fn rewrite_regular_markdown_read_passthrough() {
24231        let dir = tempfile::tempdir().unwrap();
24232        let readme = dir.path().join("README.md");
24233        let body = std::iter::repeat_n("plain markdown", 120)
24234            .collect::<Vec<_>>()
24235            .join("\n");
24236        fs::write(&readme, format!("{body}\n")).unwrap();
24237
24238        let result = rewrite_command(&format!("cat {}", shell_quote(readme.to_str().unwrap())));
24239        assert_eq!(result, None);
24240    }
24241
24242    #[test]
24243    fn rewrite_cat_large_source_to_source_read_in_indexed_repo() {
24244        let dir = tempfile::tempdir().unwrap();
24245        write_empty_root_index(dir.path());
24246        let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 120);
24247
24248        let result = rewrite_command(&format!("cat {}", shell_quote(source.to_str().unwrap())));
24249
24250        assert_eq!(
24251            result,
24252            Some(format!(
24253                "tsift --envelope source-read \"src/lib.rs\" --path {} --start 1 --lines 80 --budget normal",
24254                shell_quote(&dir.path().to_string_lossy())
24255            ))
24256        );
24257    }
24258
24259    #[test]
24260    fn rewrite_head_small_source_window_passthrough() {
24261        let dir = tempfile::tempdir().unwrap();
24262        write_empty_root_index(dir.path());
24263        let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 120);
24264
24265        let result = rewrite_command(&format!(
24266            "head -n 20 {}",
24267            shell_quote(source.to_str().unwrap())
24268        ));
24269
24270        assert_eq!(result, None);
24271    }
24272
24273    #[test]
24274    fn rewrite_sed_large_source_range_to_source_read() {
24275        let dir = tempfile::tempdir().unwrap();
24276        write_empty_root_index(dir.path());
24277        let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 200);
24278
24279        let result = rewrite_command(&format!(
24280            "sed -n '40,160p' {}",
24281            shell_quote(source.to_str().unwrap())
24282        ));
24283
24284        assert_eq!(
24285            result,
24286            Some(format!(
24287                "tsift --envelope source-read \"src/lib.rs\" --path {} --start 40 --lines 121 --budget normal",
24288                shell_quote(&dir.path().to_string_lossy())
24289            ))
24290        );
24291    }
24292
24293    #[test]
24294    fn rewrite_tail_large_source_window_preserves_tail_anchor() {
24295        let dir = tempfile::tempdir().unwrap();
24296        write_empty_root_index(dir.path());
24297        let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 200);
24298
24299        let result = rewrite_command(&format!(
24300            "tail -n 120 {}",
24301            shell_quote(source.to_str().unwrap())
24302        ));
24303
24304        assert_eq!(
24305            result,
24306            Some(format!(
24307                "tsift --envelope source-read \"src/lib.rs\" --path {} --start 81 --lines 120 --budget normal",
24308                shell_quote(&dir.path().to_string_lossy())
24309            ))
24310        );
24311    }
24312
24313    #[test]
24314    fn rewrite_large_non_source_read_passthrough_even_when_indexed() {
24315        let dir = tempfile::tempdir().unwrap();
24316        write_empty_root_index(dir.path());
24317        let text = write_repeated_lines(&dir.path().join("notes.txt"), "plain text", 120);
24318
24319        let result = rewrite_command(&format!("cat {}", shell_quote(text.to_str().unwrap())));
24320
24321        assert_eq!(result, None);
24322    }
24323
24324    #[test]
24325    fn rewrite_large_source_read_passthrough_without_index() {
24326        let dir = tempfile::tempdir().unwrap();
24327        let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 120);
24328
24329        let result = rewrite_command(&format!("cat {}", shell_quote(source.to_str().unwrap())));
24330
24331        assert_eq!(result, None);
24332    }
24333
24334    #[test]
24335    fn rewrite_cargo_test_to_digest_runner() {
24336        let result = rewrite_command("cargo test --lib");
24337        assert_eq!(
24338            result,
24339            Some(
24340                "tsift --envelope __digest-runner --kind \"test\" --path \".\" --shell-command \"cargo test --lib\" --runner \"cargo\"".to_string()
24341            )
24342        );
24343    }
24344
24345    #[test]
24346    fn rewrite_pytest_to_digest_runner() {
24347        let result = rewrite_command("pytest -q tests/test_cli.py");
24348        assert_eq!(
24349            result,
24350            Some(
24351                "tsift --envelope __digest-runner --kind \"test\" --path \".\" --shell-command \"pytest -q tests/test_cli.py\" --runner \"pytest\"".to_string()
24352            )
24353        );
24354    }
24355
24356    #[test]
24357    fn rewrite_python_m_pytest_to_digest_runner() {
24358        let result = rewrite_command("python -m pytest tests/test_cli.py");
24359        assert_eq!(
24360            result,
24361            Some(
24362                "tsift --envelope __digest-runner --kind \"test\" --path \".\" --shell-command \"python -m pytest tests/test_cli.py\" --runner \"pytest\"".to_string()
24363            )
24364        );
24365    }
24366
24367    #[test]
24368    fn rewrite_cargo_build_to_log_digest_runner() {
24369        let result = rewrite_command("cargo build --release");
24370        assert_eq!(
24371            result,
24372            Some(
24373                "tsift --envelope __digest-runner --kind \"log\" --path \".\" --shell-command \"cargo build --release\"".to_string()
24374            )
24375        );
24376    }
24377
24378    #[test]
24379    fn rewrite_cargo_install_to_log_digest_runner() {
24380        let result = rewrite_command("cargo install --path . --force");
24381        assert_eq!(
24382            result,
24383            Some(
24384                "tsift --envelope __digest-runner --kind \"log\" --path \".\" --shell-command \"cargo install --path . --force\"".to_string()
24385            )
24386        );
24387    }
24388
24389    #[test]
24390    fn rewrite_metacharacter_command_passthrough() {
24391        let result = rewrite_command("cargo test | head");
24392        assert_eq!(result, None);
24393    }
24394
24395    #[test]
24396    fn rewrite_output_cap_detects_search_even_with_global_flag() {
24397        let cap = rewrite_output_cap("tsift --compact search foo").expect("cap");
24398        assert_eq!(cap.max_lines, 50);
24399        assert_eq!(cap.strip_prefix, Some("Strategy:"));
24400    }
24401
24402    #[test]
24403    fn rewrite_output_cap_skips_structured_output() {
24404        assert!(rewrite_output_cap("tsift search foo --json").is_none());
24405        assert!(rewrite_output_cap("tsift --schema graph foo").is_none());
24406        assert!(rewrite_output_cap("tsift --envelope search foo").is_none());
24407    }
24408
24409    #[test]
24410    fn rewrite_output_format_forwards_envelope_to_digest_runner() {
24411        let command = rewrite_command("cargo test --lib").expect("rewrite");
24412        let forwarded = apply_rewrite_output_format(
24413            &command,
24414            OutputFormat {
24415                json_output: true,
24416                compact: false,
24417                pretty: false,
24418                terse: false,
24419                schema: false,
24420                envelope: true,
24421            },
24422        );
24423        assert_eq!(
24424            forwarded,
24425            "tsift --envelope __digest-runner --kind \"test\" --path \".\" --shell-command \"cargo test --lib\" --runner \"cargo\""
24426        );
24427    }
24428
24429    #[test]
24430    fn rewrite_output_format_forwards_json_when_requested() {
24431        let command = rewrite_command("cargo build --release").expect("rewrite");
24432        let forwarded = apply_rewrite_output_format(
24433            &command,
24434            OutputFormat {
24435                json_output: true,
24436                compact: false,
24437                pretty: true,
24438                terse: false,
24439                schema: false,
24440                envelope: false,
24441            },
24442        );
24443        assert_eq!(
24444            forwarded,
24445            "tsift --pretty --envelope __digest-runner --kind \"log\" --path \".\" --shell-command \"cargo build --release\""
24446        );
24447    }
24448
24449    #[test]
24450    fn output_cap_strips_search_header_and_truncates() {
24451        let capped = apply_output_cap(
24452            b"Strategy: exact | Indexed: 0 | Skipped: 0\n\nline1\nline2\nline3\n",
24453            OutputCap {
24454                max_lines: 2,
24455                strip_prefix: Some("Strategy:"),
24456            },
24457        );
24458        assert_eq!(
24459            capped,
24460            "line1\nline2\n... (+1 more lines; rerun the underlying tsift command directly for the full output)\n"
24461        );
24462    }
24463
24464    #[test]
24465    fn sql_schema_overview_lists_tables() {
24466        let (_tmp, conn) = setup_test_db();
24467        let tables = schema_overview(&conn).unwrap();
24468        let names: Vec<&str> = tables.iter().map(|t| t.name.as_str()).collect();
24469        assert_eq!(names, &["posts", "users"]);
24470    }
24471
24472    #[test]
24473    fn sql_schema_overview_row_counts() {
24474        let (_tmp, conn) = setup_test_db();
24475        let tables = schema_overview(&conn).unwrap();
24476        let users = tables.iter().find(|t| t.name == "users").unwrap();
24477        let posts = tables.iter().find(|t| t.name == "posts").unwrap();
24478        assert_eq!(users.row_count, 2);
24479        assert_eq!(posts.row_count, 3);
24480    }
24481
24482    #[test]
24483    fn sql_table_columns_metadata() {
24484        let (_tmp, conn) = setup_test_db();
24485        let cols = table_columns(&conn, "users").unwrap();
24486        assert_eq!(cols.len(), 3);
24487        assert_eq!(cols[0].name, "id");
24488        assert!(cols[0].pk);
24489        assert_eq!(cols[1].name, "name");
24490        assert!(cols[1].notnull);
24491        assert_eq!(cols[2].name, "email");
24492        assert!(!cols[2].notnull);
24493    }
24494
24495    #[test]
24496    fn sql_execute_query_returns_rows() {
24497        let (_tmp, conn) = setup_test_db();
24498        let (columns, rows) =
24499            execute_query(&conn, "SELECT name, email FROM users ORDER BY id").unwrap();
24500        assert_eq!(columns, &["name", "email"]);
24501        assert_eq!(rows.len(), 2);
24502        assert_eq!(rows[0][0], serde_json::json!("Alice"));
24503        assert_eq!(rows[0][1], serde_json::json!("alice@example.com"));
24504        assert_eq!(rows[1][1], serde_json::Value::Null);
24505    }
24506
24507    #[test]
24508    fn sql_execute_query_aggregate() {
24509        let (_tmp, conn) = setup_test_db();
24510        let (columns, rows) = execute_query(&conn, "SELECT COUNT(*) as cnt FROM posts").unwrap();
24511        assert_eq!(columns, &["cnt"]);
24512        assert_eq!(rows[0][0], serde_json::json!(3));
24513    }
24514
24515    #[test]
24516    fn sql_execute_query_join() {
24517        let (_tmp, conn) = setup_test_db();
24518        let (_cols, rows) = execute_query(
24519            &conn,
24520            "SELECT u.name, p.title FROM users u JOIN posts p ON u.id = p.user_id ORDER BY p.id",
24521        )
24522        .unwrap();
24523        assert_eq!(rows.len(), 3);
24524        assert_eq!(rows[0][0], serde_json::json!("Alice"));
24525        assert_eq!(rows[2][0], serde_json::json!("Bob"));
24526    }
24527
24528    #[test]
24529    fn sql_open_db_read_only() {
24530        let (tmp, _conn) = setup_test_db();
24531        drop(_conn);
24532        let ro_conn = open_db(tmp.path()).unwrap();
24533        let result = ro_conn.execute("INSERT INTO users VALUES (99, 'Fail', NULL)", []);
24534        assert!(result.is_err(), "read-only connection should reject writes");
24535    }
24536
24537    #[test]
24538    fn sql_empty_table_schema() {
24539        let tmp = tempfile::NamedTempFile::new().unwrap();
24540        let conn = Connection::open(tmp.path()).unwrap();
24541        conn.execute_batch("CREATE TABLE empty_tbl (id INTEGER PRIMARY KEY, data BLOB)")
24542            .unwrap();
24543        let tables = schema_overview(&conn).unwrap();
24544        assert_eq!(tables[0].row_count, 0);
24545        assert_eq!(tables[0].columns.len(), 2);
24546    }
24547
24548    // --- graph command ---
24549
24550    fn setup_graph_index() -> tempfile::TempDir {
24551        let dir = tempfile::tempdir().unwrap();
24552        std::fs::write(
24553            dir.path().join("main.rs"),
24554            "fn helper() { println!(\"hi\"); }\nfn main() { helper(); Vec::new(); }",
24555        )
24556        .unwrap();
24557        let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
24558        db.apply_changes(dir.path()).unwrap();
24559        dir
24560    }
24561
24562    fn setup_traversal_project() -> tempfile::TempDir {
24563        let dir = setup_graph_index();
24564        let task_dir = dir.path().join("tasks/software");
24565        std::fs::create_dir_all(&task_dir).unwrap();
24566        std::fs::write(
24567            task_dir.join("tsift.md"),
24568            r#"---
24569agent_doc_session: tsift-v0.1
24570agent_doc_format: template
24571---
24572
24573## Exchange
24574
24575<!-- agent:exchange patch=append -->
24576❯ do [#kgnv]
24577Completed `#kgnv`; touched files `main.rs`; tests `cargo test traversal_graph`; follow-up `#gfix`.
24578<!-- /agent:exchange -->
24579
24580<!-- agent:queue -->
24581dispatch #spec-test-build-install-commit-push
24582- do [#kgnv]
24583<!-- /agent:queue -->
24584
24585## Backlog
24586
24587<!-- agent:backlog -->
24588- [ ] [#kgnv] Fix helper traversal handles while preserving graph navigation.
24589<!-- /agent:backlog -->
24590"#,
24591        )
24592        .unwrap();
24593        dir
24594    }
24595
24596    fn setup_dependency_dag_project() -> tempfile::TempDir {
24597        let dir = tempfile::tempdir().unwrap();
24598        std::fs::write(
24599            dir.path().join("main.rs"),
24600            "fn shared_helper() {}\nfn main() { shared_helper(); }\n",
24601        )
24602        .unwrap();
24603        std::fs::write(
24604            dir.path().join("Cargo.toml"),
24605            "[package]\nname = \"dag-fixture\"\n",
24606        )
24607        .unwrap();
24608        let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
24609        db.apply_changes(dir.path()).unwrap();
24610
24611        let task_dir = dir.path().join("tasks/software");
24612        std::fs::create_dir_all(&task_dir).unwrap();
24613        std::fs::write(
24614            task_dir.join("tsift.md"),
24615            r#"---
24616agent_doc_session: tsift-dag
24617agent_doc_format: template
24618---
24619
24620## Exchange
24621
24622<!-- agent:exchange patch=append -->
24623Completed `#alpha`; touched files `main.rs`; tests `cargo test dependency_dag`; follow-up `#gamma`.
24624<!-- /agent:exchange -->
24625
24626## Backlog
24627
24628<!-- agent:backlog -->
24629- [ ] [#prep] Prepare Cargo.toml configuration before shared helper work.
24630- [ ] [#alpha] Update shared_helper in main.rs after #prep.
24631- [ ] [#beta] Refactor shared_helper tests in main.rs.
24632- [ ] [#gamma] Follow-up review for graph navigation.
24633<!-- /agent:backlog -->
24634"#,
24635        )
24636        .unwrap();
24637        dir
24638    }
24639
24640    fn setup_dependency_dag_cycle_project() -> tempfile::TempDir {
24641        let dir = setup_graph_index();
24642        let task_dir = dir.path().join("tasks/software");
24643        std::fs::create_dir_all(&task_dir).unwrap();
24644        std::fs::write(
24645            task_dir.join("tsift.md"),
24646            r#"---
24647agent_doc_session: tsift-dag-cycle
24648agent_doc_format: template
24649---
24650
24651## Backlog
24652
24653<!-- agent:backlog -->
24654- [ ] [#left] Left side depends on #right.
24655- [ ] [#right] Right side depends on #left.
24656<!-- /agent:backlog -->
24657"#,
24658        )
24659        .unwrap();
24660        dir
24661    }
24662
24663    fn seed_traversal_semantic_summaries(dir: &Path) {
24664        let summary_db = summarize::SummaryDb::open(&dir.join(".tsift/summaries.db")).unwrap();
24665        summary_db
24666            .insert(&summarize::Summary {
24667                id: 0,
24668                symbol_name: "helper".to_string(),
24669                file_path: "main.rs".to_string(),
24670                content_hash: "hash-main".to_string(),
24671                summary: "helper builds graph navigation handles for traversal.".to_string(),
24672                entities: Some(vec![
24673                    summarize::Entity {
24674                        name: "helper".to_string(),
24675                        kind: "function".to_string(),
24676                        description: "Builds graph navigation handles.".to_string(),
24677                    },
24678                    summarize::Entity {
24679                        name: "TraversalGraph".to_string(),
24680                        kind: "type".to_string(),
24681                        description: "Carries GraphStore-backed traversal rows.".to_string(),
24682                    },
24683                ]),
24684                relationships: Some(vec![summarize::Relationship {
24685                    from: "helper".to_string(),
24686                    to: "TraversalGraph".to_string(),
24687                    kind: "uses".to_string(),
24688                }]),
24689                concept_labels: Some(vec![
24690                    "graph navigation".to_string(),
24691                    "semantic extraction".to_string(),
24692                ]),
24693                extracted_at: "1700000000".to_string(),
24694                model: "test-model".to_string(),
24695                tokens_input: Some(10),
24696                tokens_output: Some(5),
24697            })
24698            .unwrap();
24699    }
24700
24701    #[test]
24702    fn graph_callers_query() {
24703        let dir = setup_graph_index();
24704        let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
24705        let callers = db.callers_of("helper").unwrap();
24706        assert_eq!(callers.len(), 1);
24707        assert_eq!(callers[0].caller_name, "main");
24708    }
24709
24710    #[test]
24711    fn graph_callees_query() {
24712        let dir = setup_graph_index();
24713        let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
24714        let callees = db.callees_of("main").unwrap();
24715        let names: Vec<&str> = callees.iter().map(|e| e.callee_name.as_str()).collect();
24716        assert!(names.contains(&"helper"));
24717        assert!(names.contains(&"new"));
24718    }
24719
24720    #[test]
24721    fn graph_no_callers_returns_empty() {
24722        let dir = setup_graph_index();
24723        let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
24724        let callers = db.callers_of("nonexistent").unwrap();
24725        assert!(callers.is_empty());
24726    }
24727
24728    #[test]
24729    fn graph_cmd_autoindexes_missing_index_by_default() {
24730        let dir = tempfile::tempdir().unwrap();
24731        std::fs::write(
24732            dir.path().join("main.rs"),
24733            "fn helper() {}\nfn main() { helper(); }\n",
24734        )
24735        .unwrap();
24736        let result = cmd_graph(
24737            "helper",
24738            dir.path(),
24739            true,
24740            false,
24741            None,
24742            20,
24743            false,
24744            true,
24745            false,
24746            false,
24747            false,
24748            false,
24749            false,
24750            TagpathSearchOpts::default(),
24751        );
24752
24753        assert!(result.is_ok());
24754        let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
24755        let summary = db.compute_changes(dir.path()).unwrap();
24756        assert_eq!(summary.new + summary.modified + summary.deleted, 0);
24757    }
24758
24759    #[test]
24760    fn traversal_graph_has_stable_typed_handles() {
24761        let dir = setup_traversal_project();
24762        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
24763        let graph_again = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
24764
24765        let file = resolve_traversal_node(&graph, "main.rs").unwrap();
24766        let symbol = resolve_traversal_node(&graph, "helper").unwrap();
24767        let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
24768        let session = resolve_traversal_node(&graph, "tsift-v0.1").unwrap();
24769
24770        assert!(file.handle.starts_with("gfil-"));
24771        assert!(symbol.handle.starts_with("gsym-"));
24772        assert!(backlog.handle.starts_with("gbak-"));
24773        assert!(session.handle.starts_with("gses-"));
24774
24775        assert_eq!(
24776            symbol.handle,
24777            resolve_traversal_node(&graph_again, "helper")
24778                .unwrap()
24779                .handle
24780        );
24781        assert_eq!(
24782            backlog.handle,
24783            resolve_traversal_node(&graph_again, "#kgnv")
24784                .unwrap()
24785                .handle
24786        );
24787    }
24788
24789    #[test]
24790    fn traversal_graph_links_backlog_items_to_code_tokens() {
24791        let dir = setup_traversal_project();
24792        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
24793        let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
24794        let helper = resolve_traversal_node(&graph, "helper").unwrap();
24795
24796        assert!(graph.edges.iter().any(|edge| {
24797            edge.from == backlog.handle && edge.to == helper.handle && edge.relation == "mentions"
24798        }));
24799    }
24800
24801    #[test]
24802    fn session_hinted_traversal_skips_global_call_edges() {
24803        let dir = setup_traversal_project();
24804        let session = dir.path().join("tasks/software/tsift.md");
24805        let bounded = build_traversal_graph_source(dir.path(), &session, None).unwrap();
24806        let backlog = resolve_traversal_node(&bounded, "#kgnv").unwrap();
24807        let helper = resolve_traversal_node(&bounded, "helper").unwrap();
24808
24809        assert!(bounded.edges.iter().any(|edge| {
24810            edge.from == backlog.handle && edge.to == helper.handle && edge.relation == "mentions"
24811        }));
24812        assert!(
24813            !bounded.edges.iter().any(|edge| edge.relation == "calls"),
24814            "session-hinted graph-db projections should not materialize unrelated global call edges"
24815        );
24816
24817        let full = build_traversal_graph_source(dir.path(), dir.path(), None).unwrap();
24818        assert!(
24819            full.edges.iter().any(|edge| edge.relation == "calls"),
24820            "root/full projections still carry the complete indexed call graph"
24821        );
24822    }
24823
24824    #[test]
24825    fn agent_doc_task_path_infers_matching_workspace_scope() {
24826        let dir = tempfile::tempdir().unwrap();
24827        std::fs::create_dir_all(dir.path().join("src/tsift")).unwrap();
24828        std::fs::create_dir_all(dir.path().join("tasks/software")).unwrap();
24829        std::fs::write(
24830            dir.path().join(".gitmodules"),
24831            "[submodule \"src/tsift\"]\n\tpath = src/tsift\n\turl = https://example.invalid/tsift.git\n",
24832        )
24833        .unwrap();
24834        let task = dir.path().join("tasks/software/tsift.md");
24835        std::fs::write(&task, "# tsift\n").unwrap();
24836
24837        let targets = resolve_search_index_targets(dir.path(), &task, None, false).unwrap();
24838        let query_db_path = resolve_query_db_path(dir.path(), &task, None).unwrap();
24839        let cfg = config::Config::load(dir.path()).unwrap();
24840
24841        assert_eq!(targets.len(), 1);
24842        assert_eq!(targets[0].scope_name.as_deref(), Some("tsift"));
24843        assert_eq!(targets[0].source_root, dir.path().join("src/tsift"));
24844        assert!(
24845            targets[0]
24846                .db_path
24847                .ends_with(".tsift/indexes/tsift/index.db")
24848        );
24849        assert_eq!(query_db_path, cfg.db_path_for(dir.path(), "tsift"));
24850    }
24851
24852    #[test]
24853    fn traversal_graph_links_agent_doc_queue_job_packets_to_backlog() {
24854        let dir = setup_traversal_project();
24855        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
24856        let job = resolve_traversal_node(&graph, "do #kgnv").unwrap();
24857        let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
24858
24859        assert_eq!(job.kind, "job_packet");
24860        assert!(job.handle.starts_with("gjob-"));
24861        assert!(graph.edges.iter().any(|edge| {
24862            edge.from == job.handle && edge.to == backlog.handle && edge.relation == "targets"
24863        }));
24864
24865        let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
24866        let jobs = store.nodes_by_kind("job_packet").unwrap();
24867        assert!(
24868            jobs.iter()
24869                .any(|node| node.properties.get("ref_id") == Some(&"kgnv".to_string())),
24870            "expected queued job packet in graph store, got {jobs:?}"
24871        );
24872    }
24873
24874    #[test]
24875    fn traversal_graph_includes_routes_and_handler_edges() {
24876        let dir = tempfile::tempdir().unwrap();
24877        std::fs::write(
24878            dir.path().join("api.py"),
24879            r#"@router.get("/items")
24880def list_items():
24881    return []
24882"#,
24883        )
24884        .unwrap();
24885        let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
24886        db.apply_changes(dir.path()).unwrap();
24887
24888        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
24889        let route = resolve_traversal_node(&graph, "/items").unwrap();
24890        let handler = resolve_traversal_node(&graph, "list_items").unwrap();
24891
24892        assert_eq!(route.kind, "route");
24893        assert!(graph.edges.iter().any(|edge| {
24894            edge.from == route.handle && edge.to == handler.handle && edge.relation == "handled_by"
24895        }));
24896    }
24897
24898    #[test]
24899    fn traversal_neighborhood_handles_prioritizes_high_signal_edges_when_limited() {
24900        let edges = vec![
24901            TraversalEdge {
24902                from: "origin".to_string(),
24903                to: "aaa_low".to_string(),
24904                relation: "unknown".to_string(),
24905                label: None,
24906                weight: 1,
24907            },
24908            TraversalEdge {
24909                from: "origin".to_string(),
24910                to: "zzz_high".to_string(),
24911                relation: "mentions".to_string(),
24912                label: None,
24913                weight: 1,
24914            },
24915        ];
24916
24917        let handles = traversal_neighborhood_handles(&edges, "origin", 1, 2);
24918
24919        assert!(handles.contains("origin"));
24920        assert!(handles.contains("zzz_high"), "{handles:?}");
24921        assert!(!handles.contains("aaa_low"), "{handles:?}");
24922    }
24923
24924    #[test]
24925    fn traversal_materializes_provider_neutral_sqlite_graph() {
24926        let dir = setup_traversal_project();
24927        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
24928        let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
24929
24930        let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
24931        let backlog_nodes = store.nodes_by_kind("backlog").unwrap();
24932        assert!(
24933            backlog_nodes.iter().any(|node| node.id == backlog.handle
24934                && node.properties.get("ref_id") == Some(&"kgnv".to_string())),
24935            "expected materialized backlog node, got {backlog_nodes:?}"
24936        );
24937        assert!(
24938            store
24939                .all_nodes()
24940                .unwrap()
24941                .iter()
24942                .any(|node| node.kind == GRAPH_PROJECTION_META_KIND
24943                    && node.properties.get("projection_version")
24944                        == Some(&GRAPH_PROJECTION_VERSION.to_string())),
24945            "expected projection metadata node"
24946        );
24947        let source_handles = store.nodes_by_kind("source_handle").unwrap();
24948        assert!(
24949            source_handles
24950                .iter()
24951                .any(|node| node.properties.get("file") == Some(&"main.rs".to_string())),
24952            "expected bounded source_handle rows, got {source_handles:?}"
24953        );
24954        let worker_context = store.nodes_by_kind("worker_context").unwrap();
24955        assert!(
24956            worker_context
24957                .iter()
24958                .any(|node| node.properties.get("target")
24959                    == Some(&"tasks/software/tsift.md".to_string())),
24960            "expected bounded worker_context rows, got {worker_context:?}"
24961        );
24962        let worker_results = store.nodes_by_kind("worker_result").unwrap();
24963        assert!(
24964            worker_results.iter().any(|node| {
24965                node.properties.get("ref_id") == Some(&"kgnv".to_string())
24966                    && node.properties.get("status") == Some(&"completed".to_string())
24967                    && node.properties.get("touched_files") == Some(&"main.rs".to_string())
24968                    && node.properties.get("follow_up_ids") == Some(&"gfix".to_string())
24969            }),
24970            "expected worker_result rows, got {worker_results:?}"
24971        );
24972    }
24973
24974    #[test]
24975    fn traversal_projection_materializes_cached_semantic_rows() {
24976        let dir = setup_traversal_project();
24977        seed_traversal_semantic_summaries(dir.path());
24978        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
24979        let helper = resolve_traversal_node(&graph, "helper").unwrap();
24980        let concept = resolve_traversal_node(&graph, "graph navigation").unwrap();
24981        let entity = resolve_traversal_node(&graph, "TraversalGraph").unwrap();
24982
24983        assert_eq!(concept.kind, "semantic_concept");
24984        assert_eq!(entity.kind, "semantic_entity");
24985        assert!(concept.handle.starts_with("gcon-"));
24986        assert!(entity.handle.starts_with("gent-"));
24987
24988        let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
24989        assert!(
24990            store
24991                .nodes_by_kind("semantic_concept")
24992                .unwrap()
24993                .iter()
24994                .any(|node| node.label == "semantic extraction"
24995                    && node.properties.contains_key("embedding")),
24996            "expected persisted concept embeddings"
24997        );
24998        assert!(
24999            store
25000                .outgoing_edges(&helper.handle, Some("mentions_concept"))
25001                .unwrap()
25002                .iter()
25003                .any(|edge| edge.to_id == concept.handle),
25004            "expected helper symbol to link to cached summary concept"
25005        );
25006        assert!(
25007            store
25008                .outgoing_edges(
25009                    &semantic_entity_handle("helper", "function"),
25010                    Some("semantic_relation")
25011                )
25012                .unwrap()
25013                .iter()
25014                .any(|edge| edge.to_id == entity.handle
25015                    && edge.properties.get("relationship_kind") == Some(&"uses".to_string())),
25016            "expected LLM relationship rows projected into GraphStore"
25017        );
25018    }
25019
25020    #[test]
25021    fn semantic_related_query_uses_persisted_graph_embeddings() {
25022        let dir = setup_traversal_project();
25023        seed_traversal_semantic_summaries(dir.path());
25024        refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
25025        let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
25026
25027        let report = semantic_related_report_from_store(
25028            dir.path(),
25029            None,
25030            "graph navigation",
25031            5,
25032            SemanticRelatedKind::Concept,
25033            &store,
25034        )
25035        .unwrap();
25036
25037        assert_eq!(report.embedding_model, SEMANTIC_EMBEDDING_MODEL);
25038        assert!(
25039            report
25040                .items
25041                .iter()
25042                .any(|item| item.label == "graph navigation"
25043                    && item.kind == "semantic_concept"
25044                    && item.score > 0.9),
25045            "expected nearest concept match from graph embeddings, got {:?}",
25046            report.items
25047        );
25048    }
25049
25050    #[test]
25051    fn graph_db_related_query_uses_semantic_seeds_and_incident_neighborhoods() {
25052        let dir = setup_traversal_project();
25053        seed_traversal_semantic_summaries(dir.path());
25054        refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
25055        let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
25056
25057        let report = graph_db_report_from_store(
25058            dir.path(),
25059            None,
25060            "sqlite",
25061            GraphDbQuery::Related {
25062                query: "graph navigation".to_string(),
25063                kind: SemanticRelatedKind::All,
25064                depth: 1,
25065                seed_limit: 2,
25066                limit: 20,
25067            },
25068            &store,
25069            sqlite_graph_freshness(&store, "root").unwrap(),
25070            Vec::new(),
25071        )
25072        .unwrap();
25073
25074        let knowledge = report.knowledge_retrieval.as_ref().unwrap();
25075        assert_eq!(knowledge.mode, "semantic_seeded_neighborhood");
25076        assert_eq!(knowledge.seed_kind, "all");
25077        assert_eq!(knowledge.depth, 1);
25078        assert!(
25079            knowledge
25080                .diagnostics
25081                .iter()
25082                .any(|diagnostic| diagnostic.contains("incident"))
25083        );
25084        assert!(
25085            report
25086                .semantic_related
25087                .iter()
25088                .any(|item| item.label == "graph navigation"
25089                    && item.kind == "semantic_concept"
25090                    && item.score > 0.9),
25091            "expected natural-language query to seed the graph navigation concept, got {:?}",
25092            report.semantic_related
25093        );
25094        assert!(
25095            report
25096                .nodes
25097                .iter()
25098                .any(|node| node.kind == "semantic_concept" && node.label == "graph navigation")
25099        );
25100        assert!(
25101            report
25102                .nodes
25103                .iter()
25104                .any(|node| node.kind == "symbol" && node.label == "helper"),
25105            "incident expansion from semantic seed should recover source symbols, got {:?}",
25106            report
25107                .nodes
25108                .iter()
25109                .map(|node| (&node.kind, &node.label))
25110                .collect::<Vec<_>>()
25111        );
25112        assert!(
25113            report
25114                .edges
25115                .iter()
25116                .any(|edge| edge.kind == "mentions_concept")
25117        );
25118    }
25119
25120    #[test]
25121    fn graph_db_semantic_seeded_neighborhood_scores_before_caps() {
25122        let mut nodes = vec![
25123            SubstrateGraphNode::new("seed", "semantic_concept", "graph budget"),
25124            SubstrateGraphNode::new("zzz_high", "symbol", "high_signal"),
25125        ];
25126        let mut edges = vec![SubstrateGraphEdge::new(
25127            "zzz_high",
25128            "seed",
25129            "mentions_concept",
25130        )];
25131        for idx in 0..24 {
25132            let id = format!("aaa_low_{idx:02}");
25133            nodes.push(SubstrateGraphNode::new(
25134                id.clone(),
25135                "note",
25136                format!("low {idx}"),
25137            ));
25138            edges.push(SubstrateGraphEdge::new(id, "seed", "weak_link"));
25139        }
25140        let mut store = SqliteGraphStore::in_memory().unwrap();
25141        store
25142            .replace_projection(&GraphProjection { nodes, edges })
25143            .unwrap();
25144
25145        let subgraph =
25146            graph_db_semantic_seeded_neighborhood(&store, &["seed".to_string()], 1, 3).unwrap();
25147
25148        assert_eq!(subgraph.nodes.len(), 3);
25149        assert_eq!(subgraph.nodes[0].id, "seed");
25150        assert_eq!(
25151            subgraph.nodes[1].id, "zzz_high",
25152            "expected semantic mention edge to survive caps before lexicographic low-signal nodes: {:?}",
25153            subgraph.nodes
25154        );
25155        assert!(subgraph.truncated);
25156        assert!(
25157            subgraph
25158                .diagnostics
25159                .iter()
25160                .any(|diagnostic| diagnostic.contains("per-node edge scan cap")),
25161            "{:?}",
25162            subgraph.diagnostics
25163        );
25164        assert!(
25165            subgraph
25166                .diagnostics
25167                .iter()
25168                .any(|diagnostic| diagnostic.contains("skipped")),
25169            "{:?}",
25170            subgraph.diagnostics
25171        );
25172    }
25173
25174    #[test]
25175    fn conflict_matrix_uses_semantic_rows_as_dispatch_ranking_signal() {
25176        let dir = setup_traversal_project();
25177        seed_traversal_semantic_summaries(dir.path());
25178        init_git_repo(dir.path());
25179        let session = dir.path().join("tasks/software/tsift.md");
25180        refresh_traversal_graph_store(dir.path(), &session, None).unwrap();
25181        let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
25182        let freshness = sqlite_graph_freshness(&store, "root").unwrap();
25183        let evidence = graph_db_evidence_report_from_store(GraphDbEvidenceInput {
25184            root: dir.path(),
25185            scope: None,
25186            backend: "sqlite",
25187            target: "kgnv",
25188            depth: 4,
25189            limit: 8,
25190            store: &store,
25191            freshness,
25192            warnings: Vec::new(),
25193        })
25194        .unwrap();
25195        assert!(
25196            evidence
25197                .semantic_related
25198                .iter()
25199                .any(|node| node.kind == "semantic_concept" && node.label == "graph navigation"),
25200            "expected semantic evidence rows, got {:?}",
25201            evidence
25202                .semantic_related
25203                .iter()
25204                .map(|node| (&node.kind, &node.label))
25205                .collect::<Vec<_>>()
25206        );
25207
25208        let cached_diff = diff_digest::compute(
25209            dir.path(),
25210            diff_digest::DiffDigestOptions {
25211                cached: true,
25212                revision: None,
25213                max_parsed_files: None,
25214            },
25215        )
25216        .unwrap();
25217        let impact_report = impact::compute(
25218            dir.path(),
25219            impact::ImpactOptions {
25220                cached: true,
25221                revision: None,
25222                scope: None,
25223                limit: 10,
25224            },
25225        )
25226        .unwrap();
25227        let graph_nodes = store.all_nodes().unwrap();
25228        let graph_index = conflict_matrix_graph_index(&graph_nodes);
25229        let semantic_candidate = conflict_matrix_candidate_from_evidence(
25230            dir.path(),
25231            &evidence,
25232            &graph_index,
25233            &cached_diff,
25234            &impact_report,
25235        );
25236        assert!(semantic_candidate.semantic_dispatch_score > 0);
25237        assert!(
25238            semantic_candidate
25239                .semantic_dispatch_reasons
25240                .iter()
25241                .any(|reason| reason.contains("semantic_concept") && reason.contains("owned file")),
25242            "expected semantic ranking explanations, got {:?}",
25243            semantic_candidate.semantic_dispatch_reasons
25244        );
25245        assert!(
25246            semantic_candidate
25247                .semantic_related
25248                .iter()
25249                .any(|item| item.label == "graph navigation")
25250        );
25251
25252        let mut plain_candidate = semantic_candidate.clone();
25253        plain_candidate.target = "plain".to_string();
25254        plain_candidate.semantic_related.clear();
25255        plain_candidate.semantic_dispatch_score = 0;
25256        plain_candidate.semantic_dispatch_reasons.clear();
25257        let mut ranked = [plain_candidate, semantic_candidate];
25258        ranked.sort_by(|left, right| {
25259            left.risk
25260                .cmp(&right.risk)
25261                .then_with(|| left.risk_score.cmp(&right.risk_score))
25262                .then_with(|| {
25263                    right
25264                        .semantic_dispatch_score
25265                        .cmp(&left.semantic_dispatch_score)
25266                })
25267                .then_with(|| left.target.cmp(&right.target))
25268        });
25269        assert_eq!(ranked[0].target, "kgnv");
25270    }
25271
25272    #[test]
25273    fn dependency_dag_extracts_explicit_overlap_and_follow_up_edges() {
25274        let dir = setup_dependency_dag_project();
25275        let session = dir.path().join("tasks/software/tsift.md");
25276        let report = build_dependency_dag_report(dir.path(), None, &[], 4, 12).unwrap();
25277
25278        assert_eq!(report.contract_version, "dependency-dag-v1");
25279        assert_eq!(
25280            report.targets,
25281            vec![
25282                "prep".to_string(),
25283                "alpha".to_string(),
25284                "beta".to_string(),
25285                "gamma".to_string()
25286            ]
25287        );
25288        assert!(report.edges.iter().any(|edge| {
25289            edge.from == "prep" && edge.to == "alpha" && edge.kind == "explicit_depends_on"
25290        }));
25291        assert!(report.edges.iter().any(|edge| {
25292            edge.from == "alpha" && edge.to == "gamma" && edge.kind == "worker_result_follow_up"
25293        }));
25294        assert!(report.edges.iter().any(|edge| {
25295            edge.from == "alpha"
25296                && edge.to == "beta"
25297                && edge.kind == "shared_resource"
25298                && edge.shared_files.contains(&"main.rs".to_string())
25299                && edge.shared_symbols.contains(&"shared_helper".to_string())
25300        }));
25301        assert!(
25302            !report.cycle_diagnostics.has_cycles,
25303            "{:?}",
25304            report.cycle_diagnostics
25305        );
25306        assert_eq!(report.topo_batches[0].targets, vec!["prep".to_string()]);
25307        assert_eq!(report.topo_batches[1].targets, vec!["alpha".to_string()]);
25308        assert!(
25309            report.replay_commands[0].contains("dependency-dag"),
25310            "{:?}",
25311            report.replay_commands
25312        );
25313
25314        cmd_dependency_dag(
25315            &session,
25316            None,
25317            &["alpha".to_string(), "beta".to_string()],
25318            4,
25319            12,
25320            OutputFormat {
25321                json_output: true,
25322                compact: false,
25323                pretty: false,
25324                terse: false,
25325                schema: false,
25326                envelope: false,
25327            },
25328        )
25329        .unwrap();
25330    }
25331
25332    #[test]
25333    fn dependency_dag_reports_cycles_from_explicit_depends_on_text() {
25334        let dir = setup_dependency_dag_cycle_project();
25335        let report = build_dependency_dag_report(dir.path(), None, &[], 4, 12).unwrap();
25336
25337        assert!(report.cycle_diagnostics.has_cycles);
25338        assert_eq!(
25339            report.cycle_diagnostics.blocked_nodes,
25340            vec!["left".to_string(), "right".to_string()]
25341        );
25342        assert!(report.cycle_diagnostics.cycle_edges.iter().any(|edge| {
25343            edge.from == "left" && edge.to == "right" && edge.kind == "explicit_depends_on"
25344        }));
25345        assert!(report.cycle_diagnostics.cycle_edges.iter().any(|edge| {
25346            edge.from == "right" && edge.to == "left" && edge.kind == "explicit_depends_on"
25347        }));
25348    }
25349
25350    #[test]
25351    fn traversal_projection_queries_match_sqlite_and_convex_stores() {
25352        let dir = setup_traversal_project();
25353        let source_graph = build_traversal_graph_source(dir.path(), dir.path(), None).unwrap();
25354        let projection = traversal_projection_from_graph(dir.path(), None, &source_graph).unwrap();
25355
25356        let mut sqlite = SqliteGraphStore::in_memory().unwrap();
25357        sqlite.replace_projection(&projection).unwrap();
25358        let convex = ConvexGraphStore::new(MemoryConvexGraphClient::default());
25359        projection.upsert_into(&convex).unwrap();
25360
25361        let sqlite_graph = traversal_graph_from_store(dir.path(), &sqlite).unwrap();
25362        let convex_graph = traversal_graph_from_store(dir.path(), &convex).unwrap();
25363        assert_eq!(sqlite_graph.nodes.len(), convex_graph.nodes.len());
25364        assert_eq!(sqlite_graph.edges.len(), convex_graph.edges.len());
25365
25366        let sqlite_backlog = resolve_traversal_node(&sqlite_graph, "#kgnv").unwrap();
25367        let convex_helper = resolve_traversal_node(&convex_graph, "helper").unwrap();
25368        assert!(convex_graph.edges.iter().any(|edge| {
25369            edge.from == sqlite_backlog.handle
25370                && edge.to == convex_helper.handle
25371                && edge.relation == "mentions"
25372        }));
25373    }
25374
25375    #[test]
25376    fn graph_db_api_queries_sqlite_neighborhood_and_schema() {
25377        let dir = setup_traversal_project();
25378        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
25379        let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
25380        let freshness = sqlite_graph_freshness(&store, "root").unwrap();
25381        assert_eq!(freshness.status, "current");
25382
25383        let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
25384        let report = graph_db_report_from_store(
25385            dir.path(),
25386            None,
25387            "sqlite",
25388            GraphDbQuery::Neighborhood {
25389                id: backlog.handle.clone(),
25390                depth: 1,
25391                edge_kind: Some("mentions".to_string()),
25392                cursor: None,
25393                limit: None,
25394                property_filters: Vec::new(),
25395            },
25396            &store,
25397            freshness,
25398            Vec::new(),
25399        )
25400        .unwrap();
25401        assert!(
25402            report
25403                .edges
25404                .iter()
25405                .any(|edge| edge.from_id == backlog.handle && edge.kind == "mentions"),
25406            "expected backlog mention edge, got {:?}",
25407            report.edges
25408        );
25409        assert!(
25410            report.ranked_neighbors.iter().any(|neighbor| {
25411                neighbor.depth == Some(1)
25412                    && neighbor.edge_kinds.iter().any(|kind| kind == "mentions")
25413                    && neighbor.node_id != backlog.handle
25414                    && neighbor.handle_coverage_pct >= 95.0
25415                    && neighbor.duplicate_name_precision >= 0.99
25416            }),
25417            "expected ranked neighborhood neighbors with quality scores, got {:?}",
25418            report.ranked_neighbors
25419        );
25420        assert!(report.ranked_neighbors.len() <= GRAPH_DB_RANKED_NEIGHBOR_CAP);
25421        let ranking_gate = report.neighborhood_ranking_gate.as_ref().unwrap();
25422        assert!(!ranking_gate.ranked_output_default);
25423        assert_eq!(ranking_gate.default_order, "stable_node_id");
25424        assert!(
25425            ranking_gate
25426                .diagnostics
25427                .iter()
25428                .any(|diagnostic| diagnostic.contains("score-capped")),
25429            "{ranking_gate:?}"
25430        );
25431        assert!(
25432            ranking_gate
25433                .required_metrics
25434                .iter()
25435                .any(|metric| metric == "handle_coverage_pct")
25436        );
25437        assert!(
25438            ranking_gate
25439                .required_metrics
25440                .iter()
25441                .any(|metric| metric == "duplicate_name_precision")
25442        );
25443        assert!(
25444            report
25445                .page
25446                .as_ref()
25447                .unwrap()
25448                .diagnostics
25449                .iter()
25450                .any(|diagnostic| diagnostic.contains("idx_graph_edges_from_kind")),
25451            "expected SQLite neighborhood query plan diagnostics, got {:?}",
25452            report.page.as_ref().unwrap().diagnostics
25453        );
25454        let edge_id = graph_db_edge_key(
25455            report
25456                .edges
25457                .iter()
25458                .find(|edge| edge.from_id == backlog.handle && edge.kind == "mentions")
25459                .unwrap(),
25460        );
25461
25462        let edge_report = graph_db_report_from_store(
25463            dir.path(),
25464            None,
25465            "sqlite",
25466            GraphDbQuery::Edge {
25467                id: edge_id.clone(),
25468            },
25469            &store,
25470            sqlite_graph_freshness(&store, "root").unwrap(),
25471            Vec::new(),
25472        )
25473        .unwrap();
25474        assert_eq!(
25475            edge_report.edge.as_ref().map(graph_db_edge_key),
25476            Some(edge_id.clone())
25477        );
25478
25479        let edges_report = graph_db_report_from_store(
25480            dir.path(),
25481            None,
25482            "sqlite",
25483            GraphDbQuery::Edges {
25484                edge_kind: Some("mentions".to_string()),
25485                cursor: None,
25486                limit: Some(2),
25487                property_filters: Vec::new(),
25488            },
25489            &store,
25490            sqlite_graph_freshness(&store, "root").unwrap(),
25491            Vec::new(),
25492        )
25493        .unwrap();
25494        assert!(edges_report.edges.iter().any(|edge| edge.id == edge_id));
25495        assert_eq!(
25496            edges_report.page.as_ref().unwrap().returned_edges,
25497            edges_report.edges.len()
25498        );
25499
25500        let incident_report = graph_db_report_from_store(
25501            dir.path(),
25502            None,
25503            "sqlite",
25504            GraphDbQuery::Incident {
25505                id: backlog.handle.clone(),
25506                edge_kind: Some("mentions".to_string()),
25507                cursor: None,
25508                limit: Some(1),
25509                property_filters: Vec::new(),
25510            },
25511            &store,
25512            sqlite_graph_freshness(&store, "root").unwrap(),
25513            Vec::new(),
25514        )
25515        .unwrap();
25516        assert_eq!(incident_report.page.as_ref().unwrap().returned_edges, 1);
25517        assert!(
25518            incident_report
25519                .edges
25520                .iter()
25521                .all(|edge| edge.from_id == backlog.handle || edge.to_id == backlog.handle),
25522            "{:?}",
25523            incident_report.edges
25524        );
25525
25526        let schema_report = graph_db_report_from_store(
25527            dir.path(),
25528            None,
25529            "sqlite",
25530            GraphDbQuery::Schema,
25531            &store,
25532            sqlite_graph_freshness(&store, "root").unwrap(),
25533            Vec::new(),
25534        )
25535        .unwrap();
25536        assert!(
25537            schema_report
25538                .schema
25539                .unwrap()
25540                .operations
25541                .iter()
25542                .any(|operation| operation.command.starts_with("neighborhood"))
25543        );
25544    }
25545
25546    #[test]
25547    fn graph_db_status_uses_snapshot_fallback_when_rollback_journal_is_locked() {
25548        let dir = setup_traversal_project();
25549        refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
25550        let graph_db = dir.path().join(".tsift/graph.db");
25551        let _lock = hold_rollback_journal_lock(&graph_db);
25552
25553        let report =
25554            graph_db_operator_report_from_disk(dir.path(), None, &graph_db, "status", None, vec![])
25555                .unwrap();
25556
25557        assert_eq!(report.status, "current");
25558        assert_eq!(
25559            report.recovery,
25560            Some(index::ReadOnlyRecovery::SnapshotFallback)
25561        );
25562        assert!(
25563            report
25564                .warnings
25565                .iter()
25566                .any(|warning| warning.contains("rollback-journal lock")),
25567            "expected rollback-journal recovery warning, got {:?}",
25568            report.warnings
25569        );
25570    }
25571
25572    #[test]
25573    fn graph_db_status_copies_wal_sidecars_when_locked() {
25574        let dir = setup_traversal_project();
25575        refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
25576        let graph_db = dir.path().join(".tsift/graph.db");
25577        let _lock = hold_wal_database_lock(&graph_db);
25578
25579        let report =
25580            graph_db_operator_report_from_disk(dir.path(), None, &graph_db, "status", None, vec![])
25581                .unwrap();
25582
25583        assert_eq!(report.status, "current");
25584        assert_eq!(
25585            report.recovery,
25586            Some(index::ReadOnlyRecovery::SnapshotFallbackWal)
25587        );
25588        assert!(
25589            report
25590                .warnings
25591                .iter()
25592                .any(|warning| warning.contains("WAL-aware snapshot fallback")),
25593            "expected WAL recovery warning, got {:?}",
25594            report.warnings
25595        );
25596    }
25597
25598    #[test]
25599    fn graph_db_evidence_uses_snapshot_fallback_when_graph_db_is_locked() {
25600        let dir = setup_traversal_project();
25601        let session = dir.path().join("tasks/software/tsift.md");
25602        refresh_traversal_graph_store(dir.path(), &session, None).unwrap();
25603        let graph_db = dir.path().join(".tsift/graph.db");
25604        let _lock = hold_rollback_journal_lock(&graph_db);
25605
25606        let result = cmd_graph_db(
25607            &session,
25608            None,
25609            GraphDbBackend::Sqlite,
25610            None,
25611            GraphDbQuery::Evidence {
25612                target: "kgnv".to_string(),
25613                depth: 3,
25614                limit: 8,
25615            },
25616            OutputFormat {
25617                json_output: false,
25618                compact: true,
25619                pretty: false,
25620                terse: false,
25621                schema: false,
25622                envelope: false,
25623            },
25624        );
25625
25626        assert!(result.is_ok());
25627    }
25628
25629    fn current_graph_db_freshness() -> GraphDbFreshnessReport {
25630        GraphDbFreshnessReport {
25631            status: "current".to_string(),
25632            fail_closed: false,
25633            projection_version: Some(GRAPH_PROJECTION_VERSION.to_string()),
25634            content_hash: Some("fixture".to_string()),
25635            source_watermark: None,
25636            diagnostics: Vec::new(),
25637        }
25638    }
25639
25640    #[test]
25641    fn graph_db_evidence_fails_closed_with_repair_command_for_stale_freshness() {
25642        let dir = setup_traversal_project();
25643        refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
25644        let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
25645        let stale = GraphDbFreshnessReport {
25646            status: "stale".to_string(),
25647            fail_closed: true,
25648            projection_version: Some("old-v0".to_string()),
25649            content_hash: None,
25650            source_watermark: None,
25651            diagnostics: vec!["projection content hash is missing".to_string()],
25652        };
25653
25654        let err = match graph_db_evidence_report_from_store(GraphDbEvidenceInput {
25655            root: dir.path(),
25656            scope: None,
25657            backend: "sqlite",
25658            target: "kgnv",
25659            depth: 3,
25660            limit: 8,
25661            store: &store,
25662            freshness: stale,
25663            warnings: Vec::new(),
25664        }) {
25665            Ok(_) => panic!("stale graph freshness should fail closed"),
25666            Err(err) => err,
25667        };
25668        let message = err.to_string();
25669        assert!(message.contains("failed closed"), "{message}");
25670        assert!(message.contains("graph-db --path"), "{message}");
25671        assert!(message.contains("refresh --json"), "{message}");
25672    }
25673
25674    fn paged_graph_ids(
25675        store: &impl GraphStore,
25676        cursor: Option<&str>,
25677    ) -> (Vec<String>, GraphDbPageReport) {
25678        let report = graph_db_report_from_store(
25679            Path::new("."),
25680            None,
25681            "fixture",
25682            GraphDbQuery::Kind {
25683                kind: "backlog".to_string(),
25684                cursor: cursor.map(str::to_string),
25685                limit: Some(2),
25686                property_filters: vec!["phase=open".to_string()],
25687            },
25688            store,
25689            current_graph_db_freshness(),
25690            Vec::new(),
25691        )
25692        .unwrap();
25693        (
25694            report.nodes.iter().map(|node| node.id.clone()).collect(),
25695            report.page.unwrap(),
25696        )
25697    }
25698
25699    #[test]
25700    fn graph_db_query_pagination_and_filters_match_sqlite_and_convex() {
25701        let nodes = (0..5)
25702            .map(|idx| {
25703                let phase = if idx == 1 { "closed" } else { "open" };
25704                SubstrateGraphNode::new(format!("gbak-{idx:02}"), "backlog", format!("#{idx:02}"))
25705                    .with_property("phase", phase)
25706            })
25707            .collect::<Vec<_>>();
25708        let projection = GraphProjection {
25709            nodes,
25710            edges: Vec::new(),
25711        };
25712        let sqlite = SqliteGraphStore::in_memory().unwrap();
25713        projection.upsert_into(&sqlite).unwrap();
25714        let convex = ConvexGraphStore::new(MemoryConvexGraphClient::default());
25715        projection.upsert_into(&convex).unwrap();
25716
25717        let (sqlite_first_ids, sqlite_first_page) = paged_graph_ids(&sqlite, None);
25718        let (convex_first_ids, convex_first_page) = paged_graph_ids(&convex, None);
25719        assert_eq!(sqlite_first_ids, vec!["gbak-00", "gbak-02"]);
25720        assert_eq!(sqlite_first_ids, convex_first_ids);
25721        assert_eq!(sqlite_first_page.next_cursor.as_deref(), Some("gbak-02"));
25722        assert!(sqlite_first_page.truncated);
25723        assert_eq!(
25724            sqlite_first_page.returned_nodes,
25725            convex_first_page.returned_nodes
25726        );
25727        assert_eq!(
25728            sqlite_first_page.property_filters,
25729            convex_first_page.property_filters
25730        );
25731        assert!(
25732            sqlite_first_page
25733                .diagnostics
25734                .iter()
25735                .any(|diagnostic| diagnostic.contains("idx_graph_nodes_kind")),
25736            "expected SQLite kind query plan diagnostics, got {:?}",
25737            sqlite_first_page.diagnostics
25738        );
25739
25740        let cursor = sqlite_first_page.next_cursor.as_deref();
25741        let (sqlite_next_ids, sqlite_next_page) = paged_graph_ids(&sqlite, cursor);
25742        let (convex_next_ids, convex_next_page) = paged_graph_ids(&convex, cursor);
25743        assert_eq!(sqlite_next_ids, vec!["gbak-03", "gbak-04"]);
25744        assert_eq!(sqlite_next_ids, convex_next_ids);
25745        assert_eq!(sqlite_next_page.next_cursor, None);
25746        assert!(!sqlite_next_page.truncated);
25747        assert_eq!(
25748            sqlite_next_page.returned_nodes,
25749            convex_next_page.returned_nodes
25750        );
25751        assert_eq!(
25752            sqlite_next_page.property_filters,
25753            convex_next_page.property_filters
25754        );
25755    }
25756
25757    #[test]
25758    fn traversal_shortest_path_crosses_artifacts_and_symbols() {
25759        let dir = setup_traversal_project();
25760        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
25761        let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
25762        let main = resolve_traversal_node(&graph, "main").unwrap();
25763
25764        let path = traversal_shortest_handles(&graph.edges, &backlog.handle, &main.handle).unwrap();
25765        assert_eq!(path.first(), Some(&backlog.handle));
25766        assert_eq!(path.last(), Some(&main.handle));
25767        assert!(
25768            path.len() >= 3,
25769            "expected backlog -> symbol -> main, got {path:?}"
25770        );
25771    }
25772
25773    #[test]
25774    fn traversal_report_recommends_next_bugfix_nodes() {
25775        let dir = setup_traversal_project();
25776        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
25777        let report = traversal_report(dir.path(), None, graph, Some("#kgnv"), None, 1, 50).unwrap();
25778
25779        assert_eq!(report.mode, "neighborhood");
25780        assert!(
25781            report
25782                .recommendations
25783                .iter()
25784                .any(|rec| rec.label == "helper" && rec.reason.contains("matched")),
25785            "expected helper recommendation, got {:?}",
25786            report.recommendations
25787        );
25788        assert!(
25789            !report.exploration.source_windows.is_empty(),
25790            "expected exploration source windows"
25791        );
25792        assert!(
25793            report
25794                .exploration
25795                .no_reread_guidance
25796                .contains("avoid whole-file reads")
25797        );
25798    }
25799
25800    #[test]
25801    fn traversal_graph_refreshes_stale_index_before_loading_symbols() {
25802        let dir = setup_traversal_project();
25803        std::thread::sleep(std::time::Duration::from_millis(50));
25804        std::fs::write(
25805            dir.path().join("main.rs"),
25806            "fn fresh_helper() { println!(\"fresh\"); }\nfn main() { fresh_helper(); }\n",
25807        )
25808        .unwrap();
25809
25810        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
25811
25812        assert!(
25813            graph
25814                .warnings
25815                .iter()
25816                .any(|warning| warning.contains("index refreshed")
25817                    && warning.contains("graph traversal packet")),
25818            "expected refresh diagnostic, got {:?}",
25819            graph.warnings
25820        );
25821        assert!(resolve_traversal_node(&graph, "fresh_helper").is_some());
25822
25823        let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
25824        let summary = db.compute_changes(dir.path()).unwrap();
25825        assert_eq!(summary.new + summary.modified + summary.deleted, 0);
25826    }
25827
25828    #[test]
25829    fn traversal_graph_falls_back_to_raw_source_when_stale_refresh_is_blocked() {
25830        let dir = setup_traversal_project();
25831        let db_path = dir.path().join(".tsift/index.db");
25832        let _writer = hold_writer_lock(&index::writer_lock_path(&db_path));
25833        std::thread::sleep(std::time::Duration::from_millis(50));
25834        std::fs::write(
25835            dir.path().join("main.rs"),
25836            "fn fresh_helper() { println!(\"fresh\"); }\nfn main() { fresh_helper(); }\n",
25837        )
25838        .unwrap();
25839
25840        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
25841        let file = resolve_traversal_node(&graph, "main.rs").unwrap();
25842
25843        assert!(
25844            graph
25845                .warnings
25846                .iter()
25847                .any(|warning| warning.contains("falling back to raw source file nodes")),
25848            "expected raw-source fallback diagnostic, got {:?}",
25849            graph.warnings
25850        );
25851        assert!(
25852            file.detail
25853                .as_deref()
25854                .is_some_and(|detail| detail.contains("raw source fallback")),
25855            "expected raw-source detail, got {:?}",
25856            file.detail
25857        );
25858        assert!(
25859            file.expand.contains("source-read"),
25860            "expected source-read fallback command, got {}",
25861            file.expand
25862        );
25863        assert!(
25864            resolve_traversal_node(&graph, "helper").is_none(),
25865            "stale symbol evidence should be skipped when refresh is blocked"
25866        );
25867    }
25868
25869    #[test]
25870    fn traversal_cmd_supports_json_and_html_outputs() {
25871        let dir = setup_traversal_project();
25872        cmd_traverse(
25873            Some("#kgnv"),
25874            Some("main"),
25875            dir.path(),
25876            None,
25877            1,
25878            50,
25879            TraverseFormat::Json,
25880            false,
25881            false,
25882            false,
25883            None,
25884        )
25885        .unwrap();
25886        cmd_traverse(
25887            None,
25888            None,
25889            dir.path(),
25890            None,
25891            1,
25892            50,
25893            TraverseFormat::Html,
25894            false,
25895            false,
25896            false,
25897            None,
25898        )
25899        .unwrap();
25900    }
25901
25902    #[test]
25903    fn traversal_html_renders_inline_graph_visualization() {
25904        let dir = setup_traversal_project();
25905        seed_traversal_semantic_summaries(dir.path());
25906        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
25907        let report = traversal_report(dir.path(), None, graph, None, None, 1, 50).unwrap();
25908        let html = traversal_report_html(&report).unwrap();
25909
25910        assert!(html.contains("id=\"graph-canvas\""));
25911        assert!(html.contains("semantic_concept"));
25912        assert!(html.contains("graph navigation"));
25913        assert!(html.contains("JSON.parse"));
25914    }
25915
25916    #[test]
25917    fn compact_helpers_trim_scores_and_snippets() {
25918        assert_eq!(format_score(0.12345, true), "0.12");
25919        assert_eq!(format_score(0.12345, false), "0.1235");
25920        let snippet = compact_snippet("    first line with useful context\nsecond");
25921        assert_eq!(snippet.as_deref(), Some("first line with useful context"));
25922    }
25923
25924    #[test]
25925    fn compact_members_caps_list() {
25926        let members: Vec<graph::CommunityMember> = ["a", "b", "c", "d", "e", "f"]
25927            .iter()
25928            .map(|n| graph::CommunityMember::new(*n))
25929            .collect();
25930        assert_eq!(compact_members(&members, 5), "a, b, c, d, e (+1 more)");
25931    }
25932
25933    #[test]
25934    fn abbreviate_kind_maps_common_kinds() {
25935        assert_eq!(abbreviate_kind("function"), "fn");
25936        assert_eq!(abbreviate_kind("method"), "meth");
25937        assert_eq!(abbreviate_kind("class"), "cls");
25938        assert_eq!(abbreviate_kind("interface"), "iface");
25939        assert_eq!(abbreviate_kind("type_alias"), "type");
25940        assert_eq!(abbreviate_kind("data_class"), "data_cls");
25941        assert_eq!(abbreviate_kind("sealed_class"), "sealed_cls");
25942        assert_eq!(abbreviate_kind("enum_class"), "enum_cls");
25943        assert_eq!(abbreviate_kind("companion_object"), "comp_obj");
25944        assert_eq!(abbreviate_kind("object"), "obj");
25945        assert_eq!(abbreviate_kind("heading"), "h");
25946        assert_eq!(abbreviate_kind("code_block"), "code");
25947        // short kinds pass through
25948        assert_eq!(abbreviate_kind("struct"), "struct");
25949        assert_eq!(abbreviate_kind("trait"), "trait");
25950        assert_eq!(abbreviate_kind("enum"), "enum");
25951        assert_eq!(abbreviate_kind("const"), "const");
25952        assert_eq!(abbreviate_kind("unknown_kind"), "unknown_kind");
25953    }
25954
25955    #[test]
25956    fn abbreviate_match_type_maps_search_types() {
25957        assert_eq!(abbreviate_match_type("exact_name"), "exact");
25958        assert_eq!(abbreviate_match_type("partial_tags"), "partial");
25959        assert_eq!(abbreviate_match_type("all_tags"), "all_tags");
25960        assert_eq!(abbreviate_match_type("other_type"), "other_type");
25961    }
25962
25963    #[test]
25964    fn explain_compact_groups_edges_by_file() {
25965        let edges = vec![
25966            index::StoredEdge {
25967                caller_file: "src/main.rs".to_string(),
25968                caller_name: "main".to_string(),
25969                caller_line: 1,
25970                callee_name: "helper".to_string(),
25971                call_site_line: 2,
25972                tagpath_handle: None,
25973            },
25974            index::StoredEdge {
25975                caller_file: "src/main.rs".to_string(),
25976                caller_name: "main".to_string(),
25977                caller_line: 1,
25978                callee_name: "render".to_string(),
25979                call_site_line: 3,
25980                tagpath_handle: None,
25981            },
25982        ];
25983        let lines = format_edge_groups(&edges, false);
25984        assert_eq!(lines, vec!["  src/main.rs (2): helper, render"]);
25985    }
25986
25987    #[test]
25988    fn search_hit_groups_preserve_file_counts_and_samples() {
25989        let dir = tempfile::tempdir().unwrap();
25990        let root = dir.path();
25991        let main_rs = root.join("src/main.rs");
25992        fs::create_dir_all(main_rs.parent().unwrap()).unwrap();
25993        fs::write(&main_rs, "claudescore-3 anchor\nclaudescore-3 follow-up\n").unwrap();
25994        let freshness = exact_search_file_timestamp(&main_rs);
25995        let hits = vec![
25996            sift::SearchHit {
25997                artifact_id: "a".to_string(),
25998                artifact_kind: sift::ContextArtifactKind::File,
25999                path: main_rs.display().to_string(),
26000                rank: 1,
26001                score: 10.0,
26002                confidence: sift::ScoreConfidence::High,
26003                location: Some("line 3".to_string()),
26004                snippet: "claudescore-3 anchor".to_string(),
26005                provenance: sift::ArtifactProvenance {
26006                    adapter: sift::AcquisitionAdapterKind::FileSystem,
26007                    source: "ripgrep -F".to_string(),
26008                    synthetic: false,
26009                },
26010                freshness: freshness.clone(),
26011                budget: sift::ArtifactBudget::from_text("claudescore-3 anchor", 1),
26012            },
26013            sift::SearchHit {
26014                artifact_id: "b".to_string(),
26015                artifact_kind: sift::ContextArtifactKind::File,
26016                path: main_rs.display().to_string(),
26017                rank: 2,
26018                score: 9.0,
26019                confidence: sift::ScoreConfidence::High,
26020                location: Some("line 7".to_string()),
26021                snippet: "claudescore-3 follow-up".to_string(),
26022                provenance: sift::ArtifactProvenance {
26023                    adapter: sift::AcquisitionAdapterKind::FileSystem,
26024                    source: "ripgrep -F".to_string(),
26025                    synthetic: false,
26026                },
26027                freshness: freshness.clone(),
26028                budget: sift::ArtifactBudget::from_text("claudescore-3 follow-up", 1),
26029            },
26030            sift::SearchHit {
26031                artifact_id: "c".to_string(),
26032                artifact_kind: sift::ContextArtifactKind::File,
26033                path: main_rs.display().to_string(),
26034                rank: 3,
26035                score: 8.0,
26036                confidence: sift::ScoreConfidence::High,
26037                location: Some("line 9".to_string()),
26038                snippet: "claudescore-3 tail".to_string(),
26039                provenance: sift::ArtifactProvenance {
26040                    adapter: sift::AcquisitionAdapterKind::FileSystem,
26041                    source: "ripgrep -F".to_string(),
26042                    synthetic: false,
26043                },
26044                freshness,
26045                budget: sift::ArtifactBudget::from_text("claudescore-3 tail", 1),
26046            },
26047        ];
26048
26049        let groups = group_search_hits(&hits, root, false);
26050        assert_eq!(groups.len(), 1);
26051        assert_eq!(groups[0].path, "src/main.rs");
26052        assert_eq!(groups[0].hits, 3);
26053        assert_eq!(
26054            groups[0].samples,
26055            vec![
26056                "line 3: claudescore-3 anchor".to_string(),
26057                "line 7: claudescore-3 follow-up".to_string()
26058            ]
26059        );
26060        assert!(should_collapse_search_hits(&hits, root, false));
26061    }
26062
26063    #[test]
26064    fn dense_edge_groups_trigger_collapse() {
26065        let edges = vec![
26066            index::StoredEdge {
26067                caller_file: "src/main.rs".to_string(),
26068                caller_name: "main".to_string(),
26069                caller_line: 1,
26070                callee_name: "helper".to_string(),
26071                call_site_line: 2,
26072                tagpath_handle: None,
26073            },
26074            index::StoredEdge {
26075                caller_file: "src/main.rs".to_string(),
26076                caller_name: "beta".to_string(),
26077                caller_line: 5,
26078                callee_name: "helper".to_string(),
26079                call_site_line: 6,
26080                tagpath_handle: None,
26081            },
26082            index::StoredEdge {
26083                caller_file: "src/main.rs".to_string(),
26084                caller_name: "gamma".to_string(),
26085                caller_line: 9,
26086                callee_name: "helper".to_string(),
26087                call_site_line: 10,
26088                tagpath_handle: None,
26089            },
26090        ];
26091        assert!(should_collapse_edge_groups(&edges));
26092    }
26093
26094    // --- workspace indexing ---
26095
26096    fn setup_workspace() -> tempfile::TempDir {
26097        let dir = tempfile::tempdir().unwrap();
26098        let root = dir.path();
26099        std::fs::write(
26100            root.join(".gitmodules"),
26101            r#"[submodule "src/alpha"]
26102	path = src/alpha
26103	url = https://example.com/alpha
26104[submodule "src/beta"]
26105	path = src/beta
26106	url = https://example.com/beta
26107"#,
26108        )
26109        .unwrap();
26110        let alpha = root.join("src/alpha");
26111        let beta = root.join("src/beta");
26112        std::fs::create_dir_all(&alpha).unwrap();
26113        std::fs::create_dir_all(&beta).unwrap();
26114        std::fs::write(
26115            alpha.join("lib.rs"),
26116            "fn alpha_helper() {}\nfn alpha_main() { alpha_helper(); }",
26117        )
26118        .unwrap();
26119        std::fs::write(beta.join("lib.rs"), "fn beta_func() {}").unwrap();
26120        dir
26121    }
26122
26123    fn setup_workspace_with_duplicate_leaf_names() -> tempfile::TempDir {
26124        let dir = tempfile::tempdir().unwrap();
26125        let root = dir.path();
26126        std::fs::write(
26127            root.join(".gitmodules"),
26128            r#"[submodule "pkg/app/foo"]
26129	path = pkg/app/foo
26130	url = https://example.com/pkg-app-foo
26131[submodule "vendor/foo"]
26132	path = vendor/foo
26133	url = https://example.com/vendor-foo
26134"#,
26135        )
26136        .unwrap();
26137        let pkg_foo = root.join("pkg/app/foo");
26138        let vendor_foo = root.join("vendor/foo");
26139        std::fs::create_dir_all(&pkg_foo).unwrap();
26140        std::fs::create_dir_all(&vendor_foo).unwrap();
26141        std::fs::write(
26142            pkg_foo.join("lib.rs"),
26143            "fn pkg_only() {}\nfn shared_name() { pkg_only(); }\n",
26144        )
26145        .unwrap();
26146        std::fs::write(
26147            vendor_foo.join("lib.rs"),
26148            "fn vendor_only() {}\nfn shared_name() { vendor_only(); }\n",
26149        )
26150        .unwrap();
26151        dir
26152    }
26153
26154    #[test]
26155    fn workspace_index_creates_per_submodule_dbs() {
26156        let dir = setup_workspace();
26157        cmd_index(
26158            dir.path(),
26159            false,
26160            false,
26161            false,
26162            false,
26163            false,
26164            true,
26165            None,
26166            false,
26167            false,
26168            false,
26169            false,
26170            false,
26171            false,
26172        )
26173        .unwrap();
26174        assert!(dir.path().join(".tsift/indexes/alpha/index.db").exists());
26175        assert!(dir.path().join(".tsift/indexes/beta/index.db").exists());
26176    }
26177
26178    #[test]
26179    fn workspace_index_single_submodule() {
26180        let dir = setup_workspace();
26181        cmd_index(
26182            dir.path(),
26183            false,
26184            false,
26185            false,
26186            false,
26187            false,
26188            false,
26189            Some("alpha"),
26190            false,
26191            false,
26192            false,
26193            false,
26194            false,
26195            false,
26196        )
26197        .unwrap();
26198        assert!(dir.path().join(".tsift/indexes/alpha/index.db").exists());
26199        assert!(!dir.path().join(".tsift/indexes/beta/index.db").exists());
26200    }
26201
26202    #[test]
26203    fn workspace_index_single_submodule_errors_on_unknown_scope() {
26204        let dir = setup_workspace();
26205
26206        let err = cmd_index(
26207            dir.path(),
26208            false,
26209            false,
26210            false,
26211            false,
26212            false,
26213            false,
26214            Some("missing"),
26215            false,
26216            false,
26217            false,
26218            false,
26219            false,
26220            false,
26221        )
26222        .unwrap_err();
26223
26224        let msg = err.to_string();
26225        assert!(msg.contains("unknown scope `missing`"));
26226        assert!(msg.contains("Available scopes: alpha, beta"));
26227        assert!(!dir.path().join(".tsift/indexes/missing/index.db").exists());
26228    }
26229
26230    #[test]
26231    fn workspace_index_uses_unique_scope_ids_when_leaf_names_collide() {
26232        let dir = setup_workspace_with_duplicate_leaf_names();
26233        cmd_index(
26234            dir.path(),
26235            false,
26236            false,
26237            false,
26238            false,
26239            false,
26240            true,
26241            None,
26242            false,
26243            false,
26244            false,
26245            false,
26246            false,
26247            false,
26248        )
26249        .unwrap();
26250
26251        assert!(
26252            dir.path()
26253                .join(".tsift/indexes/pkg/app/foo/index.db")
26254                .exists()
26255        );
26256        assert!(
26257            dir.path()
26258                .join(".tsift/indexes/vendor/foo/index.db")
26259                .exists()
26260        );
26261    }
26262
26263    #[test]
26264    fn federated_search_across_submodules() {
26265        let dir = setup_workspace();
26266        cmd_index(
26267            dir.path(),
26268            false,
26269            false,
26270            false,
26271            false,
26272            false,
26273            true,
26274            None,
26275            false,
26276            false,
26277            false,
26278            false,
26279            false,
26280            false,
26281        )
26282        .unwrap();
26283        let (hits, _diag) = federated_symbol_search(
26284            dir.path(),
26285            "alpha_helper",
26286            10,
26287            &TagpathSearchOpts {
26288                no_tagpath: true,
26289                strict: false,
26290            },
26291        )
26292        .unwrap();
26293        assert!(
26294            !hits.is_empty(),
26295            "should find alpha_helper via federated search"
26296        );
26297    }
26298
26299    #[test]
26300    fn federated_search_respects_isolation() {
26301        let dir = setup_workspace();
26302        let tsift_dir = dir.path().join(".tsift");
26303        std::fs::create_dir_all(&tsift_dir).unwrap();
26304        std::fs::write(
26305            tsift_dir.join("config.toml"),
26306            r#"
26307[overrides.alpha]
26308tier = "isolated"
26309"#,
26310        )
26311        .unwrap();
26312        cmd_index(
26313            dir.path(),
26314            false,
26315            false,
26316            false,
26317            false,
26318            false,
26319            true,
26320            None,
26321            false,
26322            false,
26323            false,
26324            false,
26325            false,
26326            false,
26327        )
26328        .unwrap();
26329        let (hits, _diag) = federated_symbol_search(
26330            dir.path(),
26331            "alpha_helper",
26332            10,
26333            &TagpathSearchOpts {
26334                no_tagpath: true,
26335                strict: false,
26336            },
26337        )
26338        .unwrap();
26339        assert!(
26340            hits.is_empty(),
26341            "isolated submodule should not appear in federated search"
26342        );
26343    }
26344
26345    #[test]
26346    fn federated_lexical_search_respects_isolation() {
26347        let dir = setup_workspace();
26348        let tsift_dir = dir.path().join(".tsift");
26349        std::fs::create_dir_all(&tsift_dir).unwrap();
26350        std::fs::write(
26351            tsift_dir.join("config.toml"),
26352            r#"
26353[overrides.alpha]
26354tier = "isolated"
26355"#,
26356        )
26357        .unwrap();
26358        cmd_index(
26359            dir.path(),
26360            false,
26361            false,
26362            false,
26363            false,
26364            false,
26365            true,
26366            None,
26367            false,
26368            false,
26369            false,
26370            false,
26371            false,
26372            false,
26373        )
26374        .unwrap();
26375
26376        let response = federated_sift_search(
26377            dir.path(),
26378            &dir.path().join(".tsift/search-cache"),
26379            "fn",
26380            10,
26381            0,
26382            "lexical",
26383        )
26384        .unwrap();
26385
26386        assert!(
26387            !response.hits.is_empty(),
26388            "shared scopes should still contribute lexical hits"
26389        );
26390        assert!(
26391            response
26392                .hits
26393                .iter()
26394                .all(|hit| hit.path.ends_with("src/beta/lib.rs")),
26395            "isolated scope should not leak lexical hits: {:?}",
26396            response.hits
26397        );
26398    }
26399
26400    #[test]
26401    fn federated_lexical_search_respects_private_tier() {
26402        let dir = setup_workspace();
26403        let tsift_dir = dir.path().join(".tsift");
26404        std::fs::create_dir_all(&tsift_dir).unwrap();
26405        std::fs::write(
26406            tsift_dir.join("config.toml"),
26407            r#"
26408[overrides.alpha]
26409tier = "private"
26410"#,
26411        )
26412        .unwrap();
26413        cmd_index(
26414            dir.path(),
26415            false,
26416            false,
26417            false,
26418            false,
26419            false,
26420            true,
26421            None,
26422            false,
26423            false,
26424            false,
26425            false,
26426            false,
26427            false,
26428        )
26429        .unwrap();
26430
26431        let response = federated_sift_search(
26432            dir.path(),
26433            &dir.path().join(".tsift/search-cache"),
26434            "fn",
26435            10,
26436            0,
26437            "lexical",
26438        )
26439        .unwrap();
26440
26441        assert!(
26442            !response.hits.is_empty(),
26443            "shared scopes should still contribute lexical hits"
26444        );
26445        assert!(
26446            response
26447                .hits
26448                .iter()
26449                .all(|hit| hit.path.ends_with("src/beta/lib.rs")),
26450            "private scope should not leak lexical hits: {:?}",
26451            response.hits
26452        );
26453    }
26454
26455    #[test]
26456    fn scoped_search_finds_submodule_symbols() {
26457        let dir = setup_workspace();
26458        cmd_index(
26459            dir.path(),
26460            false,
26461            false,
26462            false,
26463            false,
26464            false,
26465            true,
26466            None,
26467            false,
26468            false,
26469            false,
26470            false,
26471            false,
26472            false,
26473        )
26474        .unwrap();
26475        let cfg = config::Config::load(dir.path()).unwrap();
26476        let db_path = cfg.db_path_for(dir.path(), "alpha");
26477        let db = index::IndexDb::open(&db_path).unwrap();
26478        let hits = db.symbol_search("alpha_main", 10).unwrap();
26479        assert!(!hits.is_empty());
26480        assert_eq!(hits[0].name, "alpha_main");
26481    }
26482
26483    #[test]
26484    fn scoped_search_cmd_errors_on_unknown_scope() {
26485        let dir = setup_workspace();
26486
26487        let err = cmd_search(
26488            "alpha_main".to_string(),
26489            Some(dir.path().to_path_buf()),
26490            5,
26491            Some("lexical".to_string()),
26492            Some("missing".to_string()),
26493            false,
26494            false,
26495            false,
26496            0,
26497            false,
26498            false,
26499            false,
26500            false,
26501            false,
26502            false,
26503        )
26504        .unwrap_err();
26505
26506        let msg = err.to_string();
26507        assert!(msg.contains("unknown scope `missing`"));
26508        assert!(msg.contains("Available scopes: alpha, beta"));
26509    }
26510
26511    #[test]
26512    fn scoped_search_cmd_errors_on_ambiguous_legacy_scope_name() {
26513        let dir = setup_workspace_with_duplicate_leaf_names();
26514        cmd_index(
26515            dir.path(),
26516            false,
26517            false,
26518            false,
26519            false,
26520            false,
26521            true,
26522            None,
26523            false,
26524            false,
26525            false,
26526            false,
26527            false,
26528            false,
26529        )
26530        .unwrap();
26531
26532        let err = cmd_search(
26533            "vendor_only".to_string(),
26534            Some(dir.path().to_path_buf()),
26535            5,
26536            Some("lexical".to_string()),
26537            Some("foo".to_string()),
26538            false,
26539            false,
26540            false,
26541            0,
26542            false,
26543            false,
26544            false,
26545            false,
26546            false,
26547            false,
26548        )
26549        .unwrap_err();
26550
26551        let msg = err.to_string();
26552        assert!(msg.contains("ambiguous scope `foo`"));
26553        assert!(msg.contains("pkg/app/foo"));
26554        assert!(msg.contains("vendor/foo"));
26555    }
26556
26557    #[test]
26558    fn scoped_graph_query() {
26559        let dir = setup_workspace();
26560        cmd_index(
26561            dir.path(),
26562            false,
26563            false,
26564            false,
26565            false,
26566            false,
26567            true,
26568            None,
26569            false,
26570            false,
26571            false,
26572            false,
26573            false,
26574            false,
26575        )
26576        .unwrap();
26577        let cfg = config::Config::load(dir.path()).unwrap();
26578        let db_path = cfg.db_path_for(dir.path(), "alpha");
26579        let db = index::IndexDb::open(&db_path).unwrap();
26580        let callees = db.callees_of("alpha_main").unwrap();
26581        let names: Vec<&str> = callees.iter().map(|e| e.callee_name.as_str()).collect();
26582        assert!(names.contains(&"alpha_helper"));
26583    }
26584
26585    fn assert_workspace_query_requires_scope(err: anyhow::Error) {
26586        let msg = err.to_string();
26587        assert!(msg.contains("require `--scope <scope>`"), "{msg}");
26588        assert!(msg.contains("Available scopes: alpha, beta"), "{msg}");
26589        assert!(msg.contains("Indexed scopes: alpha, beta"), "{msg}");
26590        assert!(
26591            !msg.contains("no index found at"),
26592            "workspace query should fail with scope guidance, got: {msg}"
26593        );
26594    }
26595
26596    fn assert_workspace_search_requires_explicit_target(err: anyhow::Error) {
26597        let msg = err.to_string();
26598        assert!(
26599            msg.contains("requires `--scope <scope>` or `--federated`"),
26600            "{msg}"
26601        );
26602        assert!(msg.contains("Available scopes: alpha, beta"), "{msg}");
26603        assert!(msg.contains("Indexed scopes: alpha, beta"), "{msg}");
26604        assert!(
26605            !msg.contains("autoindexing index"),
26606            "workspace search should fail before creating a shared root index: {msg}"
26607        );
26608    }
26609
26610    #[test]
26611    fn graph_cmd_requires_scope_for_workspace_root_without_shared_index() {
26612        let dir = setup_workspace();
26613        cmd_index(
26614            dir.path(),
26615            false,
26616            false,
26617            false,
26618            false,
26619            false,
26620            true,
26621            None,
26622            false,
26623            false,
26624            false,
26625            false,
26626            false,
26627            false,
26628        )
26629        .unwrap();
26630
26631        let err = cmd_graph(
26632            "alpha_main",
26633            dir.path(),
26634            false,
26635            false,
26636            None,
26637            20,
26638            false,
26639            false,
26640            false,
26641            false,
26642            false,
26643            false,
26644            false,
26645            TagpathSearchOpts::default(),
26646        )
26647        .unwrap_err();
26648
26649        assert_workspace_query_requires_scope(err);
26650    }
26651
26652    #[test]
26653    fn graph_cmd_infers_scope_from_nested_workspace_path() {
26654        let dir = setup_workspace();
26655        cmd_index(
26656            dir.path(),
26657            false,
26658            false,
26659            false,
26660            false,
26661            false,
26662            true,
26663            None,
26664            false,
26665            false,
26666            false,
26667            false,
26668            false,
26669            false,
26670        )
26671        .unwrap();
26672        let nested = dir.path().join("src/alpha/nested");
26673        std::fs::create_dir_all(&nested).unwrap();
26674
26675        let result = cmd_graph(
26676            "alpha_main",
26677            &nested,
26678            false,
26679            false,
26680            None,
26681            20,
26682            false,
26683            false,
26684            false,
26685            false,
26686            false,
26687            false,
26688            false,
26689            TagpathSearchOpts::default(),
26690        );
26691
26692        assert!(result.is_ok());
26693    }
26694
26695    #[test]
26696    fn communities_cmd_requires_scope_for_workspace_root_without_shared_index() {
26697        let dir = setup_workspace();
26698        cmd_index(
26699            dir.path(),
26700            false,
26701            false,
26702            false,
26703            false,
26704            false,
26705            true,
26706            None,
26707            false,
26708            false,
26709            false,
26710            false,
26711            false,
26712            false,
26713        )
26714        .unwrap();
26715
26716        let err = cmd_communities(
26717            dir.path(),
26718            None,
26719            1,
26720            10,
26721            false,
26722            false,
26723            false,
26724            false,
26725            false,
26726            false,
26727            TagpathSearchOpts::default(),
26728        )
26729        .unwrap_err();
26730
26731        assert_workspace_query_requires_scope(err);
26732    }
26733
26734    #[test]
26735    fn communities_cmd_infers_scope_from_nested_workspace_path() {
26736        let dir = setup_workspace();
26737        cmd_index(
26738            dir.path(),
26739            false,
26740            false,
26741            false,
26742            false,
26743            false,
26744            true,
26745            None,
26746            false,
26747            false,
26748            false,
26749            false,
26750            false,
26751            false,
26752        )
26753        .unwrap();
26754        let nested = dir.path().join("src/alpha/nested");
26755        std::fs::create_dir_all(&nested).unwrap();
26756
26757        let result = cmd_communities(
26758            &nested,
26759            None,
26760            1,
26761            10,
26762            false,
26763            false,
26764            false,
26765            false,
26766            false,
26767            false,
26768            TagpathSearchOpts::default(),
26769        );
26770
26771        assert!(result.is_ok());
26772    }
26773
26774    #[test]
26775    fn path_cmd_requires_scope_for_workspace_root_without_shared_index() {
26776        let dir = setup_workspace();
26777        cmd_index(
26778            dir.path(),
26779            false,
26780            false,
26781            false,
26782            false,
26783            false,
26784            true,
26785            None,
26786            false,
26787            false,
26788            false,
26789            false,
26790            false,
26791            false,
26792        )
26793        .unwrap();
26794
26795        let err = cmd_path(
26796            "alpha_main",
26797            "alpha_helper",
26798            dir.path(),
26799            None,
26800            false,
26801            false,
26802            false,
26803            false,
26804            false,
26805            TagpathSearchOpts::default(),
26806        )
26807        .unwrap_err();
26808
26809        assert_workspace_query_requires_scope(err);
26810    }
26811
26812    #[test]
26813    fn path_cmd_infers_scope_from_nested_workspace_path() {
26814        let dir = setup_workspace();
26815        cmd_index(
26816            dir.path(),
26817            false,
26818            false,
26819            false,
26820            false,
26821            false,
26822            true,
26823            None,
26824            false,
26825            false,
26826            false,
26827            false,
26828            false,
26829            false,
26830        )
26831        .unwrap();
26832        let nested = dir.path().join("src/alpha/nested");
26833        std::fs::create_dir_all(&nested).unwrap();
26834
26835        let result = cmd_path(
26836            "alpha_main",
26837            "alpha_helper",
26838            &nested,
26839            None,
26840            false,
26841            false,
26842            false,
26843            false,
26844            false,
26845            TagpathSearchOpts::default(),
26846        );
26847
26848        assert!(result.is_ok());
26849    }
26850
26851    #[test]
26852    fn path_cmd_uses_snapshot_fallback_when_rollback_journal_is_locked() {
26853        let dir = setup_graph_index();
26854        let db_path = dir.path().join(".tsift/index.db");
26855        let _lock = hold_rollback_journal_lock(&db_path);
26856
26857        let result = cmd_path(
26858            "main",
26859            "helper",
26860            dir.path(),
26861            None,
26862            false,
26863            false,
26864            false,
26865            false,
26866            false,
26867            TagpathSearchOpts::default(),
26868        );
26869
26870        assert!(result.is_ok());
26871    }
26872
26873    #[test]
26874    fn explain_cmd_requires_scope_for_workspace_root_without_shared_index() {
26875        let dir = setup_workspace();
26876        cmd_index(
26877            dir.path(),
26878            false,
26879            false,
26880            false,
26881            false,
26882            false,
26883            true,
26884            None,
26885            false,
26886            false,
26887            false,
26888            false,
26889            false,
26890            false,
26891        )
26892        .unwrap();
26893
26894        let err = cmd_explain(
26895            "alpha_main",
26896            dir.path(),
26897            None,
26898            15,
26899            false,
26900            false,
26901            false,
26902            false,
26903            false,
26904            false,
26905            false,
26906        )
26907        .unwrap_err();
26908
26909        assert_workspace_query_requires_scope(err);
26910    }
26911
26912    #[test]
26913    fn explain_cmd_infers_scope_from_nested_workspace_path() {
26914        let dir = setup_workspace();
26915        cmd_index(
26916            dir.path(),
26917            false,
26918            false,
26919            false,
26920            false,
26921            false,
26922            true,
26923            None,
26924            false,
26925            false,
26926            false,
26927            false,
26928            false,
26929            false,
26930        )
26931        .unwrap();
26932        let nested = dir.path().join("src/alpha/nested");
26933        std::fs::create_dir_all(&nested).unwrap();
26934
26935        let result = cmd_explain(
26936            "alpha_main",
26937            &nested,
26938            None,
26939            15,
26940            false,
26941            false,
26942            false,
26943            false,
26944            false,
26945            false,
26946            false,
26947        );
26948
26949        assert!(result.is_ok());
26950    }
26951
26952    #[test]
26953    fn explain_cmd_uses_snapshot_fallback_when_rollback_journal_is_locked() {
26954        let dir = setup_graph_index();
26955        let db_path = dir.path().join(".tsift/index.db");
26956        let _lock = hold_rollback_journal_lock(&db_path);
26957
26958        let result = cmd_explain(
26959            "main",
26960            dir.path(),
26961            None,
26962            15,
26963            false,
26964            false,
26965            false,
26966            false,
26967            false,
26968            false,
26969            false,
26970        );
26971
26972        assert!(result.is_ok());
26973    }
26974
26975    // --- community detection ---
26976
26977    #[test]
26978    fn community_detection_groups_related() {
26979        let dir = setup_graph_index();
26980        let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
26981        let edges = db.all_edges().unwrap();
26982        let result = graph::detect_communities(&edges);
26983        assert!(result.node_count > 0);
26984        assert!(!result.communities.is_empty());
26985    }
26986
26987    #[test]
26988    fn community_cmd_autoindexes_missing_index_by_default() {
26989        let dir = tempfile::tempdir().unwrap();
26990        let result = cmd_communities(
26991            dir.path(),
26992            None,
26993            2,
26994            10,
26995            false,
26996            false,
26997            false,
26998            false,
26999            false,
27000            false,
27001            TagpathSearchOpts::default(),
27002        );
27003
27004        assert!(result.is_ok());
27005        assert!(dir.path().join(".tsift/index.db").exists());
27006    }
27007
27008    // --- path ---
27009
27010    #[test]
27011    fn path_finds_connected_symbols() {
27012        let dir = setup_graph_index();
27013        let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
27014        let edges = db.all_edges().unwrap();
27015        let result = graph::shortest_path(&edges, "main", "helper");
27016        assert!(result.is_some());
27017        let path = result.unwrap();
27018        assert_eq!(path.hops, 1);
27019    }
27020
27021    #[test]
27022    fn path_returns_none_for_unknown() {
27023        let dir = setup_graph_index();
27024        let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
27025        let edges = db.all_edges().unwrap();
27026        assert!(graph::shortest_path(&edges, "main", "nonexistent").is_none());
27027    }
27028
27029    #[test]
27030    fn path_cmd_autoindexes_missing_index_by_default() {
27031        let dir = tempfile::tempdir().unwrap();
27032        let result = cmd_path(
27033            "a",
27034            "b",
27035            dir.path(),
27036            None,
27037            false,
27038            false,
27039            false,
27040            false,
27041            false,
27042            TagpathSearchOpts::default(),
27043        );
27044
27045        assert!(result.is_ok());
27046        assert!(dir.path().join(".tsift/index.db").exists());
27047    }
27048
27049    // --- explain ---
27050
27051    #[test]
27052    fn explain_shows_symbol_info() {
27053        let dir = setup_graph_index();
27054        let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
27055        let symbols = db.symbol_info("main").unwrap();
27056        assert!(!symbols.is_empty());
27057        assert_eq!(symbols[0].name, "main");
27058        assert_eq!(symbols[0].kind, "function");
27059    }
27060
27061    #[test]
27062    fn explain_cmd_autoindexes_missing_index_by_default() {
27063        let dir = tempfile::tempdir().unwrap();
27064        let result = cmd_explain(
27065            "main",
27066            dir.path(),
27067            None,
27068            15,
27069            false,
27070            false,
27071            false,
27072            false,
27073            false,
27074            false,
27075            false,
27076        );
27077
27078        assert!(result.is_ok());
27079        assert!(dir.path().join(".tsift/index.db").exists());
27080    }
27081
27082    fn hold_write_lock(db_path: &std::path::Path) -> Connection {
27083        let conn = Connection::open(db_path).unwrap();
27084        conn.execute_batch("BEGIN IMMEDIATE").unwrap();
27085        conn
27086    }
27087
27088    fn hold_writer_lock(lock_path: &std::path::Path) -> std::fs::File {
27089        use fs4::fs_std::FileExt;
27090        use std::io::Write;
27091
27092        let mut file = std::fs::OpenOptions::new()
27093            .read(true)
27094            .write(true)
27095            .create(true)
27096            .truncate(false)
27097            .open(lock_path)
27098            .unwrap();
27099        assert!(file.try_lock_exclusive().unwrap());
27100        writeln!(file, "{}", std::process::id()).unwrap();
27101        file
27102    }
27103
27104    fn hold_rollback_journal_lock(db_path: &std::path::Path) -> Connection {
27105        let conn = Connection::open(db_path).unwrap();
27106        conn.execute_batch("PRAGMA journal_mode=DELETE; BEGIN EXCLUSIVE;")
27107            .unwrap();
27108        std::fs::write(substrate::rollback_journal_path(db_path), "locked").unwrap();
27109        conn
27110    }
27111
27112    fn hold_wal_database_lock(db_path: &std::path::Path) -> Connection {
27113        let conn = Connection::open(db_path).unwrap();
27114        conn.execute_batch(
27115            "PRAGMA journal_mode=WAL;
27116             PRAGMA wal_autocheckpoint=0;
27117             CREATE TABLE IF NOT EXISTS wal_lock_probe (id INTEGER PRIMARY KEY);
27118             INSERT INTO wal_lock_probe DEFAULT VALUES;
27119             PRAGMA locking_mode=EXCLUSIVE;
27120             BEGIN EXCLUSIVE;",
27121        )
27122        .unwrap();
27123        assert!(substrate::wal_sidecar_path(db_path).exists());
27124        conn
27125    }
27126
27127    #[test]
27128    fn index_cmd_reports_wal_sidecar_diagnostics_without_tsift_writer_lock() {
27129        let dir = setup_graph_index();
27130        let db_path = dir.path().join(".tsift/index.db");
27131        let _lock = hold_wal_database_lock(&db_path);
27132
27133        let err = cmd_index(
27134            dir.path(),
27135            false,
27136            false,
27137            false,
27138            false,
27139            false,
27140            false,
27141            None,
27142            false,
27143            false,
27144            false,
27145            false,
27146            false,
27147            false,
27148        )
27149        .unwrap_err();
27150
27151        let msg = err.to_string();
27152        assert!(msg.contains("indexing"));
27153        assert!(msg.contains("lock diagnostics:"));
27154        assert!(msg.contains("lock: absent"));
27155        assert!(msg.contains("wal: present") || msg.contains("shm: present"));
27156        assert!(msg.contains("wedged writer holding live WAL sidecars"));
27157        assert!(msg.contains("snapshot fallback"));
27158    }
27159
27160    #[test]
27161    fn search_cmd_succeeds_while_writer_lock_is_held() {
27162        let dir = setup_graph_index();
27163        let db_path = dir.path().join(".tsift/index.db");
27164        let _lock = hold_write_lock(&db_path);
27165
27166        let result = cmd_search(
27167            "main".to_string(),
27168            Some(dir.path().to_path_buf()),
27169            5,
27170            Some("lexical".to_string()),
27171            None,
27172            false,
27173            false,
27174            false,
27175            0,
27176            true,
27177            false,
27178            false,
27179            false,
27180            false,
27181            false,
27182        );
27183
27184        assert!(result.is_ok());
27185    }
27186
27187    #[test]
27188    fn search_cmd_uses_snapshot_fallback_when_rollback_journal_lock_appears_after_precheck() {
27189        let dir = setup_graph_index();
27190        let _hook = install_search_post_precheck_lock(dir.path().join(".tsift/index.db"));
27191
27192        let result = cmd_search(
27193            "main".to_string(),
27194            Some(dir.path().to_path_buf()),
27195            5,
27196            Some("lexical".to_string()),
27197            None,
27198            false,
27199            false,
27200            false,
27201            0,
27202            true,
27203            false,
27204            false,
27205            false,
27206            false,
27207            false,
27208        );
27209
27210        assert!(result.is_ok());
27211    }
27212
27213    #[test]
27214    fn search_cmd_uses_wal_snapshot_fallback_when_lock_appears_after_precheck() {
27215        let dir = setup_graph_index();
27216        let _hook = install_search_post_precheck_wal_lock(dir.path().join(".tsift/index.db"));
27217
27218        let result = cmd_search(
27219            "main".to_string(),
27220            Some(dir.path().to_path_buf()),
27221            5,
27222            Some("lexical".to_string()),
27223            None,
27224            false,
27225            false,
27226            false,
27227            0,
27228            true,
27229            false,
27230            false,
27231            false,
27232            false,
27233            false,
27234        );
27235
27236        assert!(result.is_ok());
27237    }
27238
27239    #[test]
27240    fn search_cmd_fails_fast_when_autoindex_disabled_and_index_is_stale() {
27241        let dir = setup_graph_index();
27242        std::thread::sleep(std::time::Duration::from_millis(50));
27243        std::fs::write(
27244            dir.path().join("main.rs"),
27245            "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
27246        )
27247        .unwrap();
27248
27249        let err = cmd_search(
27250            "helper".to_string(),
27251            Some(dir.path().to_path_buf()),
27252            5,
27253            Some("lexical".to_string()),
27254            None,
27255            false,
27256            false,
27257            false,
27258            0,
27259            false,
27260            false,
27261            false,
27262            false,
27263            false,
27264            false,
27265        )
27266        .unwrap_err();
27267
27268        assert!(err.to_string().contains("search aborted"));
27269        assert!(err.to_string().contains("index is stale"));
27270        assert!(err.to_string().contains("--no-autoindex"));
27271    }
27272
27273    #[test]
27274    fn search_cmd_reports_stale_when_root_index_is_locked_by_rollback_journal() {
27275        let dir = setup_graph_index();
27276        std::thread::sleep(std::time::Duration::from_millis(50));
27277        std::fs::write(
27278            dir.path().join("main.rs"),
27279            "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
27280        )
27281        .unwrap();
27282        let _lock = hold_rollback_journal_lock(&dir.path().join(".tsift/index.db"));
27283
27284        let err = cmd_search(
27285            "helper".to_string(),
27286            Some(dir.path().to_path_buf()),
27287            5,
27288            Some("lexical".to_string()),
27289            None,
27290            false,
27291            false,
27292            false,
27293            0,
27294            false,
27295            false,
27296            false,
27297            false,
27298            false,
27299            false,
27300        )
27301        .unwrap_err();
27302
27303        assert!(err.to_string().contains("search aborted"));
27304        assert!(err.to_string().contains("index is stale"));
27305        assert!(!err.to_string().contains("database is locked"));
27306    }
27307
27308    #[test]
27309    fn search_cmd_autoindexes_stale_index_by_default() {
27310        let dir = setup_graph_index();
27311        std::thread::sleep(std::time::Duration::from_millis(50));
27312        std::fs::write(
27313            dir.path().join("main.rs"),
27314            "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
27315        )
27316        .unwrap();
27317
27318        let result = cmd_search(
27319            "helper".to_string(),
27320            Some(dir.path().to_path_buf()),
27321            5,
27322            Some("lexical".to_string()),
27323            None,
27324            false,
27325            false,
27326            true,
27327            0,
27328            false,
27329            false,
27330            false,
27331            false,
27332            false,
27333            false,
27334        );
27335
27336        assert!(result.is_ok());
27337
27338        let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
27339        let summary = db.compute_changes(dir.path()).unwrap();
27340        assert_eq!(summary.new + summary.modified + summary.deleted, 0);
27341    }
27342
27343    #[test]
27344    fn search_cmd_keeps_read_only_results_when_active_writer_blocks_autoindex() {
27345        let dir = setup_graph_index();
27346        std::thread::sleep(std::time::Duration::from_millis(50));
27347        std::fs::write(
27348            dir.path().join("main.rs"),
27349            "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
27350        )
27351        .unwrap();
27352        let _lock = hold_writer_lock(&dir.path().join(".tsift/index.lock"));
27353
27354        let result = cmd_search(
27355            "helper".to_string(),
27356            Some(dir.path().to_path_buf()),
27357            5,
27358            Some("lexical".to_string()),
27359            None,
27360            false,
27361            false,
27362            true,
27363            0,
27364            false,
27365            false,
27366            false,
27367            false,
27368            false,
27369            false,
27370        );
27371
27372        assert!(result.is_ok());
27373
27374        let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
27375        let summary = db.compute_changes(dir.path()).unwrap();
27376        assert_eq!(summary.modified, 1);
27377    }
27378
27379    #[test]
27380    fn search_cmd_autoindex_reports_lock_diagnostics_when_rollback_journal_blocks_writer() {
27381        let dir = setup_graph_index();
27382        std::thread::sleep(std::time::Duration::from_millis(50));
27383        std::fs::write(
27384            dir.path().join("main.rs"),
27385            "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
27386        )
27387        .unwrap();
27388        let _lock = hold_rollback_journal_lock(&dir.path().join(".tsift/index.db"));
27389
27390        let err = cmd_search(
27391            "helper".to_string(),
27392            Some(dir.path().to_path_buf()),
27393            5,
27394            Some("lexical".to_string()),
27395            None,
27396            false,
27397            false,
27398            true,
27399            0,
27400            false,
27401            false,
27402            false,
27403            false,
27404            false,
27405            false,
27406        )
27407        .unwrap_err();
27408
27409        let msg = err.to_string();
27410        assert!(msg.contains("autoindexing index"));
27411        assert!(msg.contains("lock diagnostics:"));
27412        assert!(msg.contains("journal: present"));
27413        assert!(msg.contains("next: inspect the host for a wedged rollback-journal writer"));
27414    }
27415
27416    #[test]
27417    fn search_cmd_uses_ancestor_project_root_for_nested_paths() {
27418        let dir = setup_graph_index();
27419        let nested = dir.path().join("src/nested");
27420        std::fs::create_dir_all(&nested).unwrap();
27421
27422        let result = cmd_search(
27423            "helper".to_string(),
27424            Some(nested.clone()),
27425            5,
27426            Some("lexical".to_string()),
27427            None,
27428            false,
27429            false,
27430            true,
27431            0,
27432            false,
27433            false,
27434            false,
27435            false,
27436            false,
27437            false,
27438        );
27439
27440        assert!(result.is_ok());
27441        assert!(!nested.join(".tsift/index.db").exists());
27442    }
27443
27444    #[test]
27445    fn exact_search_returns_literal_matches() {
27446        let dir = tempfile::tempdir().unwrap();
27447        std::fs::write(dir.path().join("notes.txt"), "alpha\nclaudescore-3\nbeta\n").unwrap();
27448
27449        let response = run_exact_search_with_timeout(dir.path(), "claudescore-3", 5, 0).unwrap();
27450
27451        assert_eq!(response.strategy, "exact");
27452        assert_eq!(response.hits.len(), 1);
27453        assert!(response.hits[0].path.ends_with("notes.txt"));
27454        assert_eq!(response.hits[0].location.as_deref(), Some("line 2"));
27455        assert!(response.hits[0].snippet.contains("claudescore-3"));
27456    }
27457
27458    #[test]
27459    fn exact_search_skips_stale_index_precheck() {
27460        let dir = setup_graph_index();
27461        std::thread::sleep(std::time::Duration::from_millis(50));
27462        std::fs::write(
27463            dir.path().join("main.rs"),
27464            "fn helper() { println!(\"updated\"); }\nfn main() { helper(); }\n",
27465        )
27466        .unwrap();
27467
27468        let result = cmd_search(
27469            "println!(\"updated\")".to_string(),
27470            Some(dir.path().to_path_buf()),
27471            5,
27472            Some("exact".to_string()),
27473            None,
27474            false,
27475            false,
27476            false,
27477            0,
27478            false,
27479            false,
27480            false,
27481            false,
27482            false,
27483            false,
27484        );
27485
27486        assert!(result.is_ok());
27487    }
27488
27489    #[test]
27490    fn workspace_exact_search_does_not_require_shared_root_index() {
27491        let dir = setup_workspace();
27492        cmd_index(
27493            dir.path(),
27494            false,
27495            false,
27496            false,
27497            false,
27498            false,
27499            true,
27500            None,
27501            false,
27502            false,
27503            false,
27504            false,
27505            false,
27506            false,
27507        )
27508        .unwrap();
27509
27510        let result = cmd_search(
27511            "alpha_helper".to_string(),
27512            Some(dir.path().to_path_buf()),
27513            5,
27514            Some("exact".to_string()),
27515            None,
27516            false,
27517            false,
27518            false,
27519            0,
27520            false,
27521            false,
27522            false,
27523            false,
27524            false,
27525            false,
27526        );
27527
27528        assert!(result.is_ok());
27529        assert!(!dir.path().join(".tsift/index.db").exists());
27530    }
27531
27532    #[test]
27533    fn identifier_like_query_prefers_exact_search() {
27534        assert!(query_prefers_exact_search("claudescore-3"));
27535        assert!(query_prefers_exact_search("alpha_helper"));
27536        assert!(query_prefers_exact_search("src/main.rs"));
27537        assert!(query_prefers_exact_search("crate::module"));
27538        assert!(!query_prefers_exact_search("authenticate"));
27539        assert!(!query_prefers_exact_search("fn main"));
27540        assert!(!query_prefers_exact_search("."));
27541    }
27542
27543    #[test]
27544    fn resolve_search_strategy_auto_promotes_identifier_like_queries() {
27545        assert_eq!(resolve_search_strategy("claudescore-3", None), "exact");
27546        assert_eq!(resolve_search_strategy("authenticate", None), "lexical");
27547        assert_eq!(
27548            resolve_search_strategy("claudescore-3", Some("hybrid".to_string())),
27549            "hybrid"
27550        );
27551    }
27552
27553    #[test]
27554    fn workspace_identifier_like_search_auto_uses_exact_backend() {
27555        let dir = setup_workspace();
27556        cmd_index(
27557            dir.path(),
27558            false,
27559            false,
27560            false,
27561            false,
27562            false,
27563            true,
27564            None,
27565            false,
27566            false,
27567            false,
27568            false,
27569            false,
27570            false,
27571        )
27572        .unwrap();
27573
27574        let result = cmd_search(
27575            "alpha_helper".to_string(),
27576            Some(dir.path().to_path_buf()),
27577            5,
27578            None,
27579            None,
27580            false,
27581            false,
27582            false,
27583            0,
27584            false,
27585            false,
27586            false,
27587            false,
27588            false,
27589            false,
27590        );
27591
27592        assert!(result.is_ok());
27593        assert!(!dir.path().join(".tsift/index.db").exists());
27594    }
27595
27596    #[test]
27597    fn index_cmd_uses_ancestor_project_root_for_nested_paths() {
27598        let dir = setup_graph_index();
27599        let nested = dir.path().join("src/nested");
27600        std::fs::create_dir_all(&nested).unwrap();
27601        std::fs::write(nested.join("extra.rs"), "fn nested_helper() {}\n").unwrap();
27602
27603        let result = cmd_index(
27604            &nested, false, false, false, false, false, false, None, false, false, false, false,
27605            false, false,
27606        );
27607
27608        assert!(result.is_ok());
27609        assert!(dir.path().join(".tsift/index.db").exists());
27610        assert!(!nested.join(".tsift/index.db").exists());
27611    }
27612
27613    #[test]
27614    fn workspace_index_cmd_uses_ancestor_project_root_for_nested_paths() {
27615        let dir = setup_workspace();
27616        let nested = dir.path().join("docs/nested");
27617        std::fs::create_dir_all(&nested).unwrap();
27618
27619        let result = cmd_index(
27620            &nested, false, false, false, false, false, true, None, false, false, false, false,
27621            false, false,
27622        );
27623
27624        let cfg = config::Config::load(dir.path()).unwrap();
27625
27626        assert!(result.is_ok());
27627        assert!(cfg.db_path_for(dir.path(), "alpha").exists());
27628        assert!(cfg.db_path_for(dir.path(), "beta").exists());
27629    }
27630
27631    #[test]
27632    fn status_cmd_autoindexes_missing_workspace_scopes() {
27633        let dir = setup_workspace();
27634        let cfg = config::Config::load(dir.path()).unwrap();
27635        let alpha = config::Config::resolve_submodule(dir.path(), "alpha").unwrap();
27636        let alpha_db_path = cfg.db_path_for(dir.path(), &alpha.id);
27637        let alpha_db = index::IndexDb::open(&alpha_db_path).unwrap();
27638        alpha_db.apply_changes(&alpha.source_root).unwrap();
27639
27640        let beta_db_path = cfg.db_path_for(dir.path(), "beta");
27641        assert!(!beta_db_path.exists());
27642
27643        cmd_status(dir.path(), false, true, false, false, false, false).unwrap();
27644
27645        assert!(beta_db_path.exists());
27646        let report = status::check_status(dir.path()).unwrap();
27647        assert!(matches!(report.index, status::IndexStatus::Fresh { .. }));
27648    }
27649
27650    #[test]
27651    fn status_cmd_autoindexes_workspace_when_all_scopes_are_missing() {
27652        let dir = setup_workspace();
27653        let cfg = config::Config::load(dir.path()).unwrap();
27654
27655        cmd_status(dir.path(), false, true, false, false, false, false).unwrap();
27656
27657        assert!(cfg.db_path_for(dir.path(), "alpha").exists());
27658        assert!(cfg.db_path_for(dir.path(), "beta").exists());
27659        let report = status::check_status(dir.path()).unwrap();
27660        assert!(matches!(report.index, status::IndexStatus::Fresh { .. }));
27661    }
27662
27663    #[test]
27664    fn status_cmd_fix_refreshes_stale_index() {
27665        let dir = setup_graph_index();
27666        std::thread::sleep(std::time::Duration::from_millis(50));
27667        std::fs::write(
27668            dir.path().join("main.rs"),
27669            "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }\n",
27670        )
27671        .unwrap();
27672
27673        let report = status::check_status(dir.path()).unwrap();
27674        assert!(matches!(report.index, status::IndexStatus::Stale { .. }));
27675
27676        cmd_status(dir.path(), true, true, false, false, false, false).unwrap();
27677
27678        let report = status::check_status(dir.path()).unwrap();
27679        assert!(matches!(report.index, status::IndexStatus::Fresh { .. }));
27680    }
27681
27682    #[test]
27683    fn status_cmd_reports_wal_snapshot_recovery_without_tsift_writer_lock() {
27684        let dir = setup_graph_index();
27685        let db_path = dir.path().join(".tsift/index.db");
27686        let _lock = hold_wal_database_lock(&db_path);
27687
27688        cmd_status(dir.path(), false, true, false, false, false, false).unwrap();
27689
27690        let report = status::check_status(dir.path()).unwrap();
27691        assert!(matches!(
27692            report.index,
27693            status::IndexStatus::Fresh {
27694                recovery: Some(index::ReadOnlyRecovery::SnapshotFallbackWal),
27695                ..
27696            }
27697        ));
27698        let locks = status::check_locks(dir.path(), None, None).unwrap();
27699        assert!(matches!(
27700            locks.writer_lock,
27701            status::WriterLockStatus::Absent { .. }
27702        ));
27703        assert!(locks.wal_sidecar.present || locks.shared_memory_sidecar.present);
27704        assert!(
27705            locks
27706                .recommended_action
27707                .contains("wedged writer holding live WAL sidecars")
27708        );
27709    }
27710
27711    #[test]
27712    fn locks_report_uses_ancestor_project_root_for_nested_paths() {
27713        let dir = setup_graph_index();
27714        let nested = dir.path().join("src/nested");
27715        std::fs::create_dir_all(&nested).unwrap();
27716
27717        let root = lint::resolve_project_root_or_canonical_path(&nested).unwrap();
27718        let report = status::check_locks(&root, Some(&nested), None).unwrap();
27719
27720        assert_eq!(report.source_root, dir.path());
27721        assert_eq!(report.db_path, dir.path().join(".tsift/index.db"));
27722    }
27723
27724    #[test]
27725    fn workspace_locks_report_infers_scope_from_nested_path() {
27726        let dir = setup_workspace();
27727        cmd_index(
27728            dir.path(),
27729            false,
27730            false,
27731            false,
27732            false,
27733            false,
27734            true,
27735            None,
27736            false,
27737            false,
27738            false,
27739            false,
27740            false,
27741            false,
27742        )
27743        .unwrap();
27744        let nested = dir.path().join("src/alpha/nested");
27745        std::fs::create_dir_all(&nested).unwrap();
27746
27747        let root = lint::resolve_project_root_or_canonical_path(&nested).unwrap();
27748        let report = status::check_locks(&root, Some(&nested), None).unwrap();
27749        let cfg = config::Config::load(dir.path()).unwrap();
27750
27751        assert_eq!(report.label, "submodule `alpha` index");
27752        assert_eq!(report.source_root, dir.path().join("src/alpha"));
27753        assert_eq!(report.db_path, cfg.db_path_for(dir.path(), "alpha"));
27754        assert_eq!(
27755            report.reindex_command,
27756            format!("tsift index --submodule alpha {}", dir.path().display())
27757        );
27758    }
27759
27760    #[test]
27761    fn scoped_search_cmd_autoindexes_stale_submodule_index_by_default() {
27762        let dir = setup_workspace();
27763        cmd_index(
27764            dir.path(),
27765            false,
27766            false,
27767            false,
27768            false,
27769            false,
27770            true,
27771            None,
27772            false,
27773            false,
27774            false,
27775            false,
27776            false,
27777            false,
27778        )
27779        .unwrap();
27780
27781        let alpha = dir.path().join("src/alpha/lib.rs");
27782        std::thread::sleep(std::time::Duration::from_millis(50));
27783        std::fs::write(
27784            &alpha,
27785            "fn alpha_helper() { println!(\"updated\"); }\nfn alpha_main() { alpha_helper(); }",
27786        )
27787        .unwrap();
27788
27789        let result = cmd_search(
27790            "alpha_helper".to_string(),
27791            Some(dir.path().to_path_buf()),
27792            5,
27793            Some("lexical".to_string()),
27794            Some("alpha".to_string()),
27795            false,
27796            false,
27797            true,
27798            0,
27799            false,
27800            false,
27801            false,
27802            false,
27803            false,
27804            false,
27805        );
27806
27807        assert!(result.is_ok());
27808
27809        let cfg = config::Config::load(dir.path()).unwrap();
27810        let db = index::IndexDb::open_read_only(&cfg.db_path_for(dir.path(), "alpha")).unwrap();
27811        let summary = db.compute_changes(&dir.path().join("src/alpha")).unwrap();
27812        assert_eq!(summary.new + summary.modified + summary.deleted, 0);
27813    }
27814
27815    #[test]
27816    fn scoped_search_cmd_reports_stale_when_submodule_index_is_locked_by_rollback_journal() {
27817        let dir = setup_workspace();
27818        cmd_index(
27819            dir.path(),
27820            false,
27821            false,
27822            false,
27823            false,
27824            false,
27825            true,
27826            None,
27827            false,
27828            false,
27829            false,
27830            false,
27831            false,
27832            false,
27833        )
27834        .unwrap();
27835
27836        let alpha = dir.path().join("src/alpha/lib.rs");
27837        std::thread::sleep(std::time::Duration::from_millis(50));
27838        std::fs::write(
27839            &alpha,
27840            "fn alpha_helper() { println!(\"updated\"); }\nfn alpha_main() { alpha_helper(); }",
27841        )
27842        .unwrap();
27843
27844        let cfg = config::Config::load(dir.path()).unwrap();
27845        let _lock = hold_rollback_journal_lock(&cfg.db_path_for(dir.path(), "alpha"));
27846
27847        let err = cmd_search(
27848            "alpha_helper".to_string(),
27849            Some(dir.path().to_path_buf()),
27850            5,
27851            Some("lexical".to_string()),
27852            Some("alpha".to_string()),
27853            false,
27854            false,
27855            false,
27856            0,
27857            false,
27858            false,
27859            false,
27860            false,
27861            false,
27862            false,
27863        )
27864        .unwrap_err();
27865
27866        assert!(err.to_string().contains("search aborted"));
27867        assert!(err.to_string().contains("submodule `alpha` index"));
27868        assert!(!err.to_string().contains("database is locked"));
27869    }
27870
27871    #[test]
27872    fn federated_search_cmd_autoindexes_stale_indexes_by_default() {
27873        let dir = setup_workspace();
27874        cmd_index(
27875            dir.path(),
27876            false,
27877            false,
27878            false,
27879            false,
27880            false,
27881            true,
27882            None,
27883            false,
27884            false,
27885            false,
27886            false,
27887            false,
27888            false,
27889        )
27890        .unwrap();
27891
27892        let alpha = dir.path().join("src/alpha/lib.rs");
27893        std::thread::sleep(std::time::Duration::from_millis(50));
27894        std::fs::write(
27895            &alpha,
27896            "fn alpha_helper() { println!(\"updated\"); }\nfn alpha_main() { alpha_helper(); }",
27897        )
27898        .unwrap();
27899
27900        let result = cmd_search(
27901            "alpha_helper".to_string(),
27902            Some(dir.path().to_path_buf()),
27903            5,
27904            Some("lexical".to_string()),
27905            None,
27906            true,
27907            false,
27908            true,
27909            0,
27910            false,
27911            false,
27912            false,
27913            false,
27914            false,
27915            false,
27916        );
27917
27918        assert!(result.is_ok());
27919
27920        let cfg = config::Config::load(dir.path()).unwrap();
27921        let db = index::IndexDb::open_read_only(&cfg.db_path_for(dir.path(), "alpha")).unwrap();
27922        let summary = db.compute_changes(&dir.path().join("src/alpha")).unwrap();
27923        assert_eq!(summary.new + summary.modified + summary.deleted, 0);
27924    }
27925
27926    #[test]
27927    fn federated_search_cmd_reports_stale_when_submodule_index_is_locked_by_rollback_journal() {
27928        let dir = setup_workspace();
27929        cmd_index(
27930            dir.path(),
27931            false,
27932            false,
27933            false,
27934            false,
27935            false,
27936            true,
27937            None,
27938            false,
27939            false,
27940            false,
27941            false,
27942            false,
27943            false,
27944        )
27945        .unwrap();
27946
27947        let alpha = dir.path().join("src/alpha/lib.rs");
27948        std::thread::sleep(std::time::Duration::from_millis(50));
27949        std::fs::write(
27950            &alpha,
27951            "fn alpha_helper() { println!(\"updated\"); }\nfn alpha_main() { alpha_helper(); }",
27952        )
27953        .unwrap();
27954
27955        let cfg = config::Config::load(dir.path()).unwrap();
27956        let _lock = hold_rollback_journal_lock(&cfg.db_path_for(dir.path(), "alpha"));
27957
27958        let err = cmd_search(
27959            "alpha_helper".to_string(),
27960            Some(dir.path().to_path_buf()),
27961            5,
27962            Some("lexical".to_string()),
27963            None,
27964            true,
27965            false,
27966            false,
27967            30,
27968            false,
27969            false,
27970            false,
27971            false,
27972            false,
27973            false,
27974        )
27975        .unwrap_err();
27976
27977        assert!(err.to_string().contains("stale"));
27978        assert!(err.to_string().contains("submodule `alpha` index"));
27979        assert!(!err.to_string().contains("database is locked"));
27980    }
27981
27982    #[test]
27983    fn workspace_search_cmd_requires_explicit_target_without_shared_root_index() {
27984        let dir = setup_workspace();
27985        cmd_index(
27986            dir.path(),
27987            false,
27988            false,
27989            false,
27990            false,
27991            false,
27992            true,
27993            None,
27994            false,
27995            false,
27996            false,
27997            false,
27998            false,
27999            false,
28000        )
28001        .unwrap();
28002
28003        let err = cmd_search(
28004            "alpha_helper".to_string(),
28005            Some(dir.path().to_path_buf()),
28006            5,
28007            Some("lexical".to_string()),
28008            None,
28009            false,
28010            false,
28011            true,
28012            0,
28013            false,
28014            false,
28015            false,
28016            false,
28017            false,
28018            false,
28019        )
28020        .unwrap_err();
28021
28022        assert_workspace_search_requires_explicit_target(err);
28023        assert!(!dir.path().join(".tsift/index.db").exists());
28024    }
28025
28026    #[test]
28027    fn workspace_search_cmd_infers_scope_from_nested_path() {
28028        let dir = setup_workspace();
28029        cmd_index(
28030            dir.path(),
28031            false,
28032            false,
28033            false,
28034            false,
28035            false,
28036            true,
28037            None,
28038            false,
28039            false,
28040            false,
28041            false,
28042            false,
28043            false,
28044        )
28045        .unwrap();
28046        let nested = dir.path().join("src/alpha/nested");
28047        std::fs::create_dir_all(&nested).unwrap();
28048
28049        let result = cmd_search(
28050            "alpha_helper".to_string(),
28051            Some(nested),
28052            5,
28053            Some("lexical".to_string()),
28054            None,
28055            false,
28056            false,
28057            false,
28058            0,
28059            false,
28060            false,
28061            false,
28062            false,
28063            false,
28064            false,
28065        );
28066
28067        assert!(result.is_ok());
28068    }
28069
28070    #[test]
28071    fn resolve_query_db_path_infers_matching_duplicate_leaf_scope_from_nested_path() {
28072        let dir = setup_workspace_with_duplicate_leaf_names();
28073        cmd_index(
28074            dir.path(),
28075            false,
28076            false,
28077            false,
28078            false,
28079            false,
28080            true,
28081            None,
28082            false,
28083            false,
28084            false,
28085            false,
28086            false,
28087            false,
28088        )
28089        .unwrap();
28090        let nested = dir.path().join("vendor/foo/nested");
28091        std::fs::create_dir_all(&nested).unwrap();
28092
28093        let root = lint::resolve_project_root_or_canonical_path(&nested).unwrap();
28094        let db_path = resolve_query_db_path(&root, &nested, None).unwrap();
28095        let cfg = config::Config::load(dir.path()).unwrap();
28096
28097        assert_eq!(db_path, cfg.db_path_for(dir.path(), "vendor/foo"));
28098    }
28099
28100    #[test]
28101    fn graph_cmd_succeeds_while_writer_lock_is_held() {
28102        let dir = setup_graph_index();
28103        let db_path = dir.path().join(".tsift/index.db");
28104        let _lock = hold_write_lock(&db_path);
28105
28106        let result = cmd_graph(
28107            "main",
28108            dir.path(),
28109            false,
28110            false,
28111            None,
28112            20,
28113            false,
28114            true,
28115            false,
28116            false,
28117            false,
28118            false,
28119            false,
28120            TagpathSearchOpts::default(),
28121        );
28122
28123        assert!(result.is_ok());
28124    }
28125
28126    #[test]
28127    fn graph_cmd_autoindexes_stale_index_by_default() {
28128        let dir = setup_graph_index();
28129        std::thread::sleep(std::time::Duration::from_millis(50));
28130        std::fs::write(
28131            dir.path().join("main.rs"),
28132            "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }\n",
28133        )
28134        .unwrap();
28135
28136        let result = cmd_graph(
28137            "helper",
28138            dir.path(),
28139            true,
28140            false,
28141            None,
28142            20,
28143            false,
28144            true,
28145            false,
28146            false,
28147            false,
28148            false,
28149            false,
28150            TagpathSearchOpts::default(),
28151        );
28152
28153        assert!(result.is_ok());
28154        let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
28155        let summary = db.compute_changes(dir.path()).unwrap();
28156        assert_eq!(summary.new + summary.modified + summary.deleted, 0);
28157    }
28158
28159    #[test]
28160    fn graph_cmd_uses_snapshot_fallback_when_rollback_journal_is_locked() {
28161        let dir = setup_graph_index();
28162        let db_path = dir.path().join(".tsift/index.db");
28163        let _lock = hold_rollback_journal_lock(&db_path);
28164
28165        let result = cmd_graph(
28166            "main",
28167            dir.path(),
28168            false,
28169            false,
28170            None,
28171            20,
28172            false,
28173            true,
28174            false,
28175            false,
28176            false,
28177            false,
28178            false,
28179            TagpathSearchOpts::default(),
28180        );
28181
28182        assert!(result.is_ok());
28183    }
28184
28185    #[test]
28186    fn graph_cmd_uses_ancestor_project_root_for_nested_paths() {
28187        let dir = setup_graph_index();
28188        let nested = dir.path().join("src/nested");
28189        std::fs::create_dir_all(&nested).unwrap();
28190
28191        let result = cmd_graph(
28192            "helper",
28193            &nested,
28194            true,
28195            false,
28196            None,
28197            20,
28198            false,
28199            false,
28200            false,
28201            false,
28202            false,
28203            false,
28204            false,
28205            TagpathSearchOpts::default(),
28206        );
28207
28208        assert!(result.is_ok());
28209    }
28210
28211    #[test]
28212    fn communities_cmd_succeeds_while_writer_lock_is_held() {
28213        let dir = setup_graph_index();
28214        let _lock = hold_writer_lock(&dir.path().join(".tsift/index.lock"));
28215
28216        let result = cmd_communities(
28217            dir.path(),
28218            None,
28219            1,
28220            10,
28221            false,
28222            false,
28223            false,
28224            false,
28225            false,
28226            false,
28227            TagpathSearchOpts::default(),
28228        );
28229
28230        assert!(result.is_ok());
28231    }
28232
28233    #[test]
28234    fn communities_cmd_uses_snapshot_fallback_when_rollback_journal_is_locked() {
28235        let dir = setup_graph_index();
28236        let db_path = dir.path().join(".tsift/index.db");
28237        let _lock = hold_rollback_journal_lock(&db_path);
28238
28239        let result = cmd_communities(
28240            dir.path(),
28241            None,
28242            1,
28243            10,
28244            false,
28245            false,
28246            false,
28247            false,
28248            false,
28249            false,
28250            TagpathSearchOpts::default(),
28251        );
28252
28253        assert!(result.is_ok());
28254    }
28255
28256    #[test]
28257    fn lint_finds_entities_from_project_root_index_db() {
28258        let dir = tempfile::tempdir().unwrap();
28259        std::fs::write(dir.path().join("main.rs"), "fn alpha_helper() {}\n").unwrap();
28260        std::fs::write(
28261            dir.path().join("README.md"),
28262            "alpha_helper should be backticked.\n",
28263        )
28264        .unwrap();
28265        cmd_index(
28266            dir.path(),
28267            false,
28268            false,
28269            false,
28270            false,
28271            false,
28272            false,
28273            None,
28274            false,
28275            false,
28276            false,
28277            false,
28278            false,
28279            false,
28280        )
28281        .unwrap();
28282
28283        let root = lint::find_project_root_for_path(&dir.path().join("README.md"))
28284            .unwrap()
28285            .unwrap();
28286        let entities = lint::collect_entities_from_index_path(&root).unwrap();
28287        let result = lint::lint_markdown(&dir.path().join("README.md"), &entities).unwrap();
28288
28289        assert!(
28290            result
28291                .annotations
28292                .iter()
28293                .any(|ann| ann.text == "alpha_helper")
28294        );
28295    }
28296
28297    // --- search timeout ---
28298
28299    #[test]
28300    fn search_direct_runs_ok() {
28301        let dir = tempfile::tempdir().unwrap();
28302        let search_dir = dir.path().to_path_buf();
28303        let cache_dir = search_dir.join(".tsift/search-cache");
28304        std::fs::write(search_dir.join("test.rs"), "fn main() {}").unwrap();
28305        let result = run_sift_search(&search_dir, &cache_dir, "main", 1, "lexical");
28306        assert!(result.is_ok(), "direct search should succeed");
28307        assert!(
28308            cache_dir.exists(),
28309            "search should create the configured cache dir"
28310        );
28311    }
28312
28313    #[test]
28314    fn search_timeout_zero_disables_timeout() {
28315        let dir = tempfile::tempdir().unwrap();
28316        let search_dir = dir.path().to_path_buf();
28317        let cache_dir = search_dir.join(".tsift/search-cache");
28318        std::fs::write(search_dir.join("test.rs"), "fn main() {}").unwrap();
28319        let result = run_search_with_timeout(&search_dir, &cache_dir, "main", 1, 0, "lexical", &[]);
28320        assert!(result.is_ok(), "timeout=0 should still work (no timeout)");
28321        assert!(
28322            cache_dir.exists(),
28323            "timeout=0 should keep using the stable search cache dir"
28324        );
28325    }
28326
28327    #[test]
28328    fn search_timeout_message_reports_missing_index_as_rebuild_needed() {
28329        let dir = tempfile::tempdir().unwrap();
28330        std::fs::write(dir.path().join("main.rs"), "fn main() {}\n").unwrap();
28331        cmd_index(
28332            dir.path(),
28333            false,
28334            false,
28335            false,
28336            false,
28337            false,
28338            false,
28339            None,
28340            false,
28341            false,
28342            false,
28343            false,
28344            false,
28345            false,
28346        )
28347        .unwrap();
28348        let db_path = dir.path().join(".tsift/index.db");
28349        std::fs::remove_file(&db_path).unwrap();
28350        let search_target = SearchIndexTarget {
28351            label: "index".to_string(),
28352            db_path,
28353            source_root: dir.path().to_path_buf(),
28354            scope_name: None,
28355            reindex_cmd: format!("tsift index {}", dir.path().display()),
28356        };
28357
28358        let message = search_timeout_message(1, "lexical", &[search_target]).unwrap();
28359
28360        assert!(message.contains("timed out after 1s"));
28361        assert!(message.contains("index is missing"));
28362        assert!(message.contains("Run `tsift index"));
28363        assert!(!message.contains("search root looks fresh"));
28364    }
28365
28366    #[test]
28367    fn search_worker_output_path_uses_json_suffix() {
28368        let path = next_search_worker_output_path();
28369        assert!(path.extension().is_some_and(|ext| ext == "json"));
28370    }
28371
28372    // --- index quiet mode ---
28373
28374    #[test]
28375    fn index_quiet_suppresses_file_list() {
28376        let dir = setup_graph_index();
28377        let result = cmd_index(
28378            dir.path(),
28379            false,
28380            true,
28381            false,
28382            false,
28383            true,
28384            false,
28385            None,
28386            false,
28387            false,
28388            false,
28389            false,
28390            false,
28391            false,
28392        );
28393        assert!(result.is_ok());
28394    }
28395
28396    #[test]
28397    fn index_exit_code_implies_quiet() {
28398        let dir = setup_graph_index();
28399        let result = cmd_index(
28400            dir.path(),
28401            false,
28402            true,
28403            false,
28404            false,
28405            false,
28406            false,
28407            None,
28408            false,
28409            false,
28410            false,
28411            false,
28412            false,
28413            false,
28414        );
28415        assert!(result.is_ok());
28416    }
28417
28418    #[test]
28419    fn index_quiet_json_omits_changes() {
28420        let dir = setup_graph_index();
28421        let result = cmd_index(
28422            dir.path(),
28423            false,
28424            true,
28425            false,
28426            false,
28427            true,
28428            false,
28429            None,
28430            true,
28431            false,
28432            false,
28433            false,
28434            false,
28435            false,
28436        );
28437        assert!(result.is_ok());
28438    }
28439
28440    #[test]
28441    fn cli_workflow_defaults_to_search_topic() {
28442        let cli = parse_cli(["tsift", "workflow"]);
28443        match cli.command {
28444            Some(Commands::Workflow { topic, json }) => {
28445                assert_eq!(topic, "search");
28446                assert!(!json);
28447            }
28448            _ => panic!("expected Workflow command"),
28449        }
28450    }
28451
28452    #[test]
28453    fn search_workflow_recipe_preserves_handles_across_expansions() {
28454        let recipe = search_workflow_recipe();
28455        let step_names: Vec<&str> = recipe.steps.iter().map(|step| step.name).collect();
28456        assert_eq!(
28457            step_names,
28458            vec![
28459                "exact-anchor",
28460                "semantic-search",
28461                "explain-symbol",
28462                "summarize-selection",
28463                "digest-expansion"
28464            ]
28465        );
28466        assert!(
28467            recipe
28468                .handle_contract
28469                .iter()
28470                .any(|item| item.contains("originating command"))
28471        );
28472        assert!(
28473            recipe.steps[1]
28474                .preserves
28475                .iter()
28476                .any(|item| item.contains("sfam-*"))
28477        );
28478        assert!(
28479            recipe.steps[2]
28480                .preserves
28481                .iter()
28482                .any(|item| item.contains("ecall-*"))
28483        );
28484        assert!(
28485            recipe.steps[4]
28486                .preserves
28487                .iter()
28488                .any(|item| item.contains("artifact handles"))
28489        );
28490    }
28491
28492    // --- JSON compact vs pretty ---
28493
28494    #[test]
28495    fn to_json_compact_default() {
28496        let val = serde_json::json!({"a": 1, "b": [2, 3]});
28497        let compact = to_json(&val, false, false).unwrap();
28498        assert!(!compact.contains('\n'));
28499        assert!(
28500            compact.contains("\"a\":1")
28501                || compact.contains("\"a\": 1")
28502                || compact.contains("\"a\":")
28503        );
28504    }
28505
28506    #[test]
28507    fn to_json_pretty_indents() {
28508        let val = serde_json::json!({"a": 1, "b": [2, 3]});
28509        let pretty = to_json(&val, true, false).unwrap();
28510        assert!(pretty.contains('\n'));
28511        assert!(pretty.contains("  "));
28512    }
28513
28514    #[test]
28515    fn to_json_compact_is_shorter() {
28516        let val =
28517            serde_json::json!({"name": "test", "items": [1, 2, 3], "nested": {"key": "value"}});
28518        let compact = to_json(&val, false, false).unwrap();
28519        let pretty = to_json(&val, true, false).unwrap();
28520        assert!(compact.len() < pretty.len());
28521    }
28522
28523    #[test]
28524    fn terse_renames_keys() {
28525        let val =
28526            serde_json::json!({"caller_file": "a.rs", "caller_name": "main", "call_site_line": 10});
28527        let result = to_json(&val, false, true).unwrap();
28528        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28529        assert!(parsed["_s"].is_object());
28530        let d = &parsed["d"];
28531        assert_eq!(d["cf"], "a.rs");
28532        assert_eq!(d["cn"], "main");
28533        assert_eq!(d["csl"], 10);
28534    }
28535
28536    #[test]
28537    fn terse_schema_only_includes_used_keys() {
28538        let val = serde_json::json!({"name": "test", "score": 0.5});
28539        let result = to_json(&val, false, true).unwrap();
28540        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28541        let schema = parsed["_s"].as_object().unwrap();
28542        assert_eq!(schema["n"], "name");
28543        assert_eq!(schema["sc"], "score");
28544        assert!(!schema.contains_key("cf"));
28545    }
28546
28547    #[test]
28548    fn terse_nested_arrays() {
28549        let val = serde_json::json!({"callers": [{"caller_name": "a", "caller_file": "b.rs", "caller_line": 1, "callee_name": "c", "call_site_line": 2}]});
28550        let result = to_json(&val, false, true).unwrap();
28551        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28552        let d = &parsed["d"];
28553        assert_eq!(d["crs"][0]["cn"], "a");
28554        assert_eq!(d["crs"][0]["cf"], "b.rs");
28555    }
28556
28557    #[test]
28558    fn terse_preserves_unknown_keys() {
28559        let val = serde_json::json!({"custom_field": "value", "name": "test"});
28560        let result = to_json(&val, false, true).unwrap();
28561        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28562        let d = &parsed["d"];
28563        assert_eq!(d["custom_field"], "value");
28564        assert_eq!(d["n"], "test");
28565    }
28566
28567    // --- schema-then-values ---
28568
28569    #[test]
28570    fn schema_converts_homogeneous_arrays() {
28571        let val = serde_json::json!({"symbols": [
28572            {"name": "foo", "kind": "fn", "line": 10},
28573            {"name": "bar", "kind": "fn", "line": 20}
28574        ]});
28575        let result = to_json_schema(&val, false, false, true).unwrap();
28576        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28577        let syms = &parsed["symbols"];
28578        // serde_json uses BTreeMap — keys sorted alphabetically
28579        assert_eq!(syms["_c"], serde_json::json!(["kind", "line", "name"]));
28580        assert_eq!(syms["_r"][0], serde_json::json!(["fn", 10, "foo"]));
28581        assert_eq!(syms["_r"][1], serde_json::json!(["fn", 20, "bar"]));
28582    }
28583
28584    #[test]
28585    fn schema_skips_short_arrays() {
28586        let val = serde_json::json!({"items": [{"name": "only"}]});
28587        let result = to_json_schema(&val, false, false, true).unwrap();
28588        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28589        assert!(parsed["items"].is_array());
28590        assert_eq!(parsed["items"][0]["name"], "only");
28591    }
28592
28593    #[test]
28594    fn schema_skips_heterogeneous_arrays() {
28595        let val = serde_json::json!({"items": [{"a": 1}, {"b": 2}]});
28596        let result = to_json_schema(&val, false, false, true).unwrap();
28597        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28598        assert!(parsed["items"].is_array());
28599        assert_eq!(parsed["items"][0]["a"], 1);
28600    }
28601
28602    #[test]
28603    fn schema_with_terse_combines() {
28604        let val = serde_json::json!({"callers": [
28605            {"caller_name": "a", "caller_file": "x.rs"},
28606            {"caller_name": "b", "caller_file": "y.rs"}
28607        ]});
28608        let result = to_json_schema(&val, false, true, true).unwrap();
28609        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28610        assert!(parsed["_s"].is_object());
28611        let d = &parsed["d"];
28612        let crs = &d["crs"];
28613        assert!(crs["_c"].is_array());
28614        assert!(crs["_r"].is_array());
28615        // terse: caller_file→cf, caller_name→cn; BTreeMap sorts: cf < cn
28616        assert_eq!(crs["_r"][0], serde_json::json!(["x.rs", "a"]));
28617    }
28618
28619    #[test]
28620    fn schema_preserves_non_object_arrays() {
28621        let val = serde_json::json!({"tags": ["a", "b", "c"]});
28622        let result = to_json_schema(&val, false, false, true).unwrap();
28623        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28624        assert_eq!(parsed["tags"], serde_json::json!(["a", "b", "c"]));
28625    }
28626
28627    #[test]
28628    fn cli_accepts_global_schema_flag() {
28629        let cli = parse_cli(["tsift", "--schema", "search", "test"]);
28630        assert!(cli.schema);
28631        assert!(matches!(cli.command, Some(Commands::Search { .. })));
28632    }
28633
28634    #[test]
28635    fn cli_accepts_global_envelope_flag() {
28636        let cli = parse_cli([
28637            "tsift",
28638            "--envelope",
28639            "context-pack",
28640            "tasks/software/tsift.md",
28641        ]);
28642        assert!(cli.envelope);
28643        assert!(matches!(cli.command, Some(Commands::ContextPack { .. })));
28644    }
28645
28646    #[test]
28647    fn cli_accepts_locks_command() {
28648        let cli = parse_cli(["tsift", "locks"]);
28649        assert!(matches!(cli.command, Some(Commands::Locks { .. })));
28650    }
28651
28652    #[test]
28653    fn cli_locks_accepts_scope_flag() {
28654        let cli = parse_cli(["tsift", "locks", "--scope", "alpha"]);
28655        match cli.command {
28656            Some(Commands::Locks { scope, .. }) => {
28657                assert_eq!(scope.as_deref(), Some("alpha"));
28658            }
28659            _ => panic!("expected Locks command"),
28660        }
28661    }
28662
28663    #[test]
28664    fn cli_search_accepts_autoindex_flag() {
28665        let cli = parse_cli(["tsift", "search", "test", "--autoindex"]);
28666        match cli.command {
28667            Some(Commands::Search {
28668                autoindex,
28669                no_autoindex,
28670                ..
28671            }) => {
28672                assert!(autoindex);
28673                assert!(!no_autoindex);
28674            }
28675            _ => panic!("expected Search command"),
28676        }
28677    }
28678
28679    #[test]
28680    fn cli_search_accepts_exact_flag() {
28681        let cli = parse_cli(["tsift", "search", "test", "--exact"]);
28682        match cli.command {
28683            Some(Commands::Search {
28684                exact, strategy, ..
28685            }) => {
28686                assert!(exact);
28687                assert!(strategy.is_none());
28688            }
28689            _ => panic!("expected Search command"),
28690        }
28691    }
28692
28693    #[test]
28694    fn cli_parses_diff_digest_command() {
28695        let cli = parse_cli(["tsift", "diff-digest", "--json", "."]);
28696        match cli.command {
28697            Some(Commands::DiffDigest {
28698                json,
28699                path,
28700                cached,
28701                revision,
28702            }) => {
28703                assert!(json);
28704                assert_eq!(path, PathBuf::from("."));
28705                assert!(!cached);
28706                assert!(revision.is_none());
28707            }
28708            _ => panic!("expected DiffDigest command"),
28709        }
28710    }
28711
28712    #[test]
28713    fn cli_rejects_conflicting_diff_digest_modes() {
28714        match try_parse_cli([
28715            "tsift",
28716            "diff-digest",
28717            "--cached",
28718            "--revision",
28719            "HEAD",
28720            ".",
28721        ]) {
28722            Ok(_) => panic!("expected conflicting diff-digest modes to fail"),
28723            Err(err) => {
28724                assert!(err.to_string().contains("--cached"));
28725                assert!(err.to_string().contains("--revision"));
28726            }
28727        }
28728    }
28729
28730    #[test]
28731    fn cli_parses_test_digest_command() {
28732        let cli = parse_cli([
28733            "tsift",
28734            "test-digest",
28735            "--path",
28736            ".",
28737            "--input",
28738            "target/test.log",
28739            "--runner",
28740            "cargo",
28741            "--json",
28742        ]);
28743        match cli.command {
28744            Some(Commands::TestDigest {
28745                json,
28746                path,
28747                input,
28748                runner,
28749            }) => {
28750                assert!(json);
28751                assert_eq!(path, PathBuf::from("."));
28752                assert_eq!(input, Some(PathBuf::from("target/test.log")));
28753                assert_eq!(runner.as_deref(), Some("cargo"));
28754            }
28755            _ => panic!("expected TestDigest command"),
28756        }
28757    }
28758
28759    #[test]
28760    fn cli_parses_log_digest_command() {
28761        let cli = parse_cli([
28762            "tsift",
28763            "log-digest",
28764            "--path",
28765            ".",
28766            "--input",
28767            "target/build.log",
28768            "--json",
28769        ]);
28770        match cli.command {
28771            Some(Commands::LogDigest { json, path, input }) => {
28772                assert!(json);
28773                assert_eq!(path, PathBuf::from("."));
28774                assert_eq!(input, Some(PathBuf::from("target/build.log")));
28775            }
28776            _ => panic!("expected LogDigest command"),
28777        }
28778    }
28779
28780    #[test]
28781    fn cli_parses_metric_digest_command() {
28782        let cli = parse_cli([
28783            "tsift",
28784            "metric-digest",
28785            "--input",
28786            "target/runs.json",
28787            "--baseline",
28788            "target/prior.json",
28789            "--metric",
28790            "session_mae",
28791            "--lower-is-better",
28792            "session_mae",
28793            "--history",
28794            "4",
28795            "--top",
28796            "2",
28797            "--json",
28798        ]);
28799        match cli.command {
28800            Some(Commands::MetricDigest {
28801                input,
28802                baseline,
28803                metrics,
28804                lower_is_better,
28805                history,
28806                top,
28807                json,
28808                ..
28809            }) => {
28810                assert!(json);
28811                assert_eq!(input, Some(PathBuf::from("target/runs.json")));
28812                assert_eq!(baseline, Some(PathBuf::from("target/prior.json")));
28813                assert_eq!(metrics, vec!["session_mae"]);
28814                assert_eq!(lower_is_better, vec!["session_mae"]);
28815                assert_eq!(history, 4);
28816                assert_eq!(top, 2);
28817            }
28818            _ => panic!("expected MetricDigest command"),
28819        }
28820    }
28821
28822    #[test]
28823    fn cli_parses_dci_benchmark_command() {
28824        let cli = parse_cli([
28825            "tsift",
28826            "dci-benchmark",
28827            "--fixture",
28828            "fixtures/dci-search-benchmark.json",
28829            "--json",
28830        ]);
28831        match cli.command {
28832            Some(Commands::DciBenchmark { fixture, json }) => {
28833                assert!(json);
28834                assert_eq!(fixture, PathBuf::from("fixtures/dci-search-benchmark.json"));
28835            }
28836            _ => panic!("expected DciBenchmark command"),
28837        }
28838    }
28839
28840    #[test]
28841    fn cli_parses_session_digest_command() {
28842        let cli = parse_cli([
28843            "tsift",
28844            "session-digest",
28845            "--path",
28846            ".",
28847            "--input",
28848            "target/session.md",
28849            "--source",
28850            "markdown",
28851            "--json",
28852        ]);
28853        match cli.command {
28854            Some(Commands::SessionDigest {
28855                json,
28856                path,
28857                input,
28858                source,
28859            }) => {
28860                assert!(json);
28861                assert_eq!(path, PathBuf::from("."));
28862                assert_eq!(input, Some(PathBuf::from("target/session.md")));
28863                assert_eq!(source.as_deref(), Some("markdown"));
28864            }
28865            _ => panic!("expected SessionDigest command"),
28866        }
28867    }
28868
28869    #[test]
28870    fn cli_parses_session_cost_command() {
28871        let cli = parse_cli([
28872            "tsift",
28873            "session-cost",
28874            "--input",
28875            "target/session.jsonl",
28876            "--source",
28877            "codex-jsonl",
28878            "--json",
28879        ]);
28880        match cli.command {
28881            Some(Commands::SessionCost {
28882                json,
28883                input,
28884                source,
28885            }) => {
28886                assert!(json);
28887                assert_eq!(input, Some(PathBuf::from("target/session.jsonl")));
28888                assert_eq!(source.as_deref(), Some("codex-jsonl"));
28889            }
28890            _ => panic!("expected SessionCost command"),
28891        }
28892    }
28893
28894    #[test]
28895    fn cli_parses_session_review_command() {
28896        let cli = parse_cli([
28897            "tsift",
28898            "session-review",
28899            "tasks/software/tsift.md",
28900            "--next-context",
28901            "--json",
28902        ]);
28903        match cli.command {
28904            Some(Commands::SessionReview {
28905                json,
28906                next_context,
28907                path,
28908                ..
28909            }) => {
28910                assert!(json);
28911                assert!(next_context);
28912                assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
28913            }
28914            _ => panic!("expected SessionReview command"),
28915        }
28916    }
28917
28918    #[test]
28919    fn cli_search_accepts_budget_flags() {
28920        let cli = parse_cli([
28921            "tsift",
28922            "search",
28923            "alpha_helper",
28924            "--max-items",
28925            "3",
28926            "--max-bytes",
28927            "96",
28928        ]);
28929        match cli.command {
28930            Some(Commands::Search {
28931                max_items,
28932                max_bytes,
28933                ..
28934            }) => {
28935                assert_eq!(max_items, Some(3));
28936                assert_eq!(max_bytes, Some(96));
28937            }
28938            _ => panic!("expected Search command"),
28939        }
28940    }
28941
28942    #[test]
28943    fn cli_search_accepts_budget_preset() {
28944        let cli = parse_cli(["tsift", "search", "alpha_helper", "--budget", "small"]);
28945        match cli.command {
28946            Some(Commands::Search { budget, .. }) => {
28947                assert_eq!(budget, Some(ResponseBudgetPreset::Small));
28948            }
28949            _ => panic!("expected Search command"),
28950        }
28951    }
28952
28953    #[test]
28954    fn response_budget_presets_fill_defaults_and_preserve_explicit_caps() {
28955        let small = ResponseBudget::from_cli(None, None, Some(ResponseBudgetPreset::Small), false);
28956        assert_eq!(small.preview_items(), 3);
28957        assert_eq!(small.preview_bytes(), 120);
28958        assert_eq!(small.follow_up_items(), 4);
28959
28960        let overridden =
28961            ResponseBudget::from_cli(Some(7), None, Some(ResponseBudgetPreset::Small), false);
28962        assert_eq!(overridden.preview_items(), 7);
28963        assert_eq!(overridden.preview_bytes(), 120);
28964        assert_eq!(overridden.follow_up_items(), 7);
28965
28966        let envelope_default = ResponseBudget::from_cli(None, None, None, true);
28967        assert!(envelope_default.is_active());
28968    }
28969
28970    #[test]
28971    fn cli_explain_accepts_budget_flags() {
28972        let cli = parse_cli([
28973            "tsift",
28974            "explain",
28975            "alpha_helper",
28976            "--max-items",
28977            "2",
28978            "--max-bytes",
28979            "80",
28980        ]);
28981        match cli.command {
28982            Some(Commands::Explain {
28983                max_items,
28984                max_bytes,
28985                ..
28986            }) => {
28987                assert_eq!(max_items, Some(2));
28988                assert_eq!(max_bytes, Some(80));
28989            }
28990            _ => panic!("expected Explain command"),
28991        }
28992    }
28993
28994    #[test]
28995    fn cli_session_review_accepts_budget_flags() {
28996        let cli = parse_cli([
28997            "tsift",
28998            "session-review",
28999            "tasks/software/tsift.md",
29000            "--max-items",
29001            "4",
29002            "--max-bytes",
29003            "120",
29004        ]);
29005        match cli.command {
29006            Some(Commands::SessionReview {
29007                max_items,
29008                max_bytes,
29009                ..
29010            }) => {
29011                assert_eq!(max_items, Some(4));
29012                assert_eq!(max_bytes, Some(120));
29013            }
29014            _ => panic!("expected SessionReview command"),
29015        }
29016    }
29017
29018    #[test]
29019    fn cli_parses_context_pack_command() {
29020        let cli = parse_cli([
29021            "tsift",
29022            "context-pack",
29023            "tasks/software/tsift.md",
29024            "--test-input",
29025            "target/test.log",
29026            "--runner",
29027            "cargo",
29028            "--log-input",
29029            "target/build.log",
29030            "--max-items",
29031            "3",
29032            "--max-bytes",
29033            "96",
29034            "--json",
29035        ]);
29036        match cli.command {
29037            Some(Commands::ContextPack {
29038                path,
29039                test_input,
29040                runner,
29041                log_input,
29042                json,
29043                max_items,
29044                max_bytes,
29045                budget,
29046                convex_snapshot,
29047            }) => {
29048                assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
29049                assert_eq!(test_input, Some(PathBuf::from("target/test.log")));
29050                assert_eq!(runner.as_deref(), Some("cargo"));
29051                assert_eq!(log_input, Some(PathBuf::from("target/build.log")));
29052                assert!(json);
29053                assert_eq!(max_items, Some(3));
29054                assert_eq!(max_bytes, Some(96));
29055                assert!(budget.is_none());
29056                assert!(convex_snapshot.is_none());
29057            }
29058            _ => panic!("expected ContextPack command"),
29059        }
29060    }
29061
29062    #[test]
29063    fn cli_parses_token_savings_command() {
29064        let cli = parse_cli([
29065            "tsift",
29066            "token-savings",
29067            "--fixture",
29068            "fixtures/tsift-token-savings.json",
29069            "--fail-under",
29070            "--json",
29071        ]);
29072        match cli.command {
29073            Some(Commands::TokenSavings {
29074                fixture,
29075                fail_under,
29076                json,
29077            }) => {
29078                assert_eq!(fixture, PathBuf::from("fixtures/tsift-token-savings.json"));
29079                assert!(fail_under);
29080                assert!(json);
29081            }
29082            _ => panic!("expected TokenSavings command"),
29083        }
29084    }
29085
29086    #[test]
29087    fn token_savings_report_records_fixture_thresholds() {
29088        let raw_symbols = [
29089            "validate_user",
29090            "validateUser",
29091            "ValidateUser",
29092            "validate-user",
29093            "VALIDATE_USER",
29094            "Validate_User",
29095            "raw_symbol",
29096            "rawSymbol",
29097            "RawSymbol",
29098            "raw-symbol",
29099            "RAW_SYMBOL",
29100            "Raw_Symbol",
29101        ]
29102        .iter()
29103        .enumerate()
29104        .map(|(idx, identifier)| TokenSavingsRawSymbol {
29105            identifier: (*identifier).to_string(),
29106            file: format!("src/example_{idx}.rs"),
29107            line: (idx + 1) as u64,
29108            context: "function".to_string(),
29109        })
29110        .collect();
29111        let fixture = TokenSavingsFixture {
29112            schema_version: 1,
29113            description: "fixture".to_string(),
29114            token_estimate: "ceil(utf8_bytes / 4)".to_string(),
29115            cases: vec![TokenSavingsFixtureCase {
29116                name: "search-preview".to_string(),
29117                surface: "search".to_string(),
29118                minimum_savings_percent: 40.0,
29119                raw_symbols,
29120                tagpath_families: vec![
29121                    TokenSavingsFamily {
29122                        canonical: "validate_user".to_string(),
29123                        count: 6,
29124                        aliases: BTreeMap::new(),
29125                    },
29126                    TokenSavingsFamily {
29127                        canonical: "raw_symbol".to_string(),
29128                        count: 6,
29129                        aliases: BTreeMap::new(),
29130                    },
29131                ],
29132                context_pack_inputs: None,
29133                session_review_inputs: None,
29134                source_read_inputs: None,
29135            }],
29136        };
29137
29138        let report = build_token_savings_report(&fixture).unwrap();
29139
29140        assert!(report.pass);
29141        assert_eq!(report.cases[0].raw_symbol_count, 12);
29142        assert_eq!(report.cases[0].family_count, 2);
29143        assert_eq!(report.cases[0].status, "pass");
29144        assert!(report.cases[0].byte_delta > 0);
29145        assert!(report.cases[0].raw_estimated_tokens > report.cases[0].envelope_estimated_tokens);
29146        assert!(report.cases[0].savings_percent >= 40.0);
29147    }
29148
29149    #[test]
29150    fn token_savings_source_read_inputs_preserve_required_anchors() {
29151        let fixture = TokenSavingsFixture {
29152            schema_version: 1,
29153            description: "fixture".to_string(),
29154            token_estimate: "ceil(utf8_bytes / 4)".to_string(),
29155            cases: vec![TokenSavingsFixtureCase {
29156                name: "source-read".to_string(),
29157                surface: "source-read".to_string(),
29158                minimum_savings_percent: 40.0,
29159                raw_symbols: Vec::new(),
29160                tagpath_families: Vec::new(),
29161                context_pack_inputs: None,
29162                session_review_inputs: None,
29163                source_read_inputs: Some(TokenSavingsSourceReadInputs {
29164                    reads: vec![TokenSavingsSourceReadInput {
29165                        command: "sed -n '40,160p' src/main.rs".to_string(),
29166                        file: "src/main.rs".to_string(),
29167                        raw_start: 40,
29168                        raw_lines: 121,
29169                        raw_excerpt: "line 40\n".repeat(121),
29170                        envelope_start: 40,
29171                        envelope_lines: 121,
29172                        required_line_anchors: vec![40, 120, 160],
29173                    }],
29174                }),
29175            }],
29176        };
29177
29178        let report = build_token_savings_report(&fixture).unwrap();
29179
29180        assert!(report.pass);
29181        assert_eq!(report.cases[0].surface, "source-read");
29182        assert!(report.cases[0].savings_percent >= 40.0);
29183    }
29184
29185    #[test]
29186    fn token_savings_source_read_inputs_fail_when_anchor_is_hidden() {
29187        let fixture = TokenSavingsFixture {
29188            schema_version: 1,
29189            description: "fixture".to_string(),
29190            token_estimate: "ceil(utf8_bytes / 4)".to_string(),
29191            cases: vec![TokenSavingsFixtureCase {
29192                name: "source-read".to_string(),
29193                surface: "source-read".to_string(),
29194                minimum_savings_percent: 40.0,
29195                raw_symbols: Vec::new(),
29196                tagpath_families: Vec::new(),
29197                context_pack_inputs: None,
29198                session_review_inputs: None,
29199                source_read_inputs: Some(TokenSavingsSourceReadInputs {
29200                    reads: vec![TokenSavingsSourceReadInput {
29201                        command: "cat src/main.rs".to_string(),
29202                        file: "src/main.rs".to_string(),
29203                        raw_start: 1,
29204                        raw_lines: 200,
29205                        raw_excerpt: "line\n".repeat(200),
29206                        envelope_start: 1,
29207                        envelope_lines: 80,
29208                        required_line_anchors: vec![120],
29209                    }],
29210                }),
29211            }],
29212        };
29213
29214        let err = match build_token_savings_report(&fixture) {
29215            Ok(_) => panic!("hidden anchor should fail the source-read fixture"),
29216            Err(err) => err,
29217        };
29218
29219        assert!(err.to_string().contains("hides required line anchor 120"));
29220    }
29221
29222    #[test]
29223    fn search_budget_report_truncates_symbol_preview_and_emits_stable_handle() {
29224        let response = empty_search_response(Path::new("/repo"), "lexical");
29225        let symbol_hits = vec![index::SymbolHit {
29226            name: "alpha_helper_with_a_long_name".to_string(),
29227            kind: "function".to_string(),
29228            language: "rust".to_string(),
29229            file: "/repo/src/lib.rs".to_string(),
29230            line: 12,
29231            end_line: None,
29232            tags: None,
29233            score: 0.98,
29234            match_type: "exact_name".to_string(),
29235            tagpath_handle: None,
29236        }];
29237
29238        let report = build_search_budget_report(
29239            "alpha_helper_with_a_long_name",
29240            "lexical",
29241            Path::new("/repo"),
29242            &response,
29243            &symbol_hits,
29244            false,
29245            ResponseBudget::new(Some(1), Some(12)),
29246        );
29247
29248        assert_eq!(report.symbols.len(), 1);
29249        assert!(report.symbols[0].handle.starts_with("sfam-"));
29250        assert_eq!(report.symbols[0].tag_alias.as_deref(), Some("alpha/hel..."));
29251        assert_eq!(report.symbols[0].name, "alpha_hel...");
29252        assert_eq!(report.symbols[0].file, "src/lib.rs");
29253        assert!(report.symbols[0].expand.contains("tsift search"));
29254    }
29255
29256    #[test]
29257    fn search_budget_report_groups_repeated_symbols_by_canonical_tag_family() {
29258        let response = empty_search_response(Path::new("/repo"), "lexical");
29259        let symbol_hits = vec![
29260            index::SymbolHit {
29261                name: "alpha_helper".to_string(),
29262                kind: "function".to_string(),
29263                language: "rust".to_string(),
29264                file: "/repo/src/lib.rs".to_string(),
29265                line: 12,
29266                end_line: None,
29267                tags: Some("alpha,helper".to_string()),
29268                score: 0.98,
29269                match_type: "exact_name".to_string(),
29270                tagpath_handle: None,
29271            },
29272            index::SymbolHit {
29273                name: "alphaHelper".to_string(),
29274                kind: "method".to_string(),
29275                language: "rust".to_string(),
29276                file: "/repo/src/main.rs".to_string(),
29277                line: 34,
29278                end_line: None,
29279                tags: Some("alpha,helper".to_string()),
29280                score: 0.93,
29281                match_type: "tag_overlap".to_string(),
29282                tagpath_handle: None,
29283            },
29284            index::SymbolHit {
29285                name: "alpha_helper".to_string(),
29286                kind: "function".to_string(),
29287                language: "rust".to_string(),
29288                file: "/repo/src/worker.rs".to_string(),
29289                line: 56,
29290                end_line: None,
29291                tags: Some("alpha,helper".to_string()),
29292                score: 0.91,
29293                match_type: "tag_overlap".to_string(),
29294                tagpath_handle: None,
29295            },
29296        ];
29297
29298        let report = build_search_budget_report(
29299            "alpha helper",
29300            "lexical",
29301            Path::new("/repo"),
29302            &response,
29303            &symbol_hits,
29304            false,
29305            ResponseBudget::new(Some(5), Some(48)),
29306        );
29307
29308        assert_eq!(report.symbol_total, 1);
29309        assert_eq!(report.raw_symbol_total, 3);
29310        assert_eq!(report.symbols.len(), 1);
29311        assert_eq!(report.symbols[0].tag_alias.as_deref(), Some("alpha/helper"));
29312        assert_eq!(report.symbols[0].match_count, 3);
29313        assert_eq!(report.symbols[0].surface_count, 2);
29314        assert_eq!(report.symbols[0].file_count, 3);
29315        assert_eq!(
29316            report.symbols[0].surface_examples,
29317            vec!["alpha_helper".to_string(), "alphaHelper".to_string()]
29318        );
29319        assert!(report.symbols[0].name.contains("(+1 variant)"));
29320        assert!(report.symbols[0].file.contains("(+2 files)"));
29321        assert!(report.symbols[0].expand.contains("tsift search"));
29322        assert!(report.symbols[0].expand.contains("alpha helper"));
29323    }
29324
29325    #[test]
29326    fn search_budget_report_warns_on_broad_preview_and_lists_narrowing_commands() {
29327        let mut response = empty_search_response(Path::new("/repo"), "lexical");
29328        response.indexed_artifacts = 450;
29329        let symbol_hits = vec![
29330            index::SymbolHit {
29331                name: "alpha_helper".to_string(),
29332                kind: "function".to_string(),
29333                language: "rust".to_string(),
29334                file: "/repo/src/lib.rs".to_string(),
29335                line: 12,
29336                end_line: None,
29337                tags: Some("alpha,helper".to_string()),
29338                score: 0.98,
29339                match_type: "exact_name".to_string(),
29340                tagpath_handle: None,
29341            },
29342            index::SymbolHit {
29343                name: "beta_helper".to_string(),
29344                kind: "function".to_string(),
29345                language: "rust".to_string(),
29346                file: "/repo/src/beta.rs".to_string(),
29347                line: 21,
29348                end_line: None,
29349                tags: Some("beta,helper".to_string()),
29350                score: 0.92,
29351                match_type: "tag_overlap".to_string(),
29352                tagpath_handle: None,
29353            },
29354        ];
29355
29356        let report = build_search_budget_report(
29357            "helper",
29358            "lexical",
29359            Path::new("/repo"),
29360            &response,
29361            &symbol_hits,
29362            false,
29363            ResponseBudget::new(Some(1), Some(64)),
29364        );
29365
29366        let guard = report
29367            .scale_guard
29368            .as_ref()
29369            .expect("broad previews should emit a scale guard");
29370        assert_eq!(guard.level, "high-hit");
29371        assert_eq!(guard.signals.indexed_artifacts, 450);
29372        assert_eq!(guard.signals.raw_symbol_matches, 2);
29373        assert!(
29374            guard
29375                .narrow_commands
29376                .iter()
29377                .any(|command| command.contains("--exact"))
29378        );
29379        assert!(
29380            guard
29381                .narrow_commands
29382                .iter()
29383                .any(|command| command.contains("alpha helper"))
29384        );
29385        assert!(
29386            guard
29387                .narrow_commands
29388                .last()
29389                .unwrap()
29390                .contains("workflow search")
29391        );
29392    }
29393
29394    #[test]
29395    fn explain_budget_report_limits_edges_and_members() {
29396        let symbols = vec![index::StoredSymbol {
29397            name: "alpha_helper".to_string(),
29398            kind: "function".to_string(),
29399            language: "rust".to_string(),
29400            signature: None,
29401            file: "src/lib.rs".to_string(),
29402            line: 10,
29403            end_line: None,
29404            parent_module: None,
29405            visibility: None,
29406            tags: None,
29407            tagpath_handle: None,
29408        }];
29409        let callers = vec![
29410            index::StoredEdge {
29411                caller_file: "src/main.rs".to_string(),
29412                caller_name: "main".to_string(),
29413                caller_line: 1,
29414                callee_name: "alpha_helper".to_string(),
29415                call_site_line: 3,
29416                tagpath_handle: None,
29417            },
29418            index::StoredEdge {
29419                caller_file: "src/worker.rs".to_string(),
29420                caller_name: "worker".to_string(),
29421                caller_line: 5,
29422                callee_name: "alpha_helper".to_string(),
29423                call_site_line: 8,
29424                tagpath_handle: None,
29425            },
29426        ];
29427        let community = graph::Community {
29428            id: 1,
29429            members: vec![
29430                graph::CommunityMember::new("alpha_helper"),
29431                graph::CommunityMember::new("main"),
29432                graph::CommunityMember::new("worker"),
29433            ],
29434            modularity_contribution: 0.5,
29435        };
29436
29437        let report = build_explain_budget_report(
29438            "alpha_helper",
29439            Path::new("/repo"),
29440            &symbols,
29441            &callers,
29442            2,
29443            false,
29444            &[],
29445            0,
29446            false,
29447            Some(&community),
29448            ResponseBudget::new(Some(1), Some(24)),
29449        );
29450
29451        assert_eq!(report.definitions.len(), 1);
29452        assert_eq!(report.callers.len(), 1);
29453        assert!(report.truncated);
29454        assert_eq!(report.community.as_ref().unwrap().members.len(), 1);
29455        assert_eq!(
29456            report.definitions[0].tag_alias.as_deref(),
29457            Some("alpha/helper")
29458        );
29459        assert!(report.callers[0].handle.starts_with("ecall-"));
29460        assert_eq!(report.callers[0].tag_alias.as_deref(), Some("main"));
29461    }
29462
29463    #[test]
29464    fn session_review_next_context_budget_limits_lists() {
29465        let report = session_review::SessionReviewReport {
29466            root: "/repo".to_string(),
29467            target: "tasks/software/tsift.md".to_string(),
29468            target_kind: "file".to_string(),
29469            sessions_considered: 1,
29470            sessions_matched: 1,
29471            claude_sessions: 1,
29472            codex_sessions: 0,
29473            agent_doc_logs: 0,
29474            prompt_target_count: 2,
29475            command_groups: 0,
29476            file_groups: 2,
29477            symbol_groups: 1,
29478            failure_groups: 1,
29479            runtime_event_groups: 0,
29480            restart_churn_groups: 0,
29481            closeout_groups: 0,
29482            usage_samples: 1,
29483            prompt_tokens: 120,
29484            cached_input_tokens: 80,
29485            cache_creation_input_tokens: 0,
29486            output_tokens: 40,
29487            reasoning_output_tokens: 0,
29488            total_tokens: 240,
29489            cached_input_ratio: Some(40.0),
29490            largest_turn_total_tokens: 240,
29491            aggregate_cost: session_review::SessionReviewCostSummary {
29492                scope: "bounded_matched_sessions".to_string(),
29493                sessions: 1,
29494                usage_samples: 1,
29495                prompt_tokens: 120,
29496                cached_input_tokens: 80,
29497                cache_creation_input_tokens: 0,
29498                output_tokens: 40,
29499                reasoning_output_tokens: 0,
29500                total_tokens: 240,
29501                cached_input_ratio: Some(40.0),
29502                largest_turn_total_tokens: 240,
29503            },
29504            latest_session_cost: Some(session_review::SessionReviewCostSummary {
29505                scope: "latest_matched_session".to_string(),
29506                sessions: 1,
29507                usage_samples: 1,
29508                prompt_tokens: 120,
29509                cached_input_tokens: 80,
29510                cache_creation_input_tokens: 0,
29511                output_tokens: 40,
29512                reasoning_output_tokens: 0,
29513                total_tokens: 240,
29514                cached_input_ratio: Some(66.67),
29515                largest_turn_total_tokens: 240,
29516            }),
29517            guardrails: vec![
29518                session_cost::SessionCostGuardrail {
29519                    kind: "cache_resend".to_string(),
29520                    severity: "warn".to_string(),
29521                    message: "cached input ratio was high".to_string(),
29522                    guidance: "compact or restart the session".to_string(),
29523                },
29524                session_cost::SessionCostGuardrail {
29525                    kind: "prompt_budget".to_string(),
29526                    severity: "warn".to_string(),
29527                    message: "largest prompt turn reached 999999 tokens".to_string(),
29528                    guidance: "compact the session before another large turn".to_string(),
29529                },
29530                session_cost::SessionCostGuardrail {
29531                    kind: "restart_loop".to_string(),
29532                    severity: "warn".to_string(),
29533                    message: "restart churn detected".to_string(),
29534                    guidance: "restart cleanly".to_string(),
29535                },
29536                session_cost::SessionCostGuardrail {
29537                    kind: "noop_closeout".to_string(),
29538                    severity: "warn".to_string(),
29539                    message: "commit_already_current appeared 8 times".to_string(),
29540                    guidance: "avoid reopening without new edits".to_string(),
29541                },
29542            ],
29543            loop_clusters: vec![],
29544            file_read_diagnostics: vec![],
29545            prompt_targets: vec![
29546                session_review::SessionReviewPromptTarget {
29547                    text: "do one".to_string(),
29548                    occurrences: 1,
29549                },
29550                session_review::SessionReviewPromptTarget {
29551                    text: "do two".to_string(),
29552                    occurrences: 1,
29553                },
29554            ],
29555            commands: vec![],
29556            touched_files: vec![],
29557            touched_symbols: vec![],
29558            failures: vec![],
29559            runtime_events: vec![],
29560            restart_churn: vec![],
29561            closeout: vec![],
29562            largest_turns: vec![],
29563            sessions: vec![session_review::SessionReviewSession {
29564                source: "claude_jsonl".to_string(),
29565                path: "/tmp/session.jsonl".to_string(),
29566                matched_by: vec!["path".to_string()],
29567                modified_unix_secs: None,
29568                prompt_target_count: 2,
29569                command_groups: 0,
29570                file_groups: 2,
29571                symbol_groups: 1,
29572                failure_groups: 1,
29573                runtime_event_groups: 0,
29574                restart_churn_groups: 0,
29575                closeout_groups: 0,
29576                usage_samples: 1,
29577                prompt_tokens: 120,
29578                cached_input_tokens: 80,
29579                cache_creation_input_tokens: 0,
29580                output_tokens: 40,
29581                reasoning_output_tokens: 0,
29582                total_tokens: 240,
29583                largest_turn_total_tokens: 240,
29584            }],
29585            next_context: session_review::SessionReviewNextContext {
29586                target: "tasks/software/tsift.md".to_string(),
29587                active_prompt_targets: vec!["do one".to_string(), "do two".to_string()],
29588                last_verification: session_review::SessionReviewVerificationState {
29589                    status: "green".to_string(),
29590                    detail: "cargo test".to_string(),
29591                },
29592                touched_files: vec!["src/lib.rs".to_string(), "src/main.rs".to_string()],
29593                touched_symbols: vec!["alpha_helper".to_string(), "main".to_string()],
29594                unresolved_failures: vec![session_review::SessionReviewFailure {
29595                    kind: "timeout".to_string(),
29596                    message: "search timed out".to_string(),
29597                    occurrences: 1,
29598                    command: None,
29599                    session_path: None,
29600                }],
29601                next_digest_commands: vec![
29602                    "tsift session-review --next-context tasks/software/tsift.md".to_string(),
29603                    "tsift diff-digest .".to_string(),
29604                    "tsift test-digest --path . < target/very-long-test-output-file-name-that-must-remain-executable.log".to_string(),
29605                    "tsift log-digest --path . < target/very-long-build-output-file-name-that-must-remain-executable.log".to_string(),
29606                ],
29607            },
29608            warnings: vec![],
29609        };
29610
29611        let budget_report = build_session_review_next_context_budget_report(
29612            &report,
29613            ResponseBudget::new(Some(1), Some(12)),
29614            None,
29615        );
29616
29617        assert!(budget_report.truncated);
29618        assert_eq!(budget_report.prompt_targets, vec!["do one"]);
29619        assert_eq!(budget_report.touched_files, vec!["src/lib.rs"]);
29620        assert!(
29621            budget_report.touched_symbol_refs[0]
29622                .handle
29623                .starts_with("ncsym-")
29624        );
29625        assert_eq!(
29626            budget_report.touched_symbol_refs[0].tag_alias.as_deref(),
29627            Some("alpha/helper")
29628        );
29629        assert!(
29630            budget_report.unresolved_failures[0]
29631                .handle
29632                .starts_with("snf-")
29633        );
29634        assert_eq!(budget_report.next_digest_commands.len(), 4);
29635        assert_eq!(
29636            budget_report.next_digest_commands[2],
29637            "tsift test-digest --path . < target/very-long-test-output-file-name-that-must-remain-executable.log"
29638        );
29639        assert_eq!(budget_report.next_token_actions.len(), 1);
29640        assert_eq!(budget_report.next_token_actions[0].kind, "prompt_budget");
29641
29642        let full_action_report = build_session_review_next_context_budget_report(
29643            &report,
29644            ResponseBudget::new(Some(4), Some(120)),
29645            None,
29646        );
29647        assert_eq!(
29648            full_action_report
29649                .next_token_actions
29650                .iter()
29651                .map(|action| action.kind.as_str())
29652                .collect::<Vec<_>>(),
29653            vec![
29654                "prompt_budget",
29655                "cache_resend",
29656                "restart_loop",
29657                "noop_closeout"
29658            ]
29659        );
29660        assert_eq!(
29661            full_action_report.next_token_actions[0]
29662                .compact_command
29663                .as_deref(),
29664            Some("agent-doc compact \"tasks/software/tsift.md\" --commit")
29665        );
29666        assert_eq!(
29667            full_action_report.next_token_actions[0]
29668                .restart_command
29669                .as_deref(),
29670            Some("agent-doc start \"tasks/software/tsift.md\"")
29671        );
29672        assert!(
29673            full_action_report.next_token_actions[0]
29674                .digest_commands
29675                .iter()
29676                .any(|command| command
29677                    == "tsift --envelope context-pack \"tasks/software/tsift.md\" --budget normal")
29678        );
29679    }
29680
29681    #[test]
29682    fn context_pack_diff_preview_limits_files_and_symbols() {
29683        let report = diff_digest::DiffDigestReport {
29684            root: "/repo".to_string(),
29685            mode: diff_digest::DiffDigestMode::WorkingTree,
29686            revision: None,
29687            files_changed: 2,
29688            files_with_current_summaries: 1,
29689            symbols_touched: 3,
29690            call_edges_added: 1,
29691            call_edges_removed: 0,
29692            files: vec![
29693                diff_digest::DiffDigestFile {
29694                    path: "src/lib.rs".to_string(),
29695                    status: diff_digest::DiffDigestFileStatus::Modified,
29696                    touched_symbols: vec!["alpha_helper".to_string(), "beta_helper".to_string()],
29697                    summary_state: diff_digest::DiffDigestSummaryState::Current,
29698                    current_summaries: vec![diff_digest::DiffDigestSummarySnippet {
29699                        symbol: "alpha_helper".to_string(),
29700                        summary: "alpha helper handles the main alpha workflow".to_string(),
29701                    }],
29702                    added_call_edges: vec!["alpha->beta".to_string()],
29703                    removed_call_edges: vec![],
29704                    warnings: vec!["stale parse".to_string()],
29705                },
29706                diff_digest::DiffDigestFile {
29707                    path: "src/main.rs".to_string(),
29708                    status: diff_digest::DiffDigestFileStatus::Added,
29709                    touched_symbols: vec!["main".to_string()],
29710                    summary_state: diff_digest::DiffDigestSummaryState::Missing,
29711                    current_summaries: vec![],
29712                    added_call_edges: vec![],
29713                    removed_call_edges: vec![],
29714                    warnings: vec![],
29715                },
29716            ],
29717        };
29718
29719        let preview =
29720            build_context_pack_diff_preview(&report, ResponseBudget::new(Some(1), Some(11)), None);
29721
29722        assert!(preview.truncated);
29723        assert_eq!(preview.files.len(), 1);
29724        assert_eq!(preview.files[0].path, "src/lib.rs");
29725        assert_eq!(preview.files[0].touched_symbols, vec!["alpha_he..."]);
29726        assert!(
29727            preview.files[0].touched_symbol_refs[0]
29728                .handle
29729                .starts_with("cdsym-")
29730        );
29731        assert_eq!(
29732            preview.files[0].touched_symbol_refs[0].tag_alias.as_deref(),
29733            Some("alpha/he...")
29734        );
29735        assert!(
29736            preview.files[0].summary_refs[0]
29737                .handle
29738                .starts_with("cdsum-")
29739        );
29740        assert_eq!(
29741            preview.files[0].summary_refs[0].tag_alias.as_deref(),
29742            Some("alpha/he...")
29743        );
29744        assert_eq!(preview.files[0].summary_refs[0].summary, "alpha he...");
29745        assert_eq!(
29746            preview.files[0].summary_refs[0].expand,
29747            "tsift summarize --file \"src/lib.rs\""
29748        );
29749        assert_eq!(preview.files[0].warnings, vec!["stale parse"]);
29750    }
29751
29752    #[test]
29753    fn context_pack_status_reminders_include_stale_index_state() {
29754        let dir = setup_graph_index();
29755        std::thread::sleep(std::time::Duration::from_millis(50));
29756        std::fs::write(
29757            dir.path().join("main.rs"),
29758            "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }\n",
29759        )
29760        .unwrap();
29761
29762        let reminders = context_pack_status_reminders(dir.path());
29763
29764        assert_eq!(reminders.len(), 1);
29765        assert!(reminders[0].contains("index stale"));
29766        assert!(reminders[0].contains("tsift index ."));
29767    }
29768
29769    // #gdbgatecold regression-lock: the trusted context-pack pipeline must
29770    // share its index-inspection across `prepare_agent_doc_index_gate` and
29771    // `context_pack_status_reminders` (both call `IndexDb::inspect_read_only`
29772    // on the same `(root, .tsift/index.db)` key). With the scope guard
29773    // active in `build_context_pack_report_with_profile`, the second call
29774    // hits the cache, so we should record one miss and at least one hit.
29775    #[test]
29776    fn build_context_pack_reuses_inspect_within_scope() {
29777        let dir = setup_graph_index();
29778        init_git_repo(dir.path());
29779        let _guard = index::InspectScopeGuard::new();
29780        let _ = build_context_pack_report(
29781            dir.path(),
29782            None,
29783            None,
29784            None,
29785            ResponseBudget::new(Some(2), Some(96)),
29786        )
29787        .unwrap();
29788        let (hits, misses) = index::inspect_scope_stats();
29789        assert!(
29790            hits >= 1,
29791            "expected at least one cached inspect within scope (hits={hits}, misses={misses})"
29792        );
29793        assert!(
29794            misses >= 1,
29795            "expected at least one initial inspect miss (hits={hits}, misses={misses})"
29796        );
29797    }
29798
29799    // #gdbgatecold scope-isolation: outside of any scope, every call to
29800    // `IndexDb::inspect_read_only` must hit the disk fresh. This locks in
29801    // the contract that the search/status fast-paths never reuse a cached
29802    // inspection across consecutive top-level calls.
29803    #[test]
29804    fn inspect_read_only_outside_scope_does_not_cache() {
29805        let dir = setup_graph_index();
29806        let db_path = dir.path().join(".tsift/index.db");
29807        let _first = index::IndexDb::inspect_read_only(&db_path, dir.path(), false).unwrap();
29808        let (hits, misses) = index::inspect_scope_stats();
29809        assert_eq!(
29810            (hits, misses),
29811            (0, 0),
29812            "no scope guard => no hits/misses recorded"
29813        );
29814        let _second = index::IndexDb::inspect_read_only(&db_path, dir.path(), false).unwrap();
29815        let (hits, _) = index::inspect_scope_stats();
29816        assert_eq!(hits, 0, "must not reuse inspection outside of any scope");
29817    }
29818
29819    #[test]
29820    fn context_pack_refreshes_stale_index_before_handoff() {
29821        let dir = setup_graph_index();
29822        init_git_repo(dir.path());
29823        std::thread::sleep(std::time::Duration::from_millis(50));
29824        std::fs::write(
29825            dir.path().join("main.rs"),
29826            "fn helper() { println!(\"updated\"); }\nfn main() { helper(); }\n",
29827        )
29828        .unwrap();
29829
29830        let report = build_context_pack_report(
29831            dir.path(),
29832            None,
29833            None,
29834            None,
29835            ResponseBudget::new(Some(2), Some(96)),
29836        )
29837        .unwrap();
29838
29839        assert!(
29840            report
29841                .status_reminders
29842                .iter()
29843                .any(|reminder| reminder.contains("index refreshed")
29844                    && reminder.contains("context-pack handoff")),
29845            "expected context-pack refresh diagnostic, got {:?}",
29846            report.status_reminders
29847        );
29848        assert!(
29849            !report
29850                .status_reminders
29851                .iter()
29852                .any(|reminder| reminder.contains("index stale")),
29853            "stale reminder should be gone after refresh: {:?}",
29854            report.status_reminders
29855        );
29856
29857        let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
29858        let summary = db.compute_changes(dir.path()).unwrap();
29859        assert_eq!(summary.new + summary.modified + summary.deleted, 0);
29860    }
29861
29862    #[test]
29863    fn context_pack_materializes_source_handles_into_graph_store() {
29864        let dir = tempfile::tempdir().unwrap();
29865        let packet = ExplorationPacket {
29866            budget: exploration_budget_for_counts(2, 1),
29867            relationship_map: vec![ExplorationRelation {
29868                from: "file:main.rs".to_string(),
29869                relation: "touches_symbol".to_string(),
29870                to: "symbol:helper".to_string(),
29871                label: Some("modified diff".to_string()),
29872            }],
29873            source_windows: vec![ExplorationSourceWindow {
29874                handle: "xwin-test".to_string(),
29875                file: "main.rs".to_string(),
29876                start: 1,
29877                end: 32,
29878                reason: "changed file".to_string(),
29879                expand: "tsift source-read main.rs --path . --start 1 --lines 32".to_string(),
29880            }],
29881            worker_context: vec![ExplorationWorkerContext {
29882                handle: "xwrk-test".to_string(),
29883                target: "tasks/software/tsift.md".to_string(),
29884                summary: "do #kgnv".to_string(),
29885                expand: "tsift --envelope context-pack tasks/software/tsift.md --budget normal"
29886                    .to_string(),
29887            }],
29888            no_reread_guidance: "use windows".to_string(),
29889        };
29890
29891        let packet = materialize_context_pack_exploration_packet(dir.path(), packet).unwrap();
29892        assert_eq!(packet.source_windows[0].handle, "xwin-test");
29893
29894        let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
29895        let source_handles = store.nodes_by_kind("source_handle").unwrap();
29896        assert_eq!(source_handles.len(), 1);
29897        assert_eq!(
29898            source_handles[0].properties.get("file"),
29899            Some(&"main.rs".to_string())
29900        );
29901        assert_eq!(
29902            store
29903                .outgoing_edges(&exploration_ref_id("file:main.rs"), Some("touches_symbol"))
29904                .unwrap()
29905                .len(),
29906            1
29907        );
29908        let worker_context = store.nodes_by_kind("worker_context").unwrap();
29909        assert_eq!(worker_context.len(), 1);
29910        assert_eq!(
29911            store
29912                .outgoing_edges("xwrk-test", Some("scopes_source"))
29913                .unwrap()
29914                .len(),
29915            1
29916        );
29917    }
29918
29919    #[test]
29920    fn context_pack_records_graph_orchestration_observability() {
29921        let dir = setup_traversal_project();
29922        init_git_repo(dir.path());
29923        let session = dir.path().join("tasks/software/tsift.md");
29924        refresh_traversal_graph_store(dir.path(), &session, None).unwrap();
29925
29926        let report = build_context_pack_report(
29927            &session,
29928            None,
29929            None,
29930            None,
29931            ResponseBudget::new(Some(4), Some(160)),
29932        )
29933        .unwrap();
29934
29935        assert_eq!(
29936            report.graph_orchestration.contract_version,
29937            CONTEXT_PACK_GRAPH_ORCHESTRATION_CONTRACT_VERSION
29938        );
29939        assert_eq!(
29940            report
29941                .graph_orchestration
29942                .projection_freshness
29943                .status
29944                .as_str(),
29945            "current"
29946        );
29947        assert!(!report.graph_orchestration.projection_hashes.is_empty());
29948        assert!(
29949            report
29950                .graph_orchestration
29951                .evidence_packet_ids
29952                .iter()
29953                .any(|id| id.starts_with("gevd-")),
29954            "{:?}",
29955            report.graph_orchestration.evidence_packet_ids
29956        );
29957        assert!(
29958            report
29959                .graph_orchestration
29960                .conflict_matrix_decisions
29961                .iter()
29962                .any(|decision| decision.contains("run conflict-matrix")),
29963            "{:?}",
29964            report.graph_orchestration.conflict_matrix_decisions
29965        );
29966        assert!(
29967            report
29968                .graph_orchestration
29969                .follow_up_commands
29970                .iter()
29971                .any(|command| command.contains("conflict-matrix")),
29972            "{:?}",
29973            report.graph_orchestration.follow_up_commands
29974        );
29975        assert!(
29976            !report
29977                .graph_orchestration
29978                .worker_ownership_blocks
29979                .is_empty()
29980        );
29981    }
29982
29983    #[test]
29984    fn convex_sync_report_chunks_upserts_and_tombstones() {
29985        let dir = setup_traversal_project();
29986        let source_graph = build_traversal_graph_source(dir.path(), dir.path(), None).unwrap();
29987        let projection = traversal_projection_from_graph(dir.path(), None, &source_graph).unwrap();
29988        let mut snapshot = projection.to_convex_rows();
29989        snapshot.nodes.push(ConvexNodeRow {
29990            external_id: "stale-node".to_string(),
29991            kind: "backlog".to_string(),
29992            label: "stale".to_string(),
29993            properties: BTreeMap::new(),
29994            provenance: Vec::new(),
29995            freshness: None,
29996        });
29997        snapshot.edges.clear();
29998        snapshot.edges.push(ConvexEdgeRow {
29999            edge_key: "stale-edge".to_string(),
30000            from_external_id: "stale-node".to_string(),
30001            to_external_id: "stale-node".to_string(),
30002            kind: "mentions".to_string(),
30003            properties: BTreeMap::new(),
30004            provenance: Vec::new(),
30005            freshness: None,
30006        });
30007        let snapshot_path = dir.path().join("convex-snapshot.json");
30008        fs::write(&snapshot_path, serde_json::to_string(&snapshot).unwrap()).unwrap();
30009
30010        let report = build_convex_sync_report(dir.path(), None, Some(&snapshot_path), 2).unwrap();
30011
30012        assert_eq!(report.freshness.status, "stale");
30013        assert!(report.freshness.fail_closed);
30014        assert_eq!(report.node_tombstones, vec!["stale-node".to_string()]);
30015        assert!(
30016            report.edge_upserts.len() > 1,
30017            "snapshot without edges should upsert local edges"
30018        );
30019        assert_eq!(report.edge_tombstones, vec!["stale-edge".to_string()]);
30020        assert_eq!(
30021            report.chunks.first().map(|chunk| chunk.operation.as_str()),
30022            Some("delete_edges"),
30023            "edge tombstones should be planned before node tombstones"
30024        );
30025        assert!(
30026            report
30027                .chunks
30028                .iter()
30029                .any(|chunk| chunk.operation == "upsert_edges" && chunk.count <= 2),
30030            "expected chunked edge upserts, got {:?}",
30031            report.chunks
30032        );
30033    }
30034
30035    #[test]
30036    fn convex_snapshot_validation_fails_closed_when_stale() {
30037        let dir = setup_traversal_project();
30038        build_traversal_graph(dir.path(), dir.path(), None).unwrap();
30039        let snapshot = ConvexProjectionRows::default();
30040        let snapshot_path = dir.path().join("empty-convex-snapshot.json");
30041        fs::write(&snapshot_path, serde_json::to_string(&snapshot).unwrap()).unwrap();
30042
30043        let err = verify_convex_projection_snapshot(dir.path(), None, &snapshot_path).unwrap_err();
30044        assert!(
30045            err.to_string()
30046                .contains("Convex graph projection is not current"),
30047            "{err}"
30048        );
30049    }
30050
30051    #[test]
30052    fn convex_sync_report_marks_live_apply_mode_without_network() {
30053        let dir = setup_traversal_project();
30054        let report =
30055            build_convex_sync_report_with_snapshot(dir.path(), None, None, 100, false).unwrap();
30056
30057        assert!(!report.dry_run);
30058        assert!(
30059            !report
30060                .diagnostics
30061                .iter()
30062                .any(|diagnostic| diagnostic.contains("dry-run only")),
30063            "apply-mode report should not claim dry-run diagnostics"
30064        );
30065        assert!(
30066            report
30067                .chunks
30068                .iter()
30069                .any(|chunk| chunk.operation == "upsert_nodes"),
30070            "live apply mode should still expose chunked idempotent operations"
30071        );
30072    }
30073
30074    #[test]
30075    fn convex_sync_apply_round_trips_with_http_backend() {
30076        use std::net::TcpListener;
30077        use std::sync::{Arc, Mutex};
30078
30079        let dir = setup_traversal_project();
30080        let report =
30081            build_convex_sync_report_with_snapshot(dir.path(), None, None, 100, false).unwrap();
30082        let expected_chunks = report.chunks.len();
30083        assert!(expected_chunks > 0);
30084
30085        let listener = TcpListener::bind("127.0.0.1:0").unwrap();
30086        let endpoint = format!("http://{}", listener.local_addr().unwrap());
30087        let operations = Arc::new(Mutex::new(Vec::<String>::new()));
30088        let server_operations = Arc::clone(&operations);
30089        let server = std::thread::spawn(move || {
30090            for _ in 0..expected_chunks {
30091                let (mut stream, _) = listener.accept().unwrap();
30092                let mut reader = BufReader::new(stream.try_clone().unwrap());
30093                let mut request_line = String::new();
30094                reader.read_line(&mut request_line).unwrap();
30095                assert!(request_line.starts_with("POST "));
30096
30097                let mut content_length = 0usize;
30098                loop {
30099                    let mut line = String::new();
30100                    reader.read_line(&mut line).unwrap();
30101                    if line == "\r\n" {
30102                        break;
30103                    }
30104                    if let Some(value) = line.to_ascii_lowercase().strip_prefix("content-length:") {
30105                        content_length = value.trim().parse().unwrap();
30106                    }
30107                }
30108
30109                let mut body = vec![0u8; content_length];
30110                reader.read_exact(&mut body).unwrap();
30111                let request: serde_json::Value = serde_json::from_slice(&body).unwrap();
30112                server_operations
30113                    .lock()
30114                    .unwrap()
30115                    .push(request["operation"].as_str().unwrap().to_string());
30116
30117                let response = br#"{"status":"ok","message":"accepted"}"#;
30118                write!(
30119                    stream,
30120                    "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n",
30121                    response.len()
30122                )
30123                .unwrap();
30124                stream.write_all(response).unwrap();
30125            }
30126        });
30127
30128        cmd_convex_sync(
30129            ConvexSyncOptions {
30130                path: dir.path(),
30131                scope: None,
30132                snapshot: None,
30133                chunk_size: 100,
30134                remote_snapshot: false,
30135                apply: true,
30136                endpoint: Some(&endpoint),
30137                auth_token_env: "TSIFT_TEST_CONVEX_AUTH_TOKEN",
30138            },
30139            OutputFormat {
30140                json_output: false,
30141                compact: true,
30142                pretty: false,
30143                terse: false,
30144                schema: false,
30145                envelope: false,
30146            },
30147        )
30148        .unwrap();
30149        server.join().unwrap();
30150
30151        let operations = operations.lock().unwrap().clone();
30152        assert!(operations.contains(&"upsert_nodes".to_string()));
30153        assert!(operations.contains(&"upsert_edges".to_string()));
30154    }
30155
30156    #[test]
30157    fn context_pack_diff_preview_attaches_tag_ontology_refs() {
30158        let root = tempfile::tempdir().unwrap();
30159        fs::create_dir_all(root.path().join(".naming/tags")).unwrap();
30160        fs::write(
30161            root.path().join(".naming/tags/alpha.md"),
30162            "+++\ntag = \"alpha\"\ntitle = \"Alpha Domain\"\ndomain = \"fixture\"\n+++\n\nAlpha definition.\n",
30163        )
30164        .unwrap();
30165        let ontology = load_tag_ontology_preview_context(root.path()).unwrap();
30166        let report = diff_digest::DiffDigestReport {
30167            root: root.path().display().to_string(),
30168            mode: diff_digest::DiffDigestMode::WorkingTree,
30169            revision: None,
30170            files_changed: 1,
30171            files_with_current_summaries: 1,
30172            symbols_touched: 1,
30173            call_edges_added: 0,
30174            call_edges_removed: 0,
30175            files: vec![diff_digest::DiffDigestFile {
30176                path: "src/lib.rs".to_string(),
30177                status: diff_digest::DiffDigestFileStatus::Modified,
30178                touched_symbols: vec!["alpha_helper".to_string()],
30179                summary_state: diff_digest::DiffDigestSummaryState::Current,
30180                current_summaries: vec![diff_digest::DiffDigestSummarySnippet {
30181                    symbol: "alpha_helper".to_string(),
30182                    summary: "alpha helper summary".to_string(),
30183                }],
30184                added_call_edges: vec![],
30185                removed_call_edges: vec![],
30186                warnings: vec![],
30187            }],
30188        };
30189
30190        let preview = build_context_pack_diff_preview(
30191            &report,
30192            ResponseBudget::new(Some(1), Some(80)),
30193            Some(&ontology),
30194        );
30195
30196        let symbol_ref = &preview.files[0].touched_symbol_refs[0].ontology_refs[0];
30197        assert!(symbol_ref.handle.starts_with("tont-"));
30198        assert_eq!(symbol_ref.tag, "alpha");
30199        assert_eq!(symbol_ref.path, ".naming/tags/alpha.md");
30200        assert_eq!(symbol_ref.title.as_deref(), Some("Alpha Domain"));
30201        assert_eq!(symbol_ref.domain.as_deref(), Some("fixture"));
30202        assert_eq!(
30203            preview.files[0].summary_refs[0].ontology_refs[0].path,
30204            ".naming/tags/alpha.md"
30205        );
30206    }
30207
30208    #[test]
30209    fn context_pack_test_preview_limits_failure_groups() {
30210        let report = test_digest::TestDigestReport {
30211            root: "/repo".to_string(),
30212            runner: "cargo".to_string(),
30213            failures: 2,
30214            grouped_failures: 2,
30215            counts: test_digest::TestDigestCounts {
30216                passed: Some(8),
30217                failed: Some(2),
30218                skipped: Some(1),
30219            },
30220            failure_groups: vec![
30221                test_digest::TestDigestFailure {
30222                    tests: vec!["suite::alpha_failure".to_string()],
30223                    message: "assertion failed".to_string(),
30224                    path: Some("src/lib.rs".to_string()),
30225                    line: Some(42),
30226                    column: None,
30227                    occurrences: 1,
30228                    summary_state: test_digest::TestDigestSummaryState::Current,
30229                    current_summaries: vec![test_digest::TestDigestSummarySnippet {
30230                        symbol: "alpha_failure".to_string(),
30231                        summary: "failure summary for alpha test".to_string(),
30232                    }],
30233                },
30234                test_digest::TestDigestFailure {
30235                    tests: vec!["suite::beta_failure".to_string()],
30236                    message: "panic".to_string(),
30237                    path: Some("src/main.rs".to_string()),
30238                    line: Some(7),
30239                    column: None,
30240                    occurrences: 1,
30241                    summary_state: test_digest::TestDigestSummaryState::Missing,
30242                    current_summaries: vec![],
30243                },
30244            ],
30245            warnings: vec!["warning text".to_string()],
30246        };
30247
30248        let preview =
30249            build_context_pack_test_preview(&report, ResponseBudget::new(Some(1), Some(14)), None);
30250
30251        assert!(preview.truncated);
30252        assert_eq!(preview.failure_groups.len(), 1);
30253        assert_eq!(preview.failure_groups[0].tests, vec!["suite::alph..."]);
30254        assert_eq!(preview.failure_groups[0].message, "assertion f...");
30255        assert!(
30256            preview.failure_groups[0].summary_refs[0]
30257                .handle
30258                .starts_with("ctsum-")
30259        );
30260        assert_eq!(
30261            preview.failure_groups[0].summary_refs[0].expand,
30262            "tsift summarize --file \"src/lib.rs\""
30263        );
30264        assert_eq!(preview.warnings, vec!["warning text"]);
30265    }
30266
30267    #[test]
30268    fn context_pack_log_preview_limits_signals_and_refs() {
30269        let report = log_digest::LogDigestReport {
30270            root: "/repo".to_string(),
30271            total_lines: 12,
30272            non_empty_lines: 10,
30273            signal_groups: 2,
30274            repeated_line_groups: 2,
30275            repeated_line_occurrences: 3,
30276            file_ref_groups: 2,
30277            symbol_ref_groups: 2,
30278            stack_groups: 1,
30279            signals: vec![
30280                log_digest::LogDigestSignal {
30281                    severity: "error".to_string(),
30282                    message: "src/lib.rs:42 boom".to_string(),
30283                    path: Some("src/lib.rs".to_string()),
30284                    line: Some(42),
30285                    column: None,
30286                    occurrences: 2,
30287                    summary_state: log_digest::LogDigestSummaryState::Current,
30288                    current_summaries: vec![log_digest::LogDigestSummarySnippet {
30289                        symbol: "alpha_helper".to_string(),
30290                        summary: "alpha helper cached log summary".to_string(),
30291                    }],
30292                },
30293                log_digest::LogDigestSignal {
30294                    severity: "warn".to_string(),
30295                    message: "slow path".to_string(),
30296                    path: None,
30297                    line: None,
30298                    column: None,
30299                    occurrences: 1,
30300                    summary_state: log_digest::LogDigestSummaryState::Unavailable,
30301                    current_summaries: vec![],
30302                },
30303            ],
30304            repeated_lines: vec![
30305                log_digest::LogDigestRepeatedLine {
30306                    line: "retrying work item alpha".to_string(),
30307                    occurrences: 3,
30308                },
30309                log_digest::LogDigestRepeatedLine {
30310                    line: "retrying work item beta".to_string(),
30311                    occurrences: 2,
30312                },
30313            ],
30314            file_refs: vec![
30315                log_digest::LogDigestFileRef {
30316                    path: "src/lib.rs".to_string(),
30317                    line: Some(42),
30318                    column: None,
30319                    occurrences: 2,
30320                    summary_state: log_digest::LogDigestSummaryState::Current,
30321                    current_summaries: vec![log_digest::LogDigestSummarySnippet {
30322                        symbol: "alpha_helper".to_string(),
30323                        summary: "alpha helper cached file summary".to_string(),
30324                    }],
30325                },
30326                log_digest::LogDigestFileRef {
30327                    path: "src/main.rs".to_string(),
30328                    line: Some(7),
30329                    column: None,
30330                    occurrences: 1,
30331                    summary_state: log_digest::LogDigestSummaryState::Missing,
30332                    current_summaries: vec![],
30333                },
30334            ],
30335            symbol_refs: vec![
30336                log_digest::LogDigestSymbolRef {
30337                    symbol: "alpha_helper".to_string(),
30338                    occurrences: 2,
30339                    summary_state: log_digest::LogDigestSummaryState::Current,
30340                    current_summaries: vec![log_digest::LogDigestSummarySnippet {
30341                        symbol: "alpha_helper".to_string(),
30342                        summary: "alpha helper cached symbol summary".to_string(),
30343                    }],
30344                },
30345                log_digest::LogDigestSymbolRef {
30346                    symbol: "beta_helper".to_string(),
30347                    occurrences: 1,
30348                    summary_state: log_digest::LogDigestSummaryState::Missing,
30349                    current_summaries: vec![],
30350                },
30351            ],
30352            stack_traces: vec![log_digest::LogDigestStackGroup {
30353                frames: vec!["frame one".to_string()],
30354                occurrences: 1,
30355            }],
30356            warnings: vec!["warning text".to_string()],
30357        };
30358
30359        let preview =
30360            build_context_pack_log_preview(&report, ResponseBudget::new(Some(1), Some(14)), None);
30361
30362        assert!(preview.truncated);
30363        assert_eq!(preview.signals.len(), 1);
30364        assert_eq!(preview.signals[0].message, "src/lib.rs:...");
30365        assert_eq!(preview.repeated_lines[0].line, "retrying wo...");
30366        assert_eq!(preview.file_refs.len(), 1);
30367        assert_eq!(preview.symbol_refs[0].symbol, "alpha_helper");
30368        assert!(
30369            preview.signals[0].summary_refs[0]
30370                .handle
30371                .starts_with("clsum-")
30372        );
30373        assert!(
30374            preview.file_refs[0].summary_refs[0]
30375                .handle
30376                .starts_with("clfsum-")
30377        );
30378        assert!(
30379            preview.symbol_refs[0].summary_refs[0]
30380                .handle
30381                .starts_with("clssum-")
30382        );
30383        assert_eq!(
30384            preview.symbol_refs[0].summary_refs[0].tag_alias.as_deref(),
30385            Some("alpha/helper")
30386        );
30387        assert_eq!(
30388            preview.symbol_refs[0].summary_refs[0].expand,
30389            "tsift summarize \"alpha_helper\""
30390        );
30391        assert_eq!(preview.warnings, vec!["warning text"]);
30392    }
30393
30394    #[test]
30395    fn cli_search_rejects_exact_with_strategy_flag() {
30396        let cli = try_parse_cli([
30397            "tsift",
30398            "search",
30399            "test",
30400            "--exact",
30401            "--strategy",
30402            "lexical",
30403        ]);
30404        assert!(cli.is_err());
30405    }
30406
30407    #[test]
30408    fn cli_search_autoindexes_by_default() {
30409        let cli = parse_cli(["tsift", "search", "test"]);
30410        match cli.command {
30411            Some(Commands::Search {
30412                autoindex,
30413                no_autoindex,
30414                ..
30415            }) => {
30416                assert!(!autoindex);
30417                assert!(!no_autoindex);
30418                assert!(autoindex || !no_autoindex);
30419            }
30420            _ => panic!("expected Search command"),
30421        }
30422    }
30423
30424    #[test]
30425    fn cli_search_accepts_no_autoindex_flag() {
30426        let cli = parse_cli(["tsift", "search", "test", "--no-autoindex"]);
30427        match cli.command {
30428            Some(Commands::Search {
30429                autoindex,
30430                no_autoindex,
30431                ..
30432            }) => {
30433                assert!(!autoindex);
30434                assert!(no_autoindex);
30435            }
30436            _ => panic!("expected Search command"),
30437        }
30438    }
30439
30440    #[test]
30441    fn cli_search_rejects_conflicting_autoindex_flags() {
30442        let cli = try_parse_cli(["tsift", "search", "test", "--autoindex", "--no-autoindex"]);
30443        assert!(cli.is_err());
30444    }
30445
30446    // --- relativize paths ---
30447
30448    #[test]
30449    fn cli_accepts_global_absolute_flag() {
30450        let cli = parse_cli(["tsift", "--absolute", "status"]);
30451        assert!(cli.absolute);
30452        assert!(matches!(cli.command, Some(Commands::Status { .. })));
30453    }
30454
30455    #[test]
30456    fn cli_accepts_global_tabular_flag() {
30457        let cli = parse_cli(["tsift", "--tabular", "search", "test"]);
30458        assert!(cli.tabular);
30459        assert!(matches!(cli.command, Some(Commands::Search { .. })));
30460    }
30461
30462    #[test]
30463    fn cli_tabular_with_graph() {
30464        let cli = parse_cli(["tsift", "--tabular", "graph", "main"]);
30465        assert!(cli.tabular);
30466        assert!(matches!(cli.command, Some(Commands::Graph { .. })));
30467    }
30468
30469    #[test]
30470    fn cli_tabular_with_communities() {
30471        let cli = parse_cli(["tsift", "--tabular", "communities"]);
30472        assert!(cli.tabular);
30473        assert!(matches!(cli.command, Some(Commands::Communities { .. })));
30474    }
30475
30476    #[test]
30477    fn cli_tabular_with_explain() {
30478        let cli = parse_cli(["tsift", "--tabular", "explain", "main"]);
30479        assert!(cli.tabular);
30480        assert!(matches!(cli.command, Some(Commands::Explain { .. })));
30481    }
30482
30483    #[test]
30484    fn cli_traverse_accepts_path_target_and_html_format() {
30485        let cli = parse_cli([
30486            "tsift", "traverse", "#kgnv", "--to", "main", "--path", ".", "--format", "html",
30487        ]);
30488        match cli.command {
30489            Some(Commands::Traverse {
30490                node,
30491                to,
30492                path,
30493                format,
30494                ..
30495            }) => {
30496                assert_eq!(node.as_deref(), Some("#kgnv"));
30497                assert_eq!(to.as_deref(), Some("main"));
30498                assert_eq!(path, PathBuf::from("."));
30499                assert_eq!(format, TraverseFormat::Html);
30500            }
30501            _ => panic!("expected Traverse command"),
30502        }
30503    }
30504
30505    #[test]
30506    fn cli_parses_semantic_related_command() {
30507        let cli = parse_cli([
30508            "tsift",
30509            "semantic",
30510            "graph navigation",
30511            "--path",
30512            ".",
30513            "--kind",
30514            "all",
30515            "--limit",
30516            "3",
30517            "--json",
30518        ]);
30519        match cli.command {
30520            Some(Commands::Semantic {
30521                query,
30522                path,
30523                kind,
30524                limit,
30525                json,
30526                ..
30527            }) => {
30528                assert_eq!(query, "graph navigation");
30529                assert_eq!(path, PathBuf::from("."));
30530                assert_eq!(kind, SemanticRelatedKind::All);
30531                assert_eq!(limit, 3);
30532                assert!(json);
30533            }
30534            _ => panic!("expected Semantic command"),
30535        }
30536    }
30537
30538    #[test]
30539    fn cli_parses_convex_sync_command() {
30540        let cli = parse_cli([
30541            "tsift",
30542            "convex-sync",
30543            ".",
30544            "--snapshot",
30545            "rows.json",
30546            "--chunk-size",
30547            "25",
30548            "--json",
30549        ]);
30550        match cli.command {
30551            Some(Commands::ConvexSync {
30552                path,
30553                snapshot,
30554                chunk_size,
30555                json,
30556                ..
30557            }) => {
30558                assert_eq!(path, PathBuf::from("."));
30559                assert_eq!(snapshot, Some(PathBuf::from("rows.json")));
30560                assert_eq!(chunk_size, 25);
30561                assert!(json);
30562            }
30563            _ => panic!("expected ConvexSync command"),
30564        }
30565    }
30566
30567    #[test]
30568    fn cli_parses_convex_sync_live_flags() {
30569        let cli = parse_cli([
30570            "tsift",
30571            "convex-sync",
30572            ".",
30573            "--remote-snapshot",
30574            "--apply",
30575            "--endpoint",
30576            "https://example.test/convex-graph",
30577            "--auth-token-env",
30578            "TSIFT_TEST_TOKEN",
30579        ]);
30580        match cli.command {
30581            Some(Commands::ConvexSync {
30582                remote_snapshot,
30583                apply,
30584                endpoint,
30585                auth_token_env,
30586                ..
30587            }) => {
30588                assert!(remote_snapshot);
30589                assert!(apply);
30590                assert_eq!(
30591                    endpoint.as_deref(),
30592                    Some("https://example.test/convex-graph")
30593                );
30594                assert_eq!(auth_token_env, "TSIFT_TEST_TOKEN");
30595            }
30596            _ => panic!("expected ConvexSync command"),
30597        }
30598    }
30599
30600    #[test]
30601    fn cli_parses_graph_db_query() {
30602        let cli = parse_cli([
30603            "tsift",
30604            "graph-db",
30605            "--backend",
30606            "convex-snapshot",
30607            "--convex-snapshot",
30608            "rows.json",
30609            "--json",
30610            "neighborhood",
30611            "gbak-kgnv",
30612            "--depth",
30613            "2",
30614            "--edge-kind",
30615            "mentions",
30616            "--property",
30617            "path=tasks/software/tsift.md",
30618            "--cursor",
30619            "gbak-old",
30620            "--limit",
30621            "10",
30622        ]);
30623        match cli.command {
30624            Some(Commands::GraphDb {
30625                backend,
30626                convex_snapshot,
30627                json,
30628                query,
30629                ..
30630            }) => {
30631                assert_eq!(backend, GraphDbBackend::ConvexSnapshot);
30632                assert_eq!(convex_snapshot, Some(PathBuf::from("rows.json")));
30633                assert!(json);
30634                match query {
30635                    GraphDbQuery::Neighborhood {
30636                        id,
30637                        depth,
30638                        edge_kind,
30639                        cursor,
30640                        limit,
30641                        property_filters,
30642                    } => {
30643                        assert_eq!(id, "gbak-kgnv");
30644                        assert_eq!(depth, 2);
30645                        assert_eq!(edge_kind.as_deref(), Some("mentions"));
30646                        assert_eq!(cursor.as_deref(), Some("gbak-old"));
30647                        assert_eq!(limit, Some(10));
30648                        assert_eq!(
30649                            property_filters,
30650                            vec!["path=tasks/software/tsift.md".to_string()]
30651                        );
30652                    }
30653                    _ => panic!("expected graph-db neighborhood query"),
30654                }
30655            }
30656            _ => panic!("expected GraphDb command"),
30657        }
30658    }
30659
30660    #[test]
30661    fn cli_parses_graph_db_tokensave_backend() {
30662        let cli = parse_cli([
30663            "tsift",
30664            "graph-db",
30665            "--backend",
30666            "tokensave",
30667            "--json",
30668            "node",
30669            "fn:main",
30670        ]);
30671        match cli.command {
30672            Some(Commands::GraphDb {
30673                backend,
30674                json,
30675                query,
30676                ..
30677            }) => {
30678                assert_eq!(backend, GraphDbBackend::Tokensave);
30679                assert!(json);
30680                match query {
30681                    GraphDbQuery::Node { id } => assert_eq!(id, "fn:main"),
30682                    _ => panic!("expected graph-db node query"),
30683                }
30684            }
30685            _ => panic!("expected GraphDb command"),
30686        }
30687    }
30688
30689    #[test]
30690    fn cli_parses_analyze_command() {
30691        let cli = parse_cli([
30692            "tsift", "analyze", ".", "--scope", "core", "--entry", "main", "--entry", "run",
30693            "--limit", "7", "--json",
30694        ]);
30695        match cli.command {
30696            Some(Commands::Analyze {
30697                path,
30698                scope,
30699                entry_points,
30700                limit,
30701                json,
30702            }) => {
30703                assert_eq!(path, PathBuf::from("."));
30704                assert_eq!(scope.as_deref(), Some("core"));
30705                assert_eq!(entry_points, vec!["main".to_string(), "run".to_string()]);
30706                assert_eq!(limit, 7);
30707                assert!(json);
30708            }
30709            _ => panic!("expected Analyze command"),
30710        }
30711    }
30712
30713    #[test]
30714    fn cli_parses_graph_db_related_query() {
30715        let cli = parse_cli([
30716            "tsift",
30717            "graph-db",
30718            "--json",
30719            "related",
30720            "voice avatar memory retrieval",
30721            "--kind",
30722            "all",
30723            "--depth",
30724            "3",
30725            "--seed-limit",
30726            "4",
30727            "--limit",
30728            "12",
30729        ]);
30730        match cli.command {
30731            Some(Commands::GraphDb { json, query, .. }) => {
30732                assert!(json);
30733                match query {
30734                    GraphDbQuery::Related {
30735                        query,
30736                        kind,
30737                        depth,
30738                        seed_limit,
30739                        limit,
30740                    } => {
30741                        assert_eq!(query, "voice avatar memory retrieval");
30742                        assert_eq!(kind, SemanticRelatedKind::All);
30743                        assert_eq!(depth, 3);
30744                        assert_eq!(seed_limit, 4);
30745                        assert_eq!(limit, 12);
30746                    }
30747                    _ => panic!("expected graph-db related query"),
30748                }
30749            }
30750            _ => panic!("expected GraphDb command"),
30751        }
30752    }
30753
30754    #[test]
30755    fn cli_parses_graph_db_compact_query() {
30756        let cli = parse_cli([
30757            "tsift",
30758            "graph-db",
30759            "--path",
30760            ".",
30761            "compact",
30762            "--apply",
30763            "--prune-tombstones",
30764            "--confirmed-convex-reconciled",
30765        ]);
30766        match cli.command {
30767            Some(Commands::GraphDb { query, .. }) => match query {
30768                GraphDbQuery::Compact {
30769                    apply,
30770                    prune_tombstones,
30771                    confirmed_convex_reconciled,
30772                } => {
30773                    assert!(apply);
30774                    assert!(prune_tombstones);
30775                    assert!(confirmed_convex_reconciled);
30776                }
30777                _ => panic!("expected graph-db compact query"),
30778            },
30779            _ => panic!("expected GraphDb command"),
30780        }
30781    }
30782
30783    #[test]
30784    fn cli_parses_impact_command() {
30785        let cli = parse_cli(["tsift", "impact", ".", "--cached", "--limit", "5"]);
30786        match cli.command {
30787            Some(Commands::Impact {
30788                path,
30789                cached,
30790                limit,
30791                ..
30792            }) => {
30793                assert_eq!(path, PathBuf::from("."));
30794                assert!(cached);
30795                assert_eq!(limit, 5);
30796            }
30797            _ => panic!("expected Impact command"),
30798        }
30799    }
30800
30801    #[test]
30802    fn cli_parses_conflict_matrix_command() {
30803        let cli = parse_cli([
30804            "tsift",
30805            "conflict-matrix",
30806            "--path",
30807            "tasks/software/tsift.md",
30808            "--depth",
30809            "4",
30810            "--limit",
30811            "12",
30812            "--impact-limit",
30813            "6",
30814            "--json",
30815            "pwcm",
30816            "#g6kf",
30817        ]);
30818        match cli.command {
30819            Some(Commands::ConflictMatrix {
30820                targets,
30821                path,
30822                depth,
30823                limit,
30824                impact_limit,
30825                json,
30826                ..
30827            }) => {
30828                assert_eq!(targets, vec!["pwcm".to_string(), "#g6kf".to_string()]);
30829                assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
30830                assert_eq!(depth, 4);
30831                assert_eq!(limit, 12);
30832                assert_eq!(impact_limit, 6);
30833                assert!(json);
30834            }
30835            _ => panic!("expected ConflictMatrix command"),
30836        }
30837    }
30838
30839    #[test]
30840    fn cli_parses_dispatch_trace_command() {
30841        let cli = parse_cli([
30842            "tsift",
30843            "dispatch-trace",
30844            "--path",
30845            "tasks/software/tsift.md",
30846            "--format",
30847            "html",
30848            "--depth",
30849            "4",
30850            "pwcm",
30851            "#g6kf",
30852        ]);
30853        match cli.command {
30854            Some(Commands::DispatchTrace {
30855                targets,
30856                path,
30857                format,
30858                depth,
30859                ..
30860            }) => {
30861                assert_eq!(targets, vec!["pwcm".to_string(), "#g6kf".to_string()]);
30862                assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
30863                assert_eq!(format, DispatchTraceFormat::Html);
30864                assert_eq!(depth, 4);
30865            }
30866            _ => panic!("expected DispatchTrace command"),
30867        }
30868    }
30869
30870    #[test]
30871    fn cli_parses_dependency_dag_command() {
30872        let cli = parse_cli([
30873            "tsift",
30874            "dependency-dag",
30875            "--path",
30876            "tasks/software/tsift.md",
30877            "--depth",
30878            "5",
30879            "--limit",
30880            "20",
30881            "--json",
30882            "alpha",
30883            "#beta",
30884        ]);
30885        match cli.command {
30886            Some(Commands::DependencyDag {
30887                targets,
30888                path,
30889                depth,
30890                limit,
30891                json,
30892                ..
30893            }) => {
30894                assert_eq!(targets, vec!["alpha".to_string(), "#beta".to_string()]);
30895                assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
30896                assert_eq!(depth, 5);
30897                assert_eq!(limit, 20);
30898                assert!(json);
30899            }
30900            _ => panic!("expected DependencyDag command"),
30901        }
30902    }
30903
30904    #[test]
30905    fn relativize_strips_root_prefix() {
30906        let root = std::path::Path::new("/home/user/project");
30907        assert_eq!(
30908            relativize("/home/user/project/src/main.rs", root),
30909            "src/main.rs"
30910        );
30911    }
30912
30913    #[test]
30914    fn relativize_leaves_non_matching_path() {
30915        let root = std::path::Path::new("/home/user/project");
30916        assert_eq!(
30917            relativize("/other/path/file.rs", root),
30918            "/other/path/file.rs"
30919        );
30920    }
30921
30922    #[test]
30923    fn relativize_leaves_already_relative() {
30924        let root = std::path::Path::new("/home/user/project");
30925        assert_eq!(relativize("src/main.rs", root), "src/main.rs");
30926    }
30927
30928    #[test]
30929    fn relativize_pathbuf_strips_prefix() {
30930        let root = std::path::Path::new("/home/user/project");
30931        let path = std::path::Path::new("/home/user/project/src/lib.rs");
30932        assert_eq!(relativize_pathbuf(path, root), PathBuf::from("src/lib.rs"));
30933    }
30934
30935    #[test]
30936    fn relativize_edges_strips_caller_file() {
30937        let root = std::path::Path::new("/tmp/proj");
30938        let mut edges = vec![index::StoredEdge {
30939            caller_file: "/tmp/proj/src/main.rs".to_string(),
30940            caller_name: "main".to_string(),
30941            caller_line: 1,
30942            callee_name: "helper".to_string(),
30943            call_site_line: 5,
30944            tagpath_handle: None,
30945        }];
30946        relativize_edges(&mut edges, root);
30947        assert_eq!(edges[0].caller_file, "src/main.rs");
30948    }
30949
30950    #[test]
30951    fn relativize_json_paths_strips_known_keys() {
30952        let root = std::path::Path::new("/tmp/proj");
30953        let mut val = serde_json::json!({
30954            "file": "/tmp/proj/src/main.rs",
30955            "path": "/tmp/proj/test.rs",
30956            "name": "/tmp/proj/not-a-path",
30957            "hits": [{"path": "/tmp/proj/nested.rs", "score": 1.0}]
30958        });
30959        relativize_json_paths(&mut val, root);
30960        assert_eq!(val["file"], "src/main.rs");
30961        assert_eq!(val["path"], "test.rs");
30962        assert_eq!(val["name"], "/tmp/proj/not-a-path");
30963        assert_eq!(val["hits"][0]["path"], "nested.rs");
30964    }
30965
30966    // --- limit caps ---
30967
30968    #[test]
30969    fn cli_graph_accepts_limit_flag() {
30970        let cli = parse_cli(["tsift", "graph", "main", "--limit", "5"]);
30971        match cli.command {
30972            Some(Commands::Graph { limit, .. }) => assert_eq!(limit, 5),
30973            _ => panic!("expected Graph command"),
30974        }
30975    }
30976
30977    #[test]
30978    fn cli_graph_default_limit_is_20() {
30979        let cli = parse_cli(["tsift", "graph", "main"]);
30980        match cli.command {
30981            Some(Commands::Graph { limit, .. }) => assert_eq!(limit, 20),
30982            _ => panic!("expected Graph command"),
30983        }
30984    }
30985
30986    #[test]
30987    fn cli_communities_accepts_limit_flag() {
30988        let cli = parse_cli(["tsift", "communities", "--limit", "3"]);
30989        match cli.command {
30990            Some(Commands::Communities { limit, .. }) => assert_eq!(limit, 3),
30991            _ => panic!("expected Communities command"),
30992        }
30993    }
30994
30995    #[test]
30996    fn cli_communities_default_limit_is_10() {
30997        let cli = parse_cli(["tsift", "communities"]);
30998        match cli.command {
30999            Some(Commands::Communities { limit, .. }) => assert_eq!(limit, 10),
31000            _ => panic!("expected Communities command"),
31001        }
31002    }
31003
31004    #[test]
31005    fn cli_explain_accepts_limit_flag() {
31006        let cli = parse_cli(["tsift", "explain", "main", "--limit", "7"]);
31007        match cli.command {
31008            Some(Commands::Explain { limit, .. }) => assert_eq!(limit, 7),
31009            _ => panic!("expected Explain command"),
31010        }
31011    }
31012
31013    #[test]
31014    fn cli_explain_default_limit_is_15() {
31015        let cli = parse_cli(["tsift", "explain", "main"]);
31016        match cli.command {
31017            Some(Commands::Explain { limit, .. }) => assert_eq!(limit, 15),
31018            _ => panic!("expected Explain command"),
31019        }
31020    }
31021
31022    #[test]
31023    fn cli_limit_zero_means_unlimited() {
31024        let cli = parse_cli(["tsift", "graph", "main", "--limit", "0"]);
31025        match cli.command {
31026            Some(Commands::Graph { limit, .. }) => assert_eq!(limit, 0),
31027            _ => panic!("expected Graph command"),
31028        }
31029    }
31030
31031    #[test]
31032    fn graph_cmd_limit_runs_ok() {
31033        let dir = setup_graph_index();
31034        let result = cmd_graph(
31035            "main",
31036            dir.path(),
31037            false,
31038            false,
31039            None,
31040            1,
31041            false,
31042            false,
31043            false,
31044            false,
31045            false,
31046            false,
31047            false,
31048            TagpathSearchOpts::default(),
31049        );
31050        assert!(result.is_ok());
31051    }
31052
31053    #[test]
31054    fn graph_cmd_unlimited_runs_ok() {
31055        let dir = setup_graph_index();
31056        let result = cmd_graph(
31057            "main",
31058            dir.path(),
31059            false,
31060            false,
31061            None,
31062            0,
31063            false,
31064            false,
31065            false,
31066            false,
31067            false,
31068            false,
31069            false,
31070            TagpathSearchOpts::default(),
31071        );
31072        assert!(result.is_ok());
31073    }
31074
31075    #[test]
31076    fn graph_cmd_tabular_runs_ok() {
31077        let dir = setup_graph_index();
31078        let result = cmd_graph(
31079            "main",
31080            dir.path(),
31081            false,
31082            false,
31083            None,
31084            20,
31085            false,
31086            false,
31087            false,
31088            false,
31089            false,
31090            true,
31091            false,
31092            TagpathSearchOpts::default(),
31093        );
31094        assert!(result.is_ok());
31095    }
31096
31097    #[test]
31098    fn communities_cmd_tabular_runs_ok() {
31099        let dir = setup_graph_index();
31100        let result = cmd_communities(
31101            dir.path(),
31102            None,
31103            1,
31104            10,
31105            false,
31106            false,
31107            false,
31108            false,
31109            true,
31110            false,
31111            TagpathSearchOpts::default(),
31112        );
31113        assert!(result.is_ok());
31114    }
31115
31116    #[test]
31117    fn explain_cmd_tabular_runs_ok() {
31118        let dir = setup_graph_index();
31119        let result = cmd_explain(
31120            "main",
31121            dir.path(),
31122            None,
31123            15,
31124            false,
31125            false,
31126            false,
31127            false,
31128            false,
31129            true,
31130            false,
31131        );
31132        assert!(result.is_ok());
31133    }
31134
31135    #[test]
31136    fn traversal_excludes_agent_doc_runtime_paths_from_source_watermark() {
31137        // #gdbcacheprove: .agent-doc runtime markdown (snapshots, baselines, archives,
31138        // session docs, runtime logs) must not contribute to the source watermark, or
31139        // every agent-doc cycle would invalidate the graph-db backend-eval cache and
31140        // force a full rebuild on the next run.
31141        let cases = [
31142            ".agent-doc",
31143            ".agent-doc/snapshots/abc.md",
31144            ".agent-doc/baselines/abc.md",
31145            ".agent-doc/archives/2026.md",
31146            ".agent-doc/runtime/run.jsonl",
31147            "src/foo/.agent-doc",
31148            "src/foo/.agent-doc/snapshots/x.md",
31149            "./.agent-doc/snapshots/x.md",
31150        ];
31151        for path in cases {
31152            assert!(
31153                traversal_relative_path_is_generated_artifact(path),
31154                "expected `{path}` to be excluded from source watermark"
31155            );
31156        }
31157        // Real source paths must NOT be excluded.
31158        for path in [
31159            "src/main.rs",
31160            "tests/perf_gate.rs",
31161            "fixtures/x.json",
31162            "agent-doc/src/lib.rs", // sibling dir without the leading dot
31163            "src/.agent-doc-helper.rs",
31164        ] {
31165            assert!(
31166                !traversal_relative_path_is_generated_artifact(path),
31167                "expected `{path}` to be included in source watermark"
31168            );
31169        }
31170    }
31171
31172    #[test]
31173    fn traversal_excludes_tsift_and_target_runtime_paths_from_source_watermark() {
31174        // #cachelookupshift: the conflict-matrix preparation cache key hashes
31175        // file_state snapshot rows + every markdown file under the root. Any
31176        // .tsift/, target/, or .agent-doc/ path slipping past the filter would
31177        // shift the watermark every run because those directories mutate as a
31178        // side effect of running tsift itself. This test locks the artifact
31179        // filter against regressions for each prefix variant
31180        // (bare, root-anchored, nested, and './' leading).
31181        let cases = [
31182            ".tsift",
31183            ".tsift/index.db",
31184            ".tsift/indexes/foo/index.db",
31185            ".tsift/conflict-matrix-cache/inputs/abc.json",
31186            ".tsift/summaries.db",
31187            "src/foo/.tsift",
31188            "src/foo/.tsift/graph.db",
31189            "./.tsift/index.db",
31190            "target",
31191            "target/debug/build/x",
31192            "target/release/tsift",
31193            "src/foo/target/debug/x",
31194            "./target/release/x",
31195        ];
31196        for path in cases {
31197            assert!(
31198                traversal_relative_path_is_generated_artifact(path),
31199                "expected `{path}` to be excluded from source watermark"
31200            );
31201        }
31202        // Look-alike paths must NOT be excluded — only true artifact dirs.
31203        for path in [
31204            "src/ctx-core-dev/lib/a__target/CHANGELOG.md",
31205            "src/ctx-core-dev/lib/a__target/A__Target/index.d.ts",
31206            "src/tsift-extras/lib.rs",
31207            "tsift/README.md",
31208            "src/targeting.rs",
31209            "src/.tsiftrc",
31210            "src/agent-doc-helper.rs",
31211        ] {
31212            assert!(
31213                !traversal_relative_path_is_generated_artifact(path),
31214                "expected `{path}` to be included in source watermark"
31215            );
31216        }
31217    }
31218
31219    #[test]
31220    fn traversal_source_watermark_is_stable_across_invocations_on_quiescent_root() {
31221        // #cachelookupshift: the conflict-matrix preparation cache only hits
31222        // when traversal_source_watermark returns the same hash for two
31223        // consecutive calls on identical source state. Lock that invariant so
31224        // a future change that folds wall-clock time, a directory mtime, or
31225        // any other non-content input into the hash trips this test before
31226        // regressing the preparation_cache_lookup hit rate. We exercise the
31227        // session_only=true path with a hinted markdown file so the test does
31228        // not need a full index DB to drive the index-snapshot branch.
31229        let dir = tempfile::tempdir().unwrap();
31230        let root = dir.path();
31231        std::fs::create_dir_all(root.join("src")).unwrap();
31232        std::fs::write(root.join("src/main.rs"), "fn main() {}\n").unwrap();
31233        let hint = root.join("README.md");
31234        std::fs::write(&hint, "# stable\n").unwrap();
31235        // Add a generated-artifact directory that must NOT affect the watermark.
31236        std::fs::create_dir_all(root.join(".tsift")).unwrap();
31237        std::fs::write(root.join(".tsift/index.db"), b"placeholder").unwrap();
31238        std::fs::create_dir_all(root.join("target/debug")).unwrap();
31239        std::fs::write(root.join("target/debug/marker"), b"placeholder").unwrap();
31240
31241        let first = traversal_source_watermark(root, &hint, None, true)
31242            .expect("first watermark call must succeed")
31243            .expect("first watermark must produce a hash for hinted markdown");
31244        let second = traversal_source_watermark(root, &hint, None, true)
31245            .expect("second watermark call must succeed")
31246            .expect("second watermark must produce a hash for hinted markdown");
31247        assert_eq!(
31248            first, second,
31249            "watermark must be identical across back-to-back invocations on a quiescent root"
31250        );
31251
31252        // Mutating a generated-artifact file must NOT shift the hash.
31253        std::fs::write(root.join(".tsift/index.db"), b"changed").unwrap();
31254        std::fs::write(root.join("target/debug/marker"), b"changed").unwrap();
31255        let third = traversal_source_watermark(root, &hint, None, true)
31256            .expect("third watermark call must succeed")
31257            .expect("third watermark must produce a hash for hinted markdown");
31258        assert_eq!(
31259            first, third,
31260            "watermark must ignore mutations under .tsift/ and target/"
31261        );
31262
31263        // Mutating the hinted markdown file MUST shift the hash so the
31264        // preparation cache invalidates correctly when user state changes.
31265        // Sleep briefly to push the file mtime past the original even on
31266        // coarse-resolution filesystems.
31267        std::thread::sleep(std::time::Duration::from_millis(20));
31268        std::fs::write(&hint, "# stable edited with longer content\n").unwrap();
31269        let fourth = traversal_source_watermark(root, &hint, None, true)
31270            .expect("fourth watermark call must succeed")
31271            .expect("fourth watermark must produce a hash for hinted markdown");
31272        assert_ne!(
31273            first, fourth,
31274            "watermark must invalidate when the hinted markdown file changes"
31275        );
31276    }
31277
31278    #[test]
31279    fn traversal_source_watermark_uses_summary_rows_not_summaries_db_metadata() {
31280        // #gcachemiss: full-projection cache keys must not miss just because the
31281        // SQLite summary cache file header or mtime churned. Only the semantic rows
31282        // that feed traversal projection should participate in the source watermark.
31283        let dir = tempfile::tempdir().unwrap();
31284        let root = dir.path();
31285        std::fs::write(root.join("README.md"), "# stable\n").unwrap();
31286        let summaries_db_path = root.join(".tsift/summaries.db");
31287        let summary_db = summarize::SummaryDb::open(&summaries_db_path).unwrap();
31288        let mut summary = summarize::Summary {
31289            id: 0,
31290            symbol_name: "main".to_string(),
31291            file_path: "src/main.rs".to_string(),
31292            content_hash: "hash-main".to_string(),
31293            summary: "main wires the CLI".to_string(),
31294            entities: Some(vec![summarize::Entity {
31295                name: "Cli".to_string(),
31296                kind: "type".to_string(),
31297                description: "Command-line interface".to_string(),
31298            }]),
31299            relationships: None,
31300            concept_labels: Some(vec!["cli".to_string()]),
31301            extracted_at: "1700000000".to_string(),
31302            model: "test-model".to_string(),
31303            tokens_input: Some(10),
31304            tokens_output: Some(5),
31305        };
31306        summary_db.insert(&summary).unwrap();
31307        drop(summary_db);
31308
31309        let hint = root.join("README.md");
31310        let first = traversal_source_watermark(root, &hint, None, true)
31311            .expect("first watermark call must succeed")
31312            .expect("first watermark must produce a hash");
31313
31314        std::thread::sleep(std::time::Duration::from_millis(20));
31315        let conn = Connection::open(&summaries_db_path).unwrap();
31316        conn.pragma_update(None, "user_version", 1).unwrap();
31317        conn.pragma_update(None, "user_version", 0).unwrap();
31318        drop(conn);
31319
31320        let second = traversal_source_watermark(root, &hint, None, true)
31321            .expect("second watermark call must succeed")
31322            .expect("second watermark must produce a hash");
31323        assert_eq!(
31324            first, second,
31325            "metadata-only summaries.db churn must not invalidate the source watermark"
31326        );
31327
31328        summary.entities = Some(vec![summarize::Entity {
31329            name: "GraphCache".to_string(),
31330            kind: "type".to_string(),
31331            description: "Stable full-projection cache input".to_string(),
31332        }]);
31333        let summary_db = summarize::SummaryDb::open(&summaries_db_path).unwrap();
31334        summary_db.delete_by_file("src/main.rs").unwrap();
31335        summary_db.insert(&summary).unwrap();
31336        drop(summary_db);
31337
31338        let third = traversal_source_watermark(root, &hint, None, true)
31339            .expect("third watermark call must succeed")
31340            .expect("third watermark must produce a hash");
31341        assert_ne!(
31342            first, third,
31343            "semantic summary row changes must invalidate the source watermark"
31344        );
31345    }
31346
31347    #[test]
31348    fn full_projection_source_watermark_ignores_source_mtime_when_index_rows_unchanged() {
31349        // #gfullhot: backend-eval full-projection cache keys should be based on
31350        // the indexed graph inputs, not file_state mtimes. Touching a source file
31351        // without changing extracted symbols/call edges must still hit the cache.
31352        let dir = tempfile::tempdir().unwrap();
31353        let root = dir.path();
31354        std::fs::create_dir_all(root.join("src")).unwrap();
31355        std::fs::create_dir_all(root.join(".tsift")).unwrap();
31356        let source = root.join("src/lib.rs");
31357        let source_body = "pub fn alpha() { beta(); }\npub fn beta() {}\n";
31358        std::fs::write(&source, source_body).unwrap();
31359        let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
31360        db.rebuild(root).unwrap();
31361        drop(db);
31362
31363        let first = graph_db_backend_eval_full_projection_source_watermark(root, None)
31364            .unwrap()
31365            .value;
31366        std::thread::sleep(std::time::Duration::from_millis(20));
31367        std::fs::write(&source, source_body).unwrap();
31368        let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
31369        db.apply_changes(root).unwrap();
31370        drop(db);
31371
31372        let second = graph_db_backend_eval_full_projection_source_watermark(root, None)
31373            .unwrap()
31374            .value;
31375        assert_eq!(
31376            first, second,
31377            "mtime-only source index churn must not invalidate the full-projection cache"
31378        );
31379    }
31380
31381    #[test]
31382    fn full_projection_source_watermark_ignores_session_markdown_churn() {
31383        // #gfullhot: the full-projection performance cache isolates code graph
31384        // and semantic-summary inputs. Current session evidence is measured by
31385        // the bounded real dataset, so unrelated task-doc edits must not force a
31386        // million-row full-projection rebuild.
31387        let dir = tempfile::tempdir().unwrap();
31388        let root = dir.path();
31389        std::fs::create_dir_all(root.join("src")).unwrap();
31390        std::fs::create_dir_all(root.join("tasks/software")).unwrap();
31391        std::fs::create_dir_all(root.join(".tsift")).unwrap();
31392        std::fs::write(root.join("src/lib.rs"), "pub fn alpha() {}\n").unwrap();
31393        let task_doc = root.join("tasks/software/tsift.md");
31394        std::fs::write(
31395            &task_doc,
31396            "---\nagent_doc_session: tsift-v0.1\n---\n\n## Backlog\n\n- [ ] [#one] Initial item\n",
31397        )
31398        .unwrap();
31399        let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
31400        db.rebuild(root).unwrap();
31401        drop(db);
31402
31403        let first = graph_db_backend_eval_full_projection_source_watermark(root, None)
31404            .unwrap()
31405            .value;
31406        std::fs::write(
31407            &task_doc,
31408            "---\nagent_doc_session: tsift-v0.1\n---\n\n## Backlog\n\n- [ ] [#one] Edited item\n",
31409        )
31410        .unwrap();
31411        let second = graph_db_backend_eval_full_projection_source_watermark(root, None)
31412            .unwrap()
31413            .value;
31414        assert_eq!(
31415            first, second,
31416            "session markdown churn must not invalidate the full-projection code/summary cache"
31417        );
31418    }
31419
31420    #[test]
31421    fn full_projection_cache_hit_skips_provider_neutral_rebuild_after_mtime_churn() {
31422        // #gfullhot: once a full-project projection is cached, repeated samples
31423        // with unchanged graph inputs must report zero source_graph_build and
31424        // projection_rows work even if indexed file mtimes changed.
31425        let dir = tempfile::tempdir().unwrap();
31426        let root = dir.path();
31427        std::fs::create_dir_all(root.join("src")).unwrap();
31428        std::fs::create_dir_all(root.join(".tsift")).unwrap();
31429        let source = root.join("src/lib.rs");
31430        let source_body = "pub fn alpha() { beta(); }\npub fn beta() {}\n";
31431        std::fs::write(&source, source_body).unwrap();
31432        let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
31433        db.rebuild(root).unwrap();
31434        drop(db);
31435
31436        let (_projection, _warnings, _phases, first_stats) =
31437            graph_db_backend_eval_full_projection_with_profile(root, None).unwrap();
31438        assert!(
31439            !first_stats.hit,
31440            "the first full-projection run should populate the cache"
31441        );
31442
31443        std::thread::sleep(std::time::Duration::from_millis(20));
31444        std::fs::write(&source, source_body).unwrap();
31445        let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
31446        db.apply_changes(root).unwrap();
31447        drop(db);
31448
31449        let (_projection, _warnings, phases, second_stats) =
31450            graph_db_backend_eval_full_projection_with_profile(root, None).unwrap();
31451        assert!(second_stats.hit, "mtime-only churn should still cache-hit");
31452        let source_graph_build = phases
31453            .iter()
31454            .find(|phase| phase.name == "full_projection.source_graph_build")
31455            .expect("cache hit must report source_graph_build");
31456        let projection_rows = phases
31457            .iter()
31458            .find(|phase| phase.name == "full_projection.projection_rows")
31459            .expect("cache hit must report projection_rows");
31460        assert_eq!(source_graph_build.duration_micros, 0);
31461        assert_eq!(projection_rows.duration_micros, 0);
31462    }
31463}
31464
31465// --- SQL introspection ---
31466
31467#[derive(Serialize)]
31468struct TableInfo {
31469    name: String,
31470    columns: Vec<ColumnInfo>,
31471    row_count: i64,
31472}
31473
31474#[derive(Serialize)]
31475struct ColumnInfo {
31476    name: String,
31477    #[serde(rename = "type")]
31478    col_type: String,
31479    notnull: bool,
31480    pk: bool,
31481    #[serde(skip_serializing_if = "Option::is_none")]
31482    default_value: Option<String>,
31483}
31484
31485/// Open a SQLite connection (read-only).
31486pub(crate) fn open_db(path: &std::path::Path) -> Result<Connection> {
31487    let conn = Connection::open_with_flags(
31488        path,
31489        rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
31490    )
31491    .with_context(|| format!("opening database: {}", path.display()))?;
31492    Ok(conn)
31493}
31494
31495/// List all user tables with column metadata and row counts.
31496pub(crate) fn schema_overview(conn: &Connection) -> Result<Vec<TableInfo>> {
31497    let mut stmt = conn.prepare(
31498        "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%' ORDER BY name",
31499    )?;
31500    let table_names: Vec<String> = stmt
31501        .query_map([], |row| row.get(0))?
31502        .collect::<std::result::Result<Vec<_>, _>>()?;
31503
31504    let mut tables = Vec::new();
31505    for tbl in table_names {
31506        let columns = table_columns(conn, &tbl)?;
31507        let row_count: i64 =
31508            conn.query_row(&format!("SELECT COUNT(*) FROM \"{}\"", tbl), [], |row| {
31509                row.get(0)
31510            })?;
31511        tables.push(TableInfo {
31512            name: tbl,
31513            columns,
31514            row_count,
31515        });
31516    }
31517    Ok(tables)
31518}
31519
31520/// Get column metadata for a single table.
31521pub(crate) fn table_columns(conn: &Connection, table: &str) -> Result<Vec<ColumnInfo>> {
31522    let mut stmt = conn.prepare(&format!("PRAGMA table_info(\"{}\")", table))?;
31523    let cols = stmt
31524        .query_map([], |row| {
31525            Ok(ColumnInfo {
31526                name: row.get(1)?,
31527                col_type: row.get::<_, String>(2).unwrap_or_default(),
31528                notnull: row.get::<_, bool>(3).unwrap_or(false),
31529                pk: row.get::<_, i32>(5).unwrap_or(0) > 0,
31530                default_value: row.get(4)?,
31531            })
31532        })?
31533        .collect::<std::result::Result<Vec<_>, _>>()?;
31534    Ok(cols)
31535}
31536
31537/// Execute an arbitrary SQL query and return rows as JSON values.
31538pub(crate) fn execute_query(
31539    conn: &Connection,
31540    sql: &str,
31541) -> Result<(Vec<String>, Vec<Vec<serde_json::Value>>)> {
31542    let mut stmt = conn.prepare(sql).context("preparing SQL query")?;
31543    let col_names: Vec<String> = stmt.column_names().iter().map(|s| s.to_string()).collect();
31544    let col_count = col_names.len();
31545
31546    let mut rows = Vec::new();
31547    let mut query_rows = stmt.query([])?;
31548    while let Some(row) = query_rows.next()? {
31549        let mut vals = Vec::with_capacity(col_count);
31550        for i in 0..col_count {
31551            let val = match row.get_ref(i)? {
31552                rusqlite::types::ValueRef::Null => serde_json::Value::Null,
31553                rusqlite::types::ValueRef::Integer(n) => serde_json::json!(n),
31554                rusqlite::types::ValueRef::Real(f) => serde_json::json!(f),
31555                rusqlite::types::ValueRef::Text(s) => {
31556                    serde_json::Value::String(String::from_utf8_lossy(s).into_owned())
31557                }
31558                rusqlite::types::ValueRef::Blob(b) => {
31559                    serde_json::Value::String(format!("<blob {} bytes>", b.len()))
31560                }
31561            };
31562            vals.push(val);
31563        }
31564        rows.push(vals);
31565    }
31566    Ok((col_names, rows))
31567}
31568
31569// --- Command rewriting for hook integrations and manual bounded execution ---
31570
31571#[derive(Clone, Copy)]
31572struct OutputCap {
31573    max_lines: usize,
31574    strip_prefix: Option<&'static str>,
31575}
31576
31577pub(crate) fn execute_rewritten_command(command: &str) -> Result<i32> {
31578    let effective_command = effective_rewrite_run_command(command);
31579    let parts = shell_split(&effective_command);
31580    let Some(program) = parts.first().map(|part| strip_shell_quotes(part)) else {
31581        bail!("rewritten command was empty");
31582    };
31583    let args: Vec<String> = parts[1..]
31584        .iter()
31585        .map(|part| strip_shell_quotes(part).to_string())
31586        .collect();
31587    let mut command = if program == "tsift" {
31588        Command::new(std::env::current_exe().context("resolving current tsift executable")?)
31589    } else {
31590        Command::new(program)
31591    };
31592    let output = command
31593        .args(&args)
31594        .output()
31595        .with_context(|| format!("executing rewritten command `{effective_command}`"))?;
31596
31597    let stdout = if let Some(cap) = rewrite_output_cap(&effective_command) {
31598        apply_output_cap(&output.stdout, cap)
31599    } else {
31600        String::from_utf8_lossy(&output.stdout).into_owned()
31601    };
31602    if !stdout.is_empty() {
31603        print!("{stdout}");
31604    }
31605    if !output.stderr.is_empty() {
31606        eprint!("{}", String::from_utf8_lossy(&output.stderr));
31607    }
31608
31609    Ok(output
31610        .status
31611        .code()
31612        .unwrap_or_else(|| if output.status.success() { 0 } else { 1 }))
31613}
31614
31615fn effective_rewrite_run_command(command: &str) -> String {
31616    let parts = shell_split(command);
31617    if parts.first().map(|part| strip_shell_quotes(part)) != Some("tsift") {
31618        return command.to_string();
31619    }
31620    let structured = parts
31621        .iter()
31622        .skip(1)
31623        .any(|part| strip_shell_quotes(part) == "--timeout");
31624    let subcommand = parts
31625        .iter()
31626        .skip(1)
31627        .map(|part| strip_shell_quotes(part))
31628        .find(|part| !part.starts_with('-'));
31629    if matches!(subcommand, Some("search")) && !structured {
31630        format!("{command} --timeout 0")
31631    } else {
31632        command.to_string()
31633    }
31634}
31635
31636pub(crate) fn apply_rewrite_output_format(command: &str, format: OutputFormat) -> String {
31637    let trimmed = command.trim_start();
31638    let Some(rest) = trimmed.strip_prefix("tsift") else {
31639        return command.to_string();
31640    };
31641    let existing_parts = shell_split(rest);
31642
31643    let mut flags = Vec::new();
31644    if format.compact && !rewrite_has_global_flag(&existing_parts, "--compact") {
31645        flags.push("--compact");
31646    }
31647    if format.pretty && !rewrite_has_global_flag(&existing_parts, "--pretty") {
31648        flags.push("--pretty");
31649    }
31650    if format.terse && !rewrite_has_global_flag(&existing_parts, "--terse") {
31651        flags.push("--terse");
31652    }
31653    if format.schema && !rewrite_has_global_flag(&existing_parts, "--schema") {
31654        flags.push("--schema");
31655    }
31656    if format.envelope {
31657        if !rewrite_has_global_flag(&existing_parts, "--envelope") {
31658            flags.push("--envelope");
31659        }
31660    } else if format.json_output
31661        && !rewrite_has_global_flag(&existing_parts, "--json")
31662        && !rewrite_has_global_flag(&existing_parts, "--envelope")
31663    {
31664        flags.push("--json");
31665    }
31666
31667    if flags.is_empty() {
31668        return command.to_string();
31669    }
31670
31671    let forwarded = flags.join(" ");
31672    if rest.trim().is_empty() {
31673        format!("tsift {forwarded}")
31674    } else {
31675        format!("tsift {forwarded}{rest}")
31676    }
31677}
31678
31679fn rewrite_has_global_flag(parts: &[&str], flag: &str) -> bool {
31680    parts
31681        .iter()
31682        .take_while(|part| {
31683            let value = strip_shell_quotes(part);
31684            value.starts_with('-') || value == "tsift"
31685        })
31686        .any(|part| strip_shell_quotes(part) == flag)
31687}
31688
31689fn rewrite_output_cap(command: &str) -> Option<OutputCap> {
31690    let parts = shell_split(command);
31691    if strip_shell_quotes(parts.first()?) != "tsift" {
31692        return None;
31693    }
31694    let structured = parts.iter().skip(1).any(|part| {
31695        matches!(
31696            strip_shell_quotes(part),
31697            "--json" | "--terse" | "--schema" | "--tabular" | "--envelope"
31698        )
31699    });
31700    if structured {
31701        return None;
31702    }
31703
31704    let subcommand = parts
31705        .iter()
31706        .skip(1)
31707        .map(|part| strip_shell_quotes(part))
31708        .find(|part| !part.starts_with('-'))?;
31709    match subcommand {
31710        "communities" => Some(OutputCap {
31711            max_lines: 80,
31712            strip_prefix: None,
31713        }),
31714        "explain" => Some(OutputCap {
31715            max_lines: 40,
31716            strip_prefix: None,
31717        }),
31718        "graph" => Some(OutputCap {
31719            max_lines: 50,
31720            strip_prefix: None,
31721        }),
31722        "index" => Some(OutputCap {
31723            max_lines: 30,
31724            strip_prefix: None,
31725        }),
31726        "search" => Some(OutputCap {
31727            max_lines: 50,
31728            strip_prefix: Some("Strategy:"),
31729        }),
31730        _ => None,
31731    }
31732}
31733
31734fn apply_output_cap(stdout: &[u8], cap: OutputCap) -> String {
31735    let cleaned = strip_ansi_codes(&String::from_utf8_lossy(stdout));
31736    let mut lines: Vec<String> = cleaned
31737        .lines()
31738        .map(str::trim_end)
31739        .filter(|line| !line.trim().is_empty())
31740        .filter(|line| {
31741            cap.strip_prefix
31742                .map(|prefix| !line.starts_with(prefix))
31743                .unwrap_or(true)
31744        })
31745        .map(ToOwned::to_owned)
31746        .collect();
31747    if lines.len() > cap.max_lines {
31748        let hidden = lines.len() - cap.max_lines;
31749        lines.truncate(cap.max_lines);
31750        lines.push(format!(
31751            "... (+{hidden} more lines; rerun the underlying tsift command directly for the full output)"
31752        ));
31753    }
31754    if lines.is_empty() {
31755        String::new()
31756    } else {
31757        format!("{}\n", lines.join("\n"))
31758    }
31759}
31760
31761fn strip_ansi_codes(input: &str) -> String {
31762    let mut output = String::with_capacity(input.len());
31763    let mut chars = input.chars().peekable();
31764    while let Some(ch) = chars.next() {
31765        if ch == '\u{1b}' && matches!(chars.peek(), Some('[')) {
31766            chars.next();
31767            for next in chars.by_ref() {
31768                if ('@'..='~').contains(&next) {
31769                    break;
31770                }
31771            }
31772            continue;
31773        }
31774        output.push(ch);
31775    }
31776    output
31777}
31778
31779/// Attempt to rewrite a shell command to use tsift.
31780/// Returns Some(rewritten) if applicable, None if no match.
31781///
31782/// `pub` (not `pub(crate)`) so the `tsift-sim-world` test-harness crate can
31783/// exercise the rewrite surface as a dev-dependency.
31784pub fn rewrite_command(command: &str) -> Option<String> {
31785    let trimmed = command.trim();
31786
31787    // Already a tsift command — pass through (exit 0, identical)
31788    if trimmed.starts_with("tsift ") || trimmed == "tsift" {
31789        return Some(command.to_string());
31790    }
31791
31792    // rg <pattern> [path] [flags] → tsift search "<pattern>" --exact [--path <path>]
31793    if let Some(rewritten) = rewrite_rg(trimmed) {
31794        return Some(rewritten);
31795    }
31796
31797    // grep -r <pattern> [path] → tsift search "<pattern>" --exact [--path <path>]
31798    if let Some(rewritten) = rewrite_grep(trimmed) {
31799        return Some(rewritten);
31800    }
31801
31802    // git diff / git show / patch-style history → tsift diff-digest
31803    if let Some(rewritten) = rewrite_git_diff(trimmed) {
31804        return Some(rewritten);
31805    }
31806    if let Some(rewritten) = rewrite_git_show(trimmed) {
31807        return Some(rewritten);
31808    }
31809    if let Some(rewritten) = rewrite_git_patch_history(trimmed) {
31810        return Some(rewritten);
31811    }
31812
31813    // long session/doc transcript reads → tsift session-digest
31814    if let Some(rewritten) = rewrite_session_read_command(trimmed) {
31815        return Some(rewritten);
31816    }
31817
31818    // large source-file reads inside indexed repos → tsift source-read windows
31819    if let Some(rewritten) = rewrite_source_read_command(trimmed) {
31820        return Some(rewritten);
31821    }
31822
31823    // cargo test / pytest → tsift-owned test digest wrapper that preserves exit status
31824    if let Some(rewritten) = rewrite_test_command(trimmed) {
31825        return Some(rewritten);
31826    }
31827
31828    // verbose build/check/install commands → tsift-owned log digest wrapper
31829    if let Some(rewritten) = rewrite_log_command(trimmed) {
31830        return Some(rewritten);
31831    }
31832
31833    None
31834}
31835
31836pub(crate) fn no_rewrite_message(command: &str, run: bool) -> String {
31837    let trimmed = command.trim();
31838    let parts = shell_split(trimmed);
31839    let reason = if trimmed.is_empty() {
31840        "empty command"
31841    } else if has_shell_metacharacters(trimmed) {
31842        "shell metacharacters such as pipes, redirection, or background operators are not rewritten"
31843    } else if is_file_listing_command(&parts) {
31844        "file-listing commands keep original shell/find/rg semantics"
31845    } else {
31846        "no supported tsift rewrite matched this command"
31847    };
31848    let action = if run {
31849        "`--run` executes only rewritten commands; run the original command directly if intended"
31850    } else {
31851        "run the original command unchanged"
31852    };
31853    format!("tsift rewrite: no rewrite: {reason}; {action}")
31854}
31855
31856fn is_file_listing_command(parts: &[&str]) -> bool {
31857    match parts.first().copied() {
31858        Some("find") => true,
31859        Some("rg") => parts
31860            .iter()
31861            .skip(1)
31862            .any(|part| matches!(*part, "--files" | "--type-list")),
31863        _ => false,
31864    }
31865}
31866
31867/// Rewrite `rg` (ripgrep) commands to tsift search.
31868fn rewrite_rg(cmd: &str) -> Option<String> {
31869    let parts: Vec<&str> = shell_split(cmd);
31870    if parts.is_empty() || parts[0] != "rg" {
31871        return None;
31872    }
31873
31874    // File-listing forms do not have a search pattern. Leave them to the
31875    // original command so roots, globs, and ignore rules keep rg semantics.
31876    if is_file_listing_command(&parts) {
31877        return None;
31878    }
31879
31880    // Skip if rg is used with complex flags we can't translate
31881    // (pipe chains, output redirection, --replace, --count, etc.)
31882    if cmd.contains('|')
31883        || cmd.contains('>')
31884        || cmd.contains("--replace")
31885        || cmd.contains("--count")
31886        || cmd.contains("-c")
31887        || cmd.contains("--files-with-matches")
31888        || cmd.contains("--files-without-match")
31889        || cmd.contains("-l")
31890    {
31891        return None;
31892    }
31893
31894    // Extract the pattern (first non-flag argument after rg)
31895    let mut pattern = None;
31896    let mut path = None;
31897    let mut skip_next = false;
31898
31899    for part in &parts[1..] {
31900        if skip_next {
31901            skip_next = false;
31902            continue;
31903        }
31904        // Flags that take a value
31905        if matches!(
31906            *part,
31907            "-t" | "--type"
31908                | "-g"
31909                | "--glob"
31910                | "-A"
31911                | "-B"
31912                | "-C"
31913                | "--max-count"
31914                | "--max-depth"
31915                | "-m"
31916                | "-e"
31917        ) {
31918            skip_next = true;
31919            continue;
31920        }
31921        // Skip standalone flags
31922        if part.starts_with('-') {
31923            continue;
31924        }
31925        // First positional = pattern, second = path
31926        if pattern.is_none() {
31927            pattern = Some(*part);
31928        } else if path.is_none() {
31929            path = Some(*part);
31930        }
31931    }
31932
31933    Some(build_agent_search_preview_command(pattern?, path))
31934}
31935
31936/// Rewrite `grep -r` commands to tsift search.
31937fn rewrite_grep(cmd: &str) -> Option<String> {
31938    let parts: Vec<&str> = shell_split(cmd);
31939    if parts.is_empty() || parts[0] != "grep" {
31940        return None;
31941    }
31942
31943    // Only rewrite recursive grep
31944    let has_recursive = parts.iter().any(|p| {
31945        *p == "-r"
31946            || *p == "-R"
31947            || *p == "--recursive"
31948            || p.contains('r') && p.starts_with('-') && !p.starts_with("--")
31949    });
31950    if !has_recursive {
31951        return None;
31952    }
31953
31954    // Skip pipe chains
31955    if cmd.contains('|') || cmd.contains('>') {
31956        return None;
31957    }
31958
31959    let mut pattern = None;
31960    let mut path = None;
31961    let mut skip_next = false;
31962
31963    for part in &parts[1..] {
31964        if skip_next {
31965            skip_next = false;
31966            continue;
31967        }
31968        if matches!(*part, "--include" | "--exclude" | "--exclude-dir" | "-e") {
31969            skip_next = true;
31970            continue;
31971        }
31972        if part.starts_with('-') {
31973            continue;
31974        }
31975        if pattern.is_none() {
31976            pattern = Some(*part);
31977        } else if path.is_none() {
31978            path = Some(*part);
31979        }
31980    }
31981
31982    Some(build_agent_search_preview_command(pattern?, path))
31983}
31984
31985fn build_agent_search_preview_command(pattern: &str, path: Option<&str>) -> String {
31986    let mut result = format!(
31987        "tsift --envelope search {} --exact --budget normal",
31988        shell_quote(pattern)
31989    );
31990    if let Some(p) = path {
31991        result.push_str(&format!(" --path {}", shell_quote(p)));
31992    }
31993    result
31994}
31995
31996fn rewrite_git_diff(cmd: &str) -> Option<String> {
31997    if has_shell_metacharacters(cmd) {
31998        return None;
31999    }
32000
32001    let parts: Vec<&str> = shell_split(cmd);
32002    if parts.len() < 2 || parts[0] != "git" || parts[1] != "diff" {
32003        return None;
32004    }
32005    let mut cached = false;
32006    let mut path = None;
32007    let mut after_double_dash = false;
32008
32009    for part in &parts[2..] {
32010        if after_double_dash {
32011            if path.is_none() && !part.starts_with('-') {
32012                path = Some(*part);
32013                continue;
32014            }
32015            return None;
32016        }
32017        match *part {
32018            "--cached" | "--staged" => cached = true,
32019            "--" => after_double_dash = true,
32020            raw if looks_like_path_selector(raw) => {
32021                if path.replace(raw).is_some() {
32022                    return None;
32023                }
32024            }
32025            _ => return None,
32026        }
32027    }
32028
32029    Some(build_diff_digest_command(path.unwrap_or("."), cached, None))
32030}
32031
32032fn rewrite_git_show(cmd: &str) -> Option<String> {
32033    if has_shell_metacharacters(cmd) {
32034        return None;
32035    }
32036
32037    let parts: Vec<&str> = shell_split(cmd);
32038    if parts.len() < 2 || parts[0] != "git" || parts[1] != "show" {
32039        return None;
32040    }
32041
32042    let mut revision = "HEAD";
32043    let mut path = None;
32044    let mut after_double_dash = false;
32045
32046    for part in &parts[2..] {
32047        if after_double_dash {
32048            if path.is_none() && !part.starts_with('-') {
32049                path = Some(*part);
32050                continue;
32051            }
32052            return None;
32053        }
32054        match *part {
32055            "--" => after_double_dash = true,
32056            "-p" | "--patch" | "--stat" => {}
32057            raw if raw.starts_with("--format=") => {}
32058            raw if !raw.starts_with('-') => {
32059                if revision != "HEAD" {
32060                    return None;
32061                }
32062                revision = raw;
32063            }
32064            _ => return None,
32065        }
32066    }
32067
32068    Some(build_diff_digest_command(
32069        path.unwrap_or("."),
32070        false,
32071        Some(revision),
32072    ))
32073}
32074
32075fn rewrite_git_patch_history(cmd: &str) -> Option<String> {
32076    if has_shell_metacharacters(cmd) {
32077        return None;
32078    }
32079
32080    let parts: Vec<&str> = shell_split(cmd);
32081    if parts.len() < 2 || parts[0] != "git" || parts[1] != "log" {
32082        return None;
32083    }
32084
32085    let mut saw_patch = false;
32086    let mut saw_single_commit = false;
32087    let mut revision = "HEAD";
32088    let mut path = None;
32089    let mut after_double_dash = false;
32090    let mut skip_next = false;
32091
32092    for part in &parts[2..] {
32093        if skip_next {
32094            skip_next = false;
32095            if *part == "1" {
32096                saw_single_commit = true;
32097                continue;
32098            }
32099            return None;
32100        }
32101        if after_double_dash {
32102            if path.is_none() && !part.starts_with('-') {
32103                path = Some(*part);
32104                continue;
32105            }
32106            return None;
32107        }
32108        match *part {
32109            "--" => after_double_dash = true,
32110            "-p" | "--patch" => saw_patch = true,
32111            "-1" | "-n1" | "--max-count=1" => saw_single_commit = true,
32112            "-n" | "--max-count" => skip_next = true,
32113            raw if !raw.starts_with('-') => {
32114                if revision != "HEAD" {
32115                    return None;
32116                }
32117                revision = raw;
32118            }
32119            _ => return None,
32120        }
32121    }
32122
32123    if !saw_patch || !saw_single_commit {
32124        return None;
32125    }
32126
32127    Some(build_diff_digest_command(
32128        path.unwrap_or("."),
32129        false,
32130        Some(revision),
32131    ))
32132}
32133
32134fn build_diff_digest_command(path: &str, cached: bool, revision: Option<&str>) -> String {
32135    let mut result = "tsift diff-digest".to_string();
32136    if cached {
32137        result.push_str(" --cached");
32138    }
32139    if let Some(revision) = revision {
32140        result.push_str(&format!(" --revision {}", shell_quote(revision)));
32141    }
32142    if path == "." {
32143        result.push_str(" .");
32144    } else {
32145        result.push_str(&format!(" {}", shell_quote(path)));
32146    }
32147    result
32148}
32149
32150const SESSION_READ_LINE_THRESHOLD: usize = 80;
32151const SOURCE_READ_LINE_THRESHOLD: usize = 80;
32152
32153#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32154enum FileReadWindow {
32155    FullFile,
32156    FromStart { lines: usize },
32157    FromEnd { lines: usize },
32158    Range { start: usize, lines: usize },
32159}
32160
32161struct FileReadTarget {
32162    input: String,
32163    requested_lines: Option<usize>,
32164    window: FileReadWindow,
32165}
32166
32167fn rewrite_session_read_command(cmd: &str) -> Option<String> {
32168    if has_shell_metacharacters(cmd) {
32169        return None;
32170    }
32171
32172    let target = parse_file_read_target(cmd)?;
32173    let input_path = Path::new(&target.input);
32174    let source = detect_session_digest_source(input_path)?;
32175
32176    if let Some(requested_lines) = target.requested_lines {
32177        if requested_lines < SESSION_READ_LINE_THRESHOLD {
32178            return None;
32179        }
32180    } else if !file_has_at_least_lines(input_path, SESSION_READ_LINE_THRESHOLD) {
32181        return None;
32182    }
32183
32184    let digest_path = resolve_digest_context_path(input_path);
32185    Some(build_session_digest_command(
32186        &digest_path,
32187        &target.input,
32188        source,
32189    ))
32190}
32191
32192fn rewrite_source_read_command(cmd: &str) -> Option<String> {
32193    if has_shell_metacharacters(cmd) {
32194        return None;
32195    }
32196
32197    let target = parse_file_read_target(cmd)?;
32198    let input_path = Path::new(&target.input);
32199    if !file_is_supported_source(input_path) {
32200        return None;
32201    }
32202
32203    if let Some(requested_lines) = target.requested_lines {
32204        if requested_lines < SOURCE_READ_LINE_THRESHOLD {
32205            return None;
32206        }
32207    } else if !file_has_at_least_lines(input_path, SOURCE_READ_LINE_THRESHOLD) {
32208        return None;
32209    }
32210
32211    let root = lint::find_project_root_for_path(input_path).ok()??;
32212    if !project_has_index(&root) {
32213        return None;
32214    }
32215    let file_abs = input_path.canonicalize().ok()?;
32216    let file_display = relativize_pathbuf(&file_abs, &root)
32217        .to_string_lossy()
32218        .to_string();
32219    let total_lines = count_file_lines(&file_abs)?;
32220    let (start, lines) = source_window_for_read(target.window, total_lines)?;
32221    Some(build_source_read_rewrite_command(
32222        &root,
32223        &file_display,
32224        start,
32225        lines,
32226    ))
32227}
32228
32229fn parse_file_read_target(cmd: &str) -> Option<FileReadTarget> {
32230    let parts: Vec<&str> = shell_split(cmd);
32231    let head = parts.first().copied()?;
32232    match head {
32233        "cat" | "bat" | "batcat" => parse_cat_like_read_target(&parts),
32234        "head" | "tail" => parse_head_tail_read_target(&parts),
32235        "sed" => parse_sed_read_target(&parts),
32236        _ => None,
32237    }
32238}
32239
32240fn parse_cat_like_read_target(parts: &[&str]) -> Option<FileReadTarget> {
32241    let mut input = None;
32242    for part in &parts[1..] {
32243        if part.starts_with('-') {
32244            continue;
32245        }
32246        if input.replace(strip_shell_quotes(part)).is_some() {
32247            return None;
32248        }
32249    }
32250    Some(FileReadTarget {
32251        input: input?.to_string(),
32252        requested_lines: None,
32253        window: FileReadWindow::FullFile,
32254    })
32255}
32256
32257fn parse_head_tail_read_target(parts: &[&str]) -> Option<FileReadTarget> {
32258    let mut requested_lines = 10;
32259    let mut input = None;
32260    let mut index = 1;
32261
32262    while index < parts.len() {
32263        let part = parts[index];
32264        if part == "-n" || part == "--lines" {
32265            index += 1;
32266            requested_lines = parse_requested_line_count(parts.get(index).copied()?)?;
32267            index += 1;
32268            continue;
32269        }
32270        if let Some(raw) = part.strip_prefix("-n")
32271            && !raw.is_empty()
32272        {
32273            requested_lines = parse_requested_line_count(raw)?;
32274            index += 1;
32275            continue;
32276        }
32277        if let Some(raw) = part.strip_prefix("--lines=") {
32278            requested_lines = parse_requested_line_count(raw)?;
32279            index += 1;
32280            continue;
32281        }
32282        if part.starts_with('-') && part[1..].chars().all(|ch| ch.is_ascii_digit()) {
32283            requested_lines = parse_requested_line_count(&part[1..])?;
32284            index += 1;
32285            continue;
32286        }
32287        if input.replace(strip_shell_quotes(part)).is_some() {
32288            return None;
32289        }
32290        index += 1;
32291    }
32292
32293    let window = match parts[0] {
32294        "head" => FileReadWindow::FromStart {
32295            lines: requested_lines,
32296        },
32297        "tail" => FileReadWindow::FromEnd {
32298            lines: requested_lines,
32299        },
32300        _ => return None,
32301    };
32302
32303    Some(FileReadTarget {
32304        input: input?.to_string(),
32305        requested_lines: Some(requested_lines),
32306        window,
32307    })
32308}
32309
32310fn parse_sed_read_target(parts: &[&str]) -> Option<FileReadTarget> {
32311    if parts.len() != 4 || parts[1] != "-n" {
32312        return None;
32313    }
32314
32315    let (start, lines) = parse_sed_print_window(parts[2])?;
32316    Some(FileReadTarget {
32317        input: strip_shell_quotes(parts[3]).to_string(),
32318        requested_lines: Some(lines),
32319        window: FileReadWindow::Range { start, lines },
32320    })
32321}
32322
32323fn parse_requested_line_count(raw: &str) -> Option<usize> {
32324    let trimmed = strip_shell_quotes(raw);
32325    if let Some(number) = trimmed.strip_prefix('+') {
32326        number.parse::<usize>().ok()?;
32327        return Some(SESSION_READ_LINE_THRESHOLD);
32328    }
32329    trimmed.parse::<usize>().ok()
32330}
32331
32332fn parse_sed_print_window(raw: &str) -> Option<(usize, usize)> {
32333    let trimmed = strip_shell_quotes(raw);
32334    let range = trimmed.strip_suffix('p')?;
32335    let (start, end) = range.split_once(',')?;
32336    let start = start.parse::<usize>().ok()?;
32337    let end = end.parse::<usize>().ok()?;
32338    (end >= start).then_some((start, end - start + 1))
32339}
32340
32341fn file_is_supported_source(path: &Path) -> bool {
32342    path.extension()
32343        .and_then(|ext| ext.to_str())
32344        .and_then(graph::lang::Lang::from_extension)
32345        .is_some()
32346}
32347
32348fn count_file_lines(path: &Path) -> Option<usize> {
32349    let file = fs::File::open(path).ok()?;
32350    Some(
32351        BufReader::new(file)
32352            .lines()
32353            .filter(|line| line.is_ok())
32354            .count(),
32355    )
32356}
32357
32358fn source_window_for_read(window: FileReadWindow, total_lines: usize) -> Option<(usize, usize)> {
32359    if total_lines == 0 {
32360        return None;
32361    }
32362    match window {
32363        FileReadWindow::FullFile => Some((1, SOURCE_READ_LINE_THRESHOLD.min(total_lines))),
32364        FileReadWindow::FromStart { lines } => Some((1, lines.min(total_lines))),
32365        FileReadWindow::FromEnd { lines } => {
32366            let bounded = lines.min(total_lines);
32367            Some((total_lines - bounded + 1, bounded))
32368        }
32369        FileReadWindow::Range { start, lines } => {
32370            if start == 0 || start > total_lines {
32371                return None;
32372            }
32373            Some((start, lines.min(total_lines - start + 1)))
32374        }
32375    }
32376}
32377
32378fn build_source_read_rewrite_command(
32379    root: &Path,
32380    file: &str,
32381    start: usize,
32382    lines: usize,
32383) -> String {
32384    format!(
32385        "tsift --envelope source-read {} --path {} --start {} --lines {} --budget normal",
32386        shell_quote(file),
32387        shell_quote(&root.to_string_lossy()),
32388        start,
32389        lines
32390    )
32391}
32392
32393fn project_has_index(root: &Path) -> bool {
32394    let tsift_dir = root.join(".tsift");
32395    tsift_dir.join("index.db").is_file() || directory_contains_index_db(&tsift_dir.join("indexes"))
32396}
32397
32398fn directory_contains_index_db(path: &Path) -> bool {
32399    let Ok(entries) = fs::read_dir(path) else {
32400        return false;
32401    };
32402    for entry in entries.flatten() {
32403        let path = entry.path();
32404        if path.file_name().is_some_and(|name| name == "index.db") && path.is_file() {
32405            return true;
32406        }
32407        if path.is_dir() && directory_contains_index_db(&path) {
32408            return true;
32409        }
32410    }
32411    false
32412}
32413
32414fn detect_session_digest_source(path: &Path) -> Option<session_digest::SessionDigestSource> {
32415    match path.extension().and_then(|ext| ext.to_str()) {
32416        Some("md") if file_looks_like_agent_doc_session(path) => {
32417            Some(session_digest::SessionDigestSource::Markdown)
32418        }
32419        Some("jsonl") if file_looks_like_claude_jsonl(path) => {
32420            Some(session_digest::SessionDigestSource::ClaudeJsonl)
32421        }
32422        Some("jsonl") if file_looks_like_codex_jsonl(path) => {
32423            Some(session_digest::SessionDigestSource::CodexJsonl)
32424        }
32425        Some("log") if file_looks_like_agent_doc_log(path) => {
32426            Some(session_digest::SessionDigestSource::AgentDocLog)
32427        }
32428        _ => None,
32429    }
32430}
32431
32432fn file_looks_like_agent_doc_session(path: &Path) -> bool {
32433    let prefix = match read_file_prefix(path, 16 * 1024) {
32434        Some(prefix) => prefix,
32435        None => return false,
32436    };
32437    prefix.contains("agent_doc_session:")
32438        || prefix.contains("<!-- agent:exchange")
32439        || prefix.contains("\n## Exchange")
32440}
32441
32442fn file_looks_like_claude_jsonl(path: &Path) -> bool {
32443    let prefix = match read_file_prefix(path, 16 * 1024) {
32444        Some(prefix) => prefix,
32445        None => return false,
32446    };
32447
32448    prefix
32449        .lines()
32450        .map(str::trim)
32451        .filter(|line| !line.is_empty())
32452        .take(3)
32453        .any(|line| {
32454            let value = match serde_json::from_str::<serde_json::Value>(line) {
32455                Ok(value) => value,
32456                Err(_) => return false,
32457            };
32458            value.get("message").is_some()
32459                || value.get("role").is_some()
32460                || value.get("content").is_some()
32461        })
32462}
32463
32464fn file_looks_like_codex_jsonl(path: &Path) -> bool {
32465    let prefix = match read_file_prefix(path, 16 * 1024) {
32466        Some(prefix) => prefix,
32467        None => return false,
32468    };
32469
32470    prefix
32471        .lines()
32472        .map(str::trim)
32473        .filter(|line| !line.is_empty())
32474        .take(8)
32475        .any(|line| {
32476            let value = match serde_json::from_str::<serde_json::Value>(line) {
32477                Ok(value) => value,
32478                Err(_) => return false,
32479            };
32480            matches!(
32481                value.get("type").and_then(serde_json::Value::as_str),
32482                Some("session_meta" | "response_item" | "event_msg")
32483            )
32484        })
32485}
32486
32487fn file_looks_like_agent_doc_log(path: &Path) -> bool {
32488    let prefix = match read_file_prefix(path, 16 * 1024) {
32489        Some(prefix) => prefix,
32490        None => return false,
32491    };
32492    prefix
32493        .lines()
32494        .map(str::trim)
32495        .filter(|line| !line.is_empty())
32496        .take(8)
32497        .all(|line| line.starts_with('[') && line.contains("] "))
32498}
32499
32500fn read_file_prefix(path: &Path, max_bytes: usize) -> Option<String> {
32501    let file = fs::File::open(path).ok()?;
32502    let mut reader = BufReader::new(file);
32503    let mut buffer = Vec::new();
32504    reader
32505        .by_ref()
32506        .take(max_bytes as u64)
32507        .read_to_end(&mut buffer)
32508        .ok()?;
32509    Some(String::from_utf8_lossy(&buffer).into_owned())
32510}
32511
32512fn file_has_at_least_lines(path: &Path, min_lines: usize) -> bool {
32513    let file = match fs::File::open(path) {
32514        Ok(file) => file,
32515        Err(_) => return false,
32516    };
32517    let reader = BufReader::new(file);
32518    reader
32519        .lines()
32520        .take(min_lines)
32521        .filter(|line| line.is_ok())
32522        .count()
32523        >= min_lines
32524}
32525
32526fn build_session_digest_command(
32527    path: &str,
32528    input: &str,
32529    source: session_digest::SessionDigestSource,
32530) -> String {
32531    format!(
32532        "tsift session-digest --path {} --input {} --source {}",
32533        shell_quote(path),
32534        shell_quote(input),
32535        source.cli_arg()
32536    )
32537}
32538
32539fn resolve_digest_context_path(path: &Path) -> String {
32540    lint::resolve_harness_root_or_canonical_path(path)
32541        .map(|root| root.display().to_string())
32542        .unwrap_or_else(|_| ".".to_string())
32543}
32544
32545fn rewrite_test_command(cmd: &str) -> Option<String> {
32546    if has_shell_metacharacters(cmd) {
32547        return None;
32548    }
32549
32550    let parts: Vec<&str> = shell_split(cmd);
32551    if parts.len() >= 2 && parts[0] == "cargo" && parts[1] == "test" {
32552        return Some(build_digest_runner_command("test", ".", Some("cargo"), cmd));
32553    }
32554    if !parts.is_empty() && parts[0] == "pytest" {
32555        return Some(build_digest_runner_command(
32556            "test",
32557            ".",
32558            Some("pytest"),
32559            cmd,
32560        ));
32561    }
32562    if parts.len() >= 3 && parts[0] == "python" && parts[1] == "-m" && parts[2] == "pytest" {
32563        return Some(build_digest_runner_command(
32564            "test",
32565            ".",
32566            Some("pytest"),
32567            cmd,
32568        ));
32569    }
32570    None
32571}
32572
32573fn rewrite_log_command(cmd: &str) -> Option<String> {
32574    if has_shell_metacharacters(cmd) {
32575        return None;
32576    }
32577
32578    let parts: Vec<&str> = shell_split(cmd);
32579    if parts.len() >= 2
32580        && parts[0] == "cargo"
32581        && matches!(parts[1], "build" | "check" | "clippy" | "install")
32582    {
32583        return Some(build_digest_runner_command("log", ".", None, cmd));
32584    }
32585    None
32586}
32587
32588fn build_digest_runner_command(
32589    kind: &str,
32590    path: &str,
32591    runner: Option<&str>,
32592    shell_command: &str,
32593) -> String {
32594    let mut result = format!(
32595        "tsift --envelope __digest-runner --kind {} --path {} --shell-command {}",
32596        shell_quote(kind),
32597        shell_quote(path),
32598        shell_quote(shell_command)
32599    );
32600    if let Some(runner) = runner {
32601        result.push_str(&format!(" --runner {}", shell_quote(runner)));
32602    }
32603    result
32604}
32605
32606fn has_shell_metacharacters(cmd: &str) -> bool {
32607    cmd.contains('|') || cmd.contains('>') || cmd.contains('<') || cmd.contains('&')
32608}
32609
32610fn strip_shell_quotes(s: &str) -> &str {
32611    if s.len() >= 2
32612        && ((s.starts_with('"') && s.ends_with('"')) || (s.starts_with('\'') && s.ends_with('\'')))
32613    {
32614        &s[1..s.len() - 1]
32615    } else {
32616        s
32617    }
32618}
32619
32620fn looks_like_path_selector(raw: &str) -> bool {
32621    raw.ends_with('/')
32622        || raw.starts_with("./")
32623        || raw.starts_with("../")
32624        || raw.contains('/')
32625        || raw.contains('.')
32626}
32627
32628#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32629enum DigestRunnerKind {
32630    Test,
32631    Log,
32632}
32633
32634impl DigestRunnerKind {
32635    fn parse(raw: &str) -> Result<Self> {
32636        match raw.trim().to_ascii_lowercase().as_str() {
32637            "test" => Ok(Self::Test),
32638            "log" => Ok(Self::Log),
32639            other => bail!("unsupported digest runner kind `{other}`; expected test or log"),
32640        }
32641    }
32642
32643    fn as_str(self) -> &'static str {
32644        match self {
32645            Self::Test => "test",
32646            Self::Log => "log",
32647        }
32648    }
32649}
32650
32651/// Simple shell word splitting (handles single and double quotes).
32652fn shell_split(s: &str) -> Vec<&str> {
32653    let mut parts = Vec::new();
32654    let mut i = 0;
32655    let bytes = s.as_bytes();
32656    while i < bytes.len() {
32657        // Skip whitespace
32658        while i < bytes.len() && bytes[i].is_ascii_whitespace() {
32659            i += 1;
32660        }
32661        if i >= bytes.len() {
32662            break;
32663        }
32664        let start = i;
32665        if bytes[i] == b'"' || bytes[i] == b'\'' {
32666            let quote = bytes[i];
32667            i += 1;
32668            while i < bytes.len() && bytes[i] != quote {
32669                i += 1;
32670            }
32671            if i < bytes.len() {
32672                i += 1; // closing quote
32673            }
32674        } else {
32675            while i < bytes.len() && !bytes[i].is_ascii_whitespace() {
32676                i += 1;
32677            }
32678        }
32679        parts.push(&s[start..i]);
32680    }
32681    parts
32682}
32683
32684/// Quote a string for shell if it contains special characters.
32685pub(crate) fn shell_quote(s: &str) -> String {
32686    // Strip existing quotes
32687    let unquoted =
32688        if (s.starts_with('"') && s.ends_with('"')) || (s.starts_with('\'') && s.ends_with('\'')) {
32689            &s[1..s.len() - 1]
32690        } else {
32691            s
32692        };
32693
32694    if unquoted
32695        .chars()
32696        .all(|c| c.is_alphanumeric() || c == '_' || c == '-' || c == '.' || c == '/')
32697    {
32698        format!("\"{}\"", unquoted)
32699    } else {
32700        format!(
32701            "\"{}\"",
32702            unquoted.replace('\\', "\\\\").replace('"', "\\\"")
32703        )
32704    }
32705}
32706
32707fn empty_search_coverage() -> sift::SearchCoverageSnapshot {
32708    sift::SearchCoverageSnapshot {
32709        mode: sift::SearchCoverageMode::Sealed,
32710        total_sector_count: 0,
32711        mounted_sector_count: 0,
32712        reused_sector_count: 0,
32713        dirty_sector_count: 0,
32714        completed_dirty_sector_count: 0,
32715        rebuilding_sector_count: 0,
32716        resumed_sector_count: 0,
32717        active_rebuild: None,
32718    }
32719}
32720
32721fn aggregate_search_coverage(responses: &[sift::SearchResponse]) -> sift::SearchCoverageSnapshot {
32722    let total_sector_count = responses
32723        .iter()
32724        .map(|response| response.coverage.total_sector_count)
32725        .sum();
32726    let mounted_sector_count = responses
32727        .iter()
32728        .map(|response| response.coverage.mounted_sector_count)
32729        .sum();
32730    let reused_sector_count = responses
32731        .iter()
32732        .map(|response| response.coverage.reused_sector_count)
32733        .sum();
32734    let dirty_sector_count = responses
32735        .iter()
32736        .map(|response| response.coverage.dirty_sector_count)
32737        .sum();
32738    let completed_dirty_sector_count = responses
32739        .iter()
32740        .map(|response| response.coverage.completed_dirty_sector_count)
32741        .sum();
32742    let rebuilding_sector_count = responses
32743        .iter()
32744        .map(|response| response.coverage.rebuilding_sector_count)
32745        .sum();
32746    let resumed_sector_count = responses
32747        .iter()
32748        .map(|response| response.coverage.resumed_sector_count)
32749        .sum();
32750
32751    let mode = if dirty_sector_count == 0 && rebuilding_sector_count == 0 {
32752        sift::SearchCoverageMode::Sealed
32753    } else if completed_dirty_sector_count > 0
32754        || rebuilding_sector_count > 0
32755        || resumed_sector_count > 0
32756    {
32757        sift::SearchCoverageMode::Converging
32758    } else {
32759        sift::SearchCoverageMode::Frontier
32760    };
32761
32762    sift::SearchCoverageSnapshot {
32763        mode,
32764        total_sector_count,
32765        mounted_sector_count,
32766        reused_sector_count,
32767        dirty_sector_count,
32768        completed_dirty_sector_count,
32769        rebuilding_sector_count,
32770        resumed_sector_count,
32771        active_rebuild: responses
32772            .iter()
32773            .find_map(|response| response.coverage.active_rebuild.clone()),
32774    }
32775}
32776
32777fn empty_search_response(root: &Path, strategy: &str) -> sift::SearchResponse {
32778    sift::SearchResponse {
32779        strategy: strategy.to_string(),
32780        root: root.display().to_string(),
32781        indexed_artifacts: 0,
32782        skipped_artifacts: 0,
32783        coverage: empty_search_coverage(),
32784        hits: Vec::new(),
32785    }
32786}
32787
32788fn absolutize_search_hit_paths(response: &mut sift::SearchResponse, search_root: &Path) {
32789    for hit in &mut response.hits {
32790        let path = Path::new(&hit.path);
32791        if path.is_relative() {
32792            hit.path = search_root.join(path).display().to_string();
32793        }
32794    }
32795}
32796
32797fn merge_search_responses(
32798    root: &Path,
32799    strategy: &str,
32800    limit: usize,
32801    responses: Vec<sift::SearchResponse>,
32802) -> sift::SearchResponse {
32803    let indexed_artifacts = responses
32804        .iter()
32805        .map(|response| response.indexed_artifacts)
32806        .sum();
32807    let skipped_artifacts = responses
32808        .iter()
32809        .map(|response| response.skipped_artifacts)
32810        .sum();
32811    let coverage = if responses.is_empty() {
32812        empty_search_coverage()
32813    } else {
32814        aggregate_search_coverage(&responses)
32815    };
32816    let mut hits: Vec<sift::SearchHit> = responses
32817        .into_iter()
32818        .flat_map(|response| response.hits)
32819        .collect();
32820    hits.sort_by(|left, right| {
32821        right
32822            .score
32823            .partial_cmp(&left.score)
32824            .unwrap_or(Ordering::Equal)
32825            .then_with(|| left.path.cmp(&right.path))
32826            .then_with(|| left.location.cmp(&right.location))
32827    });
32828    hits.truncate(limit);
32829    for (rank, hit) in hits.iter_mut().enumerate() {
32830        hit.rank = rank + 1;
32831    }
32832
32833    sift::SearchResponse {
32834        strategy: strategy.to_string(),
32835        root: root.display().to_string(),
32836        indexed_artifacts,
32837        skipped_artifacts,
32838        coverage,
32839        hits,
32840    }
32841}
32842
32843pub(crate) fn federated_sift_search(
32844    root: &Path,
32845    cache_dir: &Path,
32846    query: &str,
32847    limit: usize,
32848    timeout_secs: u64,
32849    strategy: &str,
32850) -> Result<sift::SearchResponse> {
32851    let targets = resolve_search_index_targets(root, root, None, true)?;
32852    if targets.is_empty() {
32853        if config::Config::submodule_dirs(root)?.is_empty() {
32854            return run_search_with_timeout(
32855                root,
32856                cache_dir,
32857                query,
32858                limit,
32859                timeout_secs,
32860                strategy,
32861                &[],
32862            );
32863        }
32864        return Ok(empty_search_response(root, strategy));
32865    }
32866
32867    let mut responses = Vec::with_capacity(targets.len());
32868    for target in &targets {
32869        let mut response = run_search_with_timeout(
32870            &target.source_root,
32871            cache_dir,
32872            query,
32873            limit,
32874            timeout_secs,
32875            strategy,
32876            std::slice::from_ref(target),
32877        )?;
32878        absolutize_search_hit_paths(&mut response, &target.source_root);
32879        response.root = root.display().to_string();
32880        responses.push(response);
32881    }
32882
32883    Ok(merge_search_responses(root, strategy, limit, responses))
32884}
32885
32886/// Federated symbol search across every scoped `.tsift/indexes/<scope>/index.db`
32887/// in the workspace. Per-scope tagpath annotation runs inside the per-scope
32888/// loop so each scope's adapter resolves against its own `.naming.toml` /
32889/// `.naming/index.json` (the workspace root usually has no tagpath of its
32890/// own). The merged `TagpathAnnotationDiagnostic` reports `loaded=true` when
32891/// at least one scope loaded, and `stale=true` with the first stale reason
32892/// when any scope was stale.
32893pub(crate) fn federated_symbol_search(
32894    root: &std::path::Path,
32895    query: &str,
32896    limit: usize,
32897    tagpath_opts: &TagpathSearchOpts,
32898) -> Result<(Vec<index::SymbolHit>, TagpathAnnotationDiagnostic)> {
32899    let cfg = config::Config::load(root)?;
32900    let submodules = config::Config::submodule_dirs(root)?;
32901    let mut all_hits: Vec<index::SymbolHit> = Vec::new();
32902    let mut combined = TagpathAnnotationDiagnostic::default();
32903    for scope in &submodules {
32904        if !cfg.federation_for_scope(scope) {
32905            continue;
32906        }
32907        let db_path = cfg.db_path_for(root, &scope.id);
32908        if !db_path.exists() {
32909            continue;
32910        }
32911        let db = index::IndexDb::open_read_only(&db_path)?;
32912        let mut hits = db.symbol_search(query, limit)?;
32913        let diag = annotate_hits_with_tagpath(&mut hits, &scope.source_root, tagpath_opts)?;
32914        combined.loaded |= diag.loaded;
32915        if diag.stale && !combined.stale {
32916            combined.stale = true;
32917            combined.reason = diag.reason;
32918        }
32919        all_hits.append(&mut hits);
32920    }
32921    all_hits.sort_by(|a, b| {
32922        b.score
32923            .partial_cmp(&a.score)
32924            .unwrap_or(std::cmp::Ordering::Equal)
32925    });
32926    all_hits.truncate(limit);
32927    Ok((all_hits, combined))
32928}
32929
32930#[derive(Debug, Deserialize)]
32931#[serde(tag = "type", rename_all = "lowercase")]
32932enum RipgrepJsonEvent {
32933    Match {
32934        data: RipgrepMatchData,
32935    },
32936    #[serde(other)]
32937    Other,
32938}
32939
32940#[derive(Debug, Deserialize)]
32941struct RipgrepMatchData {
32942    path: RipgrepTextField,
32943    lines: RipgrepTextField,
32944    line_number: Option<usize>,
32945}
32946
32947#[derive(Debug, Deserialize)]
32948struct RipgrepTextField {
32949    text: Option<String>,
32950}
32951
32952pub(crate) fn federated_exact_search(
32953    root: &Path,
32954    query: &str,
32955    limit: usize,
32956    timeout_secs: u64,
32957) -> Result<sift::SearchResponse> {
32958    let cfg = config::Config::load(root)?;
32959    let mut responses = Vec::new();
32960    for scope in config::Config::submodule_dirs(root)? {
32961        if !cfg.federation_for_scope(&scope) {
32962            continue;
32963        }
32964        let mut response =
32965            run_exact_search_with_timeout(&scope.source_root, query, limit, timeout_secs)?;
32966        absolutize_search_hit_paths(&mut response, &scope.source_root);
32967        response.root = root.display().to_string();
32968        responses.push(response);
32969    }
32970
32971    Ok(merge_search_responses(root, "exact", limit, responses))
32972}
32973
32974pub(crate) fn run_sift_search(
32975    search_path: &Path,
32976    cache_dir: &Path,
32977    query: &str,
32978    limit: usize,
32979    strategy: &str,
32980) -> Result<sift::SearchResponse> {
32981    let engine = Sift::builder().with_cache_dir(cache_dir).build();
32982    let options = SearchOptions::default()
32983        .with_limit(limit)
32984        .with_strategy(strategy.to_string());
32985    let input = SearchInput::new(search_path, query).with_options(options);
32986    engine.search(input).context("sift search failed")
32987}
32988
32989fn exact_search_timeout_message(timeout_secs: u64) -> String {
32990    format!(
32991        "tsift search timed out after {}s (strategy: exact). \
32992         Re-run with `--timeout 0` to disable the timeout or narrow `--path` / `--scope`.",
32993        timeout_secs
32994    )
32995}
32996
32997fn exact_search_command(search_path: &Path, query: &str) -> Command {
32998    let mut command = Command::new("rg");
32999    command
33000        .arg("--json")
33001        .arg("--fixed-strings")
33002        .arg("--line-number")
33003        .arg("--hidden")
33004        .arg("--")
33005        .arg(query)
33006        .arg(search_path);
33007    command
33008}
33009
33010fn exact_search_file_timestamp(path: &Path) -> sift::ArtifactFreshness {
33011    let observed_unix_secs = SystemTime::now()
33012        .duration_since(UNIX_EPOCH)
33013        .unwrap_or_default()
33014        .as_secs() as i64;
33015    let modified_unix_secs = fs::metadata(path)
33016        .ok()
33017        .and_then(|metadata| metadata.modified().ok())
33018        .and_then(|modified| modified.duration_since(UNIX_EPOCH).ok())
33019        .map(|duration| duration.as_secs() as i64);
33020    sift::ArtifactFreshness {
33021        observed_unix_secs,
33022        modified_unix_secs,
33023    }
33024}
33025
33026fn parse_exact_search_output(
33027    search_path: &Path,
33028    limit: usize,
33029    raw: &str,
33030) -> Result<sift::SearchResponse> {
33031    if limit == 0 {
33032        return Ok(sift::SearchResponse {
33033            strategy: "exact".to_string(),
33034            root: search_path.display().to_string(),
33035            indexed_artifacts: 0,
33036            skipped_artifacts: 0,
33037            coverage: empty_search_coverage(),
33038            hits: Vec::new(),
33039        });
33040    }
33041
33042    let mut hits = Vec::new();
33043    for line in raw.lines() {
33044        let event: RipgrepJsonEvent =
33045            serde_json::from_str(line).context("parsing ripgrep exact-search output")?;
33046        let RipgrepJsonEvent::Match { data } = event else {
33047            continue;
33048        };
33049        let Some(path_text) = data.path.text else {
33050            continue;
33051        };
33052        let Some(lines_text) = data.lines.text else {
33053            continue;
33054        };
33055        let path = PathBuf::from(path_text);
33056        let snippet = lines_text.trim_end_matches(['\r', '\n']).to_string();
33057        let rank = hits.len() + 1;
33058        hits.push(sift::SearchHit {
33059            artifact_id: format!(
33060                "exact:{}:{}:{}",
33061                path.display(),
33062                data.line_number.unwrap_or(0),
33063                rank
33064            ),
33065            artifact_kind: sift::ContextArtifactKind::File,
33066            path: path.display().to_string(),
33067            rank,
33068            score: (limit.saturating_sub(rank).saturating_add(1)) as f64,
33069            confidence: sift::ScoreConfidence::High,
33070            location: data.line_number.map(|line| format!("line {}", line)),
33071            snippet: snippet.clone(),
33072            provenance: sift::ArtifactProvenance {
33073                adapter: sift::AcquisitionAdapterKind::FileSystem,
33074                source: "ripgrep -F".to_string(),
33075                synthetic: false,
33076            },
33077            freshness: exact_search_file_timestamp(&path),
33078            budget: sift::ArtifactBudget::from_text(&snippet, 1),
33079        });
33080        if hits.len() >= limit {
33081            break;
33082        }
33083    }
33084
33085    Ok(sift::SearchResponse {
33086        strategy: "exact".to_string(),
33087        root: search_path.display().to_string(),
33088        indexed_artifacts: hits.len(),
33089        skipped_artifacts: 0,
33090        coverage: empty_search_coverage(),
33091        hits,
33092    })
33093}
33094
33095fn exact_search_response_from_process(
33096    search_path: &Path,
33097    limit: usize,
33098    status: std::process::ExitStatus,
33099    stdout: &[u8],
33100    stderr: &[u8],
33101) -> Result<sift::SearchResponse> {
33102    if !status.success() && status.code() != Some(1) {
33103        let message = String::from_utf8_lossy(stderr);
33104        let trimmed = message.trim();
33105        if trimmed.is_empty() {
33106            bail!("ripgrep exact search exited with status {}", status);
33107        }
33108        bail!("{}", trimmed);
33109    }
33110
33111    let raw = String::from_utf8(stdout.to_vec()).context("decoding ripgrep exact-search output")?;
33112    parse_exact_search_output(search_path, limit, &raw)
33113}
33114
33115fn run_exact_search(search_path: &Path, query: &str, limit: usize) -> Result<sift::SearchResponse> {
33116    let output = exact_search_command(search_path, query)
33117        .output()
33118        .context("running exact search with ripgrep")?;
33119    exact_search_response_from_process(
33120        search_path,
33121        limit,
33122        output.status,
33123        &output.stdout,
33124        &output.stderr,
33125    )
33126}
33127
33128pub(crate) fn run_exact_search_with_timeout(
33129    search_path: &Path,
33130    query: &str,
33131    limit: usize,
33132    timeout_secs: u64,
33133) -> Result<sift::SearchResponse> {
33134    if timeout_secs == 0 {
33135        return run_exact_search(search_path, query, limit);
33136    }
33137
33138    let mut child = exact_search_command(search_path, query)
33139        .stdin(Stdio::null())
33140        .stdout(Stdio::piped())
33141        .stderr(Stdio::piped())
33142        .spawn()
33143        .context("spawning timed exact search worker")?;
33144
33145    let timeout = Duration::from_secs(timeout_secs);
33146    let status = wait_for_child_exit(&mut child, timeout)
33147        .context("waiting for timed exact search worker")?;
33148    if status.is_none() {
33149        let _ = child.kill();
33150        let _ = child.wait();
33151        bail!("{}", exact_search_timeout_message(timeout_secs));
33152    }
33153
33154    let status = status.unwrap();
33155    let stdout = read_child_stdout(&mut child)?;
33156    let stderr = read_child_stderr(&mut child)?;
33157    exact_search_response_from_process(
33158        search_path,
33159        limit,
33160        status,
33161        stdout.as_bytes(),
33162        stderr.as_bytes(),
33163    )
33164}
33165
33166pub(crate) fn run_search_with_timeout(
33167    search_path: &Path,
33168    cache_dir: &Path,
33169    query: &str,
33170    limit: usize,
33171    timeout_secs: u64,
33172    strategy: &str,
33173    search_targets: &[SearchIndexTarget],
33174) -> Result<sift::SearchResponse> {
33175    if timeout_secs == 0 {
33176        return run_sift_search(search_path, cache_dir, query, limit, strategy);
33177    }
33178
33179    let output_path = next_search_worker_output_path();
33180    let mut child = Command::new(
33181        std::env::current_exe().context("resolving tsift executable for timed search")?,
33182    )
33183    .arg("__search-worker")
33184    .arg("--path")
33185    .arg(search_path)
33186    .arg("--cache-dir")
33187    .arg(cache_dir)
33188    .arg("--query")
33189    .arg(query)
33190    .arg("--limit")
33191    .arg(limit.to_string())
33192    .arg("--strategy")
33193    .arg(strategy)
33194    .arg("--output")
33195    .arg(&output_path)
33196    .stdin(Stdio::null())
33197    .stdout(Stdio::null())
33198    .stderr(Stdio::piped())
33199    .spawn()
33200    .context("spawning timed sift search worker")?;
33201
33202    let timeout = Duration::from_secs(timeout_secs);
33203    let status =
33204        wait_for_child_exit(&mut child, timeout).context("waiting for timed sift search worker")?;
33205    if status.is_none() {
33206        let _ = child.kill();
33207        let _ = child.wait();
33208        let _ = fs::remove_file(&output_path);
33209        bail!(
33210            "{}",
33211            search_timeout_message(timeout_secs, strategy, search_targets)?
33212        );
33213    }
33214
33215    let status = status.unwrap();
33216    let stderr = read_child_stderr(&mut child)?;
33217    if !status.success() {
33218        let _ = fs::remove_file(&output_path);
33219        let message = stderr.trim();
33220        if message.is_empty() {
33221            bail!("sift search worker exited with status {}", status);
33222        }
33223        bail!("{}", message);
33224    }
33225
33226    let raw = fs::read_to_string(&output_path)
33227        .with_context(|| format!("reading search worker output: {}", output_path.display()))?;
33228    let _ = fs::remove_file(&output_path);
33229    serde_json::from_str(&raw).context("parsing search worker output")
33230}
33231
33232fn next_search_worker_output_path() -> PathBuf {
33233    let stamp = SystemTime::now()
33234        .duration_since(UNIX_EPOCH)
33235        .unwrap_or_default()
33236        .as_nanos();
33237    std::env::temp_dir().join(format!(
33238        "tsift-search-{}-{}.json",
33239        std::process::id(),
33240        stamp
33241    ))
33242}
33243
33244fn wait_for_child_exit(
33245    child: &mut std::process::Child,
33246    timeout: Duration,
33247) -> Result<Option<std::process::ExitStatus>> {
33248    let started = Instant::now();
33249    loop {
33250        if let Some(status) = child.try_wait()? {
33251            return Ok(Some(status));
33252        }
33253        if started.elapsed() >= timeout {
33254            return Ok(None);
33255        }
33256        let remaining = timeout.saturating_sub(started.elapsed());
33257        std::thread::sleep(remaining.min(Duration::from_millis(10)));
33258    }
33259}
33260
33261fn read_child_stderr(child: &mut std::process::Child) -> Result<String> {
33262    let mut stderr = String::new();
33263    if let Some(mut pipe) = child.stderr.take() {
33264        pipe.read_to_string(&mut stderr)
33265            .context("reading search worker stderr")?;
33266    }
33267    Ok(stderr)
33268}
33269
33270fn read_child_stdout(child: &mut std::process::Child) -> Result<String> {
33271    let mut stdout = String::new();
33272    if let Some(mut pipe) = child.stdout.take() {
33273        pipe.read_to_string(&mut stdout)
33274            .context("reading search worker stdout")?;
33275    }
33276    Ok(stdout)
33277}
33278
33279pub(crate) fn maybe_apply_search_worker_test_hooks() -> Result<()> {
33280    if let Ok(path) = std::env::var("TSIFT_TEST_SEARCH_WORKER_PID_FILE") {
33281        fs::write(&path, std::process::id().to_string())
33282            .with_context(|| format!("writing search worker pid file: {path}"))?;
33283    }
33284    if let Ok(ms) = std::env::var("TSIFT_TEST_SEARCH_WORKER_SLEEP_MS") {
33285        let delay_ms = ms
33286            .parse::<u64>()
33287            .with_context(|| format!("parsing TSIFT_TEST_SEARCH_WORKER_SLEEP_MS={ms}"))?;
33288        std::thread::sleep(Duration::from_millis(delay_ms));
33289    }
33290    Ok(())
33291}
33292
33293#[cfg(test)]
33294thread_local! {
33295    static SEARCH_POST_PRECHECK_LOCK_HOOK: RefCell<Option<SearchPostPrecheckLockHook>> = const { RefCell::new(None) };
33296}
33297
33298#[cfg(test)]
33299enum SearchPostPrecheckLockMode {
33300    RollbackJournal,
33301    Wal,
33302}
33303
33304#[cfg(test)]
33305struct SearchPostPrecheckLockHook {
33306    db_path: PathBuf,
33307    mode: SearchPostPrecheckLockMode,
33308}
33309
33310#[cfg(test)]
33311struct SearchPostPrecheckLockGuard;
33312
33313#[cfg(test)]
33314impl Drop for SearchPostPrecheckLockGuard {
33315    fn drop(&mut self) {
33316        SEARCH_POST_PRECHECK_LOCK_HOOK.with(|hook| {
33317            hook.borrow_mut().take();
33318        });
33319    }
33320}
33321
33322#[cfg(test)]
33323fn install_search_post_precheck_lock(db_path: PathBuf) -> SearchPostPrecheckLockGuard {
33324    install_search_post_precheck_lock_hook(db_path, SearchPostPrecheckLockMode::RollbackJournal)
33325}
33326
33327#[cfg(test)]
33328fn install_search_post_precheck_wal_lock(db_path: PathBuf) -> SearchPostPrecheckLockGuard {
33329    install_search_post_precheck_lock_hook(db_path, SearchPostPrecheckLockMode::Wal)
33330}
33331
33332#[cfg(test)]
33333fn install_search_post_precheck_lock_hook(
33334    db_path: PathBuf,
33335    mode: SearchPostPrecheckLockMode,
33336) -> SearchPostPrecheckLockGuard {
33337    SEARCH_POST_PRECHECK_LOCK_HOOK.with(|hook| {
33338        assert!(
33339            hook.borrow().is_none(),
33340            "search post-precheck lock hook already installed"
33341        );
33342        *hook.borrow_mut() = Some(SearchPostPrecheckLockHook { db_path, mode });
33343    });
33344    SearchPostPrecheckLockGuard
33345}
33346
33347#[cfg(test)]
33348pub(crate) fn maybe_apply_search_post_precheck_test_hooks() -> Result<()> {
33349    let Some(hook) = SEARCH_POST_PRECHECK_LOCK_HOOK.with(|hook| hook.borrow_mut().take()) else {
33350        return Ok(());
33351    };
33352    let (ready_tx, ready_rx) = std::sync::mpsc::sync_channel(1);
33353    std::thread::spawn(move || {
33354        let conn = Connection::open(&hook.db_path).expect("opening db for search lock hook");
33355        match hook.mode {
33356            SearchPostPrecheckLockMode::RollbackJournal => {
33357                conn.execute_batch("PRAGMA journal_mode=DELETE; BEGIN EXCLUSIVE;")
33358                    .expect("acquiring rollback-journal hook lock");
33359                fs::write(substrate::rollback_journal_path(&hook.db_path), "locked")
33360                    .expect("writing rollback journal marker");
33361            }
33362            SearchPostPrecheckLockMode::Wal => {
33363                conn.execute_batch(
33364                    "PRAGMA journal_mode=WAL;
33365                     PRAGMA wal_autocheckpoint=0;
33366                     CREATE TABLE IF NOT EXISTS search_wal_lock_probe (id INTEGER PRIMARY KEY);
33367                     INSERT INTO search_wal_lock_probe DEFAULT VALUES;
33368                     PRAGMA locking_mode=EXCLUSIVE;
33369                     BEGIN EXCLUSIVE;",
33370                )
33371                .expect("acquiring WAL hook lock");
33372                assert!(substrate::wal_sidecar_path(&hook.db_path).exists());
33373            }
33374        }
33375        ready_tx.send(()).expect("signaling search lock hook");
33376        std::thread::sleep(Duration::from_millis(200));
33377        drop(conn);
33378        let _ = fs::remove_file(substrate::rollback_journal_path(&hook.db_path));
33379    });
33380    ready_rx
33381        .recv_timeout(Duration::from_secs(1))
33382        .context("waiting for search post-precheck lock hook")?;
33383    Ok(())
33384}
33385
33386#[cfg(not(test))]
33387pub(crate) fn maybe_apply_search_post_precheck_test_hooks() -> Result<()> {
33388    Ok(())
33389}