Skip to main content

tsift_cli/
lib.rs

1mod cli;
2mod commands;
3mod community_detection;
4mod conflict_matrix;
5mod context_pack;
6mod output;
7mod rewrite;
8mod search_budget;
9mod semantic_edit;
10mod session_review_budget;
11mod token_savings;
12mod workflow;
13
14pub use rewrite::rewrite_command;
15pub(crate) use rewrite::{apply_rewrite_output_format, execute_rewritten_command, no_rewrite_message};
16pub(crate) use community_detection::{
17    CommunityDetectionReport, annotate_community_members_with_context,
18    community_tagpath_cache_part, community_tagpath_cache_part_for_loaded,
19    detect_communities_cached, file_communities_from_callers,
20    graph_effectiveness_blocked, graph_effectiveness_ready,
21    resolve_tagpath_handle_for_callee_edge, update_community_annotation_diagnostics,
22};
23#[allow(unused_imports)]
24pub(crate) use conflict_matrix::{
25    ConflictMatrixCandidate, ConflictMatrixGraphPreparedInputs,
26    ConflictMatrixPreparedInputs, ConflictMatrixReport,
27    ConflictMatrixSemanticRef, ConflictMatrixSharedPreparationSummary,
28    ConflictMatrixWorkerFeedback, ConflictMatrixWorkerPromptPacket,
29    build_conflict_matrix_report, build_conflict_matrix_report_from_prepared_graph,
30    cmd_conflict_matrix, collect_conflict_matrix_evidence_packets,
31    conflict_matrix_candidate_from_evidence, conflict_matrix_graph_index,
32    conflict_matrix_semantic_ref, conflict_matrix_shared_preparation_summary,
33    conflict_matrix_source_handle, conflict_matrix_target_scoped_graph_snapshot,
34    conflict_matrix_worker_feedback,
35    conflict_risk_label, extract_conflict_target_refs, hash_bytes_hex,
36    is_planner_config_path, normalize_conflict_target,
37    prepare_conflict_matrix_graph_orchestration,
38    prepare_conflict_matrix_inputs, resolve_conflict_matrix_targets,
39    sorted_intersection, sorted_set,
40};
41#[allow(unused_imports)]
42pub(crate) use context_pack::{
43    ContextPackReport, ContextPackSummaryRefPreview,
44    build_context_pack_diff_preview, build_context_pack_log_preview,
45    build_context_pack_report, build_context_pack_report_with_profile,
46    build_context_pack_test_preview, context_pack_status_reminders,
47    exploration_ref_id, materialize_context_pack_exploration_packet,
48    print_context_pack_human,
49};
50pub(crate) use search_budget::{
51    SearchBudgetReportInput,
52    apply_search_facet_filters, build_search_budget_follow_up, build_search_budget_report,
53    print_search_budget_human,
54};
55#[allow(unused_imports)]
56pub(crate) use session_review_budget::{
57    SessionReviewBudgetFailurePreview, SessionReviewBudgetReport,
58    SessionReviewNextContextBudgetReport, SessionReviewNextTokenAction,
59    build_session_review_budget_report, build_session_review_next_context_budget_report,
60    print_session_review_budget_human, print_session_review_next_context_budget_human,
61};
62#[cfg(test)]
63use search_budget::{SearchBudgetReport, search_facet_filters_summary};
64pub(crate) use semantic_edit::{
65    AstSpanPreview, EditBatch, EditResult, EditStatus,
66    MarkdownEmbeddedSymbol, MarkdownSpanMetadata, MetricDigestOptions,
67    SemanticEditVerifyOptions, apply_edit_plan_atomically, build_edit_plan, cmd_edit_intents,
68};
69
70#[cfg(test)]
71use rewrite::{apply_output_cap, effective_rewrite_run_command, resolve_digest_context_path, rewrite_output_cap, OutputCap};
72#[cfg(test)]
73use std::io::{BufRead as _, BufReader};
74#[cfg(test)]
75use token_savings::{
76    TokenSavingsFamily, TokenSavingsFixture, TokenSavingsFixtureCase,
77    TokenSavingsMarkdownProjectionInput, TokenSavingsMarkdownProjectionInputs,
78    TokenSavingsRawSymbol, TokenSavingsSourceReadInput, TokenSavingsSourceReadInputs,
79    build_token_savings_report,
80};
81
82use anyhow::{Context, Result, bail};
83use clap::Parser;
84use cli::{Cli, Commands, DispatchTraceFormat, GraphDbQuery, SemanticRelatedKind};
85#[cfg(test)]
86use cli::{GraphDbBackend, TraverseFormat};
87use commands::digests::{
88    cmd_context_pack, cmd_diff_digest, cmd_log_digest, cmd_metric_digest, cmd_session_cost,
89    cmd_session_digest, cmd_session_review_with_budget, cmd_test_digest,
90};
91#[cfg(test)]
92use commands::graph::cmd_explain;
93use commands::graph::{
94    cmd_analyze, cmd_communities, cmd_explain_with_budget, cmd_graph, cmd_path, cmd_traverse,
95};
96#[cfg(test)]
97use commands::index_search::cmd_search;
98use commands::index_search::{cmd_index, cmd_search_with_budget, cmd_search_worker};
99use commands::infra::{
100    StatusCommandOptions, cmd_convex_sync, cmd_edit, cmd_graph_db, cmd_init, cmd_locks,
101    cmd_rewrite, cmd_route, cmd_sql, cmd_status,
102};
103use commands::memory::cmd_memory;
104use commands::quality::{cmd_audit, cmd_audit_tagpath, cmd_lint};
105use commands::summarize::cmd_summarize;
106use flate2::{Compression, read::GzDecoder, write::GzEncoder};
107use output::tagpath::{
108    TagpathAnnotationDiagnostic, TagpathSearchOpts,
109    annotate_communities_with_tagpath, annotate_hits_with_tagpath,
110    annotate_path_nodes_with_tagpath, annotate_stored_edges_with_tagpath,
111    annotate_stored_symbols_with_tagpath,
112};
113#[cfg(test)]
114use output::ResponseBudgetPreset;
115use output::{
116    OutputFormat, ResponseBudget, ToolEnvelope, ToolEnvelopeMetric,
117    ToolEnvelopeSummary, TranscriptArtifactRef,
118};
119use rusqlite::{Connection, OptionalExtension};
120use serde::{Deserialize, Serialize};
121use sift::{SearchInput, SearchOptions, Sift};
122#[cfg(test)]
123use std::cell::RefCell;
124use std::cmp::Ordering;
125use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque};
126use std::env;
127use std::fs;
128use std::io::{Read as _, Write as _};
129use std::path::{Path, PathBuf};
130use std::process::{Command, Stdio};
131use std::sync::{Mutex, OnceLock};
132use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
133use substrate::{
134    ConvexEdgeRow, ConvexNodeRow, ConvexProjectionRows, GraphEdge as SubstrateGraphEdge,
135    GraphFreshness, GraphNode as SubstrateGraphNode, GraphProjection, GraphPropertyFilter,
136    GraphProvenance, GraphQueryOptions, GraphQueryPage, GraphStore, SQLITE_GRAPH_SCHEMA_VERSION,
137    SqliteGraphStore, SqliteProjectionRefresh,
138    TerseGraphNode as SubstrateTerseGraphNode, TerseGraphEdge as SubstrateTerseGraphEdge,
139};
140use tsift_core::{NeighborhoodScoring, RankedNeighborhoodOptions};
141use tagpath::{family as tagpath_family, ontology as tagpath_ontology};
142#[cfg(test)]
143use tsift_agent_doc::session_cost;
144#[cfg(test)]
145use tsift_agent_doc::session_review;
146use tsift_digest::{diff_digest, log_digest, metric_digest, test_digest};
147use tsift_graph as graph;
148use tsift_index::{config, index, init, multiplicity, walk};
149use tsift_memory::{MemoryEvent, default_memory_db_path, read_memory_events};
150use tsift_quality::{cycle_packet_cache, dci_benchmark, lint, perf_gate, token_gate};
151use tsift_resolution as resolution;
152use tsift_search::{impact, sift};
153use tsift_sqlite as substrate;
154use tsift_status::status;
155use tsift_summarize::summarize;
156#[cfg(feature = "backend-surrealdb")]
157use tsift_surrealdb::SurrealdbGraphStore;
158use tsift_tokensave::TokensaveDb;
159
160#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize)]
161pub(crate) enum GraphDbExperimentalBackend {
162    DuckdbDuckpgq,
163    Falkordb,
164    Ladybug,
165    Kuzu,
166    Surrealdb,
167}
168
169#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize)]
170pub(crate) struct SearchFacetFilters {
171    #[serde(skip_serializing_if = "Vec::is_empty", default)]
172    pub(crate) languages: Vec<String>,
173    #[serde(skip_serializing_if = "Vec::is_empty", default)]
174    pub(crate) kinds: Vec<String>,
175    #[serde(skip_serializing_if = "Vec::is_empty", default)]
176    pub(crate) node_kinds: Vec<String>,
177    #[serde(skip_serializing_if = "Vec::is_empty", default)]
178    pub(crate) sections: Vec<String>,
179    #[serde(skip_serializing_if = "Vec::is_empty", default)]
180    pub(crate) parents: Vec<String>,
181    #[serde(skip_serializing_if = "Vec::is_empty", default)]
182    pub(crate) children: Vec<String>,
183    #[serde(skip_serializing_if = "Vec::is_empty", default)]
184    pub(crate) fence_languages: Vec<String>,
185    #[serde(skip_serializing_if = "Vec::is_empty", default)]
186    pub(crate) list_depths: Vec<usize>,
187    #[serde(skip_serializing_if = "Vec::is_empty", default)]
188    pub(crate) heading_levels: Vec<usize>,
189}
190
191impl SearchFacetFilters {
192    pub(crate) fn is_empty(&self) -> bool {
193        self.languages.is_empty()
194            && self.kinds.is_empty()
195            && self.node_kinds.is_empty()
196            && self.sections.is_empty()
197            && self.parents.is_empty()
198            && self.children.is_empty()
199            && self.fence_languages.is_empty()
200            && self.list_depths.is_empty()
201            && self.heading_levels.is_empty()
202    }
203
204    fn needs_ast_context(&self) -> bool {
205        !self.sections.is_empty()
206            || !self.parents.is_empty()
207            || !self.children.is_empty()
208            || !self.fence_languages.is_empty()
209            || !self.list_depths.is_empty()
210            || !self.heading_levels.is_empty()
211    }
212}
213
214#[derive(Serialize)]
215struct GraphDbBackendPromotionGate {
216    status: String,
217    native_adapter_required: bool,
218    required_checks: Vec<String>,
219}
220
221impl GraphDbExperimentalBackend {
222    fn name(self) -> &'static str {
223        match self {
224            Self::DuckdbDuckpgq => "duckdb-duckpgq",
225            Self::Falkordb => "falkordb",
226            Self::Ladybug => "ladybug",
227            Self::Kuzu => "kuzu",
228            Self::Surrealdb => "surrealdb",
229        }
230    }
231
232    fn adapter_label(self) -> &'static str {
233        match self {
234            Self::DuckdbDuckpgq => "DuckDB/DuckPGQ read-only prototype",
235            Self::Falkordb => "FalkorDB read-only prototype",
236            Self::Ladybug => "Ladybug read-only prototype",
237            Self::Kuzu => "Kuzu (Vela-Engineering/kuzu) read-only prototype",
238            Self::Surrealdb => "SurrealDB read-only prototype",
239        }
240    }
241
242    fn projection_load(self) -> &'static str {
243        match self {
244            Self::Falkordb => {
245                "provider-neutral rows loaded into a FalkorDB-shaped read snapshot for parity and timing only; production FalkorDB storage remains behind backend-eval until a real adapter passes the full-projection gate"
246            }
247            Self::Kuzu => {
248                "provider-neutral rows loaded into a Kuzu-compatible in-process read snapshot for parity and performance gates; production Vela-Engineering/kuzu storage remains behind a future optional adapter"
249            }
250            Self::Surrealdb => {
251                "provider-neutral rows loaded into a SurrealDB-compatible read snapshot for parity and timing only; production SurrealDB storage remains behind backend-eval until a real optional adapter passes the full-projection gate"
252            }
253            _ => {
254                "provider-neutral rows loaded into a dependency-free in-process read snapshot for parity and performance gates"
255            }
256        }
257    }
258
259    fn lock_behavior(self) -> &'static str {
260        match self {
261            Self::Falkordb => {
262                "read-only FalkorDB prototype snapshot; production promotion must prove multi-process writer behavior and local fallback semantics before replacing SQLite"
263            }
264            Self::Kuzu => {
265                "read-only Kuzu prototype snapshot; no SQLite writer lock is taken during benchmarks, and production Vela-Engineering/kuzu promotion must prove concurrent writer semantics before replacing SQLite"
266            }
267            Self::Surrealdb => {
268                "read-only SurrealDB prototype snapshot; production promotion must prove embedded/file-backed writer and read-only lock behavior before replacing SQLite"
269            }
270            _ => "read-only snapshot/row adapter; no writer lock is taken during query benchmarks",
271        }
272    }
273
274    fn install_portability(self) -> &'static str {
275        match self {
276            Self::Falkordb => {
277                "prototype is dependency-free in this binary; production FalkorDB promotion must keep install optional and preserve cargo build/install without a service"
278            }
279            Self::Kuzu => {
280                "prototype is dependency-free in this binary; production Vela-Engineering/kuzu integration must stay optional so cargo build/install works without a native Kuzu toolchain"
281            }
282            Self::Surrealdb => {
283                "prototype is dependency-free in this binary; production SurrealDB integration must stay optional so cargo build/install works without pulling SurrealDB into the default build"
284            }
285            _ => {
286                "prototype is dependency-free in this binary; a production engine adapter must remain optional before promotion"
287            }
288        }
289    }
290
291    fn prototype_hold_reason(self) -> Option<&'static str> {
292        match self {
293            Self::DuckdbDuckpgq => Some(
294                "DuckDB/DuckPGQ remains behind backend-eval until a native production adapter proves projection writes, freshness/parity, full_projection wins, install portability, and lock behavior",
295            ),
296            Self::Falkordb => Some(
297                "FalkorDB remains behind backend-eval until a production adapter beats SQLite on full_projection conflict-matrix, evidence, dispatch-trace, path tiers, install portability, and lock behavior",
298            ),
299            Self::Ladybug => Some(
300                "Ladybug remains behind backend-eval until a native production adapter proves projection writes, freshness/parity, full_projection wins, install portability, and lock behavior",
301            ),
302            Self::Kuzu => Some(
303                "Kuzu remains behind backend-eval until a native optional adapter proves projection writes/load, SQLite parity, full_projection wins, install portability, and lock behavior",
304            ),
305            Self::Surrealdb => Some(
306                "SurrealDB remains behind backend-eval until a feature-gated optional adapter proves provider-neutral projection writes/load, SQLite parity, full_projection wins, install portability, and lock behavior",
307            ),
308        }
309    }
310
311    fn promotion_gate(self) -> GraphDbBackendPromotionGate {
312        match self {
313            Self::DuckdbDuckpgq => GraphDbBackendPromotionGate {
314                status: "hold_native_adapter_required".to_string(),
315                native_adapter_required: true,
316                required_checks: vec![
317                    "native_duckdb_duckpgq_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
318                        .to_string(),
319                    "freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
320                        .to_string(),
321                    "embedded_or_service_lock_behavior_match_or_beat_sqlite".to_string(),
322                    "operator_install_cost_keeps_cargo_build_install_duckdb_extension_free_by_default"
323                        .to_string(),
324                ],
325            },
326            Self::Falkordb => GraphDbBackendPromotionGate {
327                status: "hold_native_adapter_required".to_string(),
328                native_adapter_required: true,
329                required_checks: vec![
330                    "native_falkordb_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
331                        .to_string(),
332                    "freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
333                        .to_string(),
334                    "multi_process_writer_and_read_only_lock_behavior_match_or_beat_sqlite"
335                        .to_string(),
336                    "operator_install_cost_keeps_cargo_build_install_service_free_by_default"
337                        .to_string(),
338                ],
339            },
340            Self::Ladybug => GraphDbBackendPromotionGate {
341                status: "hold_native_adapter_required".to_string(),
342                native_adapter_required: true,
343                required_checks: vec![
344                    "native_ladybug_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
345                        .to_string(),
346                    "freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
347                        .to_string(),
348                    "concurrent_writer_and_read_only_lock_behavior_match_or_beat_sqlite"
349                        .to_string(),
350                    "operator_install_cost_keeps_cargo_build_install_ladybug_free_by_default"
351                        .to_string(),
352                ],
353            },
354            Self::Kuzu => GraphDbBackendPromotionGate {
355                status: "hold_native_adapter_required".to_string(),
356                native_adapter_required: true,
357                required_checks: vec![
358                    "native_kuzu_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
359                        .to_string(),
360                    "freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
361                        .to_string(),
362                    "concurrent_writer_and_read_only_lock_behavior_match_or_beat_sqlite"
363                        .to_string(),
364                    "operator_install_cost_keeps_cargo_build_install_native_kuzu_free_by_default"
365                        .to_string(),
366                ],
367            },
368            Self::Surrealdb => GraphDbBackendPromotionGate {
369                status: "hold_native_adapter_required".to_string(),
370                native_adapter_required: true,
371                required_checks: vec![
372                    "native_surrealdb_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
373                        .to_string(),
374                    "freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
375                        .to_string(),
376                    "embedded_file_backed_writer_and_read_only_lock_behavior_match_or_beat_sqlite"
377                        .to_string(),
378                    "operator_install_cost_keeps_cargo_build_install_surrealdb_free_by_default"
379                        .to_string(),
380                ],
381            },
382        }
383    }
384
385    fn parse(raw: &str) -> Result<Self> {
386        match raw {
387            "duckdb-duckpgq" | "duckdb" | "duckpgq" => Ok(Self::DuckdbDuckpgq),
388            "falkordb" | "falkor" => Ok(Self::Falkordb),
389            "ladybug" => Ok(Self::Ladybug),
390            "kuzu" | "vela-kuzu" => Ok(Self::Kuzu),
391            "surrealdb" | "surreal" | "surreal-db" => Ok(Self::Surrealdb),
392            _ => {
393                bail!(
394                    "unknown backend-eval candidate {raw:?}; expected duckdb-duckpgq, falkordb, ladybug, kuzu, or surrealdb"
395                )
396            }
397        }
398    }
399}
400
401
402pub fn run() -> Result<()> {
403    let cli = Cli::parse();
404    let compact = cli.compact;
405    let pretty = cli.pretty;
406    let terse = cli.terse || cli.ultra_terse;
407    let ultra_terse = cli.ultra_terse;
408    let absolute = cli.absolute;
409    let tabular = cli.tabular;
410    let schema = cli.schema;
411    let envelope = cli.envelope;
412    match cli.command {
413        Some(Commands::Search {
414            query,
415            path,
416            limit,
417            strategy,
418            exact,
419            scope,
420            federated,
421            lang,
422            kind,
423            node_kind,
424            section,
425            parent,
426            child,
427            fence_language,
428            list_depth,
429            heading_level,
430            json,
431            autoindex,
432            no_autoindex,
433            timeout,
434            max_items,
435            max_bytes,
436            budget,
437            no_tagpath,
438            tagpath_strict,
439        }) => cmd_search_with_budget(
440            query,
441            path,
442            limit,
443            if exact {
444                Some("exact".to_string())
445            } else {
446                strategy
447            },
448            scope,
449            federated,
450            json || terse || schema || envelope,
451            autoindex || !no_autoindex,
452            timeout,
453            compact,
454            pretty,
455            terse,
456            ultra_terse,
457            absolute,
458            tabular,
459            schema,
460            envelope,
461            ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
462            TagpathSearchOpts {
463                no_tagpath,
464                strict: tagpath_strict,
465            },
466            SearchFacetFilters {
467                languages: lang,
468                kinds: kind,
469                node_kinds: node_kind,
470                sections: section,
471                parents: parent,
472                children: child,
473                fence_languages: fence_language,
474                list_depths: list_depth,
475                heading_levels: heading_level,
476            },
477        ),
478        Some(Commands::SearchWorker {
479            path,
480            cache_dir,
481            query,
482            limit,
483            strategy,
484            output,
485        }) => cmd_search_worker(&path, &cache_dir, &query, limit, &strategy, &output),
486        Some(Commands::DigestRunner {
487            kind,
488            path,
489            runner,
490            shell_command,
491            json,
492        }) => cmd_digest_runner(
493            &kind,
494            &path,
495            runner.as_deref(),
496            &shell_command,
497            OutputFormat {
498                json_output: json || terse || schema || envelope,
499                compact,
500                pretty,
501                terse,
502                ultra_terse,
503                schema,
504                envelope,
505            },
506        ),
507        Some(Commands::Edit { dry_run, file }) => {
508            cmd_edit(dry_run, file, compact, pretty, terse, schema)
509        }
510        Some(Commands::EditIntents {
511            path,
512            scope,
513            file,
514            json,
515            apply,
516            verify,
517            verify_command,
518            max_items,
519            max_bytes,
520            budget,
521        }) => cmd_edit_intents(
522            &path,
523            scope.as_deref(),
524            file,
525            apply,
526            SemanticEditVerifyOptions {
527                enabled: verify,
528                command: verify_command.as_deref(),
529            },
530            OutputFormat {
531                json_output: json || terse || schema || envelope,
532                compact,
533                pretty,
534                terse,
535                ultra_terse,
536                schema,
537                envelope,
538            },
539            ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
540        ),
541        Some(Commands::Index {
542            path,
543            rebuild,
544            check,
545            exit_code,
546            prune,
547            quiet,
548            workspace,
549            submodule,
550            json,
551        }) => cmd_index(
552            &path,
553            rebuild,
554            check,
555            exit_code,
556            prune,
557            quiet,
558            workspace,
559            submodule.as_deref(),
560            json || terse || schema || envelope,
561            compact,
562            pretty,
563            terse,
564            absolute,
565            schema,
566        ),
567        Some(Commands::Rewrite { command, run }) => cmd_rewrite(
568            &command,
569            run,
570            OutputFormat {
571                json_output: terse || schema || envelope,
572                compact,
573                pretty,
574                terse,
575                ultra_terse,
576                schema,
577                envelope,
578            },
579        ),
580        Some(Commands::Route { task, id }) => cmd_route(&task, id),
581        Some(Commands::Memory { command }) => {
582            let json = command.json_output();
583            cmd_memory(
584                command,
585                OutputFormat {
586                    json_output: json || terse || schema || envelope,
587                    compact,
588                    pretty,
589                    terse,
590                    ultra_terse,
591                    schema,
592                    envelope,
593                },
594            )
595        }
596        Some(Commands::Finding { command }) => match command {
597            cli::FindingCommand::Add {
598                path,
599                kind,
600                title,
601                body,
602                about,
603                confidence,
604                status,
605                relates,
606                scope,
607                json,
608            } => commands::finding::cmd_finding_add(
609                &path,
610                &kind,
611                &title,
612                &body,
613                &about,
614                confidence,
615                &status,
616                relates.as_deref(),
617                scope.as_deref(),
618                json || terse || schema || envelope,
619                pretty,
620            ),
621            cli::FindingCommand::List {
622                path,
623                about,
624                kind,
625                status,
626                include_stale,
627                scope,
628                json,
629            } => commands::finding::cmd_finding_list(
630                &path,
631                about.as_deref(),
632                kind.as_deref(),
633                status.as_deref(),
634                include_stale,
635                scope.as_deref(),
636                json || terse || schema || envelope,
637                pretty,
638            ),
639            cli::FindingCommand::Harvest { path, scope, json } => {
640                commands::finding::cmd_finding_harvest(
641                    &path,
642                    scope.as_deref(),
643                    json || terse || schema || envelope,
644                    pretty,
645                )
646            }
647            cli::FindingCommand::Promote { id, path, json } => {
648                commands::finding::cmd_finding_promote(
649                    &path,
650                    &id,
651                    json || terse || schema || envelope,
652                    pretty,
653                )
654            }
655        },
656        Some(Commands::Graph {
657            symbol,
658            path,
659            callers,
660            callees,
661            scope,
662            limit,
663            json,
664            no_tagpath,
665            tagpath_strict,
666        }) => cmd_graph(
667            &symbol,
668            &path,
669            callers,
670            callees,
671            scope.as_deref(),
672            limit,
673            json || terse || schema || envelope,
674            compact,
675            pretty,
676            terse,
677            absolute,
678            tabular,
679            schema,
680            TagpathSearchOpts {
681                no_tagpath,
682                strict: tagpath_strict,
683            },
684        ),
685        Some(Commands::Sql {
686            db,
687            query,
688            table,
689            json,
690        }) => cmd_sql(
691            &db,
692            query,
693            table,
694            json || terse || schema || envelope,
695            compact,
696            pretty,
697            terse,
698            schema,
699        ),
700        Some(Commands::Communities {
701            path,
702            scope,
703            min_size,
704            limit,
705            json,
706            no_tagpath,
707            tagpath_strict,
708        }) => cmd_communities(
709            &path,
710            scope.as_deref(),
711            min_size,
712            limit,
713            json || terse || schema || envelope,
714            compact,
715            pretty,
716            terse,
717            tabular,
718            schema,
719            TagpathSearchOpts {
720                no_tagpath,
721                strict: tagpath_strict,
722            },
723        ),
724        Some(Commands::Analyze {
725            path,
726            scope,
727            entry_points,
728            limit,
729            json,
730        }) => cmd_analyze(
731            &path,
732            scope.as_deref(),
733            &entry_points,
734            limit,
735            OutputFormat {
736                json_output: json || terse || schema || envelope,
737                compact,
738                pretty,
739                terse,
740                ultra_terse,
741                schema,
742                envelope,
743            },
744        ),
745        Some(Commands::Path {
746            from,
747            to,
748            path,
749            scope,
750            json,
751            no_tagpath,
752            tagpath_strict,
753        }) => cmd_path(
754            &from,
755            &to,
756            &path,
757            scope.as_deref(),
758            json || terse || schema || envelope,
759            compact,
760            pretty,
761            terse,
762            schema,
763            TagpathSearchOpts {
764                no_tagpath,
765                strict: tagpath_strict,
766            },
767        ),
768        Some(Commands::Explain {
769            symbol,
770            path,
771            scope,
772            limit,
773            json,
774            max_items,
775            max_bytes,
776            budget,
777            no_tagpath,
778            tagpath_strict,
779        }) => cmd_explain_with_budget(
780            &symbol,
781            &path,
782            scope.as_deref(),
783            limit,
784            json || terse || schema || envelope,
785            compact,
786            pretty,
787            terse,
788            ultra_terse,
789            absolute,
790            tabular,
791            schema,
792            envelope,
793            ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
794            TagpathSearchOpts {
795                no_tagpath,
796                strict: tagpath_strict,
797            },
798        ),
799        Some(Commands::Traverse {
800            node,
801            to,
802            path,
803            scope,
804            depth,
805            limit,
806            format,
807            convex_snapshot,
808        }) => cmd_traverse(
809            node.as_deref(),
810            to.as_deref(),
811            &path,
812            scope.as_deref(),
813            depth,
814            limit,
815            format,
816            pretty,
817            terse,
818            schema,
819            convex_snapshot.as_deref(),
820        ),
821        Some(Commands::ConvexSync {
822            path,
823            scope,
824            snapshot,
825            chunk_size,
826            remote_snapshot,
827            apply,
828            endpoint,
829            auth_token_env,
830            json,
831        }) => cmd_convex_sync(
832            ConvexSyncOptions {
833                path: &path,
834                scope: scope.as_deref(),
835                snapshot: snapshot.as_deref(),
836                chunk_size,
837                remote_snapshot,
838                apply,
839                endpoint: endpoint.as_deref(),
840                auth_token_env: &auth_token_env,
841            },
842            OutputFormat {
843                json_output: json || terse || schema || envelope,
844                compact,
845                pretty,
846                terse,
847                ultra_terse,
848                schema,
849                envelope,
850            },
851        ),
852        Some(Commands::GraphDb {
853            path,
854            scope,
855            backend,
856            convex_snapshot,
857            json,
858            query,
859        }) => cmd_graph_db(
860            &path,
861            scope.as_deref(),
862            backend,
863            convex_snapshot.as_deref(),
864            query,
865            OutputFormat {
866                json_output: json || terse || schema || envelope,
867                compact,
868                pretty,
869                terse,
870                ultra_terse,
871                schema,
872                envelope,
873            },
874        ),
875        Some(Commands::SourceRead {
876            file,
877            path,
878            start,
879            lines,
880            end,
881            scope,
882            json,
883            max_items,
884            max_bytes,
885            budget,
886        }) => cmd_source_read(
887            &file,
888            &path,
889            start,
890            lines,
891            end,
892            scope.as_deref(),
893            OutputFormat {
894                json_output: json || terse || schema || envelope,
895                compact,
896                pretty,
897                terse,
898                ultra_terse,
899                schema,
900                envelope,
901            },
902            absolute,
903            ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
904        ),
905        Some(Commands::MarkdownAst {
906            file,
907            path,
908            node,
909            json,
910            max_items,
911            max_bytes,
912            budget,
913        }) => cmd_markdown_ast(
914            &file,
915            &path,
916            node.as_deref(),
917            OutputFormat {
918                json_output: json || terse || schema || envelope,
919                compact,
920                pretty,
921                terse,
922                ultra_terse,
923                schema,
924                envelope,
925            },
926            absolute,
927            ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
928        ),
929        Some(Commands::SymbolRead {
930            symbol,
931            file,
932            path,
933            scope,
934            json,
935            max_items,
936            max_bytes,
937            budget,
938        }) => cmd_symbol_read(
939            &symbol,
940            file.as_deref(),
941            &path,
942            scope.as_deref(),
943            OutputFormat {
944                json_output: json || terse || schema || envelope,
945                compact,
946                pretty,
947                terse,
948                ultra_terse,
949                schema,
950                envelope,
951            },
952            absolute,
953            ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
954        ),
955        Some(Commands::Audit {
956            skills_dir,
957            manifest,
958            usage,
959            cleanup,
960            report,
961            json,
962        }) => cmd_audit(
963            &skills_dir,
964            manifest,
965            usage,
966            cleanup,
967            report,
968            json || terse || schema || envelope,
969            compact,
970            pretty,
971            terse,
972            schema,
973        ),
974        Some(Commands::AuditTagpath { path, scope, json }) => cmd_audit_tagpath(
975            &path,
976            scope.as_deref(),
977            json || terse || schema || envelope,
978            pretty,
979            terse,
980            schema,
981        ),
982        Some(Commands::Init {
983            path,
984            codex,
985            opencode,
986            workspace,
987        }) => cmd_init(&path, codex, opencode, workspace),
988        Some(Commands::Lint {
989            file,
990            index,
991            entities_from,
992            json,
993        }) => cmd_lint(
994            &file,
995            index,
996            entities_from,
997            json || terse || schema || envelope,
998            compact,
999            pretty,
1000            terse,
1001            schema,
1002        ),
1003        Some(Commands::Summarize {
1004            symbol,
1005            file,
1006            extract,
1007            diff,
1008            stats,
1009            path,
1010            json,
1011        }) => cmd_summarize(
1012            symbol,
1013            file,
1014            extract,
1015            diff,
1016            stats,
1017            &path,
1018            json || terse || schema || envelope,
1019            compact,
1020            pretty,
1021            terse,
1022            schema,
1023        ),
1024        Some(Commands::Semantic {
1025            query,
1026            path,
1027            scope,
1028            limit,
1029            kind,
1030            json,
1031        }) => cmd_semantic_related(
1032            &query,
1033            &path,
1034            scope.as_deref(),
1035            limit,
1036            kind,
1037            json || terse || schema || envelope,
1038            compact,
1039            pretty,
1040            terse,
1041            schema,
1042        ),
1043        Some(Commands::DiffDigest {
1044            path,
1045            cached,
1046            revision,
1047            max_parsed_files,
1048            json,
1049        }) => cmd_diff_digest(
1050            &path,
1051            cached,
1052            revision.as_deref(),
1053            max_parsed_files,
1054            OutputFormat {
1055                json_output: json || terse || schema || envelope,
1056                compact,
1057                pretty,
1058                terse,
1059                ultra_terse,
1060                schema,
1061                envelope,
1062            },
1063        ),
1064        Some(Commands::Impact {
1065            path,
1066            cached,
1067            revision,
1068            scope,
1069            limit,
1070            json,
1071        }) => cmd_impact(
1072            &path,
1073            cached,
1074            revision.as_deref(),
1075            scope.as_deref(),
1076            limit,
1077            OutputFormat {
1078                json_output: json || terse || schema || envelope,
1079                compact,
1080                pretty,
1081                terse,
1082                ultra_terse,
1083                schema,
1084                envelope,
1085            },
1086        ),
1087        Some(Commands::TestDigest {
1088            path,
1089            input,
1090            runner,
1091            json,
1092        }) => cmd_test_digest(
1093            &path,
1094            input.as_deref(),
1095            runner.as_deref(),
1096            OutputFormat {
1097                json_output: json || terse || schema || envelope,
1098                compact,
1099                pretty,
1100                terse,
1101                ultra_terse,
1102                schema,
1103                envelope,
1104            },
1105        ),
1106        Some(Commands::LogDigest { path, input, json }) => cmd_log_digest(
1107            &path,
1108            input.as_deref(),
1109            OutputFormat {
1110                json_output: json || terse || schema || envelope,
1111                compact,
1112                pretty,
1113                terse,
1114                ultra_terse,
1115                schema,
1116                envelope,
1117            },
1118        ),
1119        Some(Commands::ContextPack {
1120            path,
1121            test_input,
1122            runner,
1123            log_input,
1124            json,
1125            max_items,
1126            max_bytes,
1127            budget,
1128            convex_snapshot,
1129        }) => cmd_context_pack(
1130            &path,
1131            test_input.as_deref(),
1132            runner.as_deref(),
1133            log_input.as_deref(),
1134            OutputFormat {
1135                json_output: json || terse || schema || envelope,
1136                compact,
1137                pretty,
1138                terse,
1139                ultra_terse,
1140                schema,
1141                envelope,
1142            },
1143            ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
1144            convex_snapshot.as_deref(),
1145        ),
1146        Some(Commands::ConflictMatrix {
1147            targets,
1148            path,
1149            scope,
1150            depth,
1151            limit,
1152            impact_limit,
1153            json,
1154        }) => cmd_conflict_matrix(
1155            &path,
1156            scope.as_deref(),
1157            &targets,
1158            depth,
1159            limit,
1160            impact_limit,
1161            OutputFormat {
1162                json_output: json || terse || schema || envelope,
1163                compact,
1164                pretty,
1165                terse,
1166                ultra_terse,
1167                schema,
1168                envelope,
1169            },
1170        ),
1171        Some(Commands::DispatchTrace {
1172            targets,
1173            path,
1174            scope,
1175            depth,
1176            limit,
1177            impact_limit,
1178            format,
1179            json,
1180        }) => cmd_dispatch_trace(
1181            DispatchTraceOptions {
1182                path: &path,
1183                scope: scope.as_deref(),
1184                raw_targets: &targets,
1185                depth,
1186                limit,
1187                impact_limit,
1188                trace_format: if json {
1189                    DispatchTraceFormat::Json
1190                } else {
1191                    format
1192                },
1193            },
1194            OutputFormat {
1195                json_output: json || terse || schema || envelope,
1196                compact,
1197                pretty,
1198                terse,
1199                ultra_terse,
1200                schema,
1201                envelope,
1202            },
1203        ),
1204        Some(Commands::DependencyDag {
1205            targets,
1206            path,
1207            scope,
1208            depth,
1209            limit,
1210            json,
1211        }) => cmd_dependency_dag(
1212            &path,
1213            scope.as_deref(),
1214            &targets,
1215            depth,
1216            limit,
1217            OutputFormat {
1218                json_output: json || terse || schema || envelope,
1219                compact,
1220                pretty,
1221                terse,
1222                ultra_terse,
1223                schema,
1224                envelope,
1225            },
1226        ),
1227        Some(Commands::TokenSavings {
1228            fixture,
1229            fail_under,
1230            json,
1231        }) => token_savings::cmd_token_savings(
1232            &fixture,
1233            fail_under,
1234            OutputFormat {
1235                json_output: json || terse || schema || envelope,
1236                compact,
1237                pretty,
1238                terse,
1239                ultra_terse,
1240                schema,
1241                envelope,
1242            },
1243        ),
1244        Some(Commands::MetricDigest {
1245            input,
1246            baseline,
1247            metrics,
1248            lower_is_better,
1249            higher_is_better,
1250            history,
1251            top,
1252            json,
1253        }) => cmd_metric_digest(
1254            MetricDigestOptions {
1255                input_path: input.as_deref(),
1256                baseline_path: baseline.as_deref(),
1257                metrics: &metrics,
1258                lower_is_better: &lower_is_better,
1259                higher_is_better: &higher_is_better,
1260                history,
1261                top,
1262            },
1263            OutputFormat {
1264                json_output: json || terse || schema || envelope,
1265                compact,
1266                pretty,
1267                terse,
1268                ultra_terse,
1269                schema,
1270                envelope,
1271            },
1272        ),
1273        Some(Commands::DciBenchmark { fixture, json }) => cmd_dci_benchmark(
1274            &fixture,
1275            OutputFormat {
1276                json_output: json || terse || schema || envelope,
1277                compact,
1278                pretty,
1279                terse,
1280                ultra_terse,
1281                schema,
1282                envelope,
1283            },
1284        ),
1285        Some(Commands::TokenGate { command }) => {
1286            cmd_token_gate(command, OutputFormat {
1287                json_output: true,
1288                compact,
1289                pretty,
1290                terse,
1291                ultra_terse,
1292                schema,
1293                envelope,
1294            })?;
1295            Ok(())
1296        },
1297        Some(Commands::Workflow { topic, json }) => workflow::cmd_workflow(
1298            &topic,
1299            OutputFormat {
1300                json_output: json || terse || schema || envelope,
1301                compact,
1302                pretty,
1303                terse,
1304                ultra_terse,
1305                schema,
1306                envelope,
1307            },
1308        ),
1309        Some(Commands::SessionDigest {
1310            path,
1311            input,
1312            source,
1313            json,
1314        }) => cmd_session_digest(
1315            &path,
1316            input.as_deref(),
1317            source.as_deref(),
1318            OutputFormat {
1319                json_output: json || terse || schema || envelope,
1320                compact,
1321                pretty,
1322                terse,
1323                ultra_terse,
1324                schema,
1325                envelope,
1326            },
1327        ),
1328        Some(Commands::SessionCost {
1329            input,
1330            source,
1331            json,
1332        }) => cmd_session_cost(
1333            input.as_deref(),
1334            source.as_deref(),
1335            OutputFormat {
1336                json_output: json || terse || schema || envelope,
1337                compact,
1338                pretty,
1339                terse,
1340                ultra_terse,
1341                schema,
1342                envelope,
1343            },
1344        ),
1345        Some(Commands::SessionReview {
1346            path,
1347            next_context,
1348            json,
1349            max_items,
1350            max_bytes,
1351            budget,
1352        }) => cmd_session_review_with_budget(
1353            &path,
1354            next_context,
1355            OutputFormat {
1356                json_output: json || terse || schema || envelope,
1357                compact,
1358                pretty,
1359                terse,
1360                ultra_terse,
1361                schema,
1362                envelope,
1363            },
1364            ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
1365        ),
1366        Some(Commands::Status {
1367            path,
1368            fix,
1369            no_fix,
1370            json,
1371        }) => cmd_status(
1372            &path,
1373            StatusCommandOptions {
1374                fix,
1375                no_fix,
1376                json_output: json || terse || schema || envelope,
1377                compact,
1378                pretty,
1379                terse,
1380                schema,
1381            },
1382        ),
1383        Some(Commands::Locks { path, scope, json }) => cmd_locks(
1384            &path,
1385            scope.as_deref(),
1386            json || terse || schema || envelope,
1387            compact,
1388            pretty,
1389            terse,
1390            schema,
1391        ),
1392        None => {
1393            println!("tsift v{}", env!("CARGO_PKG_VERSION"));
1394            println!("Run `tsift --help` for usage.");
1395            Ok(())
1396        }
1397    }
1398}
1399
1400/// Classify a task description into a model tier.
1401/// Returns (tier_name, model_id).
1402pub fn classify_task(task: &str) -> (&'static str, &'static str) {
1403    let lower = task.to_lowercase();
1404    // Architecture/design signals → opus
1405    for signal in &[
1406        "architect",
1407        "architecture",
1408        "design",
1409        "plan",
1410        "strateg",
1411        "analy",
1412        "review",
1413        "evaluate",
1414        "assess",
1415    ] {
1416        if lower.contains(signal) {
1417            return ("opus", "claude-opus-4-6");
1418        }
1419    }
1420    // Edit/write signals → sonnet
1421    for signal in &[
1422        "edit",
1423        "write",
1424        "fix",
1425        "change",
1426        "update",
1427        "create",
1428        "add ",
1429        "remove",
1430        "delete",
1431        "modify",
1432        "refactor",
1433        "implement",
1434        "build",
1435    ] {
1436        if lower.contains(signal) {
1437            return ("sonnet", "claude-sonnet-4-6");
1438        }
1439    }
1440    // Default: search/lookup → haiku
1441    ("haiku", "claude-haiku-4-5-20251001")
1442}
1443
1444#[cfg(test)]
1445fn to_json<T: serde::Serialize>(val: &T, pretty: bool, terse: bool) -> anyhow::Result<String> {
1446    to_json_schema(val, pretty, terse, false, false)
1447}
1448
1449/// Add top-level `tagpath_index_stale: true` + `tagpath_stale_reason: <reason>`
1450/// fields to a JSON response when the tagpath adapter reported any helper
1451/// going stale. JSON consumers (`tsift --envelope` / `--json` callers) can
1452/// then act on the same condition the stderr `tagpath_index_stale: …` log
1453/// already surfaces without parsing logs. No-op when `stale=false` or when
1454/// `value` is not a JSON object.
1455pub(crate) fn inject_tagpath_stale_into_json(
1456    value: &mut serde_json::Value,
1457    stale: bool,
1458    reason: Option<&str>,
1459) {
1460    if !stale {
1461        return;
1462    }
1463    if let Some(obj) = value.as_object_mut() {
1464        obj.insert(
1465            "tagpath_index_stale".to_string(),
1466            serde_json::Value::Bool(true),
1467        );
1468        if let Some(reason) = reason {
1469            obj.insert(
1470                "tagpath_stale_reason".to_string(),
1471                serde_json::Value::String(reason.to_string()),
1472            );
1473        }
1474    }
1475}
1476
1477pub(crate) fn to_json_schema<T: serde::Serialize>(
1478    val: &T,
1479    pretty: bool,
1480    terse: bool,
1481    ultra_terse: bool,
1482    schema: bool,
1483) -> anyhow::Result<String> {
1484    if terse || schema {
1485        let value = serde_json::to_value(val)?;
1486        let mut transformed = if terse { terse_transform(value) } else { value };
1487        if ultra_terse {
1488            transformed = ultra_terse_transform(transformed);
1489            transformed = edge_index_transform(transformed);
1490        }
1491        if schema {
1492            transformed = schema_transform(transformed);
1493        }
1494        if terse {
1495            let terse_schema = terse_schema_for(&transformed);
1496            let wrapped = serde_json::json!({"_s": terse_schema, "d": transformed});
1497            if pretty {
1498                Ok(serde_json::to_string_pretty(&wrapped)?)
1499            } else {
1500                Ok(serde_json::to_string(&wrapped)?)
1501            }
1502        } else if pretty {
1503            Ok(serde_json::to_string_pretty(&transformed)?)
1504        } else {
1505            Ok(serde_json::to_string(&transformed)?)
1506        }
1507    } else if pretty {
1508        Ok(serde_json::to_string_pretty(val)?)
1509    } else {
1510        Ok(serde_json::to_string(val)?)
1511    }
1512}
1513
1514pub(crate) fn envelope_metric(label: &str, value: impl ToString) -> ToolEnvelopeMetric {
1515    ToolEnvelopeMetric {
1516        label: label.to_string(),
1517        value: value.to_string(),
1518    }
1519}
1520
1521pub(crate) fn dedupe_preserve_order(values: Vec<String>) -> Vec<String> {
1522    let mut seen = HashSet::new();
1523    let mut deduped = Vec::new();
1524    for value in values {
1525        if seen.insert(value.clone()) {
1526            deduped.push(value);
1527        }
1528    }
1529    deduped
1530}
1531
1532pub(crate) fn print_json_or_envelope<T: Serialize>(
1533    report: &T,
1534    format: &OutputFormat,
1535    tool: &str,
1536    view: &str,
1537    summary: ToolEnvelopeSummary,
1538    truncated: bool,
1539    follow_up: Vec<String>,
1540) -> Result<()> {
1541    if format.envelope {
1542        let envelope = ToolEnvelope {
1543            tool,
1544            view,
1545            summary,
1546            truncated,
1547            follow_up: dedupe_preserve_order(follow_up),
1548            report,
1549        };
1550        println!(
1551            "{}",
1552            to_json_schema(
1553                &envelope,
1554                format.pretty,
1555                format.terse,
1556                format.ultra_terse,
1557                format.schema
1558            )?
1559        );
1560    } else {
1561        println!(
1562            "{}",
1563            to_json_schema(
1564                report,
1565                format.pretty,
1566                format.terse,
1567                format.ultra_terse,
1568                format.schema
1569            )?
1570        );
1571    }
1572    Ok(())
1573}
1574
1575pub(crate) fn estimated_tokens_from_bytes(bytes: usize) -> usize {
1576    bytes.div_ceil(4)
1577}
1578
1579fn cmd_token_gate(
1580    command: cli::TokenGateCommand,
1581    format: OutputFormat,
1582) -> Result<()> {
1583    match command {
1584        cli::TokenGateCommand::Sample {
1585            surface,
1586            path,
1587            scope,
1588            target,
1589            depth,
1590            sample_index,
1591            json: _,
1592        } => cmd_token_gate_sample(&surface, &path, scope.as_deref(), target.as_deref(), depth, sample_index),
1593        cli::TokenGateCommand::Evaluate {
1594            history,
1595            allowed_regression_percent,
1596            json: _,
1597        } => cmd_token_gate_evaluate(history.as_deref(), allowed_regression_percent, &format),
1598    }
1599}
1600
1601fn cmd_token_gate_sample(
1602    surface: &str,
1603    path: &Path,
1604    scope: Option<&str>,
1605    target: Option<&str>,
1606    depth: usize,
1607    sample_index: usize,
1608) -> Result<()> {
1609    if !token_gate::TOKEN_GATE_SURFACES.contains(&surface) {
1610        bail!(
1611            "unknown surface `{}`; expected one of: {}",
1612            surface,
1613            token_gate::TOKEN_GATE_SURFACES.join(", ")
1614        );
1615    }
1616
1617    let path_str = path.to_string_lossy().to_string();
1618    let tsift_bin = std::env::current_exe()?;
1619
1620    let args: Vec<String> = match surface {
1621        "context_pack" => vec![
1622            "context-pack".to_string(),
1623            "--json".to_string(),
1624            path_str,
1625        ],
1626        "session_review_next_context" => vec![
1627            "session-review".to_string(),
1628            "--json".to_string(),
1629            "--next-context".to_string(),
1630            path_str,
1631        ],
1632        "graph_db_evidence" => {
1633            let tgt = target.unwrap_or("default").to_string();
1634            vec![
1635                "graph-db".to_string(),
1636                "--json".to_string(),
1637                "--path".to_string(),
1638                path_str,
1639                "evidence".to_string(),
1640                tgt,
1641                "--depth".to_string(),
1642                depth.to_string(),
1643            ]
1644        }
1645        "conflict_matrix" => {
1646            let tgt = target.unwrap_or("default").to_string();
1647            let mut a = vec![
1648                "conflict-matrix".to_string(),
1649                "--json".to_string(),
1650                "--path".to_string(),
1651                path_str,
1652                "--depth".to_string(),
1653                depth.to_string(),
1654            ];
1655            if let Some(s) = scope {
1656                a.push("--scope".to_string());
1657                a.push(s.to_string());
1658            }
1659            a.push(tgt);
1660            a
1661        }
1662        "dispatch_trace" => {
1663            let tgt = target.unwrap_or("default").to_string();
1664            vec![
1665                "dispatch-trace".to_string(),
1666                "--json".to_string(),
1667                "--path".to_string(),
1668                path_str,
1669                tgt,
1670            ]
1671        }
1672        _ => bail!("unhandled surface: {}", surface),
1673    };
1674
1675    let start = Instant::now();
1676    let child = Command::new(&tsift_bin)
1677        .args(&args)
1678        .stdout(Stdio::piped())
1679        .stderr(Stdio::piped())
1680        .env("TSIFT_QUIET", "1")
1681        .spawn();
1682    let output = match child {
1683        Ok(c) => c.wait_with_output()?,
1684        Err(e) => bail!("failed to spawn tsift for surface {}: {}", surface, e),
1685    };
1686    let runtime_micros = start.elapsed().as_micros() as f64;
1687
1688    let stdout = String::from_utf8_lossy(&output.stdout);
1689    let envelope_bytes = stdout.trim().len() as f64;
1690    let prompt_tokens = estimated_tokens_from_bytes(stdout.trim().len()) as f64;
1691
1692    let cache_hit_rate_percent = 0.0;
1693    let raw_read_avoidance = 0.0;
1694    let useful_hit_density = if prompt_tokens > 0.0 { 0.5 } else { 0.0 };
1695
1696    let timestamp = iso_timestamp_now();
1697    let id = format!(
1698        "{surface}-baseline-{}-sample-{sample_index}",
1699        &timestamp[..10]
1700    );
1701    let label = format!(
1702        "token-gate baseline {surface} sample {sample_index} for {}",
1703        path.display()
1704    );
1705
1706    let mut metrics = BTreeMap::new();
1707    metrics.insert("prompt_tokens".to_string(), prompt_tokens);
1708    metrics.insert("envelope_bytes".to_string(), envelope_bytes);
1709    metrics.insert("runtime_micros".to_string(), runtime_micros);
1710    metrics.insert("cache_hit_rate_percent".to_string(), cache_hit_rate_percent);
1711    metrics.insert("raw_read_avoidance".to_string(), raw_read_avoidance);
1712    metrics.insert("useful_hit_density".to_string(), useful_hit_density);
1713
1714    let sample = token_gate::TokenGateSample {
1715        label,
1716        id,
1717        timestamp: Some(timestamp),
1718        surface: surface.to_string(),
1719        metrics,
1720    };
1721
1722    println!("{}", serde_json::to_string_pretty(&sample)?);
1723    Ok(())
1724}
1725
1726fn cmd_token_gate_evaluate(
1727    history_path: Option<&Path>,
1728    allowed_regression_percent: f64,
1729    format: &OutputFormat,
1730) -> Result<()> {
1731    let history_path = history_path
1732        .map(PathBuf::from)
1733        .unwrap_or_else(|| {
1734            let mut p = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
1735            p.push("../../fixtures/token-gate-history.json");
1736            p
1737        });
1738
1739    let raw = std::fs::read_to_string(&history_path)
1740        .with_context(|| format!("failed to read token gate history: {}", history_path.display()))?;
1741    let samples = token_gate::parse_token_history(&raw)?;
1742    let report = token_gate::evaluate_token_gate(&samples, allowed_regression_percent);
1743
1744    if format.json_output {
1745        println!("{}", to_json_schema(&report, format.pretty, format.terse, false, format.schema)?);
1746    } else {
1747        println!("Token Gate Report");
1748        println!("  min_samples: {}", report.min_samples);
1749        println!("  allowed_regression: {:.1}%", report.allowed_regression_percent);
1750        println!("  decision: {:?}", report.decision);
1751        for eval in &report.surface_evaluations {
1752            println!(
1753                "  {} ({} samples): {:?}",
1754                eval.display_name, eval.sample_count, eval.verdict
1755            );
1756            for me in &eval.metric_evaluations {
1757                println!(
1758                    "    {} ({:?}): {}",
1759                    me.metric, me.direction, me.diagnostic
1760                );
1761            }
1762        }
1763        for d in &report.diagnostics {
1764            println!("  ! {}", d);
1765        }
1766    }
1767    Ok(())
1768}
1769
1770fn iso_timestamp_now() -> String {
1771    let dur = SystemTime::now()
1772        .duration_since(UNIX_EPOCH)
1773        .unwrap_or_default();
1774    let total_secs = dur.as_secs();
1775    let days_since_epoch = total_secs / 86400;
1776    let (year, month, day) = days_to_ymd(days_since_epoch);
1777    let time_of_day = total_secs % 86400;
1778    let hour = (time_of_day / 3600) as u8;
1779    let minute = ((time_of_day % 3600) / 60) as u8;
1780    let second = (time_of_day % 60) as u8;
1781    format!(
1782        "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z",
1783        year, month, day, hour, minute, second
1784    )
1785}
1786
1787fn days_to_ymd(mut days: u64) -> (u64, u8, u8) {
1788    let mut year = 1970u64;
1789    loop {
1790        let days_in_year = if is_leap(year) { 366 } else { 365 };
1791        if days < days_in_year {
1792            break;
1793        }
1794        days -= days_in_year;
1795        year += 1;
1796    }
1797    let leap = is_leap(year);
1798    let month_days: [u8; 12] = if leap {
1799        [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
1800    } else {
1801        [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
1802    };
1803    let mut month: u8 = 1;
1804    for &md in &month_days {
1805        if days < md as u64 {
1806            break;
1807        }
1808        days -= md as u64;
1809        month += 1;
1810    }
1811    let day = days as u8 + 1;
1812    (year, month, day)
1813}
1814
1815fn is_leap(year: u64) -> bool {
1816    year.is_multiple_of(4) && !year.is_multiple_of(100) || year.is_multiple_of(400)
1817}
1818
1819fn persist_transcript_artifact(
1820    root: &Path,
1821    prefix: &str,
1822    suffix: &str,
1823    key: &str,
1824    body: &str,
1825    expand: String,
1826) -> Result<TranscriptArtifactRef> {
1827    let handle = stable_handle(prefix, key);
1828    let artifacts_dir = root.join(".tsift/artifacts");
1829    fs::create_dir_all(&artifacts_dir).with_context(|| {
1830        format!(
1831            "creating transcript artifacts dir: {}",
1832            artifacts_dir.display()
1833        )
1834    })?;
1835    let file_name = format!("{handle}.{suffix}");
1836    let artifact_path = artifacts_dir.join(file_name);
1837    fs::write(&artifact_path, body)
1838        .with_context(|| format!("writing transcript artifact: {}", artifact_path.display()))?;
1839    let rel_path = relativize_pathbuf(&artifact_path, root);
1840    Ok(TranscriptArtifactRef {
1841        handle,
1842        path: rel_path.display().to_string(),
1843        bytes: body.len(),
1844        lines: body.lines().count(),
1845        expand,
1846    })
1847}
1848
1849fn terse_key(key: &str) -> &str {
1850    match key {
1851        "name" => "n",
1852        "kind" => "k",
1853        "file" => "f",
1854        "line" => "l",
1855        "path" => "p",
1856        "from" => "fr",
1857        "type" => "ty",
1858        "text" => "tx",
1859        "new" => "nw",
1860        "run" => "r",
1861        "use" => "u",
1862        "score" => "sc",
1863        "language" => "la",
1864        "status" => "st",
1865        "state" => "stt",
1866        "error" => "err",
1867        "errors" => "ers",
1868        "hops" => "hp",
1869        "tags" => "tg",
1870        "model" => "ml",
1871        "skill" => "sk",
1872        "count" => "ct",
1873        "total" => "tot",
1874        "column" => "col",
1875        "description" => "dsc",
1876        "end_line" => "el",
1877        "signature" => "sig",
1878        "parent_module" => "pm",
1879        "visibility" => "vis",
1880        "match_type" => "mt",
1881        "caller_file" => "cf",
1882        "caller_name" => "cn",
1883        "caller_line" => "cl",
1884        "callee_name" => "en",
1885        "call_site_line" => "csl",
1886        "members" => "m",
1887        "refs" => "refs",
1888        "role" => "rl",
1889        "peer" => "pr",
1890        "modularity" => "q",
1891        "modularity_contribution" => "mc",
1892        "iterations" => "it",
1893        "node_count" => "nc",
1894        "edge_count" => "ec",
1895        "community_count" => "cc",
1896        "communities" => "cms",
1897        "community" => "cm",
1898        "community_diagnostics" => "cd",
1899        "cache_hit" => "cah",
1900        "tagpath_state" => "tps",
1901        "tagpath_stale_reason" => "tsr",
1902        "annotated_community_count" => "acc",
1903        "annotated_member_count" => "amc",
1904        "ambiguous_member_count" => "ambc",
1905        "ambiguous_members" => "amb",
1906        "candidate_count" => "cand",
1907        "tagpath_candidate_count" => "tcand",
1908        "evidence" => "ev",
1909        "chosen_file" => "chf",
1910        "symbol" => "s",
1911        "symbols" => "sy",
1912        "definitions" => "df",
1913        "callers" => "crs",
1914        "callees" => "ces",
1915        "total_tracked" => "tt",
1916        "modified" => "md",
1917        "deleted" => "dl",
1918        "unchanged" => "uc",
1919        "changes" => "ch",
1920        "prune_stats" => "ps",
1921        "hits" => "h",
1922        "rank" => "rk",
1923        "snippet" => "sn",
1924        "confidence" => "co",
1925        "index" => "ix",
1926        "summaries" => "sms",
1927        "recommendations" => "rec",
1928        "total_files" => "tf",
1929        "stale_files" => "sf",
1930        "last_indexed_secs_ago" => "age",
1931        "cached_files" => "caf",
1932        "total_indexed_files" => "tif",
1933        "coverage_pct" => "cov",
1934        "symbol_name" => "syn",
1935        "file_path" => "fp",
1936        "content_hash" => "hsh",
1937        "summary" => "sum",
1938        "tool" => "tl",
1939        "view" => "vw",
1940        "truncated" => "tr",
1941        "follow_up" => "fu",
1942        "report" => "rp",
1943        "metrics" => "ms",
1944        "label" => "lb",
1945        "value" => "v",
1946        "command" => "cmd",
1947        "exit_code" => "xc",
1948        "success" => "ok",
1949        "artifact" => "art",
1950        "digest" => "dg",
1951        "bytes" => "bt",
1952        "lines" => "lns",
1953        "expand" => "xp",
1954        "entities" => "ent",
1955        "relationships" => "rel",
1956        "concept_labels" => "cls",
1957        "extracted_at" => "at",
1958        "tokens_input" => "ti",
1959        "tokens_output" => "tout",
1960        "total_summaries" => "ts",
1961        "stale_count" => "stc",
1962        "total_tokens_input" => "tti",
1963        "total_tokens_output" => "tto",
1964        "estimated_tokens_saved" => "ets",
1965        "files_processed" => "fps",
1966        "symbols_extracted" => "se",
1967        "skills_dir" => "sd",
1968        "healthy" => "ok",
1969        "broken" => "brk",
1970        "skills" => "sks",
1971        "manifest_diffs" => "mdf",
1972        "similar_pairs" => "sim",
1973        "usage" => "usg",
1974        "cleanup" => "cln",
1975        "has_skill_md" => "hsm",
1976        "is_symlink" => "isl",
1977        "issues" => "iss",
1978        "invocation_count" => "inv",
1979        "reasons" => "rsn",
1980        "token_estimate" => "te",
1981        "skill_a" => "sa",
1982        "skill_b" => "sb",
1983        "desc_a" => "da",
1984        "desc_b" => "db",
1985        "annotations" => "ann",
1986        "entity" => "ety",
1987        "suggestion" => "sug",
1988        "columns" => "cols",
1989        "row_count" => "rc",
1990        "notnull" => "nn",
1991        "default_value" => "dv",
1992        "replace_all" => "ra",
1993        other => other,
1994    }
1995}
1996
1997fn terse_transform(val: serde_json::Value) -> serde_json::Value {
1998    match val {
1999        serde_json::Value::Object(map) => {
2000            let mut new_map = serde_json::Map::new();
2001            for (k, v) in map {
2002                new_map.insert(terse_key(&k).to_string(), terse_transform(v));
2003            }
2004            serde_json::Value::Object(new_map)
2005        }
2006        serde_json::Value::Array(arr) => {
2007            serde_json::Value::Array(arr.into_iter().map(terse_transform).collect())
2008        }
2009        other => other,
2010    }
2011}
2012
2013fn ultra_terse_transform(val: serde_json::Value) -> serde_json::Value {
2014    match val {
2015        serde_json::Value::Object(mut map) => {
2016            let is_graph_node =
2017                map.contains_key("id") && map.contains_key("k") && map.contains_key("n");
2018            let is_graph_edge =
2019                map.contains_key("from_id") && map.contains_key("to_id") && map.contains_key("k");
2020            if is_graph_node || is_graph_edge {
2021                map.remove("properties");
2022                map.remove("provenance");
2023                map.remove("freshness");
2024            }
2025            if is_graph_edge
2026                && let Some(serde_json::Value::String(s)) = map.get_mut("k") {
2027                    *s = abbreviate_edge_kind(s).to_string();
2028                }
2029            let is_coverage = map.contains_key("mode")
2030                && (map.contains_key("total_sector_count")
2031                    || map.contains_key("dirty_sector_count"));
2032            if is_coverage {
2033                map.remove("active_rebuild");
2034                map.remove("completed_dirty_sector_count");
2035                map.remove("mounted_sector_count");
2036                map.remove("rebuilding_sector_count");
2037                map.remove("resumed_sector_count");
2038                map.remove("reused_sector_count");
2039            }
2040            if let Some(serde_json::Value::String(s)) = map.get_mut("sn") {
2041                *s = truncate_for_ultra_terse(s, 80);
2042            }
2043            if let Some(serde_json::Value::String(s)) = map.get_mut("snippet") {
2044                *s = truncate_for_ultra_terse(s, 80);
2045            }
2046            let new_map: serde_json::Map<String, serde_json::Value> = map
2047                .into_iter()
2048                .map(|(k, v)| (k, ultra_terse_transform(v)))
2049                .collect();
2050            serde_json::Value::Object(new_map)
2051        }
2052        serde_json::Value::Array(arr) => {
2053            serde_json::Value::Array(arr.into_iter().map(ultra_terse_transform).collect())
2054        }
2055        other => other,
2056    }
2057}
2058
2059fn edge_index_transform(val: serde_json::Value) -> serde_json::Value {
2060    match val {
2061        serde_json::Value::Object(mut map) => {
2062            let node_ids: Option<Vec<String>> = map.get("nodes").and_then(|nodes| {
2063                nodes.as_array().map(|arr| {
2064                    arr.iter()
2065                        .filter_map(|n| n.get("id").and_then(|v| v.as_str()).map(String::from))
2066                        .collect()
2067                })
2068            });
2069            if let Some(ref ids) = node_ids {
2070                let id_map: std::collections::HashMap<&str, usize> = ids
2071                    .iter()
2072                    .enumerate()
2073                    .map(|(i, id)| (id.as_str(), i))
2074                    .collect();
2075                if let Some(serde_json::Value::Array(edges)) = map.get_mut("edges") {
2076                    for edge in edges.iter_mut() {
2077                        if let serde_json::Value::Object(edge_map) = edge {
2078                            if let Some(serde_json::Value::String(fid)) = edge_map.remove("from_id") {
2079                                if let Some(&idx) = id_map.get(fid.as_str()) {
2080                                    edge_map.insert("from".to_string(), serde_json::Value::Number(idx.into()));
2081                                } else {
2082                                    edge_map.insert("from_id".to_string(), serde_json::Value::String(fid));
2083                                }
2084                            }
2085                            if let Some(serde_json::Value::String(tid)) = edge_map.remove("to_id") {
2086                                if let Some(&idx) = id_map.get(tid.as_str()) {
2087                                    edge_map.insert("to".to_string(), serde_json::Value::Number(idx.into()));
2088                                } else {
2089                                    edge_map.insert("to_id".to_string(), serde_json::Value::String(tid));
2090                                }
2091                            }
2092                        }
2093                    }
2094                }
2095            }
2096            let new_map: serde_json::Map<String, serde_json::Value> = map
2097                .into_iter()
2098                .map(|(k, v)| (k, edge_index_transform(v)))
2099                .collect();
2100            serde_json::Value::Object(new_map)
2101        }
2102        serde_json::Value::Array(arr) => {
2103            serde_json::Value::Array(arr.into_iter().map(edge_index_transform).collect())
2104        }
2105        other => other,
2106    }
2107}
2108
2109fn truncate_for_ultra_terse(s: &str, max_len: usize) -> String {
2110    if s.len() <= max_len {
2111        s.to_string()
2112    } else {
2113        let truncated: String = s.chars().take(max_len.saturating_sub(3)).collect();
2114        format!("{truncated}...")
2115    }
2116}
2117
2118fn terse_schema_for(val: &serde_json::Value) -> serde_json::Value {
2119    let mut keys = HashSet::new();
2120    collect_terse_keys(val, &mut keys);
2121    let mut schema = serde_json::Map::new();
2122    for (long, short) in TERSE_PAIRS {
2123        if keys.contains(*short) {
2124            schema.insert(
2125                short.to_string(),
2126                serde_json::Value::String(long.to_string()),
2127            );
2128        }
2129    }
2130    serde_json::Value::Object(schema)
2131}
2132
2133fn collect_terse_keys(val: &serde_json::Value, keys: &mut HashSet<String>) {
2134    match val {
2135        serde_json::Value::Object(map) => {
2136            for (k, v) in map {
2137                keys.insert(k.clone());
2138                collect_terse_keys(v, keys);
2139            }
2140        }
2141        serde_json::Value::Array(arr) => {
2142            for v in arr {
2143                collect_terse_keys(v, keys);
2144            }
2145        }
2146        _ => {}
2147    }
2148}
2149
2150fn schema_transform(val: serde_json::Value) -> serde_json::Value {
2151    match val {
2152        serde_json::Value::Array(arr) if arr.len() >= 2 => {
2153            if let Some(cols) = homogeneous_keys(&arr) {
2154                let rows: Vec<serde_json::Value> = arr
2155                    .into_iter()
2156                    .map(|item| {
2157                        if let serde_json::Value::Object(map) = item {
2158                            let vals: Vec<serde_json::Value> = cols
2159                                .iter()
2160                                .map(|c| map.get(c).cloned().unwrap_or(serde_json::Value::Null))
2161                                .collect();
2162                            serde_json::Value::Array(vals)
2163                        } else {
2164                            item
2165                        }
2166                    })
2167                    .collect();
2168                let col_vals: Vec<serde_json::Value> =
2169                    cols.into_iter().map(serde_json::Value::String).collect();
2170                serde_json::json!({"_c": col_vals, "_r": rows})
2171            } else {
2172                serde_json::Value::Array(arr.into_iter().map(schema_transform).collect())
2173            }
2174        }
2175        serde_json::Value::Array(arr) => {
2176            serde_json::Value::Array(arr.into_iter().map(schema_transform).collect())
2177        }
2178        serde_json::Value::Object(map) => {
2179            let new_map: serde_json::Map<String, serde_json::Value> = map
2180                .into_iter()
2181                .map(|(k, v)| (k, schema_transform(v)))
2182                .collect();
2183            serde_json::Value::Object(new_map)
2184        }
2185        other => other,
2186    }
2187}
2188
2189fn homogeneous_keys(arr: &[serde_json::Value]) -> Option<Vec<String>> {
2190    let first = arr.first()?.as_object()?;
2191    let keys: Vec<String> = first.keys().cloned().collect();
2192    for item in &arr[1..] {
2193        let obj = item.as_object()?;
2194        if obj.len() != keys.len() {
2195            return None;
2196        }
2197        for k in &keys {
2198            if !obj.contains_key(k) {
2199                return None;
2200            }
2201        }
2202    }
2203    Some(keys)
2204}
2205
2206const TERSE_PAIRS: &[(&str, &str)] = &[
2207    ("name", "n"),
2208    ("kind", "k"),
2209    ("file", "f"),
2210    ("line", "l"),
2211    ("path", "p"),
2212    ("from", "fr"),
2213    ("type", "ty"),
2214    ("text", "tx"),
2215    ("new", "nw"),
2216    ("run", "r"),
2217    ("use", "u"),
2218    ("score", "sc"),
2219    ("language", "la"),
2220    ("status", "st"),
2221    ("state", "stt"),
2222    ("error", "err"),
2223    ("errors", "ers"),
2224    ("hops", "hp"),
2225    ("tags", "tg"),
2226    ("model", "ml"),
2227    ("skill", "sk"),
2228    ("count", "ct"),
2229    ("total", "tot"),
2230    ("column", "col"),
2231    ("description", "dsc"),
2232    ("end_line", "el"),
2233    ("signature", "sig"),
2234    ("parent_module", "pm"),
2235    ("visibility", "vis"),
2236    ("match_type", "mt"),
2237    ("caller_file", "cf"),
2238    ("caller_name", "cn"),
2239    ("caller_line", "cl"),
2240    ("callee_name", "en"),
2241    ("call_site_line", "csl"),
2242    ("members", "m"),
2243    ("refs", "refs"),
2244    ("role", "rl"),
2245    ("peer", "pr"),
2246    ("modularity", "q"),
2247    ("modularity_contribution", "mc"),
2248    ("iterations", "it"),
2249    ("node_count", "nc"),
2250    ("edge_count", "ec"),
2251    ("community_count", "cc"),
2252    ("communities", "cms"),
2253    ("community", "cm"),
2254    ("community_diagnostics", "cd"),
2255    ("cache_hit", "cah"),
2256    ("tagpath_state", "tps"),
2257    ("tagpath_stale_reason", "tsr"),
2258    ("annotated_community_count", "acc"),
2259    ("annotated_member_count", "amc"),
2260    ("ambiguous_member_count", "ambc"),
2261    ("ambiguous_members", "amb"),
2262    ("candidate_count", "cand"),
2263    ("tagpath_candidate_count", "tcand"),
2264    ("evidence", "ev"),
2265    ("chosen_file", "chf"),
2266    ("symbol", "s"),
2267    ("symbols", "sy"),
2268    ("definitions", "df"),
2269    ("callers", "crs"),
2270    ("callees", "ces"),
2271    ("total_tracked", "tt"),
2272    ("modified", "md"),
2273    ("deleted", "dl"),
2274    ("unchanged", "uc"),
2275    ("changes", "ch"),
2276    ("prune_stats", "ps"),
2277    ("hits", "h"),
2278    ("rank", "rk"),
2279    ("snippet", "sn"),
2280    ("confidence", "co"),
2281    ("index", "ix"),
2282    ("summaries", "sms"),
2283    ("recommendations", "rec"),
2284    ("total_files", "tf"),
2285    ("stale_files", "sf"),
2286    ("last_indexed_secs_ago", "age"),
2287    ("cached_files", "caf"),
2288    ("total_indexed_files", "tif"),
2289    ("coverage_pct", "cov"),
2290    ("symbol_name", "syn"),
2291    ("file_path", "fp"),
2292    ("content_hash", "hsh"),
2293    ("summary", "sum"),
2294    ("tool", "tl"),
2295    ("view", "vw"),
2296    ("truncated", "tr"),
2297    ("follow_up", "fu"),
2298    ("report", "rp"),
2299    ("metrics", "ms"),
2300    ("label", "lb"),
2301    ("value", "v"),
2302    ("command", "cmd"),
2303    ("exit_code", "xc"),
2304    ("success", "ok"),
2305    ("artifact", "art"),
2306    ("digest", "dg"),
2307    ("bytes", "bt"),
2308    ("lines", "lns"),
2309    ("expand", "xp"),
2310    ("entities", "ent"),
2311    ("relationships", "rel"),
2312    ("concept_labels", "cls"),
2313    ("extracted_at", "at"),
2314    ("tokens_input", "ti"),
2315    ("tokens_output", "tout"),
2316    ("total_summaries", "ts"),
2317    ("stale_count", "stc"),
2318    ("total_tokens_input", "tti"),
2319    ("total_tokens_output", "tto"),
2320    ("estimated_tokens_saved", "ets"),
2321    ("files_processed", "fps"),
2322    ("symbols_extracted", "se"),
2323    ("skills_dir", "sd"),
2324    ("healthy", "ok"),
2325    ("broken", "brk"),
2326    ("skills", "sks"),
2327    ("manifest_diffs", "mdf"),
2328    ("similar_pairs", "sim"),
2329    ("usage", "usg"),
2330    ("cleanup", "cln"),
2331    ("has_skill_md", "hsm"),
2332    ("is_symlink", "isl"),
2333    ("issues", "iss"),
2334    ("invocation_count", "inv"),
2335    ("reasons", "rsn"),
2336    ("token_estimate", "te"),
2337    ("skill_a", "sa"),
2338    ("skill_b", "sb"),
2339    ("desc_a", "da"),
2340    ("desc_b", "db"),
2341    ("annotations", "ann"),
2342    ("entity", "ety"),
2343    ("suggestion", "sug"),
2344    ("columns", "cols"),
2345    ("row_count", "rc"),
2346    ("notnull", "nn"),
2347    ("default_value", "dv"),
2348    ("replace_all", "ra"),
2349];
2350
2351pub(crate) fn relativize(path: &str, root: &std::path::Path) -> String {
2352    let root_str = root.to_string_lossy();
2353    let prefix = format!("{}/", root_str.trim_end_matches('/'));
2354    path.strip_prefix(&prefix).unwrap_or(path).to_string()
2355}
2356
2357fn transcript_artifact_root(path: &Path) -> Result<PathBuf> {
2358    let canonical = path
2359        .canonicalize()
2360        .with_context(|| format!("canonicalizing {}", path.display()))?;
2361    let start = if canonical.is_dir() {
2362        canonical.clone()
2363    } else {
2364        canonical
2365            .parent()
2366            .map(Path::to_path_buf)
2367            .unwrap_or_else(|| canonical.clone())
2368    };
2369
2370    for ancestor in start.ancestors() {
2371        if ancestor.join(".git").exists() || ancestor.join(".gitmodules").is_file() {
2372            return Ok(ancestor.to_path_buf());
2373        }
2374    }
2375
2376    Ok(start)
2377}
2378
2379pub(crate) fn relativize_pathbuf(path: &std::path::Path, root: &std::path::Path) -> PathBuf {
2380    path.strip_prefix(root)
2381        .map(|p| p.to_path_buf())
2382        .unwrap_or_else(|_| path.to_path_buf())
2383}
2384
2385pub(crate) fn relativize_edges(edges: &mut [index::StoredEdge], root: &std::path::Path) {
2386    for edge in edges {
2387        edge.caller_file = relativize(&edge.caller_file, root);
2388    }
2389}
2390
2391pub(crate) fn relativize_symbols(symbols: &mut [index::StoredSymbol], root: &std::path::Path) {
2392    for sym in symbols {
2393        sym.file = relativize(&sym.file, root);
2394    }
2395}
2396
2397pub(crate) fn relativize_symbol_hits(hits: &mut [index::SymbolHit], root: &std::path::Path) {
2398    for hit in hits {
2399        hit.file = relativize(&hit.file, root);
2400    }
2401}
2402
2403
2404/// Which endpoint of a `StoredEdge` is the row's primary symbol — caller
2405/// (caller list) or callee (callee list).
2406#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2407pub enum EdgeSide {
2408    Caller,
2409    Callee,
2410}
2411
2412const JSON_PATH_KEYS: &[&str] = &["file", "path", "caller_file", "file_path"];
2413
2414pub(crate) fn relativize_json_paths(val: &mut serde_json::Value, root: &std::path::Path) {
2415    let root_str = root.to_string_lossy();
2416    let prefix = format!("{}/", root_str.trim_end_matches('/'));
2417    relativize_json_inner(val, &prefix);
2418}
2419
2420fn relativize_json_inner(val: &mut serde_json::Value, prefix: &str) {
2421    match val {
2422        serde_json::Value::Array(arr) => {
2423            for v in arr {
2424                relativize_json_inner(v, prefix);
2425            }
2426        }
2427        serde_json::Value::Object(map) => {
2428            for (k, v) in map.iter_mut() {
2429                if JSON_PATH_KEYS.contains(&k.as_str())
2430                    && let serde_json::Value::String(s) = v
2431                    && let Some(rest) = s.strip_prefix(prefix)
2432                {
2433                    *s = rest.to_string();
2434                }
2435                relativize_json_inner(v, prefix);
2436            }
2437        }
2438        _ => {}
2439    }
2440}
2441
2442pub(crate) fn format_score(score: f64, compact: bool) -> String {
2443    if compact {
2444        format!("{score:.2}")
2445    } else {
2446        format!("{score:.4}")
2447    }
2448}
2449
2450pub(crate) fn truncate_for_compact(input: &str, max_chars: usize) -> String {
2451    let trimmed = input.trim();
2452    let count = trimmed.chars().count();
2453    if count <= max_chars {
2454        return trimmed.to_string();
2455    }
2456    let prefix: String = trimmed.chars().take(max_chars.saturating_sub(3)).collect();
2457    format!("{prefix}...")
2458}
2459
2460pub(crate) fn compact_snippet(snippet: &str) -> Option<String> {
2461    snippet
2462        .lines()
2463        .find(|line| !line.trim().is_empty())
2464        .map(|line| truncate_for_compact(line, 100))
2465}
2466
2467pub(crate) fn compact_members(members: &[graph::CommunityMember], limit: usize) -> String {
2468    let names: Vec<&str> = members.iter().map(|m| m.name.as_str()).collect();
2469    if names.len() <= limit {
2470        return names.join(", ");
2471    }
2472    format!(
2473        "{} (+{} more)",
2474        names[..limit].join(", "),
2475        names.len() - limit
2476    )
2477}
2478
2479pub(crate) fn stable_handle(prefix: &str, key: &str) -> String {
2480    let mut hasher = blake3::Hasher::new();
2481    hasher.update(prefix.as_bytes());
2482    hasher.update(&[0]);
2483    hasher.update(key.as_bytes());
2484    let hex = hasher.finalize().to_hex();
2485    format!("{prefix}-{}", &hex[..10])
2486}
2487
2488#[derive(Clone, Debug, PartialEq, Eq)]
2489struct CanonicalTagFamily {
2490    canonical: String,
2491    tag_alias: String,
2492}
2493
2494fn canonical_family_from_tagpath_family(
2495    family: tagpath_family::TagFamily,
2496) -> Option<CanonicalTagFamily> {
2497    let tag_alias = if family.dimensions.is_empty() {
2498        family.tags.join("/")
2499    } else {
2500        family
2501            .dimensions
2502            .iter()
2503            .filter(|dimension| !dimension.tags.is_empty())
2504            .map(|dimension| dimension.tags.join("."))
2505            .collect::<Vec<_>>()
2506            .join("/")
2507    };
2508
2509    if tag_alias.is_empty() {
2510        None
2511    } else {
2512        Some(CanonicalTagFamily {
2513            canonical: family.canonical,
2514            tag_alias,
2515        })
2516    }
2517}
2518
2519fn canonical_tag_family_from_name(name: &str) -> Option<CanonicalTagFamily> {
2520    let trimmed = name.trim();
2521    if trimmed.is_empty() {
2522        return None;
2523    }
2524
2525    canonical_family_from_tagpath_family(tagpath_family::generate_family(trimmed))
2526}
2527
2528fn canonical_tag_family_from_tags(tags: &str) -> Option<CanonicalTagFamily> {
2529    let canonical = tags
2530        .split(',')
2531        .map(str::trim)
2532        .filter(|tag| !tag.is_empty())
2533        .collect::<Vec<_>>()
2534        .join("_");
2535    if canonical.is_empty() {
2536        None
2537    } else {
2538        canonical_family_from_tagpath_family(tagpath_family::generate_family(&canonical))
2539    }
2540}
2541
2542pub(crate) fn canonical_tag_family_from_symbol(name: &str, tags: Option<&str>) -> Option<CanonicalTagFamily> {
2543    tags.and_then(canonical_tag_family_from_tags)
2544        .or_else(|| canonical_tag_family_from_name(name))
2545}
2546
2547fn tag_alias_from_name(name: &str) -> Option<String> {
2548    canonical_tag_family_from_name(name).map(|family| family.tag_alias)
2549}
2550
2551fn tag_alias_from_tags(name: &str, tags: Option<&str>) -> Option<String> {
2552    canonical_tag_family_from_symbol(name, tags).map(|family| family.tag_alias)
2553}
2554
2555pub(crate) fn family_query_from_tag_alias(tag_alias: &str) -> Option<String> {
2556    let query = tag_alias
2557        .split(['/', '.'])
2558        .map(str::trim)
2559        .filter(|part| !part.is_empty())
2560        .collect::<Vec<_>>()
2561        .join(" ");
2562    if query.is_empty() { None } else { Some(query) }
2563}
2564
2565#[derive(Serialize, Clone, Debug, PartialEq, Eq)]
2566struct CompactOntologyRefPreview {
2567    handle: String,
2568    tag: String,
2569    path: String,
2570    #[serde(skip_serializing_if = "Option::is_none")]
2571    title: Option<String>,
2572    #[serde(skip_serializing_if = "Option::is_none")]
2573    domain: Option<String>,
2574}
2575
2576#[derive(Clone, Debug)]
2577struct TagOntologyPreviewContext {
2578    project_root: PathBuf,
2579    tags: BTreeMap<String, tagpath_ontology::OntologyTag>,
2580}
2581
2582#[derive(Serialize, Clone, Debug, PartialEq, Eq)]
2583struct CompactSymbolRefPreview {
2584    handle: String,
2585    name: String,
2586    #[serde(skip_serializing_if = "Option::is_none")]
2587    tag_alias: Option<String>,
2588    #[serde(skip_serializing_if = "Vec::is_empty", default)]
2589    ontology_refs: Vec<CompactOntologyRefPreview>,
2590}
2591
2592fn build_compact_symbol_ref(
2593    prefix: &str,
2594    key: &str,
2595    name: &str,
2596    tags: Option<&str>,
2597    max_bytes: usize,
2598) -> CompactSymbolRefPreview {
2599    build_compact_symbol_ref_with_ontology(prefix, key, name, tags, max_bytes, None)
2600}
2601
2602fn build_compact_symbol_ref_with_ontology(
2603    prefix: &str,
2604    key: &str,
2605    name: &str,
2606    tags: Option<&str>,
2607    max_bytes: usize,
2608    ontology: Option<&TagOntologyPreviewContext>,
2609) -> CompactSymbolRefPreview {
2610    let tag_alias = tag_alias_from_tags(name, tags);
2611    let ontology_refs = tag_alias
2612        .as_deref()
2613        .map(|alias| ontology_refs_for_alias(ontology, alias))
2614        .unwrap_or_default();
2615    CompactSymbolRefPreview {
2616        handle: stable_handle(prefix, key),
2617        name: truncate_for_budget(name, max_bytes),
2618        tag_alias: tag_alias.map(|alias| truncate_for_budget(&alias, max_bytes)),
2619        ontology_refs,
2620    }
2621}
2622
2623fn load_tag_ontology_preview_context(root: &Path) -> Option<TagOntologyPreviewContext> {
2624    let report = tagpath_ontology::load_project(root).ok()?;
2625    if report.tags.is_empty() {
2626        return None;
2627    }
2628    Some(TagOntologyPreviewContext {
2629        project_root: report.project_path,
2630        tags: report
2631            .tags
2632            .into_iter()
2633            .map(|tag| (tag.tag.clone(), tag))
2634            .collect(),
2635    })
2636}
2637
2638fn ontology_refs_for_alias(
2639    ontology: Option<&TagOntologyPreviewContext>,
2640    alias: &str,
2641) -> Vec<CompactOntologyRefPreview> {
2642    let Some(ontology) = ontology else {
2643        return Vec::new();
2644    };
2645    let mut seen = BTreeSet::new();
2646    alias
2647        .split('/')
2648        .flat_map(|part| part.split('.'))
2649        .map(str::trim)
2650        .filter(|tag| !tag.is_empty())
2651        .filter_map(|tag| {
2652            let key = tag.to_ascii_lowercase();
2653            if !seen.insert(key.clone()) {
2654                return None;
2655            }
2656            let ontology_tag = ontology.tags.get(&key)?;
2657            let path = relativize_ontology_path(&ontology_tag.path, &ontology.project_root);
2658            Some(CompactOntologyRefPreview {
2659                handle: stable_handle("tont", &format!("{}:{path}", ontology_tag.tag)),
2660                tag: ontology_tag.tag.clone(),
2661                path,
2662                title: ontology_tag.title.clone(),
2663                domain: ontology_tag.domain.clone(),
2664            })
2665        })
2666        .collect()
2667}
2668
2669fn relativize_ontology_path(path: &Path, root: &Path) -> String {
2670    path.strip_prefix(root)
2671        .unwrap_or(path)
2672        .to_string_lossy()
2673        .replace('\\', "/")
2674}
2675
2676fn format_symbol_preview_line(handle: &str, name: &str, tag_alias: Option<&str>) -> String {
2677    match tag_alias {
2678        Some(alias) => format!("{handle} {name} tag:{alias}"),
2679        None => format!("{handle} {name}"),
2680    }
2681}
2682
2683fn format_summary_ref_line(summary: &ContextPackSummaryRefPreview) -> String {
2684    match summary.tag_alias.as_deref() {
2685        Some(alias) => format!(
2686            "{} {} tag:{} expand:{}",
2687            summary.handle, summary.symbol, alias, summary.expand
2688        ),
2689        None => format!(
2690            "{} {} expand:{}",
2691            summary.handle, summary.symbol, summary.expand
2692        ),
2693    }
2694}
2695
2696fn compact_symbol_ref_token(symbol: &CompactSymbolRefPreview) -> String {
2697    match symbol.tag_alias.as_deref() {
2698        Some(alias) => format!("{}@{}", symbol.handle, alias),
2699        None => format!("{}@{}", symbol.handle, symbol.name),
2700    }
2701}
2702
2703pub(crate) fn truncate_for_budget(input: &str, max_bytes: usize) -> String {
2704    let trimmed = input.trim();
2705    if trimmed.len() <= max_bytes {
2706        return trimmed.to_string();
2707    }
2708    if max_bytes <= 3 {
2709        return ".".repeat(max_bytes);
2710    }
2711
2712    let mut end = 0usize;
2713    for (idx, ch) in trimmed.char_indices() {
2714        let next = idx + ch.len_utf8();
2715        if next > max_bytes.saturating_sub(3) {
2716            break;
2717        }
2718        end = next;
2719    }
2720
2721    if end == 0 {
2722        "...".to_string()
2723    } else {
2724        format!("{}...", &trimmed[..end])
2725    }
2726}
2727
2728struct TokenCappedPreview {
2729    preview: Vec<SourceLinePreview>,
2730    capped_end: usize,
2731    was_capped: bool,
2732}
2733
2734fn build_token_capped_preview(
2735    all_lines: &[&str],
2736    start: usize,
2737    end: usize,
2738    max_bytes: usize,
2739    token_cap: usize,
2740) -> TokenCappedPreview {
2741    let mut preview = Vec::new();
2742    let mut accumulated_tokens = 0usize;
2743    let mut capped_end = end;
2744    let mut was_capped = false;
2745
2746    for (idx, line) in all_lines[(start - 1)..end].iter().enumerate() {
2747        let truncated = truncate_for_budget(line, max_bytes);
2748        let line_tokens = estimated_tokens_from_bytes(truncated.len());
2749        if accumulated_tokens + line_tokens > token_cap && !preview.is_empty() {
2750            capped_end = start + idx - 1;
2751            was_capped = true;
2752            break;
2753        }
2754        accumulated_tokens += line_tokens;
2755        preview.push(SourceLinePreview {
2756            line: start + idx,
2757            text: truncated,
2758        });
2759    }
2760
2761    TokenCappedPreview {
2762        preview,
2763        capped_end,
2764        was_capped,
2765    }
2766}
2767
2768pub(crate) fn abbreviate_kind(kind: &str) -> &str {
2769    match kind {
2770        "function" => "fn",
2771        "method" => "meth",
2772        "module" | "mod" => "mod",
2773        "struct" => "struct",
2774        "trait" => "trait",
2775        "impl" => "impl",
2776        "class" => "cls",
2777        "interface" => "iface",
2778        "type_alias" => "type",
2779        "data_class" => "data_cls",
2780        "sealed_class" => "sealed_cls",
2781        "enum_class" => "enum_cls",
2782        "companion_object" => "comp_obj",
2783        "object" => "obj",
2784        "heading" => "h",
2785        "code_block" => "code",
2786        "alias" => "alias",
2787        other => other,
2788    }
2789}
2790
2791pub(crate) fn abbreviate_edge_kind(kind: &str) -> &str {
2792    match kind {
2793        "calls" => "c",
2794        "defines" => "d",
2795        "contains" => "ct",
2796        "imports" => "i",
2797        "mentions" => "m",
2798        "mentions_concept" => "mc",
2799        "mentions_entity" => "me",
2800        "semantic_relation" => "sr",
2801        "belongs_to" => "bt",
2802        "scopes_context" => "sctx",
2803        "scopes_source" => "ssrc",
2804        "requests_context" => "rctx",
2805        "explains_result" => "er",
2806        "tagged_concept" => "tc",
2807        "tagged_entity" => "te",
2808        "related_concept" => "relc",
2809        "handled_by" => "hb",
2810        "defines_route" => "dr",
2811        "handles_route" => "hr",
2812        "targets" => "tgt",
2813        "has_vector_handle" => "hv",
2814        "parent" => "p",
2815        "child" => "ch",
2816        "uses" => "u",
2817        "projects_source" => "psrc",
2818        "records_memory_source" => "rms",
2819        "records_memory_event" => "rme",
2820        "has_ast_span" => "ha",
2821        "represents_symbol" => "rs",
2822        "contains_embedded_symbol" => "ces",
2823        "embedded_in_fence" => "ef",
2824        "contains_markdown_block" => "cmb",
2825        "contains_embedded_code" => "cec",
2826        "enclosing_module" => "em",
2827        "enclosing_section" => "es",
2828        "previous_sibling" => "psib",
2829        "next_sibling" => "nsib",
2830        "explicit_depends_on" => "edo",
2831        "worker_result_follow_up" => "wrf",
2832        "shared_resource" => "shr",
2833        "community_member" => "cm",
2834        other => other,
2835    }
2836}
2837
2838pub(crate) fn abbreviate_match_type(mt: &str) -> &str {
2839    match mt {
2840        "exact_name" => "exact",
2841        "all_tags" => "all_tags",
2842        "partial_tags" => "partial",
2843        other => other,
2844    }
2845}
2846
2847pub(crate) fn symbol_path_summary(path: &[graph::PathNode]) -> String {
2848    path.iter()
2849        .map(|n| n.name.as_str())
2850        .collect::<Vec<_>>()
2851        .join(" -> ")
2852}
2853
2854const SEARCH_GROUP_SAMPLE_LIMIT: usize = 2;
2855
2856struct SearchHitGroup {
2857    path: String,
2858    first_rank: usize,
2859    top_score: f64,
2860    confidence: String,
2861    hits: usize,
2862    samples: Vec<String>,
2863}
2864
2865fn format_search_sample(hit: &sift::SearchHit) -> Option<String> {
2866    let snippet = compact_snippet(&hit.snippet)?;
2867    Some(match hit.location.as_deref() {
2868        Some(location) => format!("{location}: {snippet}"),
2869        None => snippet,
2870    })
2871}
2872
2873pub(crate) fn group_search_hits(
2874    hits: &[sift::SearchHit],
2875    root: &Path,
2876    absolute: bool,
2877) -> Vec<SearchHitGroup> {
2878    let mut positions = BTreeMap::new();
2879    let mut groups = Vec::new();
2880    for hit in hits {
2881        let path = if absolute {
2882            hit.path.clone()
2883        } else {
2884            relativize(&hit.path, root)
2885        };
2886        let entry = positions.entry(path.clone()).or_insert_with(|| {
2887            groups.push(SearchHitGroup {
2888                path: path.clone(),
2889                first_rank: hit.rank,
2890                top_score: hit.score,
2891                confidence: format!("{:?}", hit.confidence),
2892                hits: 0,
2893                samples: Vec::new(),
2894            });
2895            groups.len() - 1
2896        });
2897        let group = &mut groups[*entry];
2898        group.hits += 1;
2899        if hit.rank < group.first_rank {
2900            group.first_rank = hit.rank;
2901        }
2902        if hit.score > group.top_score {
2903            group.top_score = hit.score;
2904        }
2905        if let Some(sample) = format_search_sample(hit)
2906            && group.samples.len() < SEARCH_GROUP_SAMPLE_LIMIT
2907            && !group.samples.contains(&sample)
2908        {
2909            group.samples.push(sample);
2910        }
2911    }
2912    groups.sort_by_key(|group| group.first_rank);
2913    groups
2914}
2915
2916pub(crate) fn should_collapse_search_hits(
2917    hits: &[sift::SearchHit],
2918    root: &Path,
2919    absolute: bool,
2920) -> bool {
2921    let groups = group_search_hits(hits, root, absolute);
2922    let max_hits_per_file = groups.iter().map(|group| group.hits).max().unwrap_or(0);
2923    max_hits_per_file >= 3 || (hits.len() >= 6 && groups.len() < hits.len())
2924}
2925
2926pub(crate) fn format_edge_groups(edges: &[index::StoredEdge], use_callers: bool) -> Vec<String> {
2927    let mut grouped: BTreeMap<&str, Vec<&str>> = BTreeMap::new();
2928    for edge in edges {
2929        let key = edge.caller_file.as_str();
2930        let name = if use_callers {
2931            edge.caller_name.as_str()
2932        } else {
2933            edge.callee_name.as_str()
2934        };
2935        let names = grouped.entry(key).or_default();
2936        if !names.contains(&name) {
2937            names.push(name);
2938        }
2939    }
2940
2941    grouped
2942        .into_iter()
2943        .map(|(file, names)| format!("  {} ({}): {}", file, names.len(), names.join(", ")))
2944        .collect()
2945}
2946
2947pub(crate) fn should_collapse_edge_groups(edges: &[index::StoredEdge]) -> bool {
2948    let mut grouped: BTreeMap<&str, usize> = BTreeMap::new();
2949    for edge in edges {
2950        *grouped.entry(edge.caller_file.as_str()).or_default() += 1;
2951    }
2952    let max_hits_per_file = grouped.values().copied().max().unwrap_or(0);
2953    max_hits_per_file >= 3 || (edges.len() >= 6 && grouped.len() < edges.len())
2954}
2955
2956
2957fn resolve_query_index_target(
2958    root: &Path,
2959    path_hint: &Path,
2960    scope: Option<&str>,
2961) -> Result<SearchIndexTarget> {
2962    let cfg = config::Config::load(root)?;
2963    if let Some(scope_name) = scope {
2964        if let Some(scope) = config::Config::find_submodule(root, scope_name)? {
2965            return Ok(SearchIndexTarget {
2966                label: format!("submodule `{}` index", scope.id),
2967                db_path: cfg.db_path_for(root, &scope.id),
2968                source_root: scope.source_root.clone(),
2969                scope_name: Some(scope.id.clone()),
2970                reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
2971            });
2972        }
2973        if let Some(package) = multiplicity::find_cargo_package(root, scope_name)? {
2974            return Ok(cargo_package_index_target(root, package));
2975        }
2976        config::Config::resolve_submodule(root, scope_name)?;
2977    }
2978
2979    if let Some(scope) = config::Config::infer_submodule_from_path(root, path_hint)? {
2980        return Ok(SearchIndexTarget {
2981            label: format!("submodule `{}` index", scope.id),
2982            db_path: cfg.db_path_for(root, &scope.id),
2983            source_root: scope.source_root.clone(),
2984            scope_name: Some(scope.id.clone()),
2985            reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
2986        });
2987    }
2988
2989    if let Some(package) = multiplicity::infer_cargo_package_from_path(root, path_hint)? {
2990        return Ok(cargo_package_index_target(root, package));
2991    }
2992
2993    if let Some(scope) = infer_agent_doc_task_submodule(root, path_hint)? {
2994        return Ok(SearchIndexTarget {
2995            label: format!("submodule `{}` index", scope.id),
2996            db_path: cfg.db_path_for(root, &scope.id),
2997            source_root: scope.source_root.clone(),
2998            scope_name: Some(scope.id.clone()),
2999            reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
3000        });
3001    }
3002
3003    let db_path = root.join(".tsift/index.db");
3004    if db_path.exists() {
3005        return Ok(SearchIndexTarget {
3006            label: "index".to_string(),
3007            db_path,
3008            source_root: root.to_path_buf(),
3009            scope_name: None,
3010            reindex_cmd: format!("tsift index {}", root.display()),
3011        });
3012    }
3013
3014    let scopes = config::Config::submodule_dirs(root)?;
3015    if scopes.is_empty() {
3016        return Ok(SearchIndexTarget {
3017            label: "index".to_string(),
3018            db_path,
3019            source_root: root.to_path_buf(),
3020            scope_name: None,
3021            reindex_cmd: format!("tsift index {}", root.display()),
3022        });
3023    }
3024
3025    let available_scopes = scopes
3026        .iter()
3027        .map(|scope| scope.id.as_str())
3028        .collect::<Vec<_>>()
3029        .join(", ");
3030    let indexed_scopes = scopes
3031        .iter()
3032        .filter(|scope| cfg.db_path_for(root, &scope.id).exists())
3033        .map(|scope| scope.id.as_str())
3034        .collect::<Vec<_>>();
3035    let indexed_label = if indexed_scopes.is_empty() {
3036        "none".to_string()
3037    } else {
3038        indexed_scopes.join(", ")
3039    };
3040
3041    bail!(
3042        "workspace root {} has no shared root index at {}. Read-only graph queries require `--scope <scope>` when the workspace is indexed into `.tsift/indexes/*/index.db`. Available scopes: {}. Indexed scopes: {}.",
3043        root.display(),
3044        db_path.display(),
3045        available_scopes,
3046        indexed_label
3047    );
3048}
3049
3050pub(crate) fn resolve_query_db_path(root: &Path, path_hint: &Path, scope: Option<&str>) -> Result<PathBuf> {
3051    Ok(resolve_query_index_target(root, path_hint, scope)?.db_path)
3052}
3053
3054fn ensure_query_index_current(root: &Path, target: &SearchIndexTarget) -> Result<()> {
3055    let state = inspect_search_index(target)?;
3056    let Some(reason) = index_reason_for_state(state) else {
3057        return Ok(());
3058    };
3059
3060    match apply_search_index_update(root, target) {
3061        Ok(_) => {
3062            index::inspect_scope_invalidate_all();
3063            Ok(())
3064        }
3065        Err(err) if is_active_writer_lock_error(&err) && target.db_path.exists() => {
3066            eprintln!(
3067                "note: active tsift writer detected; skipping graph-query autoindex because {}. \
3068                 Continuing with the current read-only index snapshot; graph results may lag. \
3069                 Retry `{}` after the active writer finishes for fresh graph results.",
3070                index_reason_detail(target, reason),
3071                target.reindex_cmd
3072            );
3073            Ok(())
3074        }
3075        Err(err) => Err(err),
3076    }
3077}
3078
3079pub(crate) fn open_index_db(path: &std::path::Path, scope: Option<&str>) -> Result<index::IndexDb> {
3080    let root = lint::resolve_project_root_or_canonical_path(path)?;
3081    let target = resolve_query_index_target(&root, path, scope)?;
3082    ensure_query_index_current(&root, &target)?;
3083    let db_path = target.db_path;
3084    if !db_path.exists() {
3085        bail!(
3086            "no index found at {}. Run `tsift index` first.",
3087            db_path.display()
3088        );
3089    }
3090    index::IndexDb::open_read_only_resilient(&db_path)
3091}
3092
3093pub(crate) fn query_tagpath_root(
3094    root: &std::path::Path,
3095    path_hint: &std::path::Path,
3096    scope: Option<&str>,
3097) -> Result<PathBuf> {
3098    if let Some(scope_name) = scope {
3099        if let Some(scope) = config::Config::find_submodule(root, scope_name)? {
3100            return Ok(scope.source_root);
3101        }
3102        if let Some(package) = multiplicity::find_cargo_package(root, scope_name)? {
3103            return Ok(package.package_root);
3104        }
3105        config::Config::resolve_submodule(root, scope_name)?;
3106    }
3107    if let Some(scope) = config::Config::infer_submodule_from_path(root, path_hint)? {
3108        return Ok(scope.source_root);
3109    }
3110    if let Some(package) = multiplicity::infer_cargo_package_from_path(root, path_hint)? {
3111        return Ok(package.package_root);
3112    }
3113    Ok(root.to_path_buf())
3114}
3115
3116#[derive(Clone, Debug, Serialize, PartialEq)]
3117struct TraversalNode {
3118    handle: String,
3119    kind: String,
3120    label: String,
3121    #[serde(skip_serializing_if = "Option::is_none")]
3122    ref_id: Option<String>,
3123    #[serde(skip_serializing_if = "Option::is_none")]
3124    path: Option<String>,
3125    #[serde(skip_serializing_if = "Option::is_none")]
3126    line: Option<i64>,
3127    #[serde(skip_serializing_if = "Option::is_none")]
3128    detail: Option<String>,
3129    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
3130    properties: BTreeMap<String, String>,
3131    expand: String,
3132}
3133
3134#[derive(Clone, Debug, Serialize, PartialEq)]
3135struct TraversalEdge {
3136    from: String,
3137    to: String,
3138    relation: String,
3139    #[serde(skip_serializing_if = "Option::is_none")]
3140    label: Option<String>,
3141    weight: usize,
3142}
3143
3144#[derive(Clone, Debug, Default)]
3145struct TraversalGraphBuild {
3146    nodes: BTreeMap<String, TraversalNode>,
3147    edges: Vec<TraversalEdge>,
3148    edge_keys: BTreeSet<(String, String, String)>,
3149    warnings: Vec<String>,
3150}
3151
3152pub(crate) const GRAPH_PROJECTION_VERSION: &str = "tsift-traversal-v1";
3153const GRAPH_DB_EVIDENCE_CONTRACT_VERSION: &str = "graph-db-evidence-v1";
3154const WORKER_PROMPT_PACKET_CONTRACT_VERSION: &str = "worker-prompt-packet-v1";
3155const CONFLICT_MATRIX_CONTRACT_VERSION: &str = "conflict-matrix-v1";
3156const CONTEXT_PACK_GRAPH_ORCHESTRATION_CONTRACT_VERSION: &str =
3157    "context-pack-graph-orchestration-v1";
3158const SESSION_REVIEW_FOLLOW_UP_CONTRACT_VERSION: &str = "session-review-follow-up-v1";
3159const DISPATCH_TRACE_CONTRACT_VERSION: &str = "dispatch-trace-v1";
3160const DEPENDENCY_DAG_CONTRACT_VERSION: &str = "dependency-dag-v1";
3161const GRAPH_PROJECTION_META_KIND: &str = "projection_meta";
3162const GRAPH_DB_RANKED_NEIGHBOR_CAP: usize = 12;
3163const GRAPH_DB_SEMANTIC_MIN_EDGE_SCAN_CAP: usize = 16;
3164const GRAPH_DB_SEMANTIC_MAX_EDGE_SCAN_CAP: usize = 64;
3165
3166#[derive(Debug, Serialize, PartialEq)]
3167struct TraversalTotals {
3168    nodes: usize,
3169    edges: usize,
3170}
3171
3172#[derive(Debug, Serialize, PartialEq)]
3173struct TraversalPathReport {
3174    from: TraversalNode,
3175    to: TraversalNode,
3176    hops: usize,
3177    nodes: Vec<TraversalNode>,
3178    edges: Vec<TraversalEdge>,
3179}
3180
3181#[derive(Debug, Serialize, PartialEq)]
3182struct TraversalRecommendation {
3183    handle: String,
3184    kind: String,
3185    label: String,
3186    reason: String,
3187    score: usize,
3188    expand: String,
3189}
3190
3191#[derive(Debug, Serialize, PartialEq)]
3192struct TraversalReport {
3193    root: String,
3194    #[serde(skip_serializing_if = "Option::is_none")]
3195    scope: Option<String>,
3196    mode: String,
3197    totals: TraversalTotals,
3198    #[serde(skip_serializing_if = "Option::is_none")]
3199    query: Option<String>,
3200    #[serde(skip_serializing_if = "Option::is_none")]
3201    target: Option<String>,
3202    nodes: Vec<TraversalNode>,
3203    edges: Vec<TraversalEdge>,
3204    #[serde(skip_serializing_if = "Option::is_none")]
3205    shortest_path: Option<TraversalPathReport>,
3206    recommendations: Vec<TraversalRecommendation>,
3207    exploration: ExplorationPacket,
3208    truncated: bool,
3209    #[serde(skip_serializing_if = "Vec::is_empty", default)]
3210    warnings: Vec<String>,
3211}
3212
3213#[derive(Debug, Serialize, PartialEq)]
3214struct SemanticRelatedReport {
3215    root: String,
3216    #[serde(skip_serializing_if = "Option::is_none")]
3217    scope: Option<String>,
3218    query: String,
3219    embedding_model: String,
3220    count: usize,
3221    items: Vec<SemanticRelatedItem>,
3222    #[serde(skip_serializing_if = "Vec::is_empty", default)]
3223    warnings: Vec<String>,
3224}
3225
3226#[derive(Clone, Debug, Serialize, PartialEq)]
3227struct SemanticRelatedItem {
3228    handle: String,
3229    kind: String,
3230    label: String,
3231    score: f64,
3232    #[serde(skip_serializing_if = "Option::is_none")]
3233    file_path: Option<String>,
3234    #[serde(skip_serializing_if = "Option::is_none")]
3235    source_symbol: Option<String>,
3236    #[serde(skip_serializing_if = "Option::is_none")]
3237    detail: Option<String>,
3238    expand: String,
3239}
3240
3241#[derive(Clone)]
3242struct TraversalSymbolIndexEntry {
3243    handle: String,
3244    node: TraversalNode,
3245    tokens: BTreeSet<String>,
3246}
3247
3248#[derive(Clone)]
3249struct TraversalFileIndexEntry {
3250    handle: String,
3251    node: TraversalNode,
3252    tokens: BTreeSet<String>,
3253}
3254
3255#[derive(Clone)]
3256struct TraversalRouteIndexEntry {
3257    handle: String,
3258    node: TraversalNode,
3259    tokens: BTreeSet<String>,
3260}
3261
3262#[derive(Clone)]
3263struct TraversalAstSpanIndexEntry {
3264    handle: String,
3265    symbol_handle: String,
3266    file_handle: Option<String>,
3267    file: String,
3268    name: String,
3269    kind: String,
3270    language: String,
3271    node_kind: String,
3272    start_byte: usize,
3273    end_byte: usize,
3274    parent_module: Option<String>,
3275    markdown: Option<MarkdownSpanMetadata>,
3276}
3277
3278#[derive(Clone)]
3279struct TraversalMultiplicityIndexEntry {
3280    handle: String,
3281    node: TraversalNode,
3282    tokens: BTreeSet<String>,
3283}
3284
3285struct TraversalCodeLookup<'a> {
3286    symbols: &'a [TraversalSymbolIndexEntry],
3287    files: &'a [TraversalFileIndexEntry],
3288    routes: &'a [TraversalRouteIndexEntry],
3289    multiplicities: &'a [TraversalMultiplicityIndexEntry],
3290    symbol_index: HashMap<String, Vec<usize>>,
3291    file_index: HashMap<String, Vec<usize>>,
3292    route_index: HashMap<String, Vec<usize>>,
3293    multiplicity_index: HashMap<String, Vec<usize>>,
3294    file_path_index: HashMap<String, String>,
3295}
3296
3297#[derive(Clone, Debug, Serialize, PartialEq)]
3298struct ExplorationBudget {
3299    project_size: String,
3300    max_source_windows: usize,
3301    lines_per_window: usize,
3302    relationship_limit: usize,
3303}
3304
3305#[derive(Clone, Debug, Serialize, PartialEq)]
3306struct ExplorationRelation {
3307    from: String,
3308    relation: String,
3309    to: String,
3310    #[serde(skip_serializing_if = "Option::is_none")]
3311    label: Option<String>,
3312}
3313
3314#[derive(Clone, Debug, Serialize, PartialEq)]
3315struct ExplorationSourceWindow {
3316    handle: String,
3317    file: String,
3318    start: usize,
3319    end: usize,
3320    reason: String,
3321    expand: String,
3322}
3323
3324#[derive(Clone, Debug, Serialize, PartialEq)]
3325struct ExplorationWorkerContext {
3326    handle: String,
3327    target: String,
3328    summary: String,
3329    expand: String,
3330}
3331
3332#[derive(Clone, Debug, Serialize, PartialEq)]
3333struct ExplorationPacket {
3334    budget: ExplorationBudget,
3335    relationship_map: Vec<ExplorationRelation>,
3336    source_windows: Vec<ExplorationSourceWindow>,
3337    #[serde(skip_serializing_if = "Vec::is_empty", default)]
3338    worker_context: Vec<ExplorationWorkerContext>,
3339    no_reread_guidance: String,
3340}
3341
3342impl TraversalGraphBuild {
3343    fn add_node(&mut self, node: TraversalNode) {
3344        self.nodes.entry(node.handle.clone()).or_insert(node);
3345    }
3346
3347    fn add_edge(
3348        &mut self,
3349        from: &str,
3350        to: &str,
3351        relation: &str,
3352        label: Option<String>,
3353        weight: usize,
3354    ) {
3355        if from == to || !self.nodes.contains_key(from) || !self.nodes.contains_key(to) {
3356            return;
3357        }
3358        let key = (from.to_string(), to.to_string(), relation.to_string());
3359        if self.edge_keys.insert(key) {
3360            self.edges.push(TraversalEdge {
3361                from: from.to_string(),
3362                to: to.to_string(),
3363                relation: relation.to_string(),
3364                label,
3365                weight,
3366            });
3367        }
3368    }
3369}
3370
3371pub(crate) fn graph_substrate_db_path(root: &Path, scope: Option<&str>) -> PathBuf {
3372    match scope {
3373        Some(scope) => root.join(".tsift/indexes").join(scope).join("graph.db"),
3374        None => root.join(".tsift/graph.db"),
3375    }
3376}
3377
3378fn graph_projection_meta_id(scope: Option<&str>) -> String {
3379    format!("projection:tsift-traversal:{}", scope.unwrap_or("root"))
3380}
3381
3382pub(crate) fn content_hash<T: Serialize>(value: &T) -> Result<String> {
3383    let bytes = serde_json::to_vec(value)?;
3384    Ok(blake3::hash(&bytes).to_hex().to_string())
3385}
3386
3387fn node_with_content_freshness(mut node: SubstrateGraphNode) -> Result<SubstrateGraphNode> {
3388    let mut hashable = node.clone();
3389    hashable.freshness = None;
3390    node.freshness = Some(GraphFreshness::content_hash(content_hash(&hashable)?));
3391    Ok(node)
3392}
3393
3394fn edge_with_content_freshness(mut edge: SubstrateGraphEdge) -> Result<SubstrateGraphEdge> {
3395    let mut hashable = edge.clone();
3396    hashable.freshness = None;
3397    edge.freshness = Some(GraphFreshness::content_hash(content_hash(&hashable)?));
3398    Ok(edge)
3399}
3400
3401const SEMANTIC_EMBEDDING_DIM: usize = 32;
3402const SEMANTIC_EMBEDDING_MODEL: &str = "tsift-local-hash-v1";
3403const CLAUDE_MEM_GRAPH_LIMIT_PER_TABLE: usize = 200;
3404
3405fn semantic_related_kind_name(kind: SemanticRelatedKind) -> &'static str {
3406    match kind {
3407        SemanticRelatedKind::Concept => "concept",
3408        SemanticRelatedKind::Entity => "entity",
3409        SemanticRelatedKind::All => "all",
3410    }
3411}
3412
3413fn semantic_related_command(root: &Path, query: &str, kind: SemanticRelatedKind) -> String {
3414    format!(
3415        "tsift semantic {} --path {} --kind {} --limit 10",
3416        shell_quote(query),
3417        shell_quote(root.to_string_lossy().as_ref()),
3418        semantic_related_kind_name(kind)
3419    )
3420}
3421
3422fn semantic_embedding(input: &str) -> Vec<f64> {
3423    let mut vector = vec![0.0; SEMANTIC_EMBEDDING_DIM];
3424    let mut tokens = traversal_tokens(input);
3425    if tokens.is_empty() {
3426        let trimmed = input.trim().to_ascii_lowercase();
3427        if !trimmed.is_empty() {
3428            tokens.insert(trimmed);
3429        }
3430    }
3431
3432    for token in tokens {
3433        let hash = blake3::hash(token.as_bytes());
3434        let bytes = hash.as_bytes();
3435        let idx = usize::from(bytes[0]) % SEMANTIC_EMBEDDING_DIM;
3436        let sign = if bytes[1] & 1 == 0 { 1.0 } else { -1.0 };
3437        vector[idx] += sign;
3438    }
3439
3440    let norm = vector.iter().map(|value| value * value).sum::<f64>().sqrt();
3441    if norm > 0.0 {
3442        for value in &mut vector {
3443            *value /= norm;
3444        }
3445    }
3446    vector
3447}
3448
3449fn semantic_embedding_property(input: &str) -> String {
3450    semantic_embedding(input)
3451        .iter()
3452        .map(|value| format!("{value:.6}"))
3453        .collect::<Vec<_>>()
3454        .join(",")
3455}
3456
3457fn parse_semantic_embedding_property(value: &str) -> Option<Vec<f64>> {
3458    let parsed = value
3459        .split(',')
3460        .map(str::trim)
3461        .map(str::parse::<f64>)
3462        .collect::<std::result::Result<Vec<_>, _>>()
3463        .ok()?;
3464    (parsed.len() == SEMANTIC_EMBEDDING_DIM).then_some(parsed)
3465}
3466
3467fn semantic_cosine(left: &[f64], right: &[f64]) -> f64 {
3468    if left.len() != right.len() {
3469        return 0.0;
3470    }
3471    left.iter()
3472        .zip(right.iter())
3473        .map(|(left, right)| left * right)
3474        .sum::<f64>()
3475}
3476
3477fn semantic_entity_handle(name: &str, kind: &str) -> String {
3478    stable_handle(
3479        "gent",
3480        &format!(
3481            "entity:{}:{}",
3482            kind.trim().to_ascii_lowercase(),
3483            name.trim().to_ascii_lowercase()
3484        ),
3485    )
3486}
3487
3488fn semantic_concept_handle(label: &str) -> String {
3489    stable_handle(
3490        "gcon",
3491        &format!("concept:{}", label.trim().to_ascii_lowercase()),
3492    )
3493}
3494
3495fn summary_source_handles(
3496    summary: &summarize::Summary,
3497    file_node_by_path: &BTreeMap<String, String>,
3498    symbol_node_by_file_label: &BTreeMap<(String, String), String>,
3499) -> Vec<String> {
3500    let mut handles = Vec::new();
3501    if let Some(handle) = file_node_by_path.get(&summary.file_path) {
3502        handles.push(handle.clone());
3503    }
3504    if let Some(handle) =
3505        symbol_node_by_file_label.get(&(summary.file_path.clone(), summary.symbol_name.clone()))
3506        && !handles.iter().any(|existing| existing == handle)
3507    {
3508        handles.push(handle.clone());
3509    }
3510    handles
3511}
3512
3513fn semantic_entity_node(
3514    root: &Path,
3515    summary: &summarize::Summary,
3516    name: &str,
3517    kind: &str,
3518    description: &str,
3519    provenance: &GraphProvenance,
3520) -> SubstrateGraphNode {
3521    let handle = semantic_entity_handle(name, kind);
3522    let detail = if description.trim().is_empty() {
3523        format!("{kind} entity from cached summaries")
3524    } else {
3525        format!("{kind}: {description}")
3526    };
3527    SubstrateGraphNode::new(handle.clone(), "semantic_entity", name.to_string())
3528        .with_property("handle", handle)
3529        .with_property("ref_id", name.to_string())
3530        .with_property("detail", detail)
3531        .with_property("entity_kind", kind.to_string())
3532        .with_property("description", description.to_string())
3533        .with_property("source_file", summary.file_path.clone())
3534        .with_property("source_symbol", summary.symbol_name.clone())
3535        .with_property("embedding_model", SEMANTIC_EMBEDDING_MODEL)
3536        .with_property(
3537            "embedding",
3538            semantic_embedding_property(&format!("{name} {kind} {description}")),
3539        )
3540        .with_property(
3541            "expand",
3542            semantic_related_command(root, name, SemanticRelatedKind::Entity),
3543        )
3544        .with_provenance(provenance.clone())
3545}
3546
3547fn semantic_concept_node(
3548    root: &Path,
3549    summary: &summarize::Summary,
3550    label: &str,
3551    provenance: &GraphProvenance,
3552) -> SubstrateGraphNode {
3553    let handle = semantic_concept_handle(label);
3554    SubstrateGraphNode::new(handle.clone(), "semantic_concept", label.to_string())
3555        .with_property("handle", handle)
3556        .with_property("ref_id", label.to_string())
3557        .with_property("detail", "concept label from cached summaries".to_string())
3558        .with_property("source_file", summary.file_path.clone())
3559        .with_property("source_symbol", summary.symbol_name.clone())
3560        .with_property("embedding_model", SEMANTIC_EMBEDDING_MODEL)
3561        .with_property("embedding", semantic_embedding_property(label))
3562        .with_property(
3563            "expand",
3564            semantic_related_command(root, label, SemanticRelatedKind::Concept),
3565        )
3566        .with_provenance(provenance.clone())
3567}
3568
3569fn insert_semantic_edge(
3570    edge_map: &mut BTreeMap<(String, String, String), SubstrateGraphEdge>,
3571    edge: SubstrateGraphEdge,
3572) {
3573    edge_map
3574        .entry((edge.from_id.clone(), edge.to_id.clone(), edge.kind.clone()))
3575        .or_insert(edge);
3576}
3577
3578fn memory_event_key(event: &MemoryEvent) -> String {
3579    match (event.imported_from.as_deref(), event.imported_id.as_deref()) {
3580        (Some(imported_from), Some(imported_id)) => {
3581            format!("{imported_from}:{imported_id}")
3582        }
3583        _ => event.stable_id(),
3584    }
3585}
3586
3587fn memory_event_label(event: &MemoryEvent) -> String {
3588    let first_line = event
3589        .text
3590        .lines()
3591        .map(str::trim)
3592        .find(|line| !line.is_empty())
3593        .unwrap_or(event.kind.as_str());
3594    match event.kind.as_str() {
3595        "imported_observation" => {
3596            let observation_type = event
3597                .metadata
3598                .get("observation_type")
3599                .map(String::as_str)
3600                .unwrap_or("observation");
3601            truncate_for_compact(&format!("{observation_type}: {first_line}"), 80)
3602        }
3603        "imported_session_summary" => truncate_for_compact(&format!("summary: {first_line}"), 80),
3604        "imported_user_prompt" => truncate_for_compact(&format!("prompt: {first_line}"), 80),
3605        _ => truncate_for_compact(first_line, 80),
3606    }
3607}
3608
3609fn append_tsift_memory_graph_projection_rows(
3610    root: &Path,
3611    nodes: &mut Vec<SubstrateGraphNode>,
3612    edges: &mut Vec<SubstrateGraphEdge>,
3613) -> Result<()> {
3614    let memory_db = default_memory_db_path(root);
3615    if !memory_db.exists() {
3616        return Ok(());
3617    }
3618    let events = match read_memory_events(&memory_db, CLAUDE_MEM_GRAPH_LIMIT_PER_TABLE * 3) {
3619        Ok(events) => events,
3620        Err(_) => return Ok(()),
3621    };
3622    if events.is_empty() {
3623        return Ok(());
3624    }
3625
3626    let mut seen_sessions = BTreeSet::new();
3627    let mut edge_map = BTreeMap::<(String, String, String), SubstrateGraphEdge>::new();
3628
3629    for event in &events {
3630        let event_id = event.stable_id();
3631        let event_key = memory_event_key(event);
3632        let source_handle = stable_handle("tmemsrc", &event_key);
3633        let semantic_handle = stable_handle("tmemsem", &event_key);
3634        let provenance = GraphProvenance::new("tsift-memory", &event.source_ref);
3635        let imported_from = event.imported_from.as_deref().unwrap_or("native");
3636
3637        if let Some(session_id) = &event.session_id {
3638            let session_handle =
3639                format!("memsess:{}", blake3::hash(session_id.as_bytes()).to_hex());
3640            if seen_sessions.insert(session_id.clone()) {
3641                let session_node = SubstrateGraphNode::new(
3642                    session_handle.clone(),
3643                    "memory_session",
3644                    truncate_for_compact(session_id, 80),
3645                )
3646                .with_property("handle", session_handle.clone())
3647                .with_property("ref_id", session_id.clone())
3648                .with_property("session_id", session_id.clone())
3649                .with_property("provider", "tsift-memory")
3650                .with_property(
3651                    "expand",
3652                    format!(
3653                        "tsift memory status {} --json",
3654                        shell_quote(root.to_string_lossy().as_ref())
3655                    ),
3656                )
3657                .with_provenance(provenance.clone());
3658                nodes.push(node_with_content_freshness(session_node)?);
3659            }
3660
3661            insert_semantic_edge(
3662                &mut edge_map,
3663                SubstrateGraphEdge::new(
3664                    session_handle.clone(),
3665                    event_id.clone(),
3666                    "records_memory_event",
3667                )
3668                .with_property("label", "tsift-memory session event")
3669                .with_provenance(provenance.clone()),
3670            );
3671            insert_semantic_edge(
3672                &mut edge_map,
3673                SubstrateGraphEdge::new(
3674                    session_handle,
3675                    source_handle.clone(),
3676                    "records_memory_source",
3677                )
3678                .with_property("label", "tsift-memory session source")
3679                .with_provenance(provenance.clone()),
3680            );
3681        }
3682
3683        let label = memory_event_label(event);
3684        let mut event_node =
3685            SubstrateGraphNode::new(event_id.clone(), "memory_event", event.kind.as_str())
3686                .with_property("handle", event_id.clone())
3687                .with_property("ref_id", event.source_ref.clone())
3688                .with_property("source_ref", event.source_ref.clone())
3689                .with_property("provider", "tsift-memory")
3690                .with_property("memory_kind", event.kind.as_str())
3691                .with_property("imported_from", imported_from)
3692                .with_property("text_preview", truncate_for_compact(&event.text, 240))
3693                .with_property("token_estimate", event.token_estimate.to_string())
3694                .with_property(
3695                    "expand",
3696                    format!(
3697                        "tsift memory status {} --json",
3698                        shell_quote(root.to_string_lossy().as_ref())
3699                    ),
3700                )
3701                .with_provenance(provenance.clone());
3702        if let Some(session_id) = &event.session_id {
3703            event_node = event_node.with_property("session_id", session_id.clone());
3704        }
3705        if let Some(observed_at_unix) = event.observed_at_unix {
3706            event_node = event_node.with_property("observed_at_unix", observed_at_unix.to_string());
3707        }
3708        if let Some(imported_id) = &event.imported_id {
3709            event_node = event_node.with_property("imported_id", imported_id.clone());
3710        }
3711        nodes.push(node_with_content_freshness(event_node)?);
3712
3713        let mut source_node =
3714            SubstrateGraphNode::new(source_handle.clone(), "source_handle", label.clone())
3715                .with_property("handle", source_handle.clone())
3716                .with_property("ref_id", event.source_ref.clone())
3717                .with_property("source_ref", event.source_ref.clone())
3718                .with_property("provider", "tsift-memory")
3719                .with_property("memory_kind", event.kind.as_str())
3720                .with_property("imported_from", imported_from)
3721                .with_property("text_preview", truncate_for_compact(&event.text, 240))
3722                .with_property("token_estimate", event.token_estimate.to_string())
3723                .with_property(
3724                    "expand",
3725                    format!(
3726                        "tsift memory status {} --json",
3727                        shell_quote(root.to_string_lossy().as_ref())
3728                    ),
3729                )
3730                .with_provenance(provenance.clone());
3731        if let Some(session_id) = &event.session_id {
3732            source_node = source_node.with_property("session_id", session_id.clone());
3733        }
3734        if let Some(observed_at_unix) = event.observed_at_unix {
3735            source_node =
3736                source_node.with_property("observed_at_unix", observed_at_unix.to_string());
3737        }
3738        if let Some(imported_id) = &event.imported_id {
3739            source_node = source_node.with_property("imported_id", imported_id.clone());
3740        }
3741        nodes.push(node_with_content_freshness(source_node)?);
3742
3743        insert_semantic_edge(
3744            &mut edge_map,
3745            SubstrateGraphEdge::new(event_id.clone(), source_handle.clone(), "projects_source")
3746                .with_property("label", "tsift-memory source projection")
3747                .with_provenance(provenance.clone()),
3748        );
3749
3750        let semantic_text = format!("{} {}", label, event.text);
3751        let semantic_node =
3752            SubstrateGraphNode::new(semantic_handle.clone(), "semantic_concept", label.clone())
3753                .with_property("handle", semantic_handle.clone())
3754                .with_property("ref_id", event.source_ref.clone())
3755                .with_property("detail", "semantic row from tsift-memory")
3756                .with_property("source_ref", event.source_ref.clone())
3757                .with_property("provider", "tsift-memory")
3758                .with_property("memory_kind", event.kind.as_str())
3759                .with_property("imported_from", imported_from)
3760                .with_property("embedding_model", SEMANTIC_EMBEDDING_MODEL)
3761                .with_property("embedding", semantic_embedding_property(&semantic_text))
3762                .with_property(
3763                    "expand",
3764                    semantic_related_command(root, &label, SemanticRelatedKind::Concept),
3765                )
3766                .with_provenance(provenance.clone());
3767        nodes.push(node_with_content_freshness(semantic_node)?);
3768
3769        insert_semantic_edge(
3770            &mut edge_map,
3771            SubstrateGraphEdge::new(
3772                source_handle.clone(),
3773                semantic_handle.clone(),
3774                "mentions_concept",
3775            )
3776            .with_property("label", "tsift-memory semantic source")
3777            .with_provenance(provenance.clone()),
3778        );
3779    }
3780
3781    for edge in edge_map.into_values() {
3782        edges.push(edge_with_content_freshness(edge)?);
3783    }
3784
3785    Ok(())
3786}
3787
3788fn append_summary_semantic_projection_rows(
3789    root: &Path,
3790    graph: &TraversalGraphBuild,
3791    provenance: &GraphProvenance,
3792    nodes: &mut Vec<SubstrateGraphNode>,
3793    edges: &mut Vec<SubstrateGraphEdge>,
3794) -> Result<()> {
3795    let summaries_db = root.join(".tsift/summaries.db");
3796    if !summaries_db.exists() {
3797        return Ok(());
3798    }
3799
3800    let summary_db = summarize::SummaryDb::open_read_only_resilient(&summaries_db)?;
3801    let summaries = summary_db.all()?;
3802    if summaries.is_empty() {
3803        return Ok(());
3804    }
3805
3806    let file_node_by_path = graph
3807        .nodes
3808        .values()
3809        .filter(|node| node.kind == "file")
3810        .filter_map(|node| {
3811            node.path
3812                .as_ref()
3813                .map(|path| (path.clone(), node.handle.clone()))
3814        })
3815        .collect::<BTreeMap<_, _>>();
3816    let symbol_node_by_file_label = graph
3817        .nodes
3818        .values()
3819        .filter(|node| node.kind == "symbol")
3820        .filter_map(|node| {
3821            Some((
3822                (node.path.clone()?, node.label.clone()),
3823                node.handle.clone(),
3824            ))
3825        })
3826        .collect::<BTreeMap<_, _>>();
3827
3828    let mut semantic_nodes = BTreeMap::<String, SubstrateGraphNode>::new();
3829    let mut semantic_edges = BTreeMap::<(String, String, String), SubstrateGraphEdge>::new();
3830
3831    for summary in &summaries {
3832        let source_handles =
3833            summary_source_handles(summary, &file_node_by_path, &symbol_node_by_file_label);
3834        let mut entity_ids_by_name = BTreeMap::<String, String>::new();
3835
3836        if let Some(entities) = &summary.entities {
3837            for entity in entities {
3838                let node = semantic_entity_node(
3839                    root,
3840                    summary,
3841                    &entity.name,
3842                    &entity.kind,
3843                    &entity.description,
3844                    provenance,
3845                );
3846                let entity_id = node.id.clone();
3847                entity_ids_by_name.insert(entity.name.to_ascii_lowercase(), entity_id.clone());
3848                semantic_nodes.entry(entity_id.clone()).or_insert(node);
3849
3850                for source_handle in &source_handles {
3851                    insert_semantic_edge(
3852                        &mut semantic_edges,
3853                        SubstrateGraphEdge::new(
3854                            source_handle.clone(),
3855                            entity_id.clone(),
3856                            "mentions_entity",
3857                        )
3858                        .with_property("label", format!("summary entity: {}", entity.name))
3859                        .with_property("source_file", summary.file_path.clone())
3860                        .with_provenance(provenance.clone()),
3861                    );
3862                }
3863            }
3864        }
3865
3866        let mut concept_ids = Vec::new();
3867        if let Some(labels) = &summary.concept_labels {
3868            for label in labels
3869                .iter()
3870                .map(|label| label.trim())
3871                .filter(|label| !label.is_empty())
3872            {
3873                let node = semantic_concept_node(root, summary, label, provenance);
3874                let concept_id = node.id.clone();
3875                semantic_nodes.entry(concept_id.clone()).or_insert(node);
3876                concept_ids.push(concept_id.clone());
3877
3878                for source_handle in &source_handles {
3879                    insert_semantic_edge(
3880                        &mut semantic_edges,
3881                        SubstrateGraphEdge::new(
3882                            source_handle.clone(),
3883                            concept_id.clone(),
3884                            "mentions_concept",
3885                        )
3886                        .with_property("label", format!("summary concept: {label}"))
3887                        .with_property("source_file", summary.file_path.clone())
3888                        .with_provenance(provenance.clone()),
3889                    );
3890                }
3891            }
3892        }
3893
3894        for entity_id in entity_ids_by_name.values() {
3895            for concept_id in &concept_ids {
3896                insert_semantic_edge(
3897                    &mut semantic_edges,
3898                    SubstrateGraphEdge::new(
3899                        entity_id.clone(),
3900                        concept_id.clone(),
3901                        "tagged_concept",
3902                    )
3903                    .with_property("label", "entity concept label".to_string())
3904                    .with_property("source_file", summary.file_path.clone())
3905                    .with_provenance(provenance.clone()),
3906                );
3907            }
3908        }
3909
3910        for idx in 0..concept_ids.len() {
3911            for next_idx in (idx + 1)..concept_ids.len() {
3912                insert_semantic_edge(
3913                    &mut semantic_edges,
3914                    SubstrateGraphEdge::new(
3915                        concept_ids[idx].clone(),
3916                        concept_ids[next_idx].clone(),
3917                        "related_concept",
3918                    )
3919                    .with_property("label", format!("co-occurs in {}", summary.symbol_name))
3920                    .with_property("source_file", summary.file_path.clone())
3921                    .with_provenance(provenance.clone()),
3922                );
3923            }
3924        }
3925
3926        if let Some(relationships) = &summary.relationships {
3927            for relationship in relationships {
3928                let from_id = entity_ids_by_name
3929                    .get(&relationship.from.to_ascii_lowercase())
3930                    .cloned()
3931                    .unwrap_or_else(|| {
3932                        let node = semantic_entity_node(
3933                            root,
3934                            summary,
3935                            &relationship.from,
3936                            "unknown",
3937                            "",
3938                            provenance,
3939                        );
3940                        let id = node.id.clone();
3941                        semantic_nodes.entry(id.clone()).or_insert(node);
3942                        id
3943                    });
3944                let to_id = entity_ids_by_name
3945                    .get(&relationship.to.to_ascii_lowercase())
3946                    .cloned()
3947                    .unwrap_or_else(|| {
3948                        let node = semantic_entity_node(
3949                            root,
3950                            summary,
3951                            &relationship.to,
3952                            "unknown",
3953                            "",
3954                            provenance,
3955                        );
3956                        let id = node.id.clone();
3957                        semantic_nodes.entry(id.clone()).or_insert(node);
3958                        id
3959                    });
3960                insert_semantic_edge(
3961                    &mut semantic_edges,
3962                    SubstrateGraphEdge::new(from_id, to_id, "semantic_relation")
3963                        .with_property("relationship_kind", relationship.kind.clone())
3964                        .with_property("label", relationship.kind.clone())
3965                        .with_property("source_file", summary.file_path.clone())
3966                        .with_property("source_symbol", summary.symbol_name.clone())
3967                        .with_provenance(provenance.clone()),
3968                );
3969            }
3970        }
3971    }
3972
3973    for node in semantic_nodes.into_values() {
3974        nodes.push(node_with_content_freshness(node)?);
3975    }
3976    for edge in semantic_edges.into_values() {
3977        edges.push(edge_with_content_freshness(edge)?);
3978    }
3979
3980    Ok(())
3981}
3982
3983fn projection_content_hash(
3984    nodes: &[SubstrateGraphNode],
3985    edges: &[SubstrateGraphEdge],
3986) -> Result<String> {
3987    #[derive(Serialize)]
3988    struct Payload<'a> {
3989        version: &'static str,
3990        nodes: &'a [SubstrateGraphNode],
3991        edges: &'a [SubstrateGraphEdge],
3992    }
3993
3994    content_hash(&Payload {
3995        version: GRAPH_PROJECTION_VERSION,
3996        nodes,
3997        edges,
3998    })
3999}
4000
4001pub(crate) fn graph_projection_content_hash(projection: &GraphProjection) -> Option<String> {
4002    projection
4003        .nodes
4004        .iter()
4005        .find(|node| node.kind == GRAPH_PROJECTION_META_KIND)
4006        .and_then(|node| node.properties.get("content_hash").cloned())
4007}
4008
4009fn traversal_projection_from_graph(
4010    root: &Path,
4011    scope: Option<&str>,
4012    graph: &TraversalGraphBuild,
4013) -> Result<GraphProjection> {
4014    let provenance = GraphProvenance::new(
4015        "tsift.traverse",
4016        format!("{}:{}", root.display(), scope.unwrap_or("root")),
4017    );
4018    let mut nodes = Vec::with_capacity(graph.nodes.len() + 1);
4019    for node in graph.nodes.values() {
4020        let mut projected =
4021            SubstrateGraphNode::new(node.handle.clone(), node.kind.clone(), node.label.clone())
4022                .with_property("handle", node.handle.clone())
4023                .with_property("expand", node.expand.clone())
4024                .with_provenance(provenance.clone());
4025        if let Some(ref_id) = &node.ref_id {
4026            projected = projected.with_property("ref_id", ref_id.clone());
4027        }
4028        if let Some(path) = &node.path {
4029            projected = projected.with_property("path", path.clone());
4030        }
4031        if let Some(line) = node.line {
4032            projected = projected.with_property("line", line.to_string());
4033        }
4034        if let Some(detail) = &node.detail {
4035            projected = projected.with_property("detail", detail.clone());
4036        }
4037        for (key, value) in &node.properties {
4038            projected = projected.with_property(key.clone(), value.clone());
4039        }
4040        nodes.push(node_with_content_freshness(projected)?);
4041    }
4042
4043    let mut edges = Vec::with_capacity(graph.edges.len());
4044    for edge in &graph.edges {
4045        let mut projected =
4046            SubstrateGraphEdge::new(edge.from.clone(), edge.to.clone(), edge.relation.clone())
4047                .with_property("weight", edge.weight.to_string())
4048                .with_provenance(provenance.clone());
4049        if let Some(label) = &edge.label {
4050            projected = projected.with_property("label", label.clone());
4051        }
4052        edges.push(edge_with_content_freshness(projected)?);
4053    }
4054
4055    append_traversal_context_projection_rows(root, graph, &provenance, &mut nodes, &mut edges)?;
4056    append_summary_semantic_projection_rows(root, graph, &provenance, &mut nodes, &mut edges)?;
4057    append_tsift_memory_graph_projection_rows(root, &mut nodes, &mut edges)?;
4058
4059    let projection_hash = projection_content_hash(&nodes, &edges)?;
4060    let meta = SubstrateGraphNode::new(
4061        graph_projection_meta_id(scope),
4062        GRAPH_PROJECTION_META_KIND,
4063        "tsift traversal projection",
4064    )
4065    .with_property("projection_version", GRAPH_PROJECTION_VERSION)
4066    .with_property("content_hash", projection_hash.clone())
4067    .with_property("root", root.to_string_lossy().to_string())
4068    .with_property("scope", scope.unwrap_or("root"))
4069    .with_property("node_count", graph.nodes.len().to_string())
4070    .with_property("edge_count", graph.edges.len().to_string())
4071    .with_provenance(provenance)
4072    .with_freshness(GraphFreshness::content_hash(projection_hash));
4073    nodes.push(meta);
4074
4075    Ok(GraphProjection { nodes, edges })
4076}
4077
4078#[allow(clippy::too_many_arguments)]
4079fn ensure_traversal_source_handle(
4080    root: &Path,
4081    provenance: &GraphProvenance,
4082    file_node_by_path: &BTreeMap<String, String>,
4083    node: &TraversalNode,
4084    budget: &ExplorationBudget,
4085    source_handle_by_node: &mut BTreeMap<String, String>,
4086    seen_windows: &mut BTreeMap<(String, usize, usize), String>,
4087    nodes: &mut Vec<SubstrateGraphNode>,
4088    edges: &mut Vec<SubstrateGraphEdge>,
4089) -> Result<Option<String>> {
4090    if let Some(handle) = source_handle_by_node.get(&node.handle) {
4091        return Ok(Some(handle.clone()));
4092    }
4093    let Some(window) = exploration_source_window_for_node(root, node, budget) else {
4094        return Ok(None);
4095    };
4096    let window_key = (window.file.clone(), window.start, window.end);
4097    let handle = if let Some(handle) = seen_windows.get(&window_key) {
4098        handle.clone()
4099    } else {
4100        let label = format!("{}:{}-{}", window.file, window.start, window.end);
4101        let projected = SubstrateGraphNode::new(window.handle.clone(), "source_handle", label)
4102            .with_property("handle", window.handle.clone())
4103            .with_property("file", window.file.clone())
4104            .with_property("start", window.start.to_string())
4105            .with_property("end", window.end.to_string())
4106            .with_property("reason", window.reason.clone())
4107            .with_property("expand", window.expand.clone())
4108            .with_provenance(provenance.clone());
4109        nodes.push(node_with_content_freshness(projected)?);
4110
4111        if let Some(file_handle) = file_node_by_path.get(&window.file) {
4112            let edge = SubstrateGraphEdge::new(
4113                window.handle.clone(),
4114                file_handle.clone(),
4115                "expands_source",
4116            )
4117            .with_property("label", window.reason.clone())
4118            .with_provenance(provenance.clone());
4119            edges.push(edge_with_content_freshness(edge)?);
4120        }
4121        if node.kind != "file" {
4122            let edge = SubstrateGraphEdge::new(
4123                window.handle.clone(),
4124                node.handle.clone(),
4125                "anchors_source",
4126            )
4127            .with_property("label", window.reason.clone())
4128            .with_provenance(provenance.clone());
4129            edges.push(edge_with_content_freshness(edge)?);
4130        }
4131        seen_windows.insert(window_key, window.handle.clone());
4132        window.handle
4133    };
4134    source_handle_by_node.insert(node.handle.clone(), handle.clone());
4135    Ok(Some(handle))
4136}
4137
4138fn push_traversal_backlog_target_handles<'a>(
4139    backlog: &TraversalNode,
4140    edges_by_from: &BTreeMap<&'a str, Vec<&'a TraversalEdge>>,
4141    node_by_handle: &BTreeMap<&'a str, &'a TraversalNode>,
4142    max_handles: usize,
4143    seen_target_nodes: &mut BTreeSet<String>,
4144    target_node_handles: &mut Vec<String>,
4145) {
4146    for edge in edges_by_from
4147        .get(backlog.handle.as_str())
4148        .into_iter()
4149        .flatten()
4150        .filter(|edge| edge.relation == "mentions")
4151    {
4152        let Some(target_node) = node_by_handle.get(edge.to.as_str()) else {
4153            continue;
4154        };
4155        if !matches!(
4156            target_node.kind.as_str(),
4157            "file" | "symbol" | "route" | "cargo_package" | "cargo_workspace"
4158        ) {
4159            continue;
4160        }
4161        if target_node
4162            .path
4163            .as_deref()
4164            .zip(backlog.path.as_deref())
4165            .is_some_and(|(target_path, backlog_path)| {
4166                target_path == backlog_path && target_path.ends_with(".md")
4167            })
4168        {
4169            continue;
4170        }
4171        if seen_target_nodes.insert(target_node.handle.clone()) {
4172            target_node_handles.push(target_node.handle.clone());
4173        }
4174        if target_node_handles.len() >= max_handles {
4175            break;
4176        }
4177    }
4178}
4179
4180fn append_traversal_context_projection_rows(
4181    root: &Path,
4182    graph: &TraversalGraphBuild,
4183    provenance: &GraphProvenance,
4184    nodes: &mut Vec<SubstrateGraphNode>,
4185    edges: &mut Vec<SubstrateGraphEdge>,
4186) -> Result<()> {
4187    let budget = exploration_budget_for_counts(graph.nodes.len(), graph.edges.len());
4188    let file_node_by_path = graph
4189        .nodes
4190        .values()
4191        .filter(|node| node.kind == "file")
4192        .filter_map(|node| {
4193            node.path
4194                .as_ref()
4195                .map(|path| (path.clone(), node.handle.clone()))
4196        })
4197        .collect::<BTreeMap<_, _>>();
4198
4199    let node_by_handle = graph
4200        .nodes
4201        .values()
4202        .map(|node| (node.handle.as_str(), node))
4203        .collect::<BTreeMap<_, _>>();
4204    let mut edges_by_from = BTreeMap::<&str, Vec<&TraversalEdge>>::new();
4205    for edge in &graph.edges {
4206        edges_by_from
4207            .entry(edge.from.as_str())
4208            .or_default()
4209            .push(edge);
4210    }
4211    for rows in edges_by_from.values_mut() {
4212        rows.sort_by(|left, right| {
4213            right
4214                .weight
4215                .cmp(&left.weight)
4216                .then(left.relation.cmp(&right.relation))
4217                .then(left.to.cmp(&right.to))
4218        });
4219    }
4220
4221    let mut seen_windows = BTreeMap::<(String, usize, usize), String>::new();
4222    let mut source_handle_by_node = BTreeMap::<String, String>::new();
4223
4224    let mut code_context_count = 0usize;
4225    let code_context_limit = budget.relationship_limit.min(8);
4226    for node in graph.nodes.values() {
4227        if !matches!(
4228            node.kind.as_str(),
4229            "backlog" | "job_packet" | "worker_result"
4230        ) {
4231            continue;
4232        }
4233        let mut target_node_handles = Vec::new();
4234        let mut fallback_target_handles = Vec::new();
4235        let mut seen_target_nodes = BTreeSet::new();
4236        if node.kind == "backlog" || node.kind == "worker_result" {
4237            push_traversal_backlog_target_handles(
4238                node,
4239                &edges_by_from,
4240                &node_by_handle,
4241                budget.max_source_windows,
4242                &mut seen_target_nodes,
4243                &mut target_node_handles,
4244            );
4245            fallback_target_handles.push(node.handle.clone());
4246        } else {
4247            for edge in edges_by_from
4248                .get(node.handle.as_str())
4249                .into_iter()
4250                .flatten()
4251                .filter(|edge| edge.relation == "targets")
4252            {
4253                let Some(backlog) = node_by_handle.get(edge.to.as_str()) else {
4254                    continue;
4255                };
4256                fallback_target_handles.push(backlog.handle.clone());
4257                push_traversal_backlog_target_handles(
4258                    backlog,
4259                    &edges_by_from,
4260                    &node_by_handle,
4261                    budget.max_source_windows,
4262                    &mut seen_target_nodes,
4263                    &mut target_node_handles,
4264                );
4265                if target_node_handles.len() >= budget.max_source_windows {
4266                    break;
4267                }
4268            }
4269            if fallback_target_handles.is_empty() {
4270                continue;
4271            }
4272        }
4273        let code_context = !target_node_handles.is_empty();
4274        if target_node_handles.is_empty() {
4275            target_node_handles = dedupe_preserve_order(fallback_target_handles);
4276        } else if code_context_count >= code_context_limit {
4277            continue;
4278        }
4279
4280        let mut worker_source_handles = Vec::new();
4281        let mut seen_worker_handles = BTreeSet::new();
4282        for target_handle in target_node_handles {
4283            if worker_source_handles.len() >= budget.max_source_windows {
4284                break;
4285            }
4286            let Some(target_node) = node_by_handle.get(target_handle.as_str()) else {
4287                continue;
4288            };
4289            let Some(handle) = ensure_traversal_source_handle(
4290                root,
4291                provenance,
4292                &file_node_by_path,
4293                target_node,
4294                &budget,
4295                &mut source_handle_by_node,
4296                &mut seen_windows,
4297                nodes,
4298                edges,
4299            )?
4300            else {
4301                continue;
4302            };
4303            if seen_worker_handles.insert(handle.clone()) {
4304                worker_source_handles.push(handle);
4305            }
4306        }
4307        if worker_source_handles.is_empty() {
4308            continue;
4309        }
4310        let target = node
4311            .path
4312            .clone()
4313            .unwrap_or_else(|| root.to_string_lossy().to_string());
4314        let summary = node.detail.clone().unwrap_or_else(|| node.label.clone());
4315        let handle = stable_handle("xwrk", &format!("{}:{}:{}", target, node.handle, summary));
4316        let projected = SubstrateGraphNode::new(handle.clone(), "worker_context", summary.clone())
4317            .with_property("handle", handle.clone())
4318            .with_property("target", target.clone())
4319            .with_property("summary", summary)
4320            .with_property(
4321                "source_handle_count",
4322                worker_source_handles.len().to_string(),
4323            )
4324            .with_property(
4325                "expand",
4326                format!(
4327                    "tsift --envelope context-pack {} --budget normal",
4328                    shell_quote(&target)
4329                ),
4330            )
4331            .with_provenance(provenance.clone());
4332        nodes.push(node_with_content_freshness(projected)?);
4333
4334        let request_edge =
4335            SubstrateGraphEdge::new(node.handle.clone(), handle.clone(), "requests_context")
4336                .with_property("label", "bounded worker context".to_string())
4337                .with_provenance(provenance.clone());
4338        edges.push(edge_with_content_freshness(request_edge)?);
4339
4340        for source_handle in &worker_source_handles {
4341            let scope_edge =
4342                SubstrateGraphEdge::new(handle.clone(), source_handle.clone(), "scopes_source")
4343                    .with_property("label", "bounded worker source window".to_string())
4344                    .with_provenance(provenance.clone());
4345            edges.push(edge_with_content_freshness(scope_edge)?);
4346        }
4347        if code_context {
4348            code_context_count += 1;
4349        }
4350    }
4351
4352    Ok(())
4353}
4354
4355fn traversal_node_from_graph_node(root: &Path, node: SubstrateGraphNode) -> TraversalNode {
4356    let handle = node
4357        .properties
4358        .get("handle")
4359        .cloned()
4360        .unwrap_or_else(|| node.id.clone());
4361    TraversalNode {
4362        expand: node
4363            .properties
4364            .get("expand")
4365            .cloned()
4366            .unwrap_or_else(|| traversal_expand_command(root, &handle)),
4367        handle,
4368        kind: node.kind,
4369        label: node.label,
4370        ref_id: node.properties.get("ref_id").cloned(),
4371        path: node.properties.get("path").cloned(),
4372        line: node
4373            .properties
4374            .get("line")
4375            .and_then(|value| value.parse::<i64>().ok()),
4376        detail: node.properties.get("detail").cloned(),
4377        properties: node.properties,
4378    }
4379}
4380
4381fn traversal_graph_from_store(root: &Path, store: &impl GraphStore) -> Result<TraversalGraphBuild> {
4382    let mut graph = TraversalGraphBuild::default();
4383    for node in store.all_nodes()? {
4384        if node.kind == GRAPH_PROJECTION_META_KIND {
4385            continue;
4386        }
4387        graph.add_node(traversal_node_from_graph_node(root, node));
4388    }
4389    for edge in store.all_edges()? {
4390        graph.add_edge(
4391            &edge.from_id,
4392            &edge.to_id,
4393            &edge.kind,
4394            edge.properties.get("label").cloned(),
4395            edge.properties
4396                .get("weight")
4397                .and_then(|value| value.parse::<usize>().ok())
4398                .unwrap_or(1),
4399        );
4400    }
4401    Ok(graph)
4402}
4403
4404pub(crate) fn convex_rows_from_graph_store(
4405    store: &impl GraphStore,
4406) -> Result<ConvexProjectionRows> {
4407    Ok(GraphProjection {
4408        nodes: store.all_nodes()?,
4409        edges: store.all_edges()?,
4410    }
4411    .to_convex_rows())
4412}
4413
4414#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
4415struct ConvexRequiredIndex {
4416    table: String,
4417    name: String,
4418    fields: Vec<String>,
4419}
4420
4421#[derive(Clone, Debug, Serialize, PartialEq)]
4422struct ConvexSyncChunk {
4423    operation: String,
4424    chunk: usize,
4425    count: usize,
4426    keys: Vec<String>,
4427    max_attempts: usize,
4428    retry_policy: String,
4429}
4430
4431#[derive(Clone, Debug, Serialize, PartialEq)]
4432struct ConvexTransportSummary {
4433    endpoint_env: String,
4434    endpoint_configured: bool,
4435    auth_token_env: String,
4436    auth_configured: bool,
4437    remote_snapshot: bool,
4438    applied_chunks: usize,
4439}
4440
4441#[derive(Clone, Debug, Serialize, PartialEq)]
4442struct ConvexTransportReceipt {
4443    operation: String,
4444    chunk: usize,
4445    attempt: usize,
4446    status: String,
4447    message: Option<String>,
4448}
4449
4450#[derive(Serialize)]
4451#[serde(rename_all = "camelCase")]
4452struct ConvexTransportRequest<'a> {
4453    operation: &'a str,
4454    chunk: usize,
4455    projection_version: &'a str,
4456    projection_hash: Option<&'a str>,
4457    #[serde(skip_serializing_if = "Option::is_none")]
4458    projection_meta_id: Option<&'a str>,
4459    node_rows: Vec<ConvexNodeRow>,
4460    edge_rows: Vec<ConvexEdgeRow>,
4461    keys: Vec<String>,
4462    #[serde(skip_serializing_if = "Option::is_none")]
4463    cursor: Option<String>,
4464    #[serde(skip_serializing_if = "Option::is_none")]
4465    limit: Option<usize>,
4466}
4467
4468#[derive(Deserialize)]
4469#[serde(rename_all = "camelCase")]
4470struct ConvexTransportResponse {
4471    status: Option<String>,
4472    message: Option<String>,
4473    rows: Option<ConvexProjectionRows>,
4474    #[serde(default)]
4475    meta: Option<ConvexSnapshotMeta>,
4476    #[serde(default)]
4477    page: Option<ConvexSnapshotPage>,
4478}
4479
4480#[derive(Deserialize, Debug, Clone)]
4481#[serde(rename_all = "camelCase")]
4482struct ConvexSnapshotMeta {
4483    // Captured for completeness/debugging; not currently consumed by the
4484    // freshness diff (indexes are already validated against the required set
4485    // via `convex_required_indexes`, and `page_size` is informational only).
4486    #[serde(default)]
4487    #[allow(dead_code)]
4488    indexes: Vec<ConvexRequiredIndex>,
4489    #[serde(default)]
4490    #[allow(dead_code)]
4491    node_count: Option<usize>,
4492    #[serde(default)]
4493    #[allow(dead_code)]
4494    edge_count: Option<usize>,
4495    #[serde(default)]
4496    projection_hash: Option<String>,
4497    #[serde(default)]
4498    #[allow(dead_code)]
4499    page_size: Option<usize>,
4500}
4501
4502/// Paginated snapshot page response. `rows` is either node rows or edge rows
4503/// depending on which operation was called; we deserialize as raw values to
4504/// keep the transport struct shared between both shapes, then narrow per call
4505/// site.
4506#[derive(Deserialize, Debug, Clone)]
4507#[serde(rename_all = "camelCase")]
4508struct ConvexSnapshotPage {
4509    rows: Vec<serde_json::Value>,
4510    #[serde(default)]
4511    next_cursor: Option<String>,
4512}
4513
4514#[derive(Clone, Debug, Serialize, PartialEq)]
4515struct ConvexProjectionFreshness {
4516    status: String,
4517    fail_closed: bool,
4518    local_hash: Option<String>,
4519    snapshot_hash: Option<String>,
4520    missing_nodes: Vec<String>,
4521    stale_nodes: Vec<String>,
4522    missing_edges: Vec<String>,
4523    stale_edges: Vec<String>,
4524    diagnostics: Vec<String>,
4525}
4526
4527const DEFAULT_CONVEX_GRAPH_URL_ENV: &str = "TSIFT_CONVEX_GRAPH_URL";
4528
4529impl ConvexProjectionFreshness {
4530    fn current(local_hash: Option<String>, snapshot_hash: Option<String>) -> Self {
4531        Self {
4532            status: "current".to_string(),
4533            fail_closed: false,
4534            local_hash,
4535            snapshot_hash,
4536            missing_nodes: Vec::new(),
4537            stale_nodes: Vec::new(),
4538            missing_edges: Vec::new(),
4539            stale_edges: Vec::new(),
4540            diagnostics: Vec::new(),
4541        }
4542    }
4543}
4544
4545#[derive(Clone, Debug, Serialize, PartialEq)]
4546struct ConvexSyncReport {
4547    root: String,
4548    #[serde(skip_serializing_if = "Option::is_none")]
4549    scope: Option<String>,
4550    graph_db: String,
4551    dry_run: bool,
4552    projection_version: String,
4553    projection_hash: Option<String>,
4554    required_indexes: Vec<ConvexRequiredIndex>,
4555    node_upserts: Vec<ConvexNodeRow>,
4556    edge_upserts: Vec<ConvexEdgeRow>,
4557    node_tombstones: Vec<String>,
4558    edge_tombstones: Vec<String>,
4559    chunks: Vec<ConvexSyncChunk>,
4560    freshness: ConvexProjectionFreshness,
4561    transport: Option<ConvexTransportSummary>,
4562    receipts: Vec<ConvexTransportReceipt>,
4563    diagnostics: Vec<String>,
4564    warnings: Vec<String>,
4565}
4566
4567fn convex_required_indexes() -> Vec<ConvexRequiredIndex> {
4568    vec![
4569        ConvexRequiredIndex {
4570            table: "nodes".to_string(),
4571            name: "by_external_id".to_string(),
4572            fields: vec!["externalId".to_string()],
4573        },
4574        ConvexRequiredIndex {
4575            table: "nodes".to_string(),
4576            name: "by_kind".to_string(),
4577            fields: vec!["kind".to_string()],
4578        },
4579        ConvexRequiredIndex {
4580            table: "edges".to_string(),
4581            name: "by_edge_key".to_string(),
4582            fields: vec!["edgeKey".to_string()],
4583        },
4584        ConvexRequiredIndex {
4585            table: "edges".to_string(),
4586            name: "by_from_kind".to_string(),
4587            fields: vec!["fromExternalId".to_string(), "kind".to_string()],
4588        },
4589        ConvexRequiredIndex {
4590            table: "edges".to_string(),
4591            name: "by_to_kind".to_string(),
4592            fields: vec!["toExternalId".to_string(), "kind".to_string()],
4593        },
4594    ]
4595}
4596
4597pub(crate) fn load_convex_projection_rows(path: &Path) -> Result<ConvexProjectionRows> {
4598    let content = fs::read_to_string(path)
4599        .with_context(|| format!("reading Convex projection snapshot {}", path.display()))?;
4600    serde_json::from_str(&content)
4601        .with_context(|| format!("parsing Convex projection snapshot {}", path.display()))
4602}
4603
4604fn convex_projection_row_diagnostics(rows: &ConvexProjectionRows) -> Vec<String> {
4605    let mut diagnostics = Vec::new();
4606    let mut node_counts = BTreeMap::<&str, usize>::new();
4607    for row in &rows.nodes {
4608        *node_counts.entry(row.external_id.as_str()).or_default() += 1;
4609    }
4610    for (external_id, count) in node_counts.iter().filter(|(_, count)| **count > 1) {
4611        diagnostics.push(format!(
4612            "Convex snapshot contains duplicate node externalId {external_id} ({count} rows)"
4613        ));
4614    }
4615
4616    let node_ids = node_counts.keys().copied().collect::<BTreeSet<_>>();
4617    let mut edge_counts = BTreeMap::<&str, usize>::new();
4618    for edge in &rows.edges {
4619        *edge_counts.entry(edge.edge_key.as_str()).or_default() += 1;
4620        if !node_ids.contains(edge.from_external_id.as_str()) {
4621            diagnostics.push(format!(
4622                "Convex snapshot edge {} references missing from node {}",
4623                edge.edge_key, edge.from_external_id
4624            ));
4625        }
4626        if !node_ids.contains(edge.to_external_id.as_str()) {
4627            diagnostics.push(format!(
4628                "Convex snapshot edge {} references missing to node {}",
4629                edge.edge_key, edge.to_external_id
4630            ));
4631        }
4632        let expected_key =
4633            ConvexEdgeRow::stable_key(&edge.from_external_id, &edge.to_external_id, &edge.kind);
4634        if edge.edge_key != expected_key {
4635            diagnostics.push(format!(
4636                "Convex snapshot edge {} has non-canonical key; expected {} for ({}, {}, {})",
4637                edge.edge_key, expected_key, edge.from_external_id, edge.kind, edge.to_external_id
4638            ));
4639        }
4640    }
4641    for (edge_key, count) in edge_counts.iter().filter(|(_, count)| **count > 1) {
4642        diagnostics.push(format!(
4643            "Convex snapshot contains duplicate edgeKey {edge_key} ({count} rows)"
4644        ));
4645    }
4646    diagnostics
4647}
4648
4649pub(crate) fn validate_convex_projection_rows(rows: &ConvexProjectionRows) -> Result<()> {
4650    let diagnostics = convex_projection_row_diagnostics(rows);
4651    if diagnostics.is_empty() {
4652        Ok(())
4653    } else {
4654        bail!("{}", diagnostics.join("; "))
4655    }
4656}
4657
4658pub(crate) struct ConvexHttpTransport {
4659    endpoint: String,
4660    auth_token_env: String,
4661    auth_token: Option<String>,
4662}
4663
4664impl ConvexHttpTransport {
4665    fn from_options(endpoint: Option<&str>, auth_token_env: &str) -> Result<Self> {
4666        let endpoint = endpoint
4667            .map(str::to_string)
4668            .or_else(|| env::var(DEFAULT_CONVEX_GRAPH_URL_ENV).ok())
4669            .context("Convex transport requires --endpoint or TSIFT_CONVEX_GRAPH_URL")?;
4670        let auth_token = env::var(auth_token_env)
4671            .ok()
4672            .filter(|value| !value.trim().is_empty());
4673        Ok(Self {
4674            endpoint,
4675            auth_token_env: auth_token_env.to_string(),
4676            auth_token,
4677        })
4678    }
4679
4680    fn summary(&self, remote_snapshot: bool, applied_chunks: usize) -> ConvexTransportSummary {
4681        ConvexTransportSummary {
4682            endpoint_env: DEFAULT_CONVEX_GRAPH_URL_ENV.to_string(),
4683            endpoint_configured: true,
4684            auth_token_env: self.auth_token_env.clone(),
4685            auth_configured: self.auth_token.is_some(),
4686            remote_snapshot,
4687            applied_chunks,
4688        }
4689    }
4690
4691    fn post(&self, request: &ConvexTransportRequest<'_>) -> Result<ConvexTransportResponse> {
4692        let mut builder = ureq::post(&self.endpoint);
4693        if let Some(token) = &self.auth_token {
4694            builder = builder.header("Authorization", &format!("Bearer {token}"));
4695        }
4696        builder
4697            .send_json(request)
4698            .with_context(|| format!("calling Convex graph transport {}", self.endpoint))?
4699            .body_mut()
4700            .read_json::<ConvexTransportResponse>()
4701            .with_context(|| format!("parsing Convex graph transport response {}", self.endpoint))
4702    }
4703
4704    /// Fetch a full snapshot of the Convex graph backend.
4705    ///
4706    /// Uses the paginated `snapshot_meta` + `snapshot_nodes_page` +
4707    /// `snapshot_edges_page` triplet so the call works on tables larger than
4708    /// ~5k rows (the single-shot `snapshot` query hits Convex's 15s per-request
4709    /// syscall budget at that scale; see `#convexsnapshotscale`).
4710    ///
4711    /// Falls back to the legacy single-shot `snapshot` operation if the
4712    /// backend doesn't recognize `snapshot_meta` (older deployments that
4713    /// haven't redeployed the new schema).
4714    fn fetch_snapshot(
4715        &self,
4716        projection_version: &str,
4717        scope: Option<&str>,
4718        local_hash: Option<&str>,
4719        local_rows: Option<&ConvexProjectionRows>,
4720    ) -> Result<(ConvexProjectionRows, Vec<String>)> {
4721        match self.fetch_snapshot_paginated(projection_version, scope, local_hash, local_rows) {
4722            Ok(rows) => Ok(rows),
4723            Err(err) => {
4724                // Only fall through to the legacy path if the failure looks
4725                // like "operation unknown" (older backend). Any other failure
4726                // (HTTP timeout, deserialization mismatch) should surface so
4727                // the operator sees the real cause.
4728                let msg = format!("{err:#}");
4729                let is_unknown_op = msg.contains("unknown operation")
4730                    || msg.contains("snapshot_meta")
4731                    || msg.contains("404");
4732                if !is_unknown_op {
4733                    return Err(err);
4734                }
4735                self.fetch_snapshot_legacy(projection_version)
4736                    .map(|rows| (rows, Vec::new()))
4737            }
4738        }
4739    }
4740
4741    fn fetch_snapshot_legacy(&self, projection_version: &str) -> Result<ConvexProjectionRows> {
4742        let response = self.post(&ConvexTransportRequest {
4743            operation: "snapshot",
4744            chunk: 0,
4745            projection_version,
4746            projection_hash: None,
4747            projection_meta_id: None,
4748            node_rows: Vec::new(),
4749            edge_rows: Vec::new(),
4750            keys: Vec::new(),
4751            cursor: None,
4752            limit: None,
4753        })?;
4754        response
4755            .rows
4756            .context("Convex snapshot response did not include rows")
4757    }
4758
4759    fn fetch_snapshot_paginated(
4760        &self,
4761        projection_version: &str,
4762        scope: Option<&str>,
4763        local_hash: Option<&str>,
4764        local_rows: Option<&ConvexProjectionRows>,
4765    ) -> Result<(ConvexProjectionRows, Vec<String>)> {
4766        let projection_meta_id = graph_projection_meta_id(scope);
4767        let meta_response = self.post(&ConvexTransportRequest {
4768            operation: "snapshot_meta",
4769            chunk: 0,
4770            projection_version,
4771            projection_hash: None,
4772            projection_meta_id: Some(&projection_meta_id),
4773            node_rows: Vec::new(),
4774            edge_rows: Vec::new(),
4775            keys: Vec::new(),
4776            cursor: None,
4777            limit: None,
4778        })?;
4779        if matches!(meta_response.status.as_deref(), Some("error")) {
4780            anyhow::bail!(
4781                "Convex snapshot_meta returned error: {}",
4782                meta_response.message.unwrap_or_default()
4783            );
4784        }
4785        let meta = meta_response
4786            .meta
4787            .context("Convex snapshot_meta response did not include meta")?;
4788        if let (Some(remote_hash), Some(local_hash), Some(local_rows)) =
4789            (meta.projection_hash.as_deref(), local_hash, local_rows)
4790            && remote_hash == local_hash
4791        {
4792            return Ok((
4793                local_rows.clone(),
4794                vec![
4795                    "remote projection hash matched local graph; skipped full row-page snapshot diff"
4796                        .to_string(),
4797                ],
4798            ));
4799        }
4800
4801        let mut nodes: Vec<ConvexNodeRow> = Vec::with_capacity(meta.node_count.unwrap_or_default());
4802        let mut node_cursor: Option<String> = None;
4803        loop {
4804            let response = self.post(&ConvexTransportRequest {
4805                operation: "snapshot_nodes_page",
4806                chunk: 0,
4807                projection_version,
4808                projection_hash: None,
4809                projection_meta_id: None,
4810                node_rows: Vec::new(),
4811                edge_rows: Vec::new(),
4812                keys: Vec::new(),
4813                cursor: node_cursor.clone(),
4814                limit: None,
4815            })?;
4816            let page = response
4817                .page
4818                .context("Convex snapshot_nodes_page response did not include page")?;
4819            for raw in page.rows {
4820                let row: ConvexNodeRow =
4821                    serde_json::from_value(raw).context("decoding Convex snapshot node row")?;
4822                nodes.push(row);
4823            }
4824            match page.next_cursor {
4825                Some(next) => node_cursor = Some(next),
4826                None => break,
4827            }
4828        }
4829
4830        let mut edges: Vec<ConvexEdgeRow> = Vec::with_capacity(meta.edge_count.unwrap_or_default());
4831        let mut edge_cursor: Option<String> = None;
4832        loop {
4833            let response = self.post(&ConvexTransportRequest {
4834                operation: "snapshot_edges_page",
4835                chunk: 0,
4836                projection_version,
4837                projection_hash: None,
4838                projection_meta_id: None,
4839                node_rows: Vec::new(),
4840                edge_rows: Vec::new(),
4841                keys: Vec::new(),
4842                cursor: edge_cursor.clone(),
4843                limit: None,
4844            })?;
4845            let page = response
4846                .page
4847                .context("Convex snapshot_edges_page response did not include page")?;
4848            for raw in page.rows {
4849                let row: ConvexEdgeRow =
4850                    serde_json::from_value(raw).context("decoding Convex snapshot edge row")?;
4851                edges.push(row);
4852            }
4853            match page.next_cursor {
4854                Some(next) => edge_cursor = Some(next),
4855                None => break,
4856            }
4857        }
4858
4859        Ok((ConvexProjectionRows { nodes, edges }, Vec::new()))
4860    }
4861
4862    fn apply_chunk(
4863        &self,
4864        report: &ConvexSyncReport,
4865        chunk: &ConvexSyncChunk,
4866    ) -> Result<ConvexTransportReceipt> {
4867        let node_rows = if chunk.operation == "upsert_nodes" {
4868            report
4869                .node_upserts
4870                .iter()
4871                .filter(|row| chunk.keys.contains(&row.external_id))
4872                .cloned()
4873                .collect()
4874        } else {
4875            Vec::new()
4876        };
4877        let edge_rows = if chunk.operation == "upsert_edges" {
4878            report
4879                .edge_upserts
4880                .iter()
4881                .filter(|row| chunk.keys.contains(&row.edge_key))
4882                .cloned()
4883                .collect()
4884        } else {
4885            Vec::new()
4886        };
4887        let request = ConvexTransportRequest {
4888            operation: &chunk.operation,
4889            chunk: chunk.chunk,
4890            projection_version: &report.projection_version,
4891            projection_hash: report.projection_hash.as_deref(),
4892            projection_meta_id: None,
4893            node_rows,
4894            edge_rows,
4895            keys: chunk.keys.clone(),
4896            cursor: None,
4897            limit: None,
4898        };
4899        let mut last_error = None;
4900        for attempt in 1..=chunk.max_attempts {
4901            match self.post(&request) {
4902                Ok(response) => {
4903                    return Ok(ConvexTransportReceipt {
4904                        operation: chunk.operation.clone(),
4905                        chunk: chunk.chunk,
4906                        attempt,
4907                        status: response.status.unwrap_or_else(|| "ok".to_string()),
4908                        message: response.message,
4909                    });
4910                }
4911                Err(err) => {
4912                    last_error = Some(err);
4913                    if attempt < chunk.max_attempts {
4914                        std::thread::sleep(Duration::from_millis(100 * attempt as u64));
4915                    }
4916                }
4917            }
4918        }
4919        Err(last_error.unwrap_or_else(|| anyhow::anyhow!("Convex transport chunk failed")))
4920            .with_context(|| format!("applying Convex {} chunk {}", chunk.operation, chunk.chunk))
4921    }
4922}
4923
4924fn convex_projection_hash(rows: &ConvexProjectionRows, scope: Option<&str>) -> Option<String> {
4925    let meta_id = graph_projection_meta_id(scope);
4926    rows.nodes
4927        .iter()
4928        .find(|row| row.external_id == meta_id && row.kind == GRAPH_PROJECTION_META_KIND)
4929        .and_then(|row| row.properties.get("content_hash").cloned())
4930}
4931
4932fn convex_projection_freshness(
4933    local: &ConvexProjectionRows,
4934    snapshot: Option<&ConvexProjectionRows>,
4935    scope: Option<&str>,
4936) -> ConvexProjectionFreshness {
4937    let local_hash = convex_projection_hash(local, scope);
4938    let Some(snapshot) = snapshot else {
4939        return ConvexProjectionFreshness {
4940            status: "unchecked".to_string(),
4941            fail_closed: false,
4942            local_hash,
4943            snapshot_hash: None,
4944            missing_nodes: Vec::new(),
4945            stale_nodes: Vec::new(),
4946            missing_edges: Vec::new(),
4947            stale_edges: Vec::new(),
4948            diagnostics: vec![
4949                "no Convex snapshot supplied; sync output is a local dry-run plan".to_string(),
4950            ],
4951        };
4952    };
4953
4954    let snapshot_hash = convex_projection_hash(snapshot, scope);
4955    let snapshot_nodes = snapshot
4956        .nodes
4957        .iter()
4958        .map(|row| (row.external_id.as_str(), row))
4959        .collect::<BTreeMap<_, _>>();
4960    let snapshot_edges = snapshot
4961        .edges
4962        .iter()
4963        .map(|row| (row.edge_key.as_str(), row))
4964        .collect::<BTreeMap<_, _>>();
4965
4966    let mut missing_nodes = Vec::new();
4967    let mut stale_nodes = Vec::new();
4968    for row in &local.nodes {
4969        match snapshot_nodes.get(row.external_id.as_str()) {
4970            Some(snapshot_row) if *snapshot_row == row => {}
4971            Some(_) => stale_nodes.push(row.external_id.clone()),
4972            None => missing_nodes.push(row.external_id.clone()),
4973        }
4974    }
4975
4976    let mut missing_edges = Vec::new();
4977    let mut stale_edges = Vec::new();
4978    for row in &local.edges {
4979        match snapshot_edges.get(row.edge_key.as_str()) {
4980            Some(snapshot_row) if *snapshot_row == row => {}
4981            Some(_) => stale_edges.push(row.edge_key.clone()),
4982            None => missing_edges.push(row.edge_key.clone()),
4983        }
4984    }
4985
4986    let hash_current = local_hash.is_some() && local_hash == snapshot_hash;
4987    let rows_current = missing_nodes.is_empty()
4988        && stale_nodes.is_empty()
4989        && missing_edges.is_empty()
4990        && stale_edges.is_empty();
4991    if hash_current && rows_current {
4992        return ConvexProjectionFreshness::current(local_hash, snapshot_hash);
4993    }
4994
4995    let mut diagnostics = Vec::new();
4996    if local_hash != snapshot_hash {
4997        diagnostics.push(format!(
4998            "projection hash mismatch: local={} snapshot={}",
4999            local_hash.as_deref().unwrap_or("missing"),
5000            snapshot_hash.as_deref().unwrap_or("missing")
5001        ));
5002    }
5003    if !missing_nodes.is_empty() || !missing_edges.is_empty() {
5004        diagnostics.push(format!(
5005            "Convex snapshot is missing {} node(s) and {} edge(s)",
5006            missing_nodes.len(),
5007            missing_edges.len()
5008        ));
5009    }
5010    if !stale_nodes.is_empty() || !stale_edges.is_empty() {
5011        diagnostics.push(format!(
5012            "Convex snapshot has {} stale node row(s) and {} stale edge row(s)",
5013            stale_nodes.len(),
5014            stale_edges.len()
5015        ));
5016    }
5017
5018    ConvexProjectionFreshness {
5019        status: "stale".to_string(),
5020        fail_closed: true,
5021        local_hash,
5022        snapshot_hash,
5023        missing_nodes,
5024        stale_nodes,
5025        missing_edges,
5026        stale_edges,
5027        diagnostics,
5028    }
5029}
5030
5031pub(crate) fn verify_convex_projection_snapshot(
5032    root: &Path,
5033    scope: Option<&str>,
5034    snapshot_path: &Path,
5035) -> Result<()> {
5036    let graph_db = graph_substrate_db_path(root, scope);
5037    let store = SqliteGraphStore::open_read_only_resilient(&graph_db)?;
5038    let local = convex_rows_from_graph_store(&store)?;
5039    let snapshot = load_convex_projection_rows(snapshot_path)?;
5040    validate_convex_projection_rows(&snapshot)?;
5041    let freshness = convex_projection_freshness(&local, Some(&snapshot), scope);
5042    if freshness.fail_closed {
5043        bail!(
5044            "Convex graph projection is not current for {}: {}",
5045            root.display(),
5046            freshness.diagnostics.join("; ")
5047        );
5048    }
5049    Ok(())
5050}
5051
5052fn convex_rows_diff(
5053    local: &ConvexProjectionRows,
5054    snapshot: Option<&ConvexProjectionRows>,
5055) -> (
5056    Vec<ConvexNodeRow>,
5057    Vec<ConvexEdgeRow>,
5058    Vec<String>,
5059    Vec<String>,
5060) {
5061    let Some(snapshot) = snapshot else {
5062        return (
5063            local.nodes.clone(),
5064            local.edges.clone(),
5065            Vec::new(),
5066            Vec::new(),
5067        );
5068    };
5069    let local_nodes = local
5070        .nodes
5071        .iter()
5072        .map(|row| (row.external_id.as_str(), row))
5073        .collect::<BTreeMap<_, _>>();
5074    let local_edges = local
5075        .edges
5076        .iter()
5077        .map(|row| (row.edge_key.as_str(), row))
5078        .collect::<BTreeMap<_, _>>();
5079    let snapshot_nodes = snapshot
5080        .nodes
5081        .iter()
5082        .map(|row| (row.external_id.as_str(), row))
5083        .collect::<BTreeMap<_, _>>();
5084    let snapshot_edges = snapshot
5085        .edges
5086        .iter()
5087        .map(|row| (row.edge_key.as_str(), row))
5088        .collect::<BTreeMap<_, _>>();
5089
5090    let node_upserts = local
5091        .nodes
5092        .iter()
5093        .filter(|row| {
5094            snapshot_nodes
5095                .get(row.external_id.as_str())
5096                .is_none_or(|snapshot_row| *snapshot_row != *row)
5097        })
5098        .cloned()
5099        .collect::<Vec<_>>();
5100    let edge_upserts = local
5101        .edges
5102        .iter()
5103        .filter(|row| {
5104            snapshot_edges
5105                .get(row.edge_key.as_str())
5106                .is_none_or(|snapshot_row| *snapshot_row != *row)
5107        })
5108        .cloned()
5109        .collect::<Vec<_>>();
5110    let node_tombstones = snapshot
5111        .nodes
5112        .iter()
5113        .filter(|row| !local_nodes.contains_key(row.external_id.as_str()))
5114        .map(|row| row.external_id.clone())
5115        .collect::<Vec<_>>();
5116    let edge_tombstones = snapshot
5117        .edges
5118        .iter()
5119        .filter(|row| !local_edges.contains_key(row.edge_key.as_str()))
5120        .map(|row| row.edge_key.clone())
5121        .collect::<Vec<_>>();
5122
5123    (node_upserts, edge_upserts, node_tombstones, edge_tombstones)
5124}
5125
5126fn push_sync_chunks(
5127    chunks: &mut Vec<ConvexSyncChunk>,
5128    operation: &str,
5129    keys: Vec<String>,
5130    size: usize,
5131) {
5132    if keys.is_empty() {
5133        return;
5134    }
5135    for (idx, chunk) in keys.chunks(size).enumerate() {
5136        chunks.push(ConvexSyncChunk {
5137            operation: operation.to_string(),
5138            chunk: idx + 1,
5139            count: chunk.len(),
5140            keys: chunk.to_vec(),
5141            max_attempts: 3,
5142            retry_policy:
5143                "retry the whole chunk; rows are idempotent by externalId/edgeKey, stop on a repeated partial failure"
5144                    .to_string(),
5145        });
5146    }
5147}
5148
5149pub(crate) fn build_convex_sync_report_with_snapshot(
5150    path: &Path,
5151    scope: Option<&str>,
5152    snapshot: Option<ConvexProjectionRows>,
5153    chunk_size: usize,
5154    dry_run: bool,
5155) -> Result<ConvexSyncReport> {
5156    if chunk_size == 0 {
5157        bail!("--chunk-size must be greater than zero");
5158    }
5159    let root = lint::resolve_project_root_or_canonical_path(path)?;
5160    let (graph, _refresh) = write_traversal_graph_store(&root, path, scope)?;
5161    let graph_db = graph_substrate_db_path(&root, scope);
5162    let store = SqliteGraphStore::open_read_only_resilient(&graph_db)?;
5163    let local = convex_rows_from_graph_store(&store)?;
5164    let freshness = convex_projection_freshness(&local, snapshot.as_ref(), scope);
5165    let (node_upserts, edge_upserts, node_tombstones, edge_tombstones) =
5166        convex_rows_diff(&local, snapshot.as_ref());
5167
5168    let mut chunks = Vec::new();
5169    push_sync_chunks(
5170        &mut chunks,
5171        "delete_edges",
5172        edge_tombstones.clone(),
5173        chunk_size,
5174    );
5175    push_sync_chunks(
5176        &mut chunks,
5177        "upsert_nodes",
5178        node_upserts
5179            .iter()
5180            .map(|row| row.external_id.clone())
5181            .collect(),
5182        chunk_size,
5183    );
5184    push_sync_chunks(
5185        &mut chunks,
5186        "upsert_edges",
5187        edge_upserts
5188            .iter()
5189            .map(|row| row.edge_key.clone())
5190            .collect(),
5191        chunk_size,
5192    );
5193    push_sync_chunks(
5194        &mut chunks,
5195        "delete_nodes",
5196        node_tombstones.clone(),
5197        chunk_size,
5198    );
5199
5200    let mut diagnostics = vec![
5201        "apply node upserts before edge upserts; apply edge tombstones before node tombstones"
5202            .to_string(),
5203    ];
5204    if dry_run {
5205        diagnostics.push("dry-run only: no Convex network mutation was attempted".to_string());
5206    }
5207    if freshness.fail_closed {
5208        diagnostics.push(
5209            "Convex-backed traverse/context-pack reads must fail closed until this plan is applied"
5210                .to_string(),
5211        );
5212    }
5213
5214    Ok(ConvexSyncReport {
5215        root: root.to_string_lossy().to_string(),
5216        scope: scope.map(str::to_string),
5217        graph_db: graph_db.to_string_lossy().to_string(),
5218        dry_run,
5219        projection_version: GRAPH_PROJECTION_VERSION.to_string(),
5220        projection_hash: convex_projection_hash(&local, scope),
5221        required_indexes: convex_required_indexes(),
5222        node_upserts,
5223        edge_upserts,
5224        node_tombstones,
5225        edge_tombstones,
5226        chunks,
5227        freshness,
5228        transport: None,
5229        receipts: Vec::new(),
5230        diagnostics,
5231        warnings: graph.warnings,
5232    })
5233}
5234
5235#[cfg(test)]
5236fn build_convex_sync_report(
5237    path: &Path,
5238    scope: Option<&str>,
5239    snapshot_path: Option<&Path>,
5240    chunk_size: usize,
5241) -> Result<ConvexSyncReport> {
5242    let snapshot = snapshot_path.map(load_convex_projection_rows).transpose()?;
5243    build_convex_sync_report_with_snapshot(path, scope, snapshot, chunk_size, true)
5244}
5245
5246pub(crate) fn print_convex_sync_human(report: &ConvexSyncReport, compact: bool) {
5247    if compact {
5248        println!(
5249            "convex-sync nodes:+{} -{} edges:+{} -{} chunks:{} freshness:{}",
5250            report.node_upserts.len(),
5251            report.node_tombstones.len(),
5252            report.edge_upserts.len(),
5253            report.edge_tombstones.len(),
5254            report.chunks.len(),
5255            report.freshness.status
5256        );
5257        return;
5258    }
5259
5260    println!(
5261        "Convex graph sync {}",
5262        if report.dry_run { "dry-run" } else { "apply" }
5263    );
5264    println!("root: {}", report.root);
5265    println!("graph_db: {}", report.graph_db);
5266    println!(
5267        "upserts: {} node(s), {} edge(s)",
5268        report.node_upserts.len(),
5269        report.edge_upserts.len()
5270    );
5271    println!(
5272        "tombstones: {} node(s), {} edge(s)",
5273        report.node_tombstones.len(),
5274        report.edge_tombstones.len()
5275    );
5276    println!("chunks: {}", report.chunks.len());
5277    println!("freshness: {}", report.freshness.status);
5278    if let Some(transport) = &report.transport {
5279        println!(
5280            "transport: endpoint_env={} auth_env={} applied_chunks={}",
5281            transport.endpoint_env, transport.auth_token_env, transport.applied_chunks
5282        );
5283    }
5284    for receipt in &report.receipts {
5285        println!(
5286            "receipt: {} chunk {} attempt {} {}",
5287            receipt.operation, receipt.chunk, receipt.attempt, receipt.status
5288        );
5289    }
5290    for diagnostic in report
5291        .diagnostics
5292        .iter()
5293        .chain(report.freshness.diagnostics.iter())
5294    {
5295        println!("- {}", diagnostic);
5296    }
5297}
5298
5299pub(crate) struct ConvexSyncOptions<'a> {
5300    path: &'a Path,
5301    scope: Option<&'a str>,
5302    snapshot: Option<&'a Path>,
5303    chunk_size: usize,
5304    remote_snapshot: bool,
5305    apply: bool,
5306    endpoint: Option<&'a str>,
5307    auth_token_env: &'a str,
5308}
5309
5310#[derive(Serialize)]
5311struct GraphDbSchemaField {
5312    name: &'static str,
5313    value_type: &'static str,
5314    description: &'static str,
5315}
5316
5317#[derive(Serialize)]
5318struct GraphDbSchemaOperation {
5319    command: &'static str,
5320    description: &'static str,
5321}
5322
5323#[derive(Serialize)]
5324struct GraphDbSchemaContract {
5325    name: &'static str,
5326    version: &'static str,
5327    description: &'static str,
5328}
5329
5330#[derive(Serialize)]
5331struct GraphDbSchema {
5332    contract_versions: Vec<GraphDbSchemaContract>,
5333    node_fields: Vec<GraphDbSchemaField>,
5334    edge_fields: Vec<GraphDbSchemaField>,
5335    operations: Vec<GraphDbSchemaOperation>,
5336}
5337
5338#[derive(Clone, Serialize, Deserialize)]
5339struct GraphDbFreshnessReport {
5340    status: String,
5341    fail_closed: bool,
5342    projection_version: Option<String>,
5343    content_hash: Option<String>,
5344    source_watermark: Option<String>,
5345    diagnostics: Vec<String>,
5346}
5347
5348#[derive(Clone, Debug, Serialize)]
5349pub(crate) struct GraphEffectivenessReadiness {
5350    pub(crate) status: String,
5351    pub(crate) fail_closed: bool,
5352    pub(crate) reason: String,
5353    pub(crate) diagnostics: Vec<String>,
5354    pub(crate) next_commands: Vec<String>,
5355}
5356
5357#[derive(Clone, Debug, Serialize, PartialEq)]
5358struct GraphDbPropertyFilter {
5359    key: String,
5360    value: String,
5361}
5362
5363#[derive(Clone, Debug, Default)]
5364struct GraphDbQueryOptions {
5365    cursor: Option<String>,
5366    limit: Option<usize>,
5367    property_filters: Vec<GraphDbPropertyFilter>,
5368}
5369
5370#[derive(Clone, Debug, Serialize, PartialEq)]
5371struct GraphDbPageReport {
5372    #[serde(skip_serializing_if = "Option::is_none")]
5373    cursor: Option<String>,
5374    #[serde(skip_serializing_if = "Option::is_none")]
5375    limit: Option<usize>,
5376    #[serde(skip_serializing_if = "Option::is_none")]
5377    next_cursor: Option<String>,
5378    returned_nodes: usize,
5379    returned_edges: usize,
5380    truncated: bool,
5381    property_filters: Vec<GraphDbPropertyFilter>,
5382    #[serde(skip_serializing_if = "Vec::is_empty", default)]
5383    diagnostics: Vec<String>,
5384}
5385
5386type GraphDbRankedNeighbor = resolution::RankedNeighbor;
5387
5388#[derive(Clone, Debug, Serialize)]
5389struct CommunityTruncationSummary {
5390    total_communities: usize,
5391    fully_kept: usize,
5392    partially_pruned: usize,
5393    fully_pruned: usize,
5394    pruned_community_kinds: Vec<String>,
5395    pruned_community_top_labels: Vec<String>,
5396}
5397
5398#[derive(Clone, Debug, Serialize)]
5399struct GraphDbRankedNeighborhoodComparison {
5400    traversal_nodes: usize,
5401    traversal_edges: usize,
5402    pruned_count: usize,
5403    total_discovered: usize,
5404    latency_micros: u128,
5405    overlap_with_unranked_pct: f64,
5406    useful_hit_density_ranked: f64,
5407    useful_hit_density_unranked: f64,
5408    duplicate_name_count_ranked: usize,
5409    duplicate_name_count_unranked: usize,
5410    handle_coverage_ranked_pct: f64,
5411    handle_coverage_unranked_pct: f64,
5412    #[serde(skip_serializing_if = "Option::is_none")]
5413    community_truncation_summary: Option<CommunityTruncationSummary>,
5414    diagnostics: Vec<String>,
5415}
5416
5417#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
5418struct GraphDbDroppedByBudget {
5419    item: String,
5420    kind: String,
5421    dropped: usize,
5422    reason: String,
5423}
5424
5425#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
5426struct GraphDbOutputBudgetReport {
5427    max_tokens: usize,
5428    estimated_tokens: usize,
5429    selected_nodes: usize,
5430    selected_edges: usize,
5431    candidate_nodes: usize,
5432    candidate_edges: usize,
5433    dropped_by_budget: Vec<GraphDbDroppedByBudget>,
5434    diagnostics: Vec<String>,
5435}
5436
5437#[derive(Clone, Debug, Serialize, PartialEq)]
5438struct GraphDbKnowledgeRetrieval {
5439    mode: String,
5440    query: String,
5441    seed_kind: String,
5442    seed_limit: usize,
5443    seed_count: usize,
5444    depth: usize,
5445    limit: usize,
5446    node_count: usize,
5447    edge_count: usize,
5448    truncated: bool,
5449    traversal: String,
5450    freshness_boundary: String,
5451    privacy_boundary: String,
5452    diagnostics: Vec<String>,
5453}
5454
5455struct GraphDbSemanticSeededSubgraph {
5456    nodes: Vec<SubstrateGraphNode>,
5457    edges: Vec<SubstrateGraphEdge>,
5458    truncated: bool,
5459    diagnostics: Vec<String>,
5460}
5461
5462type GraphDbNeighborhoodRankingGate = resolution::NeighborhoodRankingGate;
5463
5464#[derive(Serialize)]
5465struct GraphDbReport {
5466    root: String,
5467    #[serde(skip_serializing_if = "Option::is_none")]
5468    scope: Option<String>,
5469    backend: String,
5470    query: String,
5471    freshness: GraphDbFreshnessReport,
5472    #[serde(skip_serializing_if = "Option::is_none")]
5473    readiness: Option<GraphEffectivenessReadiness>,
5474    #[serde(skip_serializing_if = "Option::is_none")]
5475    schema: Option<GraphDbSchema>,
5476    #[serde(skip_serializing_if = "Option::is_none")]
5477    node: Option<SubstrateTerseGraphNode>,
5478    #[serde(skip_serializing_if = "Option::is_none")]
5479    edge: Option<SubstrateTerseGraphEdge>,
5480    #[serde(skip_serializing_if = "Vec::is_empty", default)]
5481    nodes: Vec<SubstrateTerseGraphNode>,
5482    #[serde(skip_serializing_if = "Vec::is_empty", default)]
5483    edges: Vec<SubstrateTerseGraphEdge>,
5484    #[serde(skip_serializing_if = "Vec::is_empty", default)]
5485    ranked_neighbors: Vec<GraphDbRankedNeighbor>,
5486    #[serde(skip_serializing_if = "Vec::is_empty", default)]
5487    semantic_related: Vec<SemanticRelatedItem>,
5488    #[serde(skip_serializing_if = "Option::is_none")]
5489    neighborhood_ranking_gate: Option<GraphDbNeighborhoodRankingGate>,
5490    #[serde(skip_serializing_if = "Option::is_none")]
5491    ranked_neighborhood_comparison: Option<GraphDbRankedNeighborhoodComparison>,
5492    #[serde(skip_serializing_if = "Option::is_none")]
5493    knowledge_retrieval: Option<GraphDbKnowledgeRetrieval>,
5494    #[serde(skip_serializing_if = "Option::is_none")]
5495    output_budget: Option<GraphDbOutputBudgetReport>,
5496    #[serde(skip_serializing_if = "Option::is_none")]
5497    path: Option<substrate::GraphPath>,
5498    #[serde(skip_serializing_if = "Option::is_none")]
5499    page: Option<GraphDbPageReport>,
5500    #[serde(skip_serializing_if = "Vec::is_empty", default)]
5501    warnings: Vec<String>,
5502}
5503
5504struct ExperimentalReadOnlyGraphStore {
5505    backend: GraphDbExperimentalBackend,
5506    nodes: BTreeMap<String, SubstrateGraphNode>,
5507    edges: BTreeMap<String, SubstrateGraphEdge>,
5508    node_ids_by_kind: BTreeMap<String, Vec<String>>,
5509    outgoing_edge_keys_by_from: BTreeMap<String, Vec<String>>,
5510}
5511
5512impl ExperimentalReadOnlyGraphStore {
5513    fn from_rows(backend: GraphDbExperimentalBackend, rows: &ConvexProjectionRows) -> Result<Self> {
5514        validate_convex_projection_rows(rows)?;
5515        let nodes = rows
5516            .nodes
5517            .iter()
5518            .map(|row| {
5519                let node = SubstrateGraphNode {
5520                    id: row.external_id.clone(),
5521                    kind: row.kind.clone(),
5522                    label: row.label.clone(),
5523                    properties: row.properties.clone(),
5524                    provenance: row.provenance.clone(),
5525                    freshness: row.freshness.clone(),
5526                };
5527                (node.id.clone(), node)
5528            })
5529            .collect::<BTreeMap<_, _>>();
5530        let edges = rows
5531            .edges
5532            .iter()
5533            .map(|row| {
5534                let edge = SubstrateGraphEdge {
5535                    id: row.edge_key.clone(),
5536                    from_id: row.from_external_id.clone(),
5537                    to_id: row.to_external_id.clone(),
5538                    kind: row.kind.clone(),
5539                    properties: row.properties.clone(),
5540                    provenance: row.provenance.clone(),
5541                    freshness: row.freshness.clone(),
5542                };
5543                (graph_db_edge_key(&edge), edge)
5544            })
5545            .collect::<BTreeMap<_, _>>();
5546        let mut node_ids_by_kind = BTreeMap::<String, Vec<String>>::new();
5547        for node in nodes.values() {
5548            node_ids_by_kind
5549                .entry(node.kind.clone())
5550                .or_default()
5551                .push(node.id.clone());
5552        }
5553        for ids in node_ids_by_kind.values_mut() {
5554            ids.sort();
5555        }
5556        let mut outgoing_edge_keys_by_from = BTreeMap::<String, Vec<String>>::new();
5557        for edge in edges.values() {
5558            outgoing_edge_keys_by_from
5559                .entry(edge.from_id.clone())
5560                .or_default()
5561                .push(graph_db_edge_key(edge));
5562        }
5563        for edge_keys in outgoing_edge_keys_by_from.values_mut() {
5564            edge_keys.sort_by(|left_key, right_key| {
5565                let left = &edges[left_key];
5566                let right = &edges[right_key];
5567                left.to_id
5568                    .cmp(&right.to_id)
5569                    .then(left.kind.cmp(&right.kind))
5570                    .then(left_key.cmp(right_key))
5571            });
5572        }
5573        Ok(Self {
5574            backend,
5575            nodes,
5576            edges,
5577            node_ids_by_kind,
5578            outgoing_edge_keys_by_from,
5579        })
5580    }
5581}
5582
5583impl GraphStore for ExperimentalReadOnlyGraphStore {
5584    fn upsert_node(&self, _node: &SubstrateGraphNode) -> Result<()> {
5585        bail!("{} backend-eval adapter is read-only", self.backend.name())
5586    }
5587
5588    fn upsert_edge(&self, _edge: &SubstrateGraphEdge) -> Result<()> {
5589        bail!("{} backend-eval adapter is read-only", self.backend.name())
5590    }
5591
5592    fn delete_node(&self, _id: &str) -> Result<usize> {
5593        bail!("{} backend-eval adapter is read-only", self.backend.name())
5594    }
5595
5596    fn delete_edge(&self, _from_id: &str, _to_id: &str, _kind: &str) -> Result<usize> {
5597        bail!("{} backend-eval adapter is read-only", self.backend.name())
5598    }
5599
5600    fn node(&self, id: &str) -> Result<Option<SubstrateGraphNode>> {
5601        Ok(self.nodes.get(id).cloned())
5602    }
5603
5604    fn all_nodes(&self) -> Result<Vec<SubstrateGraphNode>> {
5605        Ok(self.nodes.values().cloned().collect())
5606    }
5607
5608    fn all_edges(&self) -> Result<Vec<SubstrateGraphEdge>> {
5609        let mut edges = self.edges.values().cloned().collect::<Vec<_>>();
5610        edges.sort_by(|left, right| {
5611            left.from_id
5612                .cmp(&right.from_id)
5613                .then(left.kind.cmp(&right.kind))
5614                .then(left.to_id.cmp(&right.to_id))
5615        });
5616        Ok(edges)
5617    }
5618
5619    fn graph_counts(&self) -> Result<(usize, usize)> {
5620        Ok((self.nodes.len(), self.edges.len()))
5621    }
5622
5623    fn sample_edge(&self, kind: Option<&str>) -> Result<Option<SubstrateGraphEdge>> {
5624        let mut edges = self
5625            .edges
5626            .values()
5627            .filter(|edge| edge.from_id != edge.to_id)
5628            .filter(|edge| kind.is_none_or(|kind| edge.kind == kind))
5629            .cloned()
5630            .collect::<Vec<_>>();
5631        edges.sort_by(|left, right| {
5632            left.from_id
5633                .cmp(&right.from_id)
5634                .then(left.kind.cmp(&right.kind))
5635                .then(left.to_id.cmp(&right.to_id))
5636        });
5637        Ok(edges.into_iter().next())
5638    }
5639
5640    fn sample_edge_with_property(
5641        &self,
5642    ) -> Result<Option<(SubstrateGraphEdge, GraphPropertyFilter)>> {
5643        Ok(self
5644            .edges
5645            .values()
5646            .filter(|edge| edge.from_id != edge.to_id)
5647            .filter_map(|edge| {
5648                edge.properties.iter().next().map(|(key, value)| {
5649                    (
5650                        edge,
5651                        GraphPropertyFilter {
5652                            key: key.clone(),
5653                            value: value.clone(),
5654                        },
5655                    )
5656                })
5657            })
5658            .min_by(|(left_edge, left_filter), (right_edge, right_filter)| {
5659                left_filter
5660                    .key
5661                    .cmp(&right_filter.key)
5662                    .then(left_filter.value.cmp(&right_filter.value))
5663                    .then_with(|| graph_db_edge_key(left_edge).cmp(&graph_db_edge_key(right_edge)))
5664            })
5665            .map(|(edge, filter)| (edge.clone(), filter)))
5666    }
5667
5668    fn nodes_by_kind(&self, kind: &str) -> Result<Vec<SubstrateGraphNode>> {
5669        Ok(self
5670            .node_ids_by_kind
5671            .get(kind)
5672            .into_iter()
5673            .flatten()
5674            .filter_map(|id| self.nodes.get(id).cloned())
5675            .collect())
5676    }
5677
5678    fn outgoing_edges(&self, from_id: &str, kind: Option<&str>) -> Result<Vec<SubstrateGraphEdge>> {
5679        Ok(self
5680            .outgoing_edge_keys_by_from
5681            .get(from_id)
5682            .into_iter()
5683            .flatten()
5684            .filter_map(|key| self.edges.get(key))
5685            .filter(|edge| kind.is_none_or(|kind| edge.kind == kind))
5686            .cloned()
5687            .collect())
5688    }
5689
5690    fn edges_between_nodes(&self, node_ids: &BTreeSet<String>) -> Result<Vec<SubstrateGraphEdge>> {
5691        Ok(self
5692            .edges
5693            .values()
5694            .filter(|edge| node_ids.contains(&edge.from_id) && node_ids.contains(&edge.to_id))
5695            .cloned()
5696            .collect())
5697    }
5698
5699    fn shortest_path(
5700        &self,
5701        from_id: &str,
5702        to_id: &str,
5703        kind: Option<&str>,
5704    ) -> Result<Option<substrate::GraphPath>> {
5705        if from_id == to_id {
5706            return Ok(Some(substrate::GraphPath {
5707                nodes: vec![from_id.to_string()],
5708                hops: 0,
5709            }));
5710        }
5711
5712        let mut queue = VecDeque::new();
5713        let mut parent = BTreeMap::<String, String>::new();
5714        parent.insert(from_id.to_string(), String::new());
5715        queue.push_back(from_id.to_string());
5716
5717        while let Some(current) = queue.pop_front() {
5718            for edge in self.outgoing_edges(&current, kind)? {
5719                if parent.contains_key(&edge.to_id) {
5720                    continue;
5721                }
5722                parent.insert(edge.to_id.clone(), current.clone());
5723                if edge.to_id == to_id {
5724                    let mut nodes = vec![to_id.to_string()];
5725                    let mut cursor = to_id;
5726                    while let Some(previous) = parent.get(cursor) {
5727                        if previous.is_empty() {
5728                            break;
5729                        }
5730                        nodes.push(previous.clone());
5731                        cursor = previous;
5732                    }
5733                    nodes.reverse();
5734                    return Ok(Some(substrate::GraphPath {
5735                        hops: nodes.len().saturating_sub(1),
5736                        nodes,
5737                    }));
5738                }
5739                queue.push_back(edge.to_id);
5740            }
5741        }
5742
5743        Ok(None)
5744    }
5745
5746    fn reachable_nodes_by_kinds(
5747        &self,
5748        from_id: &str,
5749        kinds: &[&str],
5750        depth: usize,
5751        limit: usize,
5752    ) -> Result<BTreeMap<String, Vec<(SubstrateGraphNode, substrate::GraphPath)>>> {
5753        let requested = kinds.iter().copied().collect::<BTreeSet<_>>();
5754        let mut rows = requested
5755            .iter()
5756            .map(|kind| {
5757                (
5758                    (*kind).to_string(),
5759                    BTreeMap::<String, (SubstrateGraphNode, substrate::GraphPath)>::new(),
5760                )
5761            })
5762            .collect::<BTreeMap<_, _>>();
5763        if requested.is_empty() {
5764            return Ok(BTreeMap::new());
5765        }
5766
5767        let mut seen = BTreeSet::from([from_id.to_string()]);
5768        let mut queue = VecDeque::from([(from_id.to_string(), vec![from_id.to_string()])]);
5769        while let Some((current, path)) = queue.pop_front() {
5770            let current_depth = path.len().saturating_sub(1);
5771            if current_depth >= depth {
5772                continue;
5773            }
5774            for edge in self.outgoing_edges(&current, None)? {
5775                if !seen.insert(edge.to_id.clone()) {
5776                    continue;
5777                }
5778                let Some(node) = self.nodes.get(&edge.to_id).cloned() else {
5779                    continue;
5780                };
5781                let mut next_path = path.clone();
5782                next_path.push(edge.to_id.clone());
5783                let graph_path = substrate::GraphPath {
5784                    hops: next_path.len().saturating_sub(1),
5785                    nodes: next_path.clone(),
5786                };
5787                if requested.contains(node.kind.as_str()) {
5788                    rows.entry(node.kind.clone())
5789                        .or_default()
5790                        .entry(node.id.clone())
5791                        .or_insert((node.clone(), graph_path));
5792                }
5793                queue.push_back((edge.to_id, next_path));
5794            }
5795        }
5796
5797        Ok(rows
5798            .into_iter()
5799            .map(|(kind, values)| {
5800                let mut values = values.into_values().collect::<Vec<_>>();
5801                values.sort_by(|(left_node, left_path), (right_node, right_path)| {
5802                    left_path
5803                        .hops
5804                        .cmp(&right_path.hops)
5805                        .then(left_node.label.cmp(&right_node.label))
5806                        .then(left_node.id.cmp(&right_node.id))
5807                });
5808                if limit > 0 && values.len() > limit {
5809                    values.truncate(limit);
5810                }
5811                (kind, values)
5812            })
5813            .collect())
5814    }
5815}
5816
5817pub(crate) const GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS: usize = 64;
5818pub(crate) const GRAPH_DB_BACKEND_EVAL_EXTENDED_PATH_HOPS: [usize; 3] = [128, 256, 512];
5819pub(crate) const GRAPH_DB_BACKEND_EVAL_DIRECT_PATH_HOPS: usize = 1;
5820const GRAPH_DB_BACKEND_EVAL_ALLOWED_REGRESSION_PERCENT: f64 = 10.0;
5821pub(crate) const GRAPH_DB_BACKEND_EVAL_NORMALIZATION_ROW_UNIT: f64 = 1000.0;
5822const GRAPH_DB_BACKEND_EVAL_MIN_SAMPLE_RUNS: usize = 3;
5823const CONFLICT_MATRIX_PREPARATION_CACHE_VERSION: &str = "conflict-matrix-prep-v1";
5824const CONFLICT_MATRIX_GRAPH_PREPARATION_CACHE_VERSION: &str = "conflict-matrix-graph-prep-v1";
5825const GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION: &str = "backend-eval-full-projection-v5";
5826
5827#[derive(Clone, Serialize, Deserialize)]
5828pub(crate) struct GraphDbBackendEvalPhaseTiming {
5829    name: String,
5830    duration_micros: u128,
5831    detail: String,
5832}
5833
5834#[derive(Serialize, Deserialize)]
5835struct GraphDbBackendEvalFullProjectionCache {
5836    version: String,
5837    key: String,
5838    source_watermark: String,
5839    projection: GraphProjection,
5840    warnings: Vec<String>,
5841}
5842
5843#[derive(Clone, Default)]
5844struct GraphDbBackendEvalFullProjectionCacheStats {
5845    hit: bool,
5846    disk_bytes: u64,
5847    json_bytes: u64,
5848    pruned_files: usize,
5849    pruned_bytes: u64,
5850}
5851
5852#[derive(Serialize)]
5853struct GraphDbBackendEvalRawSourceWatermarkRow {
5854    path: String,
5855    bytes: u64,
5856    content_hash: String,
5857}
5858
5859#[derive(Clone)]
5860struct GraphDbBackendEvalFullProjectionSourceWatermark {
5861    value: String,
5862    detail: String,
5863}
5864
5865#[derive(Serialize)]
5866pub(crate) struct GraphDbBackendEvalConfig {
5867    high_degree_nodes: usize,
5868    high_degree_fanout: usize,
5869    deep_chain_nodes: usize,
5870    deep_chain_fanout: usize,
5871    depth: usize,
5872    limit: usize,
5873    impact_limit: usize,
5874    path_max_hops: usize,
5875    path_direct_hop_budget: usize,
5876    path_deep_chain_hop_budget: usize,
5877    path_extended_hop_budgets: Vec<usize>,
5878    path_hop_policy: String,
5879    path_probe_strategy: String,
5880    path_query_plan_checks: Vec<String>,
5881    full_projection_enabled: bool,
5882    full_projection_profile: String,
5883    normalization_row_unit: usize,
5884}
5885
5886#[derive(Clone)]
5887struct GraphDbBackendEvalSignature {
5888    operation: String,
5889    value: serde_json::Value,
5890}
5891
5892#[derive(Serialize)]
5893struct GraphDbBackendEvalOperation {
5894    name: String,
5895    supported: bool,
5896    status: String,
5897    duration_micros: u128,
5898    #[serde(skip_serializing_if = "Option::is_none")]
5899    rows: Option<usize>,
5900    #[serde(skip_serializing_if = "Option::is_none")]
5901    error: Option<String>,
5902}
5903
5904#[derive(Serialize)]
5905struct GraphDbBackendEvalParity {
5906    matches_sqlite: bool,
5907    diagnostics: Vec<String>,
5908}
5909
5910#[derive(Serialize)]
5911struct GraphDbBackendEvalBackendReport {
5912    backend: String,
5913    adapter: String,
5914    read_only: bool,
5915    projection_load: String,
5916    operations: Vec<GraphDbBackendEvalOperation>,
5917    total_micros: u128,
5918    parity: GraphDbBackendEvalParity,
5919    lock_behavior: String,
5920    install_portability: String,
5921}
5922
5923#[derive(Serialize)]
5924struct GraphDbBackendEvalDataset {
5925    name: String,
5926    target_count: usize,
5927    nodes: usize,
5928    edges: usize,
5929    backends: Vec<GraphDbBackendEvalBackendReport>,
5930}
5931
5932#[derive(Serialize)]
5933struct GraphDbBackendPromotionDecision {
5934    backend: String,
5935    decision: String,
5936    reasons: Vec<String>,
5937    gate: GraphDbBackendPromotionGate,
5938}
5939
5940#[derive(Serialize)]
5941struct GraphDbBackendEvalPerformanceGate {
5942    baseline_fixture: String,
5943    ci_profile: String,
5944    opt_in_real_profile: String,
5945    full_projection_cache_hit_gate: String,
5946    allowed_regression_percent: f64,
5947    minimum_sample_runs: usize,
5948    normalized_metric_unit: String,
5949    required_metrics: Vec<String>,
5950    digest_command: String,
5951    repeated_sample_command: String,
5952    hop_cap_promotion: GraphDbHopCapPromotionGate,
5953    backend_adapter_spike: GraphDbBackendAdapterSpikeGate,
5954}
5955
5956#[derive(Serialize)]
5957struct GraphDbHopCapPromotionGate {
5958    status: String,
5959    current_default_hops: usize,
5960    candidate_hop_tiers: Vec<usize>,
5961    required_backend: String,
5962    required_workloads: Vec<String>,
5963    required_metrics: Vec<String>,
5964    allowed_regression_percent: f64,
5965    minimum_sample_runs: usize,
5966    decision_rule: String,
5967}
5968
5969#[derive(Serialize)]
5970struct GraphDbBackendAdapterSpikeGate {
5971    status: String,
5972    candidate_backends: Vec<GraphDbBackendAdapterSpikeCandidate>,
5973    required_workloads: Vec<String>,
5974    required_checks: Vec<String>,
5975    decision_rule: String,
5976    evidence_plan: String,
5977}
5978
5979#[derive(Serialize)]
5980struct GraphDbBackendAdapterSpikeCandidate {
5981    backend: String,
5982    adapter_label: String,
5983    projection_load: String,
5984    lock_behavior: String,
5985    install_portability: String,
5986}
5987
5988#[derive(Serialize)]
5989pub(crate) struct GraphDbBackendEvalReport {
5990    root: String,
5991    #[serde(skip_serializing_if = "Option::is_none")]
5992    scope: Option<String>,
5993    label: String,
5994    baseline_backend: String,
5995    candidates: Vec<String>,
5996    targets: Vec<String>,
5997    config: GraphDbBackendEvalConfig,
5998    phase_timings: Vec<GraphDbBackendEvalPhaseTiming>,
5999    datasets: Vec<GraphDbBackendEvalDataset>,
6000    promotion: Vec<GraphDbBackendPromotionDecision>,
6001    performance_gate: GraphDbBackendEvalPerformanceGate,
6002    metrics: BTreeMap<String, f64>,
6003    metric_digest_command: String,
6004    warnings: Vec<String>,
6005}
6006
6007#[derive(Clone, Debug, Serialize)]
6008struct GraphDbDoctorCheck {
6009    name: String,
6010    status: String,
6011    fail_closed: bool,
6012    diagnostics: Vec<String>,
6013    repair_commands: Vec<String>,
6014}
6015
6016#[derive(Serialize)]
6017pub(crate) struct GraphDbDoctorReport {
6018    root: String,
6019    #[serde(skip_serializing_if = "Option::is_none")]
6020    scope: Option<String>,
6021    backend: String,
6022    graph_db: String,
6023    #[serde(skip_serializing_if = "Option::is_none")]
6024    convex_snapshot: Option<String>,
6025    status: String,
6026    fail_closed: bool,
6027    checks: Vec<GraphDbDoctorCheck>,
6028    repair_commands: Vec<String>,
6029    #[serde(skip_serializing_if = "Vec::is_empty", default)]
6030    required_indexes: Vec<ConvexRequiredIndex>,
6031}
6032
6033#[derive(Serialize)]
6034struct GraphDbDriftSummary {
6035    node_upserts: usize,
6036    edge_upserts: usize,
6037    node_tombstones: usize,
6038    edge_tombstones: usize,
6039    stale_nodes: usize,
6040    stale_edges: usize,
6041    stale_projection_metadata: usize,
6042    duplicate_failures: usize,
6043    orphan_failures: usize,
6044    missing_required_indexes: usize,
6045}
6046
6047#[derive(Serialize)]
6048struct GraphDbDriftReport {
6049    root: String,
6050    #[serde(skip_serializing_if = "Option::is_none")]
6051    scope: Option<String>,
6052    graph_db: String,
6053    convex_snapshot: String,
6054    status: String,
6055    graph_reads_allowed: bool,
6056    projection_version: String,
6057    local_hash: Option<String>,
6058    snapshot_hash: Option<String>,
6059    summary: GraphDbDriftSummary,
6060    node_upserts: Vec<String>,
6061    edge_upserts: Vec<String>,
6062    node_tombstones: Vec<String>,
6063    edge_tombstones: Vec<String>,
6064    stale_nodes: Vec<String>,
6065    stale_edges: Vec<String>,
6066    diagnostics: Vec<String>,
6067    next_commands: Vec<String>,
6068    required_indexes: Vec<ConvexRequiredIndex>,
6069    #[serde(skip_serializing_if = "Vec::is_empty", default)]
6070    warnings: Vec<String>,
6071}
6072
6073#[derive(Clone, Serialize)]
6074struct GraphDbTombstoneCounts {
6075    nodes: usize,
6076    edges: usize,
6077    total: usize,
6078}
6079
6080#[derive(Clone, Serialize)]
6081struct GraphDbOperatorCounts {
6082    nodes: usize,
6083    edges: usize,
6084    tombstones: GraphDbTombstoneCounts,
6085    #[serde(skip_serializing_if = "Option::is_none")]
6086    file_size_bytes: Option<u64>,
6087    #[serde(skip_serializing_if = "Option::is_none")]
6088    freelist_bytes: Option<u64>,
6089}
6090
6091#[derive(Clone, Serialize)]
6092struct GraphDbCompactionPolicy {
6093    status: String,
6094    tombstone_scan_rows: usize,
6095    live_rows: usize,
6096    file_size_bytes: Option<u64>,
6097    freelist_bytes: Option<u64>,
6098    safe_to_prune_tombstones: bool,
6099    requires_convex_reconciliation: bool,
6100    recommendations: Vec<String>,
6101    proof: Vec<String>,
6102}
6103
6104#[derive(Serialize)]
6105pub(crate) struct GraphDbRefreshSummary {
6106    scope: String,
6107    projection_version: String,
6108    mode: String,
6109    #[serde(skip_serializing_if = "Option::is_none")]
6110    source_watermark: Option<String>,
6111    tombstoned_nodes: usize,
6112    tombstoned_edges: usize,
6113    upserted_nodes: usize,
6114    upserted_edges: usize,
6115    unchanged_nodes: usize,
6116    unchanged_edges: usize,
6117    upserted_properties: usize,
6118    unchanged_properties: usize,
6119    deleted_properties: usize,
6120    deleted_nodes: usize,
6121    deleted_edges: usize,
6122    pruned_tombstones: usize,
6123    #[serde(skip_serializing_if = "Option::is_none")]
6124    file_size_bytes_before: Option<u64>,
6125    #[serde(skip_serializing_if = "Option::is_none")]
6126    file_size_bytes_after: Option<u64>,
6127    #[serde(skip_serializing_if = "Vec::is_empty", default)]
6128    phase_timings: Vec<GraphDbBackendEvalPhaseTiming>,
6129}
6130
6131#[derive(Serialize)]
6132struct GraphDbOperatorReport {
6133    root: String,
6134    #[serde(skip_serializing_if = "Option::is_none")]
6135    scope: Option<String>,
6136    graph_db: String,
6137    operation: String,
6138    status: String,
6139    materialized: bool,
6140    freshness: GraphDbFreshnessReport,
6141    readiness: GraphEffectivenessReadiness,
6142    counts: GraphDbOperatorCounts,
6143    #[serde(skip_serializing_if = "Option::is_none")]
6144    refresh: Option<GraphDbRefreshSummary>,
6145    compaction: GraphDbCompactionPolicy,
6146    #[serde(skip_serializing_if = "Option::is_none")]
6147    recovery: Option<index::ReadOnlyRecovery>,
6148    next_commands: Vec<String>,
6149    #[serde(skip_serializing_if = "Vec::is_empty", default)]
6150    warnings: Vec<String>,
6151}
6152
6153#[derive(Serialize)]
6154pub(crate) struct GraphDbCompactionReport {
6155    root: String,
6156    #[serde(skip_serializing_if = "Option::is_none")]
6157    scope: Option<String>,
6158    graph_db: String,
6159    applied: bool,
6160    pruned_tombstones: usize,
6161    counts_before: GraphDbOperatorCounts,
6162    counts_after: GraphDbOperatorCounts,
6163    compaction_before: GraphDbCompactionPolicy,
6164    compaction_after: GraphDbCompactionPolicy,
6165    reclaimed_bytes: i64,
6166    next_commands: Vec<String>,
6167    #[serde(skip_serializing_if = "Vec::is_empty", default)]
6168    warnings: Vec<String>,
6169}
6170
6171#[derive(Clone, Serialize, Deserialize)]
6172struct GraphDbEvidencePath {
6173    to: String,
6174    kind: String,
6175    label: String,
6176    #[serde(skip_serializing_if = "Option::is_none")]
6177    path: Option<substrate::GraphPath>,
6178    #[serde(skip_serializing_if = "Option::is_none")]
6179    expand: Option<String>,
6180}
6181
6182#[derive(Clone, Serialize, Deserialize)]
6183struct GraphDbFixtureCoverage {
6184    test: String,
6185    fixture: String,
6186    assertions: Vec<String>,
6187}
6188
6189#[derive(Clone, Serialize, Deserialize)]
6190struct GraphDbEvidenceReport {
6191    root: String,
6192    #[serde(skip_serializing_if = "Option::is_none")]
6193    scope: Option<String>,
6194    backend: String,
6195    contract_version: String,
6196    target: String,
6197    packet_id: String,
6198    #[serde(skip_serializing_if = "Option::is_none")]
6199    projection_hash: Option<String>,
6200    freshness: GraphDbFreshnessReport,
6201    target_node: SubstrateTerseGraphNode,
6202    worker_context: Vec<SubstrateTerseGraphNode>,
6203    source_handles: Vec<SubstrateTerseGraphNode>,
6204    worker_results: Vec<SubstrateTerseGraphNode>,
6205    semantic_related: Vec<SubstrateTerseGraphNode>,
6206    shortest_paths: Vec<GraphDbEvidencePath>,
6207    #[serde(skip_serializing_if = "Option::is_none")]
6208    output_budget: Option<GraphDbOutputBudgetReport>,
6209    #[serde(default)]
6210    truncated: bool,
6211    #[serde(skip_serializing_if = "Option::is_none")]
6212    next_cursor: Option<String>,
6213    next_commands: Vec<String>,
6214    replay_commands: Vec<String>,
6215    repair_commands: Vec<String>,
6216    fixture_coverage: GraphDbFixtureCoverage,
6217    #[serde(skip_serializing_if = "Vec::is_empty", default)]
6218    warnings: Vec<String>,
6219}
6220
6221pub(crate) struct GraphDbEvidenceInput<'a, S: GraphStore> {
6222    root: &'a Path,
6223    scope: Option<&'a str>,
6224    backend: &'a str,
6225    target: &'a str,
6226    depth: usize,
6227    limit: usize,
6228    cursor: Option<&'a str>,
6229    store: &'a S,
6230    freshness: GraphDbFreshnessReport,
6231    warnings: Vec<String>,
6232}
6233
6234impl GraphDbDoctorReport {
6235    fn new(
6236        root: &Path,
6237        scope: Option<&str>,
6238        backend: &str,
6239        graph_db: &Path,
6240        convex_snapshot: Option<&Path>,
6241    ) -> Self {
6242        Self {
6243            root: root.to_string_lossy().to_string(),
6244            scope: scope.map(str::to_string),
6245            backend: backend.to_string(),
6246            graph_db: graph_db.to_string_lossy().to_string(),
6247            convex_snapshot: convex_snapshot.map(|path| path.to_string_lossy().to_string()),
6248            status: "ok".to_string(),
6249            fail_closed: false,
6250            checks: Vec::new(),
6251            repair_commands: Vec::new(),
6252            required_indexes: Vec::new(),
6253        }
6254    }
6255
6256    fn push_check(&mut self, check: GraphDbDoctorCheck) {
6257        self.checks.push(check);
6258    }
6259
6260    fn finalize(&mut self) {
6261        self.fail_closed = self.checks.iter().any(|check| check.fail_closed);
6262        self.status = if self.fail_closed {
6263            "fail_closed"
6264        } else {
6265            "ok"
6266        }
6267        .to_string();
6268        let mut commands = BTreeSet::new();
6269        for check in &self.checks {
6270            commands.extend(check.repair_commands.iter().cloned());
6271        }
6272        self.repair_commands = commands.into_iter().collect();
6273    }
6274
6275    fn summary(&self) -> String {
6276        self.checks
6277            .iter()
6278            .filter(|check| check.fail_closed)
6279            .flat_map(|check| check.diagnostics.iter())
6280            .take(3)
6281            .cloned()
6282            .collect::<Vec<_>>()
6283            .join("; ")
6284    }
6285}
6286
6287fn graph_db_doctor_check(
6288    name: impl Into<String>,
6289    diagnostics: Vec<String>,
6290    repair_commands: Vec<String>,
6291) -> GraphDbDoctorCheck {
6292    let fail_closed = !diagnostics.is_empty();
6293    GraphDbDoctorCheck {
6294        name: name.into(),
6295        status: if fail_closed { "fail_closed" } else { "ok" }.to_string(),
6296        fail_closed,
6297        diagnostics,
6298        repair_commands: if fail_closed {
6299            repair_commands
6300        } else {
6301            Vec::new()
6302        },
6303    }
6304}
6305
6306pub(crate) fn graph_db_scope_arg(scope: Option<&str>) -> String {
6307    scope
6308        .map(|scope| format!(" --scope {}", shell_quote(scope)))
6309        .unwrap_or_default()
6310}
6311
6312fn graph_db_refresh_command(root: &Path, scope: Option<&str>) -> String {
6313    format!(
6314        "tsift graph-db --path {}{} refresh --json",
6315        shell_quote(root.to_string_lossy().as_ref()),
6316        graph_db_scope_arg(scope)
6317    )
6318}
6319
6320fn graph_db_rebuild_command(root: &Path, scope: Option<&str>) -> String {
6321    graph_db_refresh_command(root, scope)
6322}
6323
6324fn graph_db_backup_rebuild_command(root: &Path, scope: Option<&str>, graph_db: &Path) -> String {
6325    let backup = format!("{}.bak", graph_db.to_string_lossy());
6326    format!(
6327        "mv {} {} && {}",
6328        shell_quote(graph_db.to_string_lossy().as_ref()),
6329        shell_quote(&backup),
6330        graph_db_rebuild_command(root, scope)
6331    )
6332}
6333
6334fn convex_refresh_command(root: &Path, scope: Option<&str>) -> String {
6335    format!(
6336        "tsift convex-sync {}{} --remote-snapshot --apply --json",
6337        shell_quote(root.to_string_lossy().as_ref()),
6338        graph_db_scope_arg(scope)
6339    )
6340}
6341
6342fn open_sqlite_graph_db_readonly(graph_db: &Path) -> Result<substrate::SqliteReadOnlyConnection> {
6343    substrate::open_graph_read_only_connection_resilient(graph_db)
6344}
6345
6346fn sqlite_table_exists(conn: &Connection, table: &str) -> Result<bool> {
6347    conn.query_row(
6348        "SELECT EXISTS(SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = ?1)",
6349        [table],
6350        |row| row.get::<_, bool>(0),
6351    )
6352    .map_err(Into::into)
6353}
6354
6355fn sqlite_known_table_count(conn: &Connection, table: &str) -> Result<usize> {
6356    let sql = match table {
6357        "graph_nodes" => "SELECT COUNT(*) FROM graph_nodes",
6358        "graph_edges" => "SELECT COUNT(*) FROM graph_edges",
6359        "graph_tombstones" => "SELECT COUNT(*) FROM graph_tombstones",
6360        other => bail!("unsupported graph count table {other}"),
6361    };
6362    conn.query_row(sql, [], |row| row.get::<_, usize>(0))
6363        .map_err(Into::into)
6364}
6365
6366fn sqlite_tombstone_counts(conn: &Connection) -> Result<GraphDbTombstoneCounts> {
6367    if !sqlite_table_exists(conn, "graph_tombstones")? {
6368        return Ok(GraphDbTombstoneCounts {
6369            nodes: 0,
6370            edges: 0,
6371            total: 0,
6372        });
6373    }
6374    let mut stmt =
6375        conn.prepare("SELECT row_kind, COUNT(*) FROM graph_tombstones GROUP BY row_kind")?;
6376    let mut rows = stmt.query([])?;
6377    let mut nodes = 0usize;
6378    let mut edges = 0usize;
6379    while let Some(row) = rows.next()? {
6380        let row_kind: String = row.get(0)?;
6381        let count: usize = row.get(1)?;
6382        match row_kind.as_str() {
6383            "node" => nodes = count,
6384            "edge" => edges = count,
6385            _ => {}
6386        }
6387    }
6388    Ok(GraphDbTombstoneCounts {
6389        nodes,
6390        edges,
6391        total: nodes + edges,
6392    })
6393}
6394
6395fn sqlite_graph_counts_from_cache(
6396    conn: &Connection,
6397    scope: &str,
6398) -> Result<Option<GraphDbOperatorCounts>> {
6399    if !sqlite_table_exists(conn, "graph_operator_stats")? {
6400        return Ok(None);
6401    }
6402    let row = conn
6403        .query_row(
6404            r#"
6405        SELECT nodes, edges, tombstone_nodes, tombstone_edges, file_size_bytes, freelist_bytes
6406        FROM graph_operator_stats
6407        WHERE scope = ?1
6408        "#,
6409            [scope],
6410            |row| {
6411                Ok((
6412                    row.get::<_, usize>(0)?,
6413                    row.get::<_, usize>(1)?,
6414                    row.get::<_, usize>(2)?,
6415                    row.get::<_, usize>(3)?,
6416                    row.get::<_, Option<i64>>(4)?,
6417                    row.get::<_, Option<i64>>(5)?,
6418                ))
6419            },
6420        )
6421        .optional()?;
6422    Ok(row.map(
6423        |(nodes, edges, tombstone_nodes, tombstone_edges, file_size_bytes, freelist_bytes)| {
6424            GraphDbOperatorCounts {
6425                nodes,
6426                edges,
6427                tombstones: GraphDbTombstoneCounts {
6428                    nodes: tombstone_nodes,
6429                    edges: tombstone_edges,
6430                    total: tombstone_nodes + tombstone_edges,
6431                },
6432                file_size_bytes: file_size_bytes
6433                    .and_then(|value| u64::try_from(value).ok())
6434                    .or_else(|| sqlite_database_size_bytes(conn).ok()),
6435                freelist_bytes: freelist_bytes
6436                    .and_then(|value| u64::try_from(value).ok())
6437                    .or_else(|| sqlite_database_freelist_bytes(conn).ok()),
6438            }
6439        },
6440    ))
6441}
6442
6443fn sqlite_graph_counts(conn: &Connection, scope: &str) -> Result<GraphDbOperatorCounts> {
6444    if let Some(counts) = sqlite_graph_counts_from_cache(conn, scope)? {
6445        return Ok(counts);
6446    }
6447    let nodes = if sqlite_table_exists(conn, "graph_nodes")? {
6448        sqlite_known_table_count(conn, "graph_nodes")?
6449    } else {
6450        0
6451    };
6452    let edges = if sqlite_table_exists(conn, "graph_edges")? {
6453        sqlite_known_table_count(conn, "graph_edges")?
6454    } else {
6455        0
6456    };
6457    Ok(GraphDbOperatorCounts {
6458        nodes,
6459        edges,
6460        tombstones: sqlite_tombstone_counts(conn)?,
6461        file_size_bytes: sqlite_database_size_bytes(conn).ok(),
6462        freelist_bytes: sqlite_database_freelist_bytes(conn).ok(),
6463    })
6464}
6465
6466fn sqlite_graph_semantic_node_count(conn: &Connection) -> Result<usize> {
6467    if !sqlite_table_exists(conn, "graph_nodes")? {
6468        return Ok(0);
6469    }
6470    let count: i64 = conn.query_row(
6471        "SELECT COUNT(*) FROM graph_nodes WHERE kind IN ('semantic_concept', 'semantic_entity')",
6472        [],
6473        |row| row.get(0),
6474    )?;
6475    Ok(count as usize)
6476}
6477
6478pub(crate) fn graph_db_compaction_policy(
6479    root: &Path,
6480    scope: Option<&str>,
6481    counts: &GraphDbOperatorCounts,
6482    prune_confirmed: bool,
6483) -> GraphDbCompactionPolicy {
6484    let live_rows = counts.nodes + counts.edges;
6485    let tombstone_scan_rows = counts.tombstones.total;
6486    let tombstone_heavy = tombstone_scan_rows > live_rows.max(1);
6487    let freelist_heavy = counts
6488        .file_size_bytes
6489        .zip(counts.freelist_bytes)
6490        .is_some_and(|(file_size, freelist)| freelist > 0 && freelist >= file_size / 20);
6491    let status = if tombstone_heavy || freelist_heavy {
6492        "recommended"
6493    } else {
6494        "not_needed"
6495    }
6496    .to_string();
6497    let mut recommendations = vec![
6498        convex_refresh_command(root, scope),
6499        graph_db_refresh_command(root, scope),
6500        format!(
6501            "tsift graph-db --path {}{} compact --apply --json",
6502            shell_quote(root.to_string_lossy().as_ref()),
6503            graph_db_scope_arg(scope)
6504        ),
6505    ];
6506    if prune_confirmed {
6507        recommendations.push(format!(
6508            "tsift graph-db --path {}{} compact --apply --prune-tombstones --confirmed-convex-reconciled --json",
6509            shell_quote(root.to_string_lossy().as_ref()),
6510            graph_db_scope_arg(scope)
6511        ));
6512    }
6513    let proof = vec![
6514        format!("{live_rows} live graph row(s)"),
6515        format!("{tombstone_scan_rows} retained tombstone row(s) scanned by status/doctor"),
6516        format!(
6517            "graph.db file_size={} byte(s), freelist={} byte(s)",
6518            counts.file_size_bytes.unwrap_or(0),
6519            counts.freelist_bytes.unwrap_or(0)
6520        ),
6521    ];
6522    GraphDbCompactionPolicy {
6523        status,
6524        tombstone_scan_rows,
6525        live_rows,
6526        file_size_bytes: counts.file_size_bytes,
6527        freelist_bytes: counts.freelist_bytes,
6528        safe_to_prune_tombstones: prune_confirmed,
6529        requires_convex_reconciliation: tombstone_scan_rows > 0 && !prune_confirmed,
6530        recommendations,
6531        proof,
6532    }
6533}
6534
6535fn sqlite_database_size_bytes(conn: &Connection) -> Result<u64> {
6536    let page_count: u64 = conn.query_row("PRAGMA page_count", [], |row| row.get(0))?;
6537    let page_size: u64 = conn.query_row("PRAGMA page_size", [], |row| row.get(0))?;
6538    Ok(page_count.saturating_mul(page_size))
6539}
6540
6541fn sqlite_database_freelist_bytes(conn: &Connection) -> Result<u64> {
6542    let freelist_count: u64 = conn.query_row("PRAGMA freelist_count", [], |row| row.get(0))?;
6543    let page_size: u64 = conn.query_row("PRAGMA page_size", [], |row| row.get(0))?;
6544    Ok(freelist_count.saturating_mul(page_size))
6545}
6546
6547fn sqlite_graph_tombstone_retention_diagnostics(
6548    conn: &Connection,
6549    scope: &str,
6550) -> Result<Vec<String>> {
6551    if !sqlite_table_exists(conn, "graph_tombstones")? {
6552        return Ok(Vec::new());
6553    }
6554    let cached = sqlite_graph_counts_from_cache(conn, scope)?;
6555    let counts = match cached.clone() {
6556        Some(counts) => counts,
6557        None => sqlite_graph_counts(conn, scope)?,
6558    };
6559    let live_rows = counts.nodes + counts.edges;
6560    let file_size = counts.file_size_bytes.unwrap_or(0);
6561    let freelist = counts.freelist_bytes.unwrap_or(0);
6562    let stale_live_tombstones = if cached.is_some() {
6563        0
6564    } else {
6565        let mut live_keys = BTreeSet::new();
6566        if sqlite_table_exists(conn, "graph_nodes")? {
6567            let mut stmt = conn.prepare("SELECT id FROM graph_nodes")?;
6568            for row in stmt.query_map([], |row| row.get::<_, String>(0))? {
6569                live_keys.insert(format!("node:{}", row?));
6570            }
6571        }
6572        if sqlite_table_exists(conn, "graph_edges")? {
6573            let mut stmt = conn.prepare("SELECT edge_key FROM graph_edges")?;
6574            for row in stmt.query_map([], |row| row.get::<_, String>(0))? {
6575                live_keys.insert(format!("edge:{}", row?));
6576            }
6577        }
6578        let mut stale_live_tombstones = 0usize;
6579        let mut stmt = conn.prepare("SELECT row_key FROM graph_tombstones ORDER BY row_key")?;
6580        for row in stmt.query_map([], |row| row.get::<_, String>(0))? {
6581            if live_keys.contains(&row?) {
6582                stale_live_tombstones += 1;
6583            }
6584        }
6585        stale_live_tombstones
6586    };
6587
6588    let mut diagnostics = Vec::new();
6589    if stale_live_tombstones > 0 {
6590        diagnostics.push(format!(
6591            "{stale_live_tombstones} tombstone(s) reference rows that are live again; the next graph-db refresh prunes those stale tombstones before inserting new deletion markers"
6592        ));
6593    }
6594    if counts.tombstones.total > live_rows.max(1) {
6595        let source = if cached.is_some() {
6596            "cached refresh stats"
6597        } else {
6598            "live row scan"
6599        };
6600        diagnostics.push(format!(
6601            "tombstone retention exceeds live graph rows: {} tombstone(s) vs {} live row(s) from {}; graph.db file_size={} byte(s), freelist={} byte(s), status/doctor tombstone scans inspect {} extra row(s). Run convex-sync against the remote snapshot before rebuild/compaction if a remote consumer may still need deletion reconciliation.",
6602            counts.tombstones.total,
6603            live_rows,
6604            source,
6605            file_size,
6606            freelist,
6607            counts.tombstones.total
6608        ));
6609    }
6610    Ok(diagnostics)
6611}
6612
6613fn sqlite_graph_freshness_from_conn(
6614    conn: &Connection,
6615    scope: &str,
6616) -> Result<GraphDbFreshnessReport> {
6617    if !sqlite_table_exists(conn, "graph_projection_versions")? {
6618        return Ok(GraphDbFreshnessReport {
6619            status: "missing".to_string(),
6620            fail_closed: true,
6621            projection_version: None,
6622            content_hash: None,
6623            source_watermark: None,
6624            diagnostics: vec![
6625                "graph projection metadata table is missing; refresh graph.db before trusting reads"
6626                    .to_string(),
6627            ],
6628        });
6629    }
6630    let version = conn
6631        .query_row(
6632            r#"
6633            SELECT projection_version, content_hash, source_watermark
6634            FROM graph_projection_versions
6635            WHERE scope = ?1
6636            "#,
6637            [scope],
6638            |row| {
6639                Ok((
6640                    row.get::<_, String>(0)?,
6641                    row.get::<_, Option<String>>(1)?,
6642                    row.get::<_, Option<String>>(2)?,
6643                ))
6644            },
6645        )
6646        .optional()?;
6647    let Some((projection_version, content_hash, source_watermark)) = version else {
6648        return Ok(GraphDbFreshnessReport {
6649            status: "missing".to_string(),
6650            fail_closed: true,
6651            projection_version: None,
6652            content_hash: None,
6653            source_watermark: None,
6654            diagnostics: vec![
6655                "graph projection metadata is missing; refresh graph.db before trusting reads"
6656                    .to_string(),
6657            ],
6658        });
6659    };
6660
6661    let mut diagnostics = Vec::new();
6662    if projection_version != GRAPH_PROJECTION_VERSION {
6663        diagnostics.push(format!(
6664            "projection version mismatch: expected {} got {}",
6665            GRAPH_PROJECTION_VERSION, projection_version
6666        ));
6667    }
6668    if content_hash.is_none() {
6669        diagnostics.push("projection content hash is missing".to_string());
6670    }
6671    let fail_closed = !diagnostics.is_empty();
6672    Ok(GraphDbFreshnessReport {
6673        status: if fail_closed { "stale" } else { "current" }.to_string(),
6674        fail_closed,
6675        projection_version: Some(projection_version),
6676        content_hash,
6677        source_watermark,
6678        diagnostics,
6679    })
6680}
6681
6682fn graph_db_operator_next_commands(
6683    root: &Path,
6684    scope: Option<&str>,
6685    include_refresh: bool,
6686) -> Vec<String> {
6687    let mut commands = Vec::new();
6688    if include_refresh {
6689        commands.push(graph_db_refresh_command(root, scope));
6690    }
6691    commands.push(format!(
6692        "tsift graph-db --path {}{} doctor --json",
6693        shell_quote(root.to_string_lossy().as_ref()),
6694        graph_db_scope_arg(scope)
6695    ));
6696    commands.push(format!(
6697        "tsift graph-db --path {}{} --backend convex-snapshot --convex-snapshot <rows.json> drift --json",
6698        shell_quote(root.to_string_lossy().as_ref()),
6699        graph_db_scope_arg(scope)
6700    ));
6701    commands.push(format!(
6702        "tsift convex-sync {}{} --remote-snapshot --apply --json",
6703        shell_quote(root.to_string_lossy().as_ref()),
6704        graph_db_scope_arg(scope)
6705    ));
6706    commands
6707}
6708
6709pub(crate) fn graph_db_read_recovery_diagnostic(recovery: index::ReadOnlyRecovery) -> String {
6710    match recovery {
6711        index::ReadOnlyRecovery::SnapshotFallback => {
6712            "graph.db read recovered through snapshot fallback after a rollback-journal lock on the live database".to_string()
6713        }
6714        index::ReadOnlyRecovery::SnapshotFallbackWal => {
6715            "graph.db read recovered through WAL-aware snapshot fallback after copying live -wal/-shm sidecars".to_string()
6716        }
6717    }
6718}
6719
6720fn sqlite_string_set(conn: &Connection, sql: &str) -> Result<BTreeSet<String>> {
6721    let mut stmt = conn.prepare(sql)?;
6722    let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
6723    let mut values = BTreeSet::new();
6724    for row in rows {
6725        values.insert(row?);
6726    }
6727    Ok(values)
6728}
6729
6730fn sqlite_column_names(conn: &Connection, table: &str) -> Result<BTreeSet<String>> {
6731    let mut stmt = conn.prepare(&format!("PRAGMA table_info({table})"))?;
6732    let rows = stmt.query_map([], |row| row.get::<_, String>(1))?;
6733    let mut columns = BTreeSet::new();
6734    for row in rows {
6735        columns.insert(row?);
6736    }
6737    Ok(columns)
6738}
6739
6740fn sqlite_graph_schema_diagnostics(conn: &Connection) -> Result<Vec<String>> {
6741    let mut diagnostics = Vec::new();
6742    let user_version: i64 =
6743        conn.pragma_query_value(None, "user_version", |row| row.get::<_, i64>(0))?;
6744    if user_version > SQLITE_GRAPH_SCHEMA_VERSION {
6745        diagnostics.push(format!(
6746            "graph.db schema version {user_version} is newer than supported version {SQLITE_GRAPH_SCHEMA_VERSION}"
6747        ));
6748    } else if user_version < SQLITE_GRAPH_SCHEMA_VERSION {
6749        diagnostics.push(format!(
6750            "graph.db schema version {user_version} is older than supported version {SQLITE_GRAPH_SCHEMA_VERSION}"
6751        ));
6752    }
6753
6754    let tables = sqlite_string_set(
6755        conn,
6756        "SELECT name FROM sqlite_master WHERE type = 'table' ORDER BY name",
6757    )?;
6758    let required_tables = [
6759        (
6760            "graph_nodes",
6761            vec![
6762                "id",
6763                "kind",
6764                "label",
6765                "properties_json",
6766                "provenance_json",
6767                "freshness_json",
6768                "row_hash",
6769                "source_watermark",
6770            ],
6771        ),
6772        (
6773            "graph_edges",
6774            vec![
6775                "edge_key",
6776                "from_id",
6777                "to_id",
6778                "kind",
6779                "properties_json",
6780                "provenance_json",
6781                "freshness_json",
6782                "row_hash",
6783                "source_watermark",
6784            ],
6785        ),
6786        (
6787            "graph_projection_versions",
6788            vec![
6789                "scope",
6790                "projection_version",
6791                "content_hash",
6792                "source_watermark",
6793                "observed_at_unix",
6794            ],
6795        ),
6796        (
6797            "graph_tombstones",
6798            vec!["row_key", "row_kind", "deleted_at_unix"],
6799        ),
6800        ("graph_node_properties", vec!["node_id", "key", "value"]),
6801        ("graph_edge_properties", vec!["edge_key", "key", "value"]),
6802    ];
6803    for (table, required_columns) in required_tables {
6804        if !tables.contains(table) {
6805            diagnostics.push(format!("graph.db schema drift: missing table {table}"));
6806            continue;
6807        }
6808        let columns = sqlite_column_names(conn, table)?;
6809        for column in required_columns {
6810            if !columns.contains(column) {
6811                diagnostics.push(format!(
6812                    "graph.db schema drift: missing column {table}.{column}"
6813                ));
6814            }
6815        }
6816    }
6817
6818    let indexes = sqlite_string_set(
6819        conn,
6820        "SELECT name FROM sqlite_master WHERE type = 'index' ORDER BY name",
6821    )?;
6822    for index in [
6823        "idx_graph_nodes_kind",
6824        "idx_graph_edges_from_kind",
6825        "idx_graph_edges_to_kind",
6826        "idx_graph_edges_edge_key",
6827        "idx_graph_node_properties_key_value_node",
6828        "idx_graph_edge_properties_key_value_edge",
6829    ] {
6830        if !indexes.contains(index) {
6831            diagnostics.push(format!("graph.db schema drift: missing index {index}"));
6832        }
6833    }
6834
6835    if tables.contains("graph_edges") {
6836        let mut stmt = conn.prepare("PRAGMA foreign_key_list(graph_edges)")?;
6837        let rows = stmt.query_map([], |row| {
6838            Ok((row.get::<_, String>(3)?, row.get::<_, String>(4)?))
6839        })?;
6840        let mut fks = BTreeSet::new();
6841        for row in rows {
6842            fks.insert(row?);
6843        }
6844        for expected in [
6845            ("from_id".to_string(), "id".to_string()),
6846            ("to_id".to_string(), "id".to_string()),
6847        ] {
6848            if !fks.contains(&expected) {
6849                diagnostics.push(format!(
6850                    "graph.db schema drift: missing graph_edges foreign key {} -> graph_nodes.{}",
6851                    expected.0, expected.1
6852                ));
6853            }
6854        }
6855    }
6856
6857    Ok(diagnostics)
6858}
6859
6860fn sqlite_query_diagnostics(conn: &Connection, sql: &str) -> Result<Vec<String>> {
6861    let mut stmt = conn.prepare(sql)?;
6862    let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
6863    let mut diagnostics = Vec::new();
6864    for row in rows {
6865        diagnostics.push(row?);
6866    }
6867    Ok(diagnostics)
6868}
6869
6870fn sqlite_graph_duplicate_diagnostics(conn: &Connection) -> Result<Vec<String>> {
6871    let mut diagnostics = sqlite_query_diagnostics(
6872        conn,
6873        r#"
6874        SELECT 'duplicate graph_nodes.id ' || id || ' (' || COUNT(*) || ' rows)'
6875        FROM graph_nodes
6876        GROUP BY id
6877        HAVING COUNT(*) > 1
6878        ORDER BY id
6879        "#,
6880    )?;
6881    diagnostics.extend(sqlite_query_diagnostics(
6882        conn,
6883        r#"
6884        SELECT 'duplicate graph_edges key ' || from_id || ' -' || kind || '-> ' || to_id || ' (' || COUNT(*) || ' rows)'
6885        FROM graph_edges
6886        GROUP BY from_id, to_id, kind
6887        HAVING COUNT(*) > 1
6888        ORDER BY from_id, kind, to_id
6889        "#,
6890    )?);
6891    diagnostics.extend(sqlite_query_diagnostics(
6892        conn,
6893        r#"
6894        SELECT 'duplicate graph_edges.edge_key ' || edge_key || ' (' || COUNT(*) || ' rows)'
6895        FROM graph_edges
6896        GROUP BY edge_key
6897        HAVING COUNT(*) > 1
6898        ORDER BY edge_key
6899        "#,
6900    )?);
6901    Ok(diagnostics)
6902}
6903
6904fn sqlite_graph_orphan_diagnostics(conn: &Connection) -> Result<Vec<String>> {
6905    sqlite_query_diagnostics(
6906        conn,
6907        r#"
6908        SELECT 'orphan edge missing from node: ' || e.from_id || ' -' || e.kind || '-> ' || e.to_id
6909        FROM graph_edges e
6910        LEFT JOIN graph_nodes n ON n.id = e.from_id
6911        WHERE n.id IS NULL
6912        UNION ALL
6913        SELECT 'orphan edge missing to node: ' || e.from_id || ' -' || e.kind || '-> ' || e.to_id
6914        FROM graph_edges e
6915        LEFT JOIN graph_nodes n ON n.id = e.to_id
6916        WHERE n.id IS NULL
6917        ORDER BY 1
6918        "#,
6919    )
6920}
6921
6922fn sqlite_graph_json_diagnostics(conn: &Connection) -> Result<Vec<String>> {
6923    let mut diagnostics = Vec::new();
6924    let mut node_stmt = conn.prepare(
6925        "SELECT id, properties_json, provenance_json, freshness_json FROM graph_nodes ORDER BY id",
6926    )?;
6927    let node_rows = node_stmt.query_map([], |row| {
6928        Ok((
6929            row.get::<_, String>(0)?,
6930            row.get::<_, String>(1)?,
6931            row.get::<_, String>(2)?,
6932            row.get::<_, Option<String>>(3)?,
6933        ))
6934    })?;
6935    for row in node_rows {
6936        let (id, properties_json, provenance_json, freshness_json) = row?;
6937        if let Err(err) = serde_json::from_str::<BTreeMap<String, String>>(&properties_json) {
6938            diagnostics.push(format!(
6939                "graph_nodes {id} properties_json is invalid: {err}"
6940            ));
6941        }
6942        if let Err(err) = serde_json::from_str::<Vec<GraphProvenance>>(&provenance_json) {
6943            diagnostics.push(format!(
6944                "graph_nodes {id} provenance_json is invalid: {err}"
6945            ));
6946        }
6947        if let Some(freshness_json) = freshness_json
6948            && let Err(err) = serde_json::from_str::<GraphFreshness>(&freshness_json)
6949        {
6950            diagnostics.push(format!("graph_nodes {id} freshness_json is invalid: {err}"));
6951        }
6952    }
6953
6954    let mut edge_stmt = conn.prepare(
6955        "SELECT edge_key, from_id, to_id, kind, properties_json, provenance_json, freshness_json FROM graph_edges ORDER BY from_id, kind, to_id",
6956    )?;
6957    let edge_rows = edge_stmt.query_map([], |row| {
6958        Ok((
6959            row.get::<_, String>(0)?,
6960            row.get::<_, String>(1)?,
6961            row.get::<_, String>(2)?,
6962            row.get::<_, String>(3)?,
6963            row.get::<_, String>(4)?,
6964            row.get::<_, String>(5)?,
6965            row.get::<_, Option<String>>(6)?,
6966        ))
6967    })?;
6968    for row in edge_rows {
6969        let (edge_key, from_id, to_id, kind, properties_json, provenance_json, freshness_json) =
6970            row?;
6971        let edge = format!("{edge_key} {from_id} -{kind}-> {to_id}");
6972        if let Err(err) = serde_json::from_str::<BTreeMap<String, String>>(&properties_json) {
6973            diagnostics.push(format!(
6974                "graph_edges {edge} properties_json is invalid: {err}"
6975            ));
6976        }
6977        if let Err(err) = serde_json::from_str::<Vec<GraphProvenance>>(&provenance_json) {
6978            diagnostics.push(format!(
6979                "graph_edges {edge} provenance_json is invalid: {err}"
6980            ));
6981        }
6982        if let Some(freshness_json) = freshness_json
6983            && let Err(err) = serde_json::from_str::<GraphFreshness>(&freshness_json)
6984        {
6985            diagnostics.push(format!(
6986                "graph_edges {edge} freshness_json is invalid: {err}"
6987            ));
6988        }
6989    }
6990    Ok(diagnostics)
6991}
6992
6993fn sqlite_graph_projection_metadata_diagnostics(
6994    conn: &Connection,
6995    scope: Option<&str>,
6996) -> Result<Vec<String>> {
6997    let mut diagnostics = Vec::new();
6998    let scope_key = scope.unwrap_or("root");
6999    let version = conn
7000        .query_row(
7001            r#"
7002            SELECT projection_version, content_hash, source_watermark
7003            FROM graph_projection_versions
7004            WHERE scope = ?1
7005            "#,
7006            [scope_key],
7007            |row| {
7008                Ok((
7009                    row.get::<_, String>(0)?,
7010                    row.get::<_, Option<String>>(1)?,
7011                    row.get::<_, Option<String>>(2)?,
7012                ))
7013            },
7014        )
7015        .optional()?;
7016    let Some((projection_version, content_hash, _source_watermark)) = version else {
7017        diagnostics.push(format!(
7018            "graph projection metadata is missing for scope {scope_key}"
7019        ));
7020        return Ok(diagnostics);
7021    };
7022    if projection_version != GRAPH_PROJECTION_VERSION {
7023        diagnostics.push(format!(
7024            "projection version mismatch: expected {GRAPH_PROJECTION_VERSION} got {projection_version}"
7025        ));
7026    }
7027    if content_hash.is_none() {
7028        diagnostics.push("projection content hash is missing".to_string());
7029    }
7030
7031    let meta_id = graph_projection_meta_id(scope);
7032    let meta_properties = conn
7033        .query_row(
7034            "SELECT properties_json FROM graph_nodes WHERE id = ?1 AND kind = ?2",
7035            (&meta_id, GRAPH_PROJECTION_META_KIND),
7036            |row| row.get::<_, String>(0),
7037        )
7038        .optional()?;
7039    let Some(meta_properties) = meta_properties else {
7040        diagnostics.push(format!("projection_meta node {meta_id} is missing"));
7041        return Ok(diagnostics);
7042    };
7043    let properties = serde_json::from_str::<BTreeMap<String, String>>(&meta_properties)
7044        .with_context(|| format!("parsing projection_meta properties for {meta_id}"))?;
7045    if properties.get("projection_version").map(String::as_str) != Some(GRAPH_PROJECTION_VERSION) {
7046        diagnostics.push(format!(
7047            "projection_meta node {meta_id} has stale projection_version"
7048        ));
7049    }
7050    if properties.get("content_hash") != content_hash.as_ref() {
7051        diagnostics.push(format!(
7052            "projection_meta node {meta_id} content_hash does not match graph_projection_versions"
7053        ));
7054    }
7055    Ok(diagnostics)
7056}
7057
7058pub(crate) fn sqlite_convex_rows_from_conn(conn: &Connection) -> Result<ConvexProjectionRows> {
7059    let mut node_stmt = conn.prepare(
7060        "SELECT id, kind, label, properties_json, provenance_json, freshness_json FROM graph_nodes ORDER BY id",
7061    )?;
7062    let node_rows = node_stmt.query_map([], |row| {
7063        let properties_json: String = row.get(3)?;
7064        let provenance_json: String = row.get(4)?;
7065        let freshness_json: Option<String> = row.get(5)?;
7066        Ok((
7067            row.get::<_, String>(0)?,
7068            row.get::<_, String>(1)?,
7069            row.get::<_, String>(2)?,
7070            properties_json,
7071            provenance_json,
7072            freshness_json,
7073        ))
7074    })?;
7075    let mut nodes = Vec::new();
7076    for row in node_rows {
7077        let (external_id, kind, label, properties_json, provenance_json, freshness_json) = row?;
7078        nodes.push(ConvexNodeRow {
7079            external_id,
7080            kind,
7081            label,
7082            properties: serde_json::from_str(&properties_json)?,
7083            provenance: serde_json::from_str(&provenance_json)?,
7084            freshness: freshness_json
7085                .map(|value| serde_json::from_str(&value))
7086                .transpose()?,
7087        });
7088    }
7089
7090    let mut edge_stmt = conn.prepare(
7091        "SELECT edge_key, from_id, to_id, kind, properties_json, provenance_json, freshness_json FROM graph_edges ORDER BY from_id, kind, to_id",
7092    )?;
7093    let edge_rows = edge_stmt.query_map([], |row| {
7094        let properties_json: String = row.get(4)?;
7095        let provenance_json: String = row.get(5)?;
7096        let freshness_json: Option<String> = row.get(6)?;
7097        Ok((
7098            row.get::<_, String>(0)?,
7099            row.get::<_, String>(1)?,
7100            row.get::<_, String>(2)?,
7101            row.get::<_, String>(3)?,
7102            properties_json,
7103            provenance_json,
7104            freshness_json,
7105        ))
7106    })?;
7107    let mut edges = Vec::new();
7108    for row in edge_rows {
7109        let (
7110            edge_key,
7111            from_external_id,
7112            to_external_id,
7113            kind,
7114            properties_json,
7115            provenance_json,
7116            freshness_json,
7117        ) = row?;
7118        edges.push(ConvexEdgeRow {
7119            edge_key,
7120            from_external_id,
7121            to_external_id,
7122            kind,
7123            properties: serde_json::from_str(&properties_json)?,
7124            provenance: serde_json::from_str(&provenance_json)?,
7125            freshness: freshness_json
7126                .map(|value| serde_json::from_str(&value))
7127                .transpose()?,
7128        });
7129    }
7130    Ok(ConvexProjectionRows { nodes, edges })
7131}
7132
7133fn convex_required_index_label(index: &ConvexRequiredIndex) -> String {
7134    format!("{}.{}({})", index.table, index.name, index.fields.join(","))
7135}
7136
7137fn convex_snapshot_index_value(value: &serde_json::Value) -> Option<&serde_json::Value> {
7138    value
7139        .get("indexes")
7140        .or_else(|| value.get("requiredIndexes"))
7141        .or_else(|| {
7142            value
7143                .get("metadata")
7144                .and_then(|metadata| metadata.get("indexes"))
7145        })
7146}
7147
7148fn convex_snapshot_declared_indexes(
7149    value: &serde_json::Value,
7150) -> Result<Option<Vec<ConvexRequiredIndex>>> {
7151    convex_snapshot_index_value(value)
7152        .map(|indexes| {
7153            serde_json::from_value::<Vec<ConvexRequiredIndex>>(indexes.clone())
7154                .context("parsing Convex snapshot index metadata")
7155        })
7156        .transpose()
7157}
7158
7159fn convex_snapshot_index_diagnostics(value: &serde_json::Value) -> Result<Vec<String>> {
7160    let required = convex_required_indexes();
7161    let Some(declared) = convex_snapshot_declared_indexes(value)? else {
7162        return Ok(vec![format!(
7163            "Convex snapshot index metadata is missing; required indexes not confirmed: {}",
7164            required
7165                .iter()
7166                .map(convex_required_index_label)
7167                .collect::<Vec<_>>()
7168                .join(", ")
7169        )]);
7170    };
7171    let declared = declared.into_iter().collect::<BTreeSet<_>>();
7172    let missing = required
7173        .iter()
7174        .filter(|index| !declared.contains(*index))
7175        .map(convex_required_index_label)
7176        .collect::<Vec<_>>();
7177    if missing.is_empty() {
7178        Ok(Vec::new())
7179    } else {
7180        Ok(vec![format!(
7181            "Convex snapshot is missing required index metadata: {}",
7182            missing.join(", ")
7183        )])
7184    }
7185}
7186
7187pub(crate) fn load_convex_projection_snapshot_value(
7188    snapshot_path: &Path,
7189) -> Result<(ConvexProjectionRows, serde_json::Value)> {
7190    let content = fs::read_to_string(snapshot_path).with_context(|| {
7191        format!(
7192            "reading Convex projection snapshot {}",
7193            snapshot_path.display()
7194        )
7195    })?;
7196    let value = serde_json::from_str::<serde_json::Value>(&content).with_context(|| {
7197        format!(
7198            "parsing Convex projection snapshot {}",
7199            snapshot_path.display()
7200        )
7201    })?;
7202    let rows = serde_json::from_value::<ConvexProjectionRows>(value.clone())
7203        .with_context(|| format!("parsing Convex projection rows {}", snapshot_path.display()))?;
7204    Ok((rows, value))
7205}
7206
7207pub(crate) fn append_sqlite_graph_doctor_checks(
7208    report: &mut GraphDbDoctorReport,
7209    root: &Path,
7210    scope: Option<&str>,
7211    graph_db: &Path,
7212) -> Option<substrate::SqliteReadOnlyConnection> {
7213    let rebuild = graph_db_rebuild_command(root, scope);
7214    let backup_rebuild = graph_db_backup_rebuild_command(root, scope, graph_db);
7215    if !graph_db.exists() {
7216        report.push_check(graph_db_doctor_check(
7217            "sqlite_graph_db_exists",
7218            vec![format!("graph.db is missing at {}", graph_db.display())],
7219            vec![rebuild],
7220        ));
7221        return None;
7222    }
7223    report.push_check(graph_db_doctor_check(
7224        "sqlite_graph_db_exists",
7225        Vec::new(),
7226        vec![rebuild.clone()],
7227    ));
7228
7229    let conn = match open_sqlite_graph_db_readonly(graph_db) {
7230        Ok(conn) => conn,
7231        Err(err) => {
7232            report.push_check(graph_db_doctor_check(
7233                "sqlite_graph_db_open",
7234                vec![err.to_string()],
7235                vec![backup_rebuild],
7236            ));
7237            return None;
7238        }
7239    };
7240    report.push_check(graph_db_doctor_check(
7241        "sqlite_graph_db_open",
7242        Vec::new(),
7243        vec![rebuild.clone()],
7244    ));
7245    if let Some(recovery) = conn.recovery() {
7246        report.push_check(GraphDbDoctorCheck {
7247            name: "sqlite_graph_db_read_recovery".to_string(),
7248            status: "recovered".to_string(),
7249            fail_closed: false,
7250            diagnostics: vec![graph_db_read_recovery_diagnostic(recovery)],
7251            repair_commands: Vec::new(),
7252        });
7253    }
7254
7255    let schema_diagnostics = sqlite_graph_schema_diagnostics(conn.conn())
7256        .unwrap_or_else(|err| vec![format!("graph.db schema inspection failed: {err}")]);
7257    report.push_check(graph_db_doctor_check(
7258        "sqlite_schema",
7259        schema_diagnostics,
7260        vec![backup_rebuild.clone()],
7261    ));
7262
7263    let metadata_diagnostics = sqlite_graph_projection_metadata_diagnostics(conn.conn(), scope)
7264        .unwrap_or_else(|err| {
7265            vec![format!(
7266                "graph projection metadata inspection failed: {err}"
7267            )]
7268        });
7269    report.push_check(graph_db_doctor_check(
7270        "sqlite_projection_metadata",
7271        metadata_diagnostics,
7272        vec![rebuild.clone()],
7273    ));
7274
7275    let duplicate_diagnostics = sqlite_graph_duplicate_diagnostics(conn.conn())
7276        .unwrap_or_else(|err| vec![format!("duplicate id inspection failed: {err}")]);
7277    report.push_check(graph_db_doctor_check(
7278        "sqlite_duplicate_ids",
7279        duplicate_diagnostics,
7280        vec![backup_rebuild.clone()],
7281    ));
7282
7283    let orphan_diagnostics = sqlite_graph_orphan_diagnostics(conn.conn())
7284        .unwrap_or_else(|err| vec![format!("orphan edge inspection failed: {err}")]);
7285    report.push_check(graph_db_doctor_check(
7286        "sqlite_orphan_edges",
7287        orphan_diagnostics,
7288        vec![rebuild.clone()],
7289    ));
7290
7291    let json_diagnostics = sqlite_graph_json_diagnostics(conn.conn())
7292        .unwrap_or_else(|err| vec![format!("graph row JSON inspection failed: {err}")]);
7293    report.push_check(graph_db_doctor_check(
7294        "sqlite_row_json",
7295        json_diagnostics,
7296        vec![backup_rebuild],
7297    ));
7298
7299    let tombstone_diagnostics =
7300        sqlite_graph_tombstone_retention_diagnostics(conn.conn(), scope.unwrap_or("root"))
7301            .unwrap_or_else(|err| {
7302                vec![format!(
7303                    "graph tombstone retention inspection failed: {err}"
7304                )]
7305            });
7306    report.push_check(GraphDbDoctorCheck {
7307        name: "sqlite_tombstone_retention".to_string(),
7308        status: if tombstone_diagnostics.is_empty() {
7309            "ok".to_string()
7310        } else {
7311            "warning".to_string()
7312        },
7313        fail_closed: false,
7314        diagnostics: tombstone_diagnostics,
7315        repair_commands: Vec::new(),
7316    });
7317    let compaction_check = match sqlite_graph_counts(conn.conn(), scope.unwrap_or("root")) {
7318        Ok(counts) => {
7319            let policy = graph_db_compaction_policy(root, scope, &counts, false);
7320            GraphDbDoctorCheck {
7321                name: "sqlite_compaction_policy".to_string(),
7322                status: policy.status.clone(),
7323                fail_closed: false,
7324                diagnostics: policy.proof,
7325                repair_commands: if policy.status == "recommended" {
7326                    policy.recommendations
7327                } else {
7328                    Vec::new()
7329                },
7330            }
7331        }
7332        Err(err) => GraphDbDoctorCheck {
7333            name: "sqlite_compaction_policy".to_string(),
7334            status: "warning".to_string(),
7335            fail_closed: false,
7336            diagnostics: vec![format!("graph compaction policy inspection failed: {err}")],
7337            repair_commands: Vec::new(),
7338        },
7339    };
7340    report.push_check(compaction_check);
7341
7342    Some(conn)
7343}
7344
7345pub(crate) fn append_convex_snapshot_doctor_checks(
7346    report: &mut GraphDbDoctorReport,
7347    root: &Path,
7348    scope: Option<&str>,
7349    local_rows: Option<&ConvexProjectionRows>,
7350    snapshot_path: Option<&Path>,
7351) {
7352    let repair = convex_refresh_command(root, scope);
7353    let Some(snapshot_path) = snapshot_path else {
7354        report.push_check(graph_db_doctor_check(
7355            "convex_snapshot_present",
7356            vec!["--backend convex-snapshot requires --convex-snapshot <rows.json>".to_string()],
7357            vec![format!(
7358                "tsift convex-sync {}{} --json > convex-rows.json",
7359                shell_quote(root.to_string_lossy().as_ref()),
7360                graph_db_scope_arg(scope)
7361            )],
7362        ));
7363        return;
7364    };
7365    report.push_check(graph_db_doctor_check(
7366        "convex_snapshot_present",
7367        Vec::new(),
7368        vec![repair.clone()],
7369    ));
7370
7371    let (snapshot, snapshot_value) = match load_convex_projection_snapshot_value(snapshot_path) {
7372        Ok(snapshot) => snapshot,
7373        Err(err) => {
7374            report.push_check(graph_db_doctor_check(
7375                "convex_snapshot_parse",
7376                vec![err.to_string()],
7377                vec![repair],
7378            ));
7379            return;
7380        }
7381    };
7382    report.push_check(graph_db_doctor_check(
7383        "convex_snapshot_parse",
7384        Vec::new(),
7385        vec![repair.clone()],
7386    ));
7387
7388    let row_diagnostics = convex_projection_row_diagnostics(&snapshot);
7389    report.push_check(graph_db_doctor_check(
7390        "convex_snapshot_rows",
7391        row_diagnostics,
7392        vec![repair.clone()],
7393    ));
7394
7395    let index_diagnostics = convex_snapshot_index_diagnostics(&snapshot_value)
7396        .unwrap_or_else(|err| vec![err.to_string()]);
7397    report.required_indexes = convex_required_indexes();
7398    report.push_check(graph_db_doctor_check(
7399        "convex_required_indexes",
7400        index_diagnostics,
7401        vec![
7402            "Add the indexes from examples/convex-graph/schema.ts, then redeploy the Convex app"
7403                .to_string(),
7404        ],
7405    ));
7406
7407    if let Some(local_rows) = local_rows {
7408        let freshness = convex_projection_freshness(local_rows, Some(&snapshot), scope);
7409        report.push_check(graph_db_doctor_check(
7410            "convex_projection_freshness",
7411            freshness.diagnostics,
7412            vec![repair],
7413        ));
7414    } else {
7415        report.push_check(graph_db_doctor_check(
7416            "convex_projection_freshness",
7417            vec![
7418                "local SQLite graph.db could not be read, so Convex freshness cannot be verified"
7419                    .to_string(),
7420            ],
7421            vec![graph_db_rebuild_command(root, scope)],
7422        ));
7423    }
7424}
7425
7426fn graph_db_convex_snapshot_doctor_command(
7427    root: &Path,
7428    scope: Option<&str>,
7429    snapshot_path: &Path,
7430) -> String {
7431    format!(
7432        "tsift graph-db --path {}{} --backend convex-snapshot --convex-snapshot {} doctor --json",
7433        shell_quote(root.to_string_lossy().as_ref()),
7434        graph_db_scope_arg(scope),
7435        shell_quote(snapshot_path.to_string_lossy().as_ref())
7436    )
7437}
7438
7439fn graph_db_convex_snapshot_read_command(
7440    root: &Path,
7441    scope: Option<&str>,
7442    snapshot_path: &Path,
7443) -> String {
7444    format!(
7445        "tsift graph-db --path {}{} --backend convex-snapshot --convex-snapshot {} schema --json",
7446        shell_quote(root.to_string_lossy().as_ref()),
7447        graph_db_scope_arg(scope),
7448        shell_quote(snapshot_path.to_string_lossy().as_ref())
7449    )
7450}
7451
7452fn convex_sync_snapshot_diff_command(
7453    root: &Path,
7454    scope: Option<&str>,
7455    snapshot_path: &Path,
7456) -> String {
7457    format!(
7458        "tsift convex-sync {}{} --snapshot {} --json",
7459        shell_quote(root.to_string_lossy().as_ref()),
7460        graph_db_scope_arg(scope),
7461        shell_quote(snapshot_path.to_string_lossy().as_ref())
7462    )
7463}
7464
7465pub(crate) struct GraphDbDriftInput<'a> {
7466    root: &'a Path,
7467    scope: Option<&'a str>,
7468    graph_db: &'a Path,
7469    snapshot_path: &'a Path,
7470    local: &'a ConvexProjectionRows,
7471    snapshot: &'a ConvexProjectionRows,
7472    snapshot_value: &'a serde_json::Value,
7473    warnings: Vec<String>,
7474}
7475
7476pub(crate) fn graph_db_drift_report(input: GraphDbDriftInput<'_>) -> GraphDbDriftReport {
7477    let GraphDbDriftInput {
7478        root,
7479        scope,
7480        graph_db,
7481        snapshot_path,
7482        local,
7483        snapshot,
7484        snapshot_value,
7485        warnings,
7486    } = input;
7487    let freshness = convex_projection_freshness(local, Some(snapshot), scope);
7488    let (node_upserts, edge_upserts, node_tombstones, edge_tombstones) =
7489        convex_rows_diff(local, Some(snapshot));
7490    let row_diagnostics = convex_projection_row_diagnostics(snapshot);
7491    let index_diagnostics = convex_snapshot_index_diagnostics(snapshot_value)
7492        .unwrap_or_else(|err| vec![format!("Convex snapshot index metadata failed: {err}")]);
7493    let local_hash = freshness.local_hash.clone();
7494    let snapshot_hash = freshness.snapshot_hash.clone();
7495    let stale_nodes = freshness.stale_nodes.clone();
7496    let stale_edges = freshness.stale_edges.clone();
7497
7498    let duplicate_failures = row_diagnostics
7499        .iter()
7500        .filter(|diagnostic| diagnostic.contains("duplicate"))
7501        .count();
7502    let orphan_failures = row_diagnostics
7503        .iter()
7504        .filter(|diagnostic| diagnostic.contains("references missing"))
7505        .count();
7506    let missing_required_indexes = index_diagnostics.len();
7507    let stale_projection_metadata =
7508        usize::from(local_hash != snapshot_hash || snapshot_hash.is_none());
7509    let hard_failures = duplicate_failures + orphan_failures + missing_required_indexes;
7510    let has_drift = freshness.fail_closed
7511        || !node_upserts.is_empty()
7512        || !edge_upserts.is_empty()
7513        || !node_tombstones.is_empty()
7514        || !edge_tombstones.is_empty();
7515    let status = if hard_failures > 0 {
7516        "fail_closed"
7517    } else if has_drift {
7518        "drift"
7519    } else {
7520        "current"
7521    }
7522    .to_string();
7523
7524    let mut diagnostics = Vec::new();
7525    diagnostics.extend(row_diagnostics);
7526    diagnostics.extend(index_diagnostics);
7527    diagnostics.extend(freshness.diagnostics.clone());
7528    if has_drift {
7529        diagnostics.push(format!(
7530            "projection diff: {} node upsert(s), {} edge upsert(s), {} node tombstone(s), {} edge tombstone(s)",
7531            node_upserts.len(),
7532            edge_upserts.len(),
7533            node_tombstones.len(),
7534            edge_tombstones.len()
7535        ));
7536    }
7537
7538    let mut next_commands = vec![graph_db_convex_snapshot_doctor_command(
7539        root,
7540        scope,
7541        snapshot_path,
7542    )];
7543    if status == "current" {
7544        next_commands.push(graph_db_convex_snapshot_read_command(
7545            root,
7546            scope,
7547            snapshot_path,
7548        ));
7549    } else {
7550        next_commands.push(convex_sync_snapshot_diff_command(
7551            root,
7552            scope,
7553            snapshot_path,
7554        ));
7555        next_commands.push(convex_refresh_command(root, scope));
7556    }
7557
7558    GraphDbDriftReport {
7559        root: root.to_string_lossy().to_string(),
7560        scope: scope.map(str::to_string),
7561        graph_db: graph_db.to_string_lossy().to_string(),
7562        convex_snapshot: snapshot_path.to_string_lossy().to_string(),
7563        status: status.clone(),
7564        graph_reads_allowed: status == "current",
7565        projection_version: GRAPH_PROJECTION_VERSION.to_string(),
7566        local_hash,
7567        snapshot_hash,
7568        summary: GraphDbDriftSummary {
7569            node_upserts: node_upserts.len(),
7570            edge_upserts: edge_upserts.len(),
7571            node_tombstones: node_tombstones.len(),
7572            edge_tombstones: edge_tombstones.len(),
7573            stale_nodes: stale_nodes.len(),
7574            stale_edges: stale_edges.len(),
7575            stale_projection_metadata,
7576            duplicate_failures,
7577            orphan_failures,
7578            missing_required_indexes,
7579        },
7580        node_upserts: node_upserts
7581            .into_iter()
7582            .map(|row| row.external_id)
7583            .collect(),
7584        edge_upserts: edge_upserts.into_iter().map(|row| row.edge_key).collect(),
7585        node_tombstones,
7586        edge_tombstones,
7587        stale_nodes,
7588        stale_edges,
7589        diagnostics,
7590        next_commands,
7591        required_indexes: convex_required_indexes(),
7592        warnings,
7593    }
7594}
7595
7596pub(crate) fn print_graph_db_drift_human(report: &GraphDbDriftReport) {
7597    println!(
7598        "graph-db drift status: {} reads_allowed: {}",
7599        report.status, report.graph_reads_allowed
7600    );
7601    println!("graph_db: {}", report.graph_db);
7602    println!("convex_snapshot: {}", report.convex_snapshot);
7603    println!(
7604        "upserts: {} node(s), {} edge(s)",
7605        report.summary.node_upserts, report.summary.edge_upserts
7606    );
7607    println!(
7608        "tombstones: {} node(s), {} edge(s)",
7609        report.summary.node_tombstones, report.summary.edge_tombstones
7610    );
7611    for diagnostic in &report.diagnostics {
7612        println!("diagnostic: {diagnostic}");
7613    }
7614    for command in &report.next_commands {
7615        println!("next: {command}");
7616    }
7617}
7618
7619pub(crate) fn print_graph_db_doctor_human(report: &GraphDbDoctorReport) {
7620    println!(
7621        "graph-db doctor backend: {} status: {}",
7622        report.backend, report.status
7623    );
7624    println!("graph_db: {}", report.graph_db);
7625    if let Some(snapshot) = &report.convex_snapshot {
7626        println!("convex_snapshot: {snapshot}");
7627    }
7628    for check in &report.checks {
7629        println!("check: {} {}", check.name, check.status);
7630        for diagnostic in &check.diagnostics {
7631            println!("  diagnostic: {diagnostic}");
7632        }
7633    }
7634    for command in &report.repair_commands {
7635        println!("repair: {command}");
7636    }
7637}
7638
7639pub(crate) fn graph_db_operator_report_from_disk(
7640    root: &Path,
7641    scope: Option<&str>,
7642    graph_db: &Path,
7643    operation: &str,
7644    refresh: Option<GraphDbRefreshSummary>,
7645    warnings: Vec<String>,
7646) -> Result<GraphDbOperatorReport> {
7647    if !graph_db.exists() {
7648        let next_commands = graph_db_operator_next_commands(root, scope, true);
7649        let counts = GraphDbOperatorCounts {
7650            nodes: 0,
7651            edges: 0,
7652            tombstones: GraphDbTombstoneCounts {
7653                nodes: 0,
7654                edges: 0,
7655                total: 0,
7656            },
7657            file_size_bytes: None,
7658            freelist_bytes: None,
7659        };
7660        return Ok(GraphDbOperatorReport {
7661            root: root.to_string_lossy().to_string(),
7662            scope: scope.map(str::to_string),
7663            graph_db: graph_db.to_string_lossy().to_string(),
7664            operation: operation.to_string(),
7665            status: "missing".to_string(),
7666            materialized: false,
7667            freshness: GraphDbFreshnessReport {
7668                status: "missing".to_string(),
7669                fail_closed: true,
7670                projection_version: None,
7671                content_hash: None,
7672                source_watermark: None,
7673                diagnostics: vec![
7674                    "graph.db is missing; run graph-db refresh before trusting graph reads"
7675                        .to_string(),
7676                ],
7677            },
7678            readiness: graph_effectiveness_blocked(
7679                "graph_db_missing",
7680                vec![
7681                    "graph.db is missing; materialize the projection before relying on graph effectiveness".to_string(),
7682                ],
7683                next_commands.clone(),
7684            ),
7685            counts: counts.clone(),
7686            refresh,
7687            compaction: graph_db_compaction_policy(root, scope, &counts, false),
7688            recovery: None,
7689            next_commands,
7690            warnings,
7691        });
7692    }
7693
7694    let conn = open_sqlite_graph_db_readonly(graph_db)?;
7695    let recovery = conn.recovery();
7696    let mut warnings = warnings;
7697    if let Some(recovery) = recovery {
7698        warnings.push(graph_db_read_recovery_diagnostic(recovery));
7699    }
7700    let mut freshness = sqlite_graph_freshness_from_conn(conn.conn(), scope.unwrap_or("root"))?;
7701    let schema_diagnostics = sqlite_graph_schema_diagnostics(conn.conn())
7702        .unwrap_or_else(|err| vec![format!("graph.db schema inspection failed: {err}")]);
7703    if !schema_diagnostics.is_empty() {
7704        freshness.diagnostics.extend(schema_diagnostics);
7705        freshness.fail_closed = true;
7706        freshness.status = "stale".to_string();
7707    }
7708    let counts = sqlite_graph_counts(conn.conn(), scope.unwrap_or("root"))?;
7709    let semantic_row_count = sqlite_graph_semantic_node_count(conn.conn()).ok();
7710    warnings.extend(
7711        sqlite_graph_tombstone_retention_diagnostics(conn.conn(), scope.unwrap_or("root"))
7712            .unwrap_or_else(|err| {
7713                vec![format!(
7714                    "graph tombstone retention inspection failed: {err}"
7715                )]
7716            }),
7717    );
7718    let status = if freshness.fail_closed {
7719        "stale"
7720    } else {
7721        "current"
7722    }
7723    .to_string();
7724
7725    Ok(GraphDbOperatorReport {
7726        root: root.to_string_lossy().to_string(),
7727        scope: scope.map(str::to_string),
7728        graph_db: graph_db.to_string_lossy().to_string(),
7729        operation: operation.to_string(),
7730        status,
7731        materialized: true,
7732        freshness,
7733        readiness: graph_db_semantic_readiness(root, scope, semantic_row_count),
7734        compaction: graph_db_compaction_policy(root, scope, &counts, false),
7735        counts,
7736        refresh,
7737        recovery,
7738        next_commands: graph_db_operator_next_commands(root, scope, false),
7739        warnings,
7740    })
7741}
7742
7743fn print_graph_db_operator_human(report: &GraphDbOperatorReport) {
7744    println!(
7745        "graph-db {} status: {} materialized: {}",
7746        report.operation, report.status, report.materialized
7747    );
7748    println!("graph_db: {}", report.graph_db);
7749    println!(
7750        "projection: version={} hash={} watermark={}",
7751        report
7752            .freshness
7753            .projection_version
7754            .as_deref()
7755            .unwrap_or("<missing>"),
7756        report
7757            .freshness
7758            .content_hash
7759            .as_deref()
7760            .unwrap_or("<missing>"),
7761        report
7762            .freshness
7763            .source_watermark
7764            .as_deref()
7765            .unwrap_or("<missing>")
7766    );
7767    println!(
7768        "rows: {} node(s), {} edge(s), {} tombstone(s)",
7769        report.counts.nodes, report.counts.edges, report.counts.tombstones.total
7770    );
7771    println!(
7772        "readiness: {} reason: {} fail_closed: {}",
7773        report.readiness.status, report.readiness.reason, report.readiness.fail_closed
7774    );
7775    if let Some(file_size) = report.counts.file_size_bytes {
7776        println!(
7777            "storage: {} byte(s), {} free byte(s)",
7778            file_size,
7779            report.counts.freelist_bytes.unwrap_or(0)
7780        );
7781    }
7782    if let Some(refresh) = &report.refresh {
7783        println!(
7784            "refresh: {} tombstoned node(s), {} tombstoned edge(s)",
7785            refresh.tombstoned_nodes, refresh.tombstoned_edges
7786        );
7787        println!(
7788            "delta: {} node upsert(s), {} edge upsert(s), {} property row upsert(s), {} unchanged node(s), {} unchanged edge(s), {} unchanged property row(s), {} deleted property row(s), {} pruned tombstone(s)",
7789            refresh.upserted_nodes,
7790            refresh.upserted_edges,
7791            refresh.upserted_properties,
7792            refresh.unchanged_nodes,
7793            refresh.unchanged_edges,
7794            refresh.unchanged_properties,
7795            refresh.deleted_properties,
7796            refresh.pruned_tombstones
7797        );
7798    }
7799    println!(
7800        "compaction: {} tombstone_scan_rows={} live_rows={}",
7801        report.compaction.status,
7802        report.compaction.tombstone_scan_rows,
7803        report.compaction.live_rows
7804    );
7805    for proof in &report.compaction.proof {
7806        println!("compaction proof: {proof}");
7807    }
7808    if let Some(recovery) = report.recovery {
7809        println!("recovery: {}", graph_db_read_recovery_diagnostic(recovery));
7810    }
7811    for diagnostic in &report.freshness.diagnostics {
7812        println!("diagnostic: {diagnostic}");
7813    }
7814    for diagnostic in &report.readiness.diagnostics {
7815        println!("readiness diagnostic: {diagnostic}");
7816    }
7817    for warning in &report.warnings {
7818        println!("warning: {warning}");
7819    }
7820    for command in &report.readiness.next_commands {
7821        println!("readiness next: {command}");
7822    }
7823    for command in &report.next_commands {
7824        println!("next: {command}");
7825    }
7826}
7827
7828pub(crate) fn print_graph_db_operator_report(
7829    report: &GraphDbOperatorReport,
7830    format: OutputFormat,
7831) -> Result<()> {
7832    if format.json_output {
7833        print_json_or_envelope(
7834            report,
7835            &format,
7836            "graph-db",
7837            &report.operation,
7838            ToolEnvelopeSummary {
7839                text: format!(
7840                    "Graph DB {} status {} with {} node(s), {} edge(s), {} tombstone(s)",
7841                    report.operation,
7842                    report.status,
7843                    report.counts.nodes,
7844                    report.counts.edges,
7845                    report.counts.tombstones.total
7846                ),
7847                metrics: vec![
7848                    envelope_metric("operation", &report.operation),
7849                    envelope_metric("status", &report.status),
7850                    envelope_metric("nodes", report.counts.nodes),
7851                    envelope_metric("edges", report.counts.edges),
7852                    envelope_metric("tombstones", report.counts.tombstones.total),
7853                    envelope_metric("compaction", &report.compaction.status),
7854                    envelope_metric("readiness", &report.readiness.status),
7855                ],
7856            },
7857            false,
7858            report.next_commands.clone(),
7859        )
7860    } else {
7861        print_graph_db_operator_human(report);
7862        Ok(())
7863    }
7864}
7865
7866fn status_run_command_without_notes(run: &str) -> &str {
7867    run.split_once("  (")
7868        .map(|(command, _)| command)
7869        .unwrap_or(run)
7870}
7871
7872fn status_summarize_extract_command(run: &str) -> &str {
7873    let run = status_run_command_without_notes(run);
7874    run.split(" && ")
7875        .find(|command| command.contains("summarize --extract"))
7876        .unwrap_or(run)
7877}
7878
7879fn graph_db_status_summarize_command(report: &status::StatusReport) -> String {
7880    report
7881        .recommendations
7882        .run
7883        .as_deref()
7884        .filter(|command| command.contains("summarize --extract"))
7885        .map(status_summarize_extract_command)
7886        .unwrap_or("tsift summarize --extract .")
7887        .to_string()
7888}
7889
7890fn graph_db_semantic_rows_readiness(row_count: usize, source: &str) -> GraphEffectivenessReadiness {
7891    let mut readiness = graph_effectiveness_ready("semantic_rows_available");
7892    readiness.diagnostics.push(format!(
7893        "graph projection has {row_count} semantic_concept/semantic_entity row(s) from {source}; graph semantic rows are available"
7894    ));
7895    readiness
7896}
7897
7898fn graph_db_semantic_readiness(
7899    root: &Path,
7900    scope: Option<&str>,
7901    semantic_row_count: Option<usize>,
7902) -> GraphEffectivenessReadiness {
7903    if let Some(row_count) = semantic_row_count
7904        && row_count > 0
7905    {
7906        return graph_db_semantic_rows_readiness(row_count, "materialized graph projection");
7907    }
7908
7909    let report = match status::check_status(root) {
7910        Ok(report) => report,
7911        Err(err) => {
7912            return graph_effectiveness_blocked(
7913                "status_check_unavailable",
7914                vec![format!(
7915                    "semantic readiness could not inspect summary cache after graph-db refresh: {err:#}"
7916                )],
7917                vec![graph_db_refresh_command(root, scope)],
7918            );
7919        }
7920    };
7921
7922    match &report.summaries {
7923        status::SummaryStatus::Available {
7924            cached_files,
7925            total_indexed_files,
7926            coverage_pct,
7927            ..
7928        } => {
7929            let mut readiness = graph_effectiveness_ready("semantic_rows_available");
7930            readiness.diagnostics.push(format!(
7931                "summary cache has {cached_files}/{total_indexed_files} indexed file(s) cached ({coverage_pct}% coverage); graph semantic rows are available"
7932            ));
7933            readiness
7934        }
7935        status::SummaryStatus::None { .. } => {
7936            let summarize = graph_db_status_summarize_command(&report);
7937            let index_command = report
7938                .recommendations
7939                .run
7940                .as_deref()
7941                .filter(|cmd| cmd.contains("index"))
7942                .map(str::to_string);
7943            let mut repair = Vec::new();
7944            if let Some(cmd) = index_command {
7945                repair.push(cmd);
7946            }
7947            repair.push(summarize.clone());
7948            repair.push(graph_db_refresh_command(root, scope));
7949            graph_effectiveness_blocked(
7950                "summary_cache_empty",
7951                vec![format!(
7952                    "summary cache empty: graph-db materialized code/session rows but semantic rows are unavailable; run `{}` from {} and rerun `{}` before relying on semantic evidence",
7953                    summarize,
7954                    root.display(),
7955                    graph_db_refresh_command(root, scope)
7956                )],
7957                repair,
7958            )
7959        }
7960        status::SummaryStatus::Unavailable => {
7961            let mut repair: Vec<String> = report
7962                .recommendations
7963                .run
7964                .clone()
7965                .into_iter()
7966                .collect();
7967            let summarize = "tsift summarize --extract .".to_string();
7968            repair.push(summarize);
7969            repair.push(graph_db_refresh_command(root, scope));
7970            graph_effectiveness_blocked(
7971                "summary_cache_unavailable",
7972                vec![
7973                    "summary cache unavailable because the source index is missing; build the index, extract summaries, and refresh the graph before relying on semantic graph evidence".to_string(),
7974                ],
7975                repair,
7976            )
7977        }
7978    }
7979}
7980
7981pub(crate) fn graph_db_operator_status_warnings(root: &Path, scope: Option<&str>) -> Vec<String> {
7982    let report = match status::check_status(root) {
7983        Ok(report) => report,
7984        Err(err) => {
7985            return vec![format!(
7986                "status check unavailable after graph-db refresh: {err:#}"
7987            )];
7988        }
7989    };
7990
7991    let summarize_run = if matches!(report.summaries, status::SummaryStatus::None { .. }) {
7992        Some(graph_db_status_summarize_command(&report))
7993    } else {
7994        None
7995    };
7996    let mut warnings = report.reminders;
7997    if matches!(report.summaries, status::SummaryStatus::None { .. }) {
7998        let run = summarize_run.unwrap_or_else(|| "tsift summarize --extract .".to_string());
7999        warnings.push(format!(
8000            "summary cache empty: graph-db refresh materialized code/session rows but semantic rows are unavailable; run `{}` from {} and rerun `{}` before relying on semantic evidence",
8001            run,
8002            root.display(),
8003            graph_db_refresh_command(root, scope)
8004        ));
8005    }
8006    dedupe_preserve_order(warnings)
8007}
8008
8009pub(crate) fn print_graph_db_compaction_human(report: &GraphDbCompactionReport) {
8010    println!(
8011        "graph-db compact applied:{} pruned_tombstones:{} reclaimed:{} byte(s)",
8012        report.applied, report.pruned_tombstones, report.reclaimed_bytes
8013    );
8014    println!("graph_db: {}", report.graph_db);
8015    println!(
8016        "before: {} node(s), {} edge(s), {} tombstone(s), file={} free={}",
8017        report.counts_before.nodes,
8018        report.counts_before.edges,
8019        report.counts_before.tombstones.total,
8020        report.counts_before.file_size_bytes.unwrap_or(0),
8021        report.counts_before.freelist_bytes.unwrap_or(0)
8022    );
8023    println!(
8024        "after: {} node(s), {} edge(s), {} tombstone(s), file={} free={}",
8025        report.counts_after.nodes,
8026        report.counts_after.edges,
8027        report.counts_after.tombstones.total,
8028        report.counts_after.file_size_bytes.unwrap_or(0),
8029        report.counts_after.freelist_bytes.unwrap_or(0)
8030    );
8031    for proof in &report.compaction_after.proof {
8032        println!("proof: {proof}");
8033    }
8034    for warning in &report.warnings {
8035        println!("warning: {warning}");
8036    }
8037    for command in &report.next_commands {
8038        println!("next: {command}");
8039    }
8040}
8041
8042fn parse_graph_db_property_filters(raw: &[String]) -> Result<Vec<GraphDbPropertyFilter>> {
8043    raw.iter()
8044        .map(|value| {
8045            let (key, filter_value) = value
8046                .split_once('=')
8047                .with_context(|| format!("graph-db --property expects KEY=VALUE, got {value:?}"))?;
8048            let key = key.trim();
8049            let filter_value = filter_value.trim();
8050            if key.is_empty() || filter_value.is_empty() {
8051                bail!("graph-db --property expects non-empty KEY=VALUE, got {value:?}");
8052            }
8053            Ok(GraphDbPropertyFilter {
8054                key: key.to_string(),
8055                value: filter_value.to_string(),
8056            })
8057        })
8058        .collect()
8059}
8060
8061fn graph_db_query_options(
8062    cursor: Option<String>,
8063    limit: Option<usize>,
8064    property_filters: &[String],
8065) -> Result<GraphDbQueryOptions> {
8066    Ok(GraphDbQueryOptions {
8067        cursor,
8068        limit: limit.filter(|limit| *limit > 0),
8069        property_filters: parse_graph_db_property_filters(property_filters)?,
8070    })
8071}
8072
8073fn graph_db_query_options_for_store(options: &GraphDbQueryOptions) -> GraphQueryOptions {
8074    GraphQueryOptions {
8075        cursor: options.cursor.clone(),
8076        limit: options.limit,
8077        property_filters: options
8078            .property_filters
8079            .iter()
8080            .map(|filter| GraphPropertyFilter {
8081                key: filter.key.clone(),
8082                value: filter.value.clone(),
8083            })
8084            .collect(),
8085    }
8086}
8087
8088fn graph_db_page_report_from_store(
8089    page: GraphQueryPage,
8090    property_filters: Vec<GraphDbPropertyFilter>,
8091) -> GraphDbPageReport {
8092    GraphDbPageReport {
8093        cursor: page.cursor,
8094        limit: page.limit,
8095        next_cursor: page.next_cursor,
8096        returned_nodes: page.returned_nodes,
8097        returned_edges: page.returned_edges,
8098        truncated: page.truncated,
8099        property_filters,
8100        diagnostics: page.diagnostics,
8101    }
8102}
8103
8104fn graph_db_neighborhood_ranking_gate(
8105    ranked_neighbor_cap: usize,
8106) -> GraphDbNeighborhoodRankingGate {
8107    GraphDbNeighborhoodRankingGate {
8108        status: "held_default_order_unchanged".to_string(),
8109        ranked_output_default: false,
8110        default_order: "stable_node_id".to_string(),
8111        default_change_gate: "community_search_quality_metrics".to_string(),
8112        required_workloads: metric_digest::COMMUNITY_SEARCH_WORKLOADS
8113            .iter()
8114            .map(|workload| (*workload).to_string())
8115            .collect(),
8116        required_metrics: metric_digest::COMMUNITY_SEARCH_REQUIRED_METRICS
8117            .iter()
8118            .map(|metric| (*metric).to_string())
8119            .collect(),
8120        max_duration_regression_percent: metric_digest::COMMUNITY_MAX_DURATION_REGRESSION_PERCENT,
8121        min_handle_coverage_pct: metric_digest::COMMUNITY_MIN_HANDLE_COVERAGE_PCT,
8122        min_duplicate_name_precision: metric_digest::COMMUNITY_MIN_DUPLICATE_NAME_PRECISION,
8123        min_top_community_stability: metric_digest::COMMUNITY_MIN_TOP_COMMUNITY_STABILITY,
8124        diagnostics: vec![
8125            "ranked_neighbors is additive; neighborhood nodes remain ordered by stable node id for cursor pagination".to_string(),
8126            format!(
8127                "ranked_neighbors is score-capped at {ranked_neighbor_cap} entries so previews stay bounded while cursor pagination remains exhaustive"
8128            ),
8129            "changing the default neighborhood order requires the community-search gate to pass for every required workload".to_string(),
8130        ],
8131    }
8132}
8133
8134fn graph_db_ranked_neighbor_cap(limit: Option<usize>) -> usize {
8135    match limit {
8136        Some(0) | None => GRAPH_DB_RANKED_NEIGHBOR_CAP,
8137        Some(limit) => limit.clamp(1, GRAPH_DB_RANKED_NEIGHBOR_CAP),
8138    }
8139}
8140
8141fn graph_db_ranked_neighbors(
8142    center_id: &str,
8143    nodes: &[SubstrateGraphNode],
8144    edges: &[SubstrateGraphEdge],
8145    cap: usize,
8146) -> Vec<GraphDbRankedNeighbor> {
8147    resolution::ranked_neighbors_capped(center_id, nodes, edges, cap)
8148}
8149
8150fn graph_db_ranked_neighborhood_comparison<S: GraphStore>(
8151    center_id: &str,
8152    depth: usize,
8153    edge_kind: Option<&str>,
8154    limit: Option<usize>,
8155    unranked_nodes: &[SubstrateGraphNode],
8156    unranked_edges: &[SubstrateGraphEdge],
8157    store: &S,
8158) -> Result<Option<GraphDbRankedNeighborhoodComparison>> {
8159    use std::time::Instant;
8160    let max_nodes = match limit {
8161        Some(0) | None => 200,
8162        Some(n) => n.clamp(10, 500),
8163    };
8164    let mut options = RankedNeighborhoodOptions::new(depth, max_nodes)
8165        .with_scoring(NeighborhoodScoring::EdgeKindWeighted);
8166    if let Some(kind) = edge_kind {
8167        options = options.with_edge_kind(kind);
8168    }
8169    let start = Instant::now();
8170    let result = store.ranked_neighborhood(center_id, &options)?;
8171    let latency = start.elapsed().as_micros();
8172    let Some(ranked) = result else {
8173        return Ok(None);
8174    };
8175    let unranked_ids: BTreeSet<_> = unranked_nodes.iter().map(|n| n.id.as_str()).collect();
8176    let ranked_ids: BTreeSet<_> = ranked.nodes.iter().map(|n| n.id.as_str()).collect();
8177    let overlap_count = ranked_ids.intersection(&unranked_ids).count();
8178    let overlap_pct = if unranked_ids.is_empty() || ranked_ids.is_empty() {
8179        0.0
8180    } else {
8181        (overlap_count as f64 / unranked_ids.len().max(ranked_ids.len()) as f64) * 100.0
8182    };
8183    let count_duplicates = |nodes: &[SubstrateGraphNode]| -> usize {
8184        let mut name_count = BTreeMap::<&str, usize>::new();
8185        for n in nodes {
8186            *name_count.entry(&n.label).or_default() += 1;
8187        }
8188        name_count.values().filter(|&&c| c > 1).count()
8189    };
8190    let count_handle_coverage = |nodes: &[SubstrateGraphNode]| -> f64 {
8191        if nodes.is_empty() {
8192            return 100.0;
8193        }
8194        let with_handle = nodes
8195            .iter()
8196            .filter(|n| n.properties.contains_key("handle") || n.properties.contains_key("ref_id"))
8197            .count();
8198        (with_handle as f64 / nodes.len() as f64) * 100.0
8199    };
8200    let useful_density = |nodes: &[SubstrateGraphNode], edges: &[SubstrateGraphEdge]| -> f64 {
8201        if nodes.is_empty() {
8202            return 0.0;
8203        }
8204        let semantic_kinds = [
8205            "semantic_concept",
8206            "semantic_entity",
8207            "symbol",
8208            "file",
8209            "source_handle",
8210        ];
8211        let useful = nodes
8212            .iter()
8213            .filter(|n| semantic_kinds.contains(&n.kind.as_str()))
8214            .count();
8215        let edge_diversity = edges.iter().map(|e| &e.kind).collect::<BTreeSet<_>>().len();
8216        let kind_diversity = nodes.iter().map(|n| &n.kind).collect::<BTreeSet<_>>().len();
8217        (useful as f64 * 0.5 + kind_diversity as f64 * 0.3 + edge_diversity as f64 * 0.2)
8218            / nodes.len() as f64
8219    };
8220    let community_truncation_summary = if ranked.pruned_count > 0 && !ranked.edges.is_empty() {
8221        let edge_pairs: Vec<(String, String)> = ranked
8222            .edges
8223            .iter()
8224            .map(|e| (e.from_id.clone(), e.to_id.clone()))
8225            .collect();
8226        let cr = tsift_graph::detect_communities(&edge_pairs);
8227        let kept_labels: BTreeSet<&str> = ranked.nodes.iter().map(|n| n.label.as_str()).collect();
8228        let mut fully_kept = 0usize;
8229        let mut partially_pruned = 0usize;
8230        let mut fully_pruned = 0usize;
8231        let mut pruned_kinds = BTreeSet::new();
8232        let mut pruned_labels = Vec::new();
8233        for comm in &cr.communities {
8234            let kept_in_comm: Vec<&str> = comm
8235                .members
8236                .iter()
8237                .filter(|m| kept_labels.contains(m.name.as_str()))
8238                .map(|m| m.name.as_str())
8239                .collect();
8240            if kept_in_comm.len() == comm.members.len() {
8241                fully_kept += 1;
8242            } else if kept_in_comm.is_empty() {
8243                fully_pruned += 1;
8244                for m in &comm.members {
8245                    if let Some(n) = ranked.nodes.iter().find(|n| n.label == m.name) {
8246                        pruned_kinds.insert(n.kind.clone());
8247                    }
8248                    pruned_labels.push(m.name.clone());
8249                }
8250            } else {
8251                partially_pruned += 1;
8252            }
8253        }
8254        pruned_labels.truncate(5);
8255        Some(CommunityTruncationSummary {
8256            total_communities: cr.communities.len(),
8257            fully_kept,
8258            partially_pruned,
8259            fully_pruned,
8260            pruned_community_kinds: pruned_kinds.into_iter().collect(),
8261            pruned_community_top_labels: pruned_labels,
8262        })
8263    } else {
8264        None
8265    };
8266    Ok(Some(GraphDbRankedNeighborhoodComparison {
8267        traversal_nodes: ranked.nodes.len(),
8268        traversal_edges: ranked.edges.len(),
8269        pruned_count: ranked.pruned_count,
8270        total_discovered: ranked.total_discovered,
8271        latency_micros: latency,
8272        overlap_with_unranked_pct: (overlap_pct * 100.0).round() / 100.0,
8273        useful_hit_density_ranked: (useful_density(&ranked.nodes, &ranked.edges) * 1000.0).round()
8274            / 1000.0,
8275        useful_hit_density_unranked: (useful_density(unranked_nodes, unranked_edges) * 1000.0)
8276            .round()
8277            / 1000.0,
8278        duplicate_name_count_ranked: count_duplicates(&ranked.nodes),
8279        duplicate_name_count_unranked: count_duplicates(unranked_nodes),
8280        handle_coverage_ranked_pct: (count_handle_coverage(&ranked.nodes) * 100.0).round() / 100.0,
8281        handle_coverage_unranked_pct: (count_handle_coverage(unranked_nodes) * 100.0).round()
8282            / 100.0,
8283        community_truncation_summary,
8284        diagnostics: vec![
8285            format!(
8286                "ranked_neighborhood traversed {} node(s), {} edge(s) with {} pruned of {} discovered in {}µs",
8287                ranked.nodes.len(),
8288                ranked.edges.len(),
8289                ranked.pruned_count,
8290                ranked.total_discovered,
8291                latency
8292            ),
8293            format!(
8294                "overlap with unranked BFS: {:.1}% ({} shared of {} unranked, {} ranked)",
8295                overlap_pct,
8296                overlap_count,
8297                unranked_ids.len(),
8298                ranked_ids.len()
8299            ),
8300            "comparison is diagnostic; promotion requires community-search quality gate to pass for every required workload".to_string(),
8301        ],
8302    }))
8303}
8304
8305struct GraphDbBudgetedSubgraph {
8306    nodes: Vec<SubstrateGraphNode>,
8307    edges: Vec<SubstrateGraphEdge>,
8308    report: GraphDbOutputBudgetReport,
8309    truncated: bool,
8310    next_cursor: Option<String>,
8311}
8312
8313const GRAPH_DB_OUTPUT_DEFAULT_TOKEN_CAP: usize = 6_000;
8314const GRAPH_DB_OUTPUT_MIN_TOKEN_CAP: usize = 1_200;
8315const GRAPH_DB_OUTPUT_MAX_TOKEN_CAP: usize = 12_000;
8316
8317fn graph_db_output_token_cap(limit: Option<usize>) -> usize {
8318    match limit {
8319        Some(0) | None => GRAPH_DB_OUTPUT_DEFAULT_TOKEN_CAP,
8320        Some(limit) => limit
8321            .saturating_mul(320)
8322            .clamp(GRAPH_DB_OUTPUT_MIN_TOKEN_CAP, GRAPH_DB_OUTPUT_MAX_TOKEN_CAP),
8323    }
8324}
8325
8326fn graph_db_node_kind_quota(kind: &str, limit: Option<usize>) -> usize {
8327    if matches!(limit, Some(0) | None) {
8328        return match kind {
8329            "source_handle" => 10,
8330            "worker_context" | "worker_result" => 8,
8331            "semantic_concept" | "semantic_entity" => 10,
8332            "file" | "symbol" | "route" => 12,
8333            _ => 8,
8334        };
8335    }
8336    let base = limit.unwrap_or(0).max(1);
8337    match kind {
8338        "source_handle" => base.saturating_add(4),
8339        "worker_context" | "worker_result" => base.saturating_add(2),
8340        "semantic_concept" | "semantic_entity" => base.saturating_add(4),
8341        "file" | "symbol" | "route" => base.saturating_add(4),
8342        _ => base.saturating_add(1),
8343    }
8344}
8345
8346fn graph_db_edge_kind_quota(kind: &str, limit: Option<usize>) -> usize {
8347    if matches!(limit, Some(0) | None) {
8348        return match kind {
8349            "mentions" | "mentions_concept" | "mentions_entity" => 24,
8350            "semantic_relation" | "calls" | "defines" => 20,
8351            _ => 16,
8352        };
8353    }
8354    let base = limit.unwrap_or(0).max(1);
8355    match kind {
8356        "mentions" | "mentions_concept" | "mentions_entity" => base.saturating_mul(3),
8357        "semantic_relation" | "calls" | "defines" => base.saturating_mul(2),
8358        _ => base.saturating_add(2),
8359    }
8360}
8361
8362fn graph_db_estimated_tokens<T: Serialize>(value: &T) -> usize {
8363    serde_json::to_vec(value)
8364        .map(|bytes| bytes.len().div_ceil(4).max(1))
8365        .unwrap_or(1)
8366}
8367
8368fn graph_db_node_search_text(node: &SubstrateGraphNode) -> String {
8369    let mut parts = vec![node.kind.clone(), node.label.clone()];
8370    for key in [
8371        "detail",
8372        "description",
8373        "source_ref",
8374        "path",
8375        "source_file",
8376        "source_symbol",
8377        "text_preview",
8378    ] {
8379        if let Some(value) = node.properties.get(key) {
8380            parts.push(value.clone());
8381        }
8382    }
8383    parts.join(" ")
8384}
8385
8386fn graph_db_semantic_scores_for_query(
8387    query: Option<&str>,
8388    nodes: &[SubstrateGraphNode],
8389) -> BTreeMap<String, f64> {
8390    let Some(query) = query.filter(|value| !value.trim().is_empty()) else {
8391        return BTreeMap::new();
8392    };
8393    let query_embedding = semantic_embedding(query);
8394    nodes
8395        .iter()
8396        .filter(|node| matches!(node.kind.as_str(), "semantic_concept" | "semantic_entity"))
8397        .filter_map(|node| {
8398            let embedding = node
8399                .properties
8400                .get("embedding")
8401                .and_then(|value| parse_semantic_embedding_property(value))?;
8402            Some((
8403                node.id.clone(),
8404                semantic_cosine(&query_embedding, &embedding),
8405            ))
8406        })
8407        .collect()
8408}
8409
8410fn graph_db_depth_by_id(
8411    origin_ids: &[String],
8412    edges: &[SubstrateGraphEdge],
8413) -> BTreeMap<String, usize> {
8414    let mut adjacency = BTreeMap::<String, Vec<String>>::new();
8415    for edge in edges {
8416        adjacency
8417            .entry(edge.from_id.clone())
8418            .or_default()
8419            .push(edge.to_id.clone());
8420        adjacency
8421            .entry(edge.to_id.clone())
8422            .or_default()
8423            .push(edge.from_id.clone());
8424    }
8425
8426    let mut depth_by_id = BTreeMap::<String, usize>::new();
8427    let mut queue = VecDeque::<String>::new();
8428    for origin in origin_ids {
8429        if depth_by_id.insert(origin.clone(), 0).is_none() {
8430            queue.push_back(origin.clone());
8431        }
8432    }
8433    while let Some(current) = queue.pop_front() {
8434        let depth = depth_by_id.get(&current).copied().unwrap_or(0);
8435        for next in adjacency.get(&current).into_iter().flatten() {
8436            if depth_by_id.contains_key(next) {
8437                continue;
8438            }
8439            depth_by_id.insert(next.clone(), depth.saturating_add(1));
8440            queue.push_back(next.clone());
8441        }
8442    }
8443    depth_by_id
8444}
8445
8446fn graph_db_source_covered_ids(
8447    nodes: &[SubstrateGraphNode],
8448    edges: &[SubstrateGraphEdge],
8449) -> BTreeSet<String> {
8450    let source_ids = nodes
8451        .iter()
8452        .filter(|node| node.kind == "source_handle")
8453        .map(|node| node.id.as_str())
8454        .collect::<BTreeSet<_>>();
8455    let mut covered = source_ids
8456        .iter()
8457        .map(|id| (*id).to_string())
8458        .collect::<BTreeSet<_>>();
8459    for edge in edges {
8460        if source_ids.contains(edge.from_id.as_str()) {
8461            covered.insert(edge.to_id.clone());
8462        }
8463        if source_ids.contains(edge.to_id.as_str()) {
8464            covered.insert(edge.from_id.clone());
8465        }
8466    }
8467    covered
8468}
8469
8470fn graph_db_recency_score(node: &SubstrateGraphNode) -> i64 {
8471    for key in [
8472        "observed_at_unix",
8473        "completed_at_unix",
8474        "created_at_unix",
8475        "started_at_unix",
8476    ] {
8477        if let Some(value) = node.properties.get(key)
8478            && let Ok(epoch) = value.parse::<i64>()
8479        {
8480            return epoch.div_euclid(86_400).clamp(0, 40_000);
8481        }
8482    }
8483    0
8484}
8485
8486fn graph_db_node_kind_score(kind: &str) -> i64 {
8487    match kind {
8488        "source_handle" => 180,
8489        "worker_context" => 170,
8490        "worker_result" => 160,
8491        "semantic_concept" | "semantic_entity" => 150,
8492        "backlog" | "job_packet" => 130,
8493        "symbol" => 120,
8494        "file" => 110,
8495        "route" => 105,
8496        "session" => 90,
8497        _ => 40,
8498    }
8499}
8500
8501fn graph_db_edge_kind_score(kind: &str) -> i64 {
8502    match kind {
8503        "mentions_concept" | "mentions_entity" => 180,
8504        "semantic_relation" => 170,
8505        "mentions" => 165,
8506        "requests_context" | "scopes_context" | "scopes_source" => 155,
8507        "explains_result" => 150,
8508        "calls" => 145,
8509        "defines" | "handled_by" | "defines_route" => 130,
8510        "contains" | "targets" => 120,
8511        "records_memory_source" | "has_vector_handle" => 115,
8512        _ => 40,
8513    }
8514}
8515
8516fn graph_db_node_usefulness_score(
8517    node: &SubstrateGraphNode,
8518    depth_by_id: &BTreeMap<String, usize>,
8519    semantic_scores: &BTreeMap<String, f64>,
8520    source_covered_ids: &BTreeSet<String>,
8521    origin_ids: &[String],
8522) -> i64 {
8523    if origin_ids.iter().any(|origin| origin == &node.id) {
8524        return 1_000_000;
8525    }
8526    let semantic = semantic_scores
8527        .get(&node.id)
8528        .map(|score| (score.max(0.0) * 1_000.0) as i64)
8529        .unwrap_or(0);
8530    let depth_penalty = depth_by_id
8531        .get(&node.id)
8532        .map(|depth| (*depth as i64).saturating_mul(55))
8533        .unwrap_or(180);
8534    let source_coverage = if source_covered_ids.contains(&node.id)
8535        || node.properties.contains_key("source_ref")
8536        || node.properties.contains_key("path")
8537    {
8538        120
8539    } else {
8540        0
8541    };
8542    graph_db_node_kind_score(&node.kind)
8543        + semantic
8544        + source_coverage
8545        + graph_db_recency_score(node).min(80)
8546        - depth_penalty
8547}
8548
8549fn graph_db_edge_usefulness_score(
8550    edge: &SubstrateGraphEdge,
8551    node_score_by_id: &BTreeMap<String, i64>,
8552    depth_by_id: &BTreeMap<String, usize>,
8553) -> i64 {
8554    let endpoint_score = node_score_by_id
8555        .get(&edge.from_id)
8556        .copied()
8557        .unwrap_or_default()
8558        .max(
8559            node_score_by_id
8560                .get(&edge.to_id)
8561                .copied()
8562                .unwrap_or_default(),
8563        );
8564    let depth_penalty = depth_by_id
8565        .get(&edge.from_id)
8566        .into_iter()
8567        .chain(depth_by_id.get(&edge.to_id))
8568        .min()
8569        .map(|depth| (*depth as i64).saturating_mul(35))
8570        .unwrap_or(140);
8571    graph_db_edge_kind_score(&edge.kind) + (endpoint_score / 8) - depth_penalty
8572}
8573
8574fn graph_db_push_drop(
8575    drops: &mut BTreeMap<(String, String, String), usize>,
8576    item: &str,
8577    kind: &str,
8578    reason: &str,
8579) {
8580    *drops
8581        .entry((item.to_string(), kind.to_string(), reason.to_string()))
8582        .or_default() += 1;
8583}
8584
8585fn graph_db_budget_drop_report(
8586    drops: BTreeMap<(String, String, String), usize>,
8587) -> Vec<GraphDbDroppedByBudget> {
8588    drops
8589        .into_iter()
8590        .map(|((item, kind, reason), dropped)| GraphDbDroppedByBudget {
8591            item,
8592            kind,
8593            reason,
8594            dropped,
8595        })
8596        .collect()
8597}
8598
8599fn graph_db_apply_output_budget(
8600    origin_ids: &[String],
8601    semantic_scores: &BTreeMap<String, f64>,
8602    nodes: Vec<SubstrateGraphNode>,
8603    edges: Vec<SubstrateGraphEdge>,
8604    limit: Option<usize>,
8605) -> GraphDbBudgetedSubgraph {
8606    graph_db_apply_output_budget_with_depths_and_cursor(
8607        origin_ids,
8608        semantic_scores,
8609        nodes,
8610        edges,
8611        limit,
8612        None,
8613        None,
8614    )
8615}
8616
8617fn graph_db_apply_output_budget_with_depths_and_cursor(
8618    origin_ids: &[String],
8619    semantic_scores: &BTreeMap<String, f64>,
8620    nodes: Vec<SubstrateGraphNode>,
8621    edges: Vec<SubstrateGraphEdge>,
8622    limit: Option<usize>,
8623    depth_overrides: Option<&BTreeMap<String, usize>>,
8624    cursor: Option<&str>,
8625) -> GraphDbBudgetedSubgraph {
8626    let max_tokens = graph_db_output_token_cap(limit);
8627    let candidate_nodes = nodes.len();
8628    let candidate_edges = edges.len();
8629    let mut depth_by_id = graph_db_depth_by_id(origin_ids, &edges);
8630    if let Some(depth_overrides) = depth_overrides {
8631        for (id, depth) in depth_overrides {
8632            depth_by_id
8633                .entry(id.clone())
8634                .and_modify(|current| *current = (*current).min(*depth))
8635                .or_insert(*depth);
8636        }
8637    }
8638    let source_covered_ids = graph_db_source_covered_ids(&nodes, &edges);
8639    let node_score_by_id = nodes
8640        .iter()
8641        .map(|node| {
8642            (
8643                node.id.clone(),
8644                graph_db_node_usefulness_score(
8645                    node,
8646                    &depth_by_id,
8647                    semantic_scores,
8648                    &source_covered_ids,
8649                    origin_ids,
8650                ),
8651            )
8652        })
8653        .collect::<BTreeMap<_, _>>();
8654
8655    let mut node_candidates = nodes.iter().collect::<Vec<_>>();
8656    node_candidates.sort_by(|left, right| {
8657        node_score_by_id
8658            .get(&right.id)
8659            .cmp(&node_score_by_id.get(&left.id))
8660            .then_with(|| left.kind.cmp(&right.kind))
8661            .then_with(|| left.label.cmp(&right.label))
8662            .then_with(|| left.id.cmp(&right.id))
8663    });
8664
8665    let cursor_skip = if let Some(cursor) = cursor {
8666        node_candidates
8667            .iter()
8668            .position(|node| node.id == cursor)
8669            .map(|pos| pos.saturating_add(1))
8670            .unwrap_or(0)
8671    } else {
8672        0
8673    };
8674    if cursor_skip > 0 {
8675        node_candidates = node_candidates.into_iter().skip(cursor_skip).collect();
8676    }
8677
8678    let mut selected_node_ids = BTreeSet::new();
8679    let mut selected_node_counts = BTreeMap::<String, usize>::new();
8680    let mut estimated_tokens = 0usize;
8681    let mut drops = BTreeMap::<(String, String, String), usize>::new();
8682    for node in &node_candidates {
8683        let kind_count = selected_node_counts
8684            .get(&node.kind)
8685            .copied()
8686            .unwrap_or_default();
8687        if !origin_ids.iter().any(|origin| origin == &node.id)
8688            && kind_count >= graph_db_node_kind_quota(&node.kind, limit)
8689        {
8690            graph_db_push_drop(&mut drops, "node", &node.kind, "per_kind_quota");
8691            continue;
8692        }
8693        let tokens = graph_db_estimated_tokens(node);
8694        if !origin_ids.iter().any(|origin| origin == &node.id)
8695            && estimated_tokens.saturating_add(tokens) > max_tokens
8696        {
8697            graph_db_push_drop(&mut drops, "node", &node.kind, "estimated_token_cap");
8698            continue;
8699        }
8700        selected_node_ids.insert(node.id.clone());
8701        *selected_node_counts.entry(node.kind.clone()).or_default() += 1;
8702        estimated_tokens = estimated_tokens.saturating_add(tokens);
8703    }
8704
8705    let has_remaining_candidates = node_candidates
8706        .iter()
8707        .any(|node| !selected_node_ids.contains(&node.id));
8708
8709    let mut selected_nodes = nodes
8710        .into_iter()
8711        .filter(|node| selected_node_ids.contains(&node.id))
8712        .collect::<Vec<_>>();
8713
8714    let mut edge_candidates = edges
8715        .iter()
8716        .filter(|edge| {
8717            selected_node_ids.contains(&edge.from_id) && selected_node_ids.contains(&edge.to_id)
8718        })
8719        .collect::<Vec<_>>();
8720    let edge_score_by_key = edge_candidates
8721        .iter()
8722        .map(|edge| {
8723            (
8724                graph_db_edge_key(edge),
8725                graph_db_edge_usefulness_score(edge, &node_score_by_id, &depth_by_id),
8726            )
8727        })
8728        .collect::<BTreeMap<_, _>>();
8729    edge_candidates.sort_by(|left, right| {
8730        edge_score_by_key
8731            .get(&graph_db_edge_key(right))
8732            .cmp(&edge_score_by_key.get(&graph_db_edge_key(left)))
8733            .then_with(|| left.kind.cmp(&right.kind))
8734            .then_with(|| left.from_id.cmp(&right.from_id))
8735            .then_with(|| left.to_id.cmp(&right.to_id))
8736    });
8737
8738    let endpoint_dropped_edges = edges
8739        .iter()
8740        .filter(|edge| {
8741            !selected_node_ids.contains(&edge.from_id) || !selected_node_ids.contains(&edge.to_id)
8742        })
8743        .count();
8744    if endpoint_dropped_edges > 0 {
8745        drops.insert(
8746            (
8747                "edge".to_string(),
8748                "*".to_string(),
8749                "endpoint_node_dropped".to_string(),
8750            ),
8751            endpoint_dropped_edges,
8752        );
8753    }
8754
8755    let mut selected_edge_ids = BTreeSet::new();
8756    let mut selected_edge_counts = BTreeMap::<String, usize>::new();
8757    for edge in edge_candidates {
8758        let kind_count = selected_edge_counts
8759            .get(&edge.kind)
8760            .copied()
8761            .unwrap_or_default();
8762        if kind_count >= graph_db_edge_kind_quota(&edge.kind, limit) {
8763            graph_db_push_drop(&mut drops, "edge", &edge.kind, "per_kind_quota");
8764            continue;
8765        }
8766        let tokens = graph_db_estimated_tokens(edge);
8767        if estimated_tokens.saturating_add(tokens) > max_tokens {
8768            graph_db_push_drop(&mut drops, "edge", &edge.kind, "estimated_token_cap");
8769            continue;
8770        }
8771        selected_edge_ids.insert(graph_db_edge_key(edge));
8772        *selected_edge_counts.entry(edge.kind.clone()).or_default() += 1;
8773        estimated_tokens = estimated_tokens.saturating_add(tokens);
8774    }
8775
8776    let selected_edges = edges
8777        .into_iter()
8778        .filter(|edge| selected_edge_ids.contains(&graph_db_edge_key(edge)))
8779        .collect::<Vec<_>>();
8780    let dropped_by_budget = graph_db_budget_drop_report(drops);
8781    let truncated = has_remaining_candidates;
8782    let next_cursor = if truncated {
8783        selected_nodes.last().map(|node| node.id.clone())
8784    } else {
8785        None
8786    };
8787    let mut diagnostics = vec![
8788        "budget ranking signals: semantic_match, edge_kind, depth, recency, source_handle_coverage"
8789            .to_string(),
8790        format!(
8791            "selected {} of {} candidate node(s) and {} of {} candidate edge(s) within estimated token cap {}",
8792            selected_nodes.len(),
8793            candidate_nodes,
8794            selected_edges.len(),
8795            candidate_edges,
8796            max_tokens
8797        ),
8798    ];
8799    if cursor.is_some() {
8800        diagnostics.push(format!(
8801            "cursor skipped {} previously returned candidate(s)",
8802            cursor_skip
8803        ));
8804    }
8805    if next_cursor.is_some() {
8806        diagnostics.push(
8807            "result was truncated; pass next_cursor as --cursor for the next page".to_string(),
8808        );
8809    }
8810    selected_nodes.shrink_to_fit();
8811
8812    GraphDbBudgetedSubgraph {
8813        nodes: selected_nodes,
8814        edges: selected_edges,
8815        report: GraphDbOutputBudgetReport {
8816            max_tokens,
8817            estimated_tokens,
8818            selected_nodes: selected_node_ids.len(),
8819            selected_edges: selected_edge_ids.len(),
8820            candidate_nodes,
8821            candidate_edges,
8822            dropped_by_budget,
8823            diagnostics,
8824        },
8825        truncated,
8826        next_cursor,
8827    }
8828}
8829
8830fn graph_db_edge_key(edge: &SubstrateGraphEdge) -> String {
8831    if edge.id.is_empty() {
8832        substrate::ConvexEdgeRow::stable_key(&edge.from_id, &edge.to_id, &edge.kind)
8833    } else {
8834        edge.id.clone()
8835    }
8836}
8837
8838fn graph_db_schema() -> GraphDbSchema {
8839    GraphDbSchema {
8840        contract_versions: vec![
8841            GraphDbSchemaContract {
8842                name: "graph_db_evidence",
8843                version: GRAPH_DB_EVIDENCE_CONTRACT_VERSION,
8844                description: "graph-db evidence JSON packet including packet_id, projection hash, worker context, source handles, worker results, semantic rows, replay commands, and repair commands",
8845            },
8846            GraphDbSchemaContract {
8847                name: "worker_prompt_packet",
8848                version: WORKER_PROMPT_PACKET_CONTRACT_VERSION,
8849                description: "conflict-matrix worker prompt packet with owned scope, scheduler fields, stable graph handles, expected tests, expansion commands, token budget, semantic ranking reasons, worker feedback closure controls, and fail-closed prompt text",
8850            },
8851            GraphDbSchemaContract {
8852                name: "conflict_matrix",
8853                version: CONFLICT_MATRIX_CONTRACT_VERSION,
8854                description: "parallel-dispatch decision report keyed by graph evidence packets, scheduler block fields, hard file/symbol/test/config gates, and soft worker-feedback closure ranking",
8855            },
8856            GraphDbSchemaContract {
8857                name: "context_pack_graph_orchestration",
8858                version: CONTEXT_PACK_GRAPH_ORCHESTRATION_CONTRACT_VERSION,
8859                description: "context-pack graph orchestration summary with projection freshness, evidence packet ids, ownership blocks, and follow-up graph commands",
8860            },
8861            GraphDbSchemaContract {
8862                name: "session_review_follow_up",
8863                version: SESSION_REVIEW_FOLLOW_UP_CONTRACT_VERSION,
8864                description: "session-review next-context follow-up command contract for resumable digest/context-pack commands",
8865            },
8866            GraphDbSchemaContract {
8867                name: "dispatch_trace",
8868                version: DISPATCH_TRACE_CONTRACT_VERSION,
8869                description: "operator review trace linking backlog, job packets, worker results, source handles, semantic rows, scheduler fields, evidence packet ids, worker feedback closure controls, and worker prompt packets",
8870            },
8871            GraphDbSchemaContract {
8872                name: "dependency_dag",
8873                version: DEPENDENCY_DAG_CONTRACT_VERSION,
8874                description: "topological planning DAG for agent-doc backlog targets with replayable dependency edges, topo batches, and cycle diagnostics",
8875            },
8876        ],
8877        node_fields: vec![
8878            GraphDbSchemaField {
8879                name: "id",
8880                value_type: "string",
8881                description: "Stable provider-neutral node id",
8882            },
8883            GraphDbSchemaField {
8884                name: "kind",
8885                value_type: "string",
8886                description: "Application-defined node family such as file, symbol, or backlog",
8887            },
8888            GraphDbSchemaField {
8889                name: "label",
8890                value_type: "string",
8891                description: "Human-readable label",
8892            },
8893            GraphDbSchemaField {
8894                name: "properties",
8895                value_type: "object<string,string>",
8896                description: "Adapter-specific string properties",
8897            },
8898            GraphDbSchemaField {
8899                name: "provenance",
8900                value_type: "array",
8901                description: "Source system and source reference metadata",
8902            },
8903            GraphDbSchemaField {
8904                name: "freshness",
8905                value_type: "object|null",
8906                description: "Optional content hash and observed timestamp",
8907            },
8908        ],
8909        edge_fields: vec![
8910            GraphDbSchemaField {
8911                name: "id",
8912                value_type: "string",
8913                description: "Stable provider-neutral edge id derived from from_id, kind, and to_id",
8914            },
8915            GraphDbSchemaField {
8916                name: "from_id",
8917                value_type: "string",
8918                description: "Source node id",
8919            },
8920            GraphDbSchemaField {
8921                name: "to_id",
8922                value_type: "string",
8923                description: "Target node id",
8924            },
8925            GraphDbSchemaField {
8926                name: "kind",
8927                value_type: "string",
8928                description: "Application-defined edge relation",
8929            },
8930            GraphDbSchemaField {
8931                name: "properties",
8932                value_type: "object<string,string>",
8933                description: "Adapter-specific string properties",
8934            },
8935            GraphDbSchemaField {
8936                name: "provenance",
8937                value_type: "array",
8938                description: "Source system and source reference metadata",
8939            },
8940            GraphDbSchemaField {
8941                name: "freshness",
8942                value_type: "object|null",
8943                description: "Optional content hash and observed timestamp",
8944            },
8945        ],
8946        operations: vec![
8947            GraphDbSchemaOperation {
8948                command: "refresh",
8949                description: "Materialize .tsift/graph.db explicitly with delta upserts/deletes, row hash watermarks, tombstone pruning, projection metadata, row counts, and operator next commands",
8950            },
8951            GraphDbSchemaOperation {
8952                command: "status",
8953                description: "Inspect .tsift/graph.db freshness, projection metadata, row counts, tombstone counts, file-size impact, and operator next commands without refreshing",
8954            },
8955            GraphDbSchemaOperation {
8956                command: "doctor",
8957                description: "Validate graph.db or Convex snapshot health and return fail-closed repair diagnostics plus non-fatal SQLite tombstone-retention warnings",
8958            },
8959            GraphDbSchemaOperation {
8960                command: "drift",
8961                description: "Compare local SQLite projection rows with a Convex snapshot and return upsert, tombstone, metadata, duplicate, orphan, and next-command diagnostics",
8962            },
8963            GraphDbSchemaOperation {
8964                command: "compact [--apply] [--prune-tombstones --confirmed-convex-reconciled]",
8965                description: "Return or apply the post-reconciliation SQLite graph compaction policy, including WAL checkpoint/VACUUM proof and guarded tombstone pruning",
8966            },
8967            GraphDbSchemaOperation {
8968                command: "backend-eval [--candidate duckdb-duckpgq|falkordb|ladybug|kuzu|surrealdb] [--target ID] [--full-projection]",
8969                description: "Benchmark experimental read-only GraphStore backend prototypes against SQLite on bounded real, optional full-project, and synthetic projections across refresh/status/path tiers/evidence/conflict-matrix/dispatch-trace and emit promotion hold/eligibility gates",
8970            },
8971            GraphDbSchemaOperation {
8972                command: "evidence <target> [--depth N] [--limit N]",
8973                description: "Return a bounded versioned graph-db handoff packet for a backlog id or job packet handle, including packet_id, projection hash, worker_context rows, source_handle rows, worker_result rows, semantic_concept/entity rows, shortest paths, replay commands, repair commands, and next commands",
8974            },
8975            GraphDbSchemaOperation {
8976                command: "related <phrase> [--kind concept|entity|all] [--depth N] [--seed-limit N] [--limit N]",
8977                description: "Resolve a natural-language phrase to cached semantic concept/entity seed nodes, then return an incident/outgoing GraphStore neighborhood around those seeds for general knowledge retrieval without changing stable neighborhood pagination defaults",
8978            },
8979            GraphDbSchemaOperation {
8980                command: "dispatch-trace [target...] --path <session> [--format json|html]",
8981                description: "Export a compact graph-backed dispatch trace with evidence packet ids, worker-result feedback closure summaries, graph links, and conflict-matrix worker prompt packets",
8982            },
8983            GraphDbSchemaOperation {
8984                command: "dependency-dag [target...] --path <session>",
8985                description: "Extract a versioned agent-doc dependency DAG from backlog ids, explicit depends-on text, shared file/symbol/test/config evidence, semantic overlap, and worker-result follow-up ids",
8986            },
8987            GraphDbSchemaOperation {
8988                command: "schema",
8989                description: "Return record and operation schemas",
8990            },
8991            GraphDbSchemaOperation {
8992                command: "node <id>",
8993                description: "Return one node by stable id",
8994            },
8995            GraphDbSchemaOperation {
8996                command: "edge <id>",
8997                description: "Return one edge by stable edge id",
8998            },
8999            GraphDbSchemaOperation {
9000                command: "edges [--edge-kind <kind>] [--property KEY=VALUE] [--cursor EDGE_ID] [--limit N]",
9001                description: "Return edge records ordered by stable edge id with SQLite-pushed edge-property filtering and cursor pagination",
9002            },
9003            GraphDbSchemaOperation {
9004                command: "incident <id> [--edge-kind <kind>] [--property KEY=VALUE] [--cursor EDGE_ID] [--limit N]",
9005                description: "Return incoming and outgoing edges incident to one node, ordered by stable edge id with optional kind and edge-property filters",
9006            },
9007            GraphDbSchemaOperation {
9008                command: "kind <kind> [--property KEY=VALUE] [--cursor ID] [--limit N]",
9009                description: "Return nodes of one kind ordered by id with SQLite-pushed property filtering/cursor pagination and query-plan diagnostics",
9010            },
9011            GraphDbSchemaOperation {
9012                command: "neighborhood <id> --depth <n> [--edge-kind <kind>] [--property KEY=VALUE] [--cursor ID] [--limit N]",
9013                description: "Return a directed outgoing subgraph around a node using batched SQLite recursive traversal plus pushed filters/paging when available; JSON also includes additive ranked_neighbors while default nodes remain stable-id ordered",
9014            },
9015            GraphDbSchemaOperation {
9016                command: "path <from> <to> [--edge-kind <kind>] [--max-hops N]",
9017                description: "Return the shortest directed path by node id, optionally bounded by hop count",
9018            },
9019        ],
9020    }
9021}
9022
9023pub(crate) fn sqlite_graph_freshness(
9024    store: &SqliteGraphStore,
9025    scope: &str,
9026) -> Result<GraphDbFreshnessReport> {
9027    let version = store.projection_version(scope)?;
9028    let Some(version) = version else {
9029        return Ok(GraphDbFreshnessReport {
9030            status: "missing".to_string(),
9031            fail_closed: true,
9032            projection_version: None,
9033            content_hash: None,
9034            source_watermark: None,
9035            diagnostics: vec![
9036                "graph projection metadata is missing; rebuild the graph before trusting reads"
9037                    .to_string(),
9038            ],
9039        });
9040    };
9041    let mut diagnostics = Vec::new();
9042    let fail_closed =
9043        version.projection_version != GRAPH_PROJECTION_VERSION || version.content_hash.is_none();
9044    if version.projection_version != GRAPH_PROJECTION_VERSION {
9045        diagnostics.push(format!(
9046            "projection version mismatch: expected {} got {}",
9047            GRAPH_PROJECTION_VERSION, version.projection_version
9048        ));
9049    }
9050    if version.content_hash.is_none() {
9051        diagnostics.push("projection content hash is missing".to_string());
9052    }
9053    Ok(GraphDbFreshnessReport {
9054        status: if fail_closed { "stale" } else { "current" }.to_string(),
9055        fail_closed,
9056        projection_version: Some(version.projection_version),
9057        content_hash: version.content_hash,
9058        source_watermark: version.source_watermark,
9059        diagnostics,
9060    })
9061}
9062
9063pub(crate) fn convex_graph_freshness(
9064    local: &ConvexProjectionRows,
9065    snapshot: &ConvexProjectionRows,
9066    scope: Option<&str>,
9067) -> GraphDbFreshnessReport {
9068    let freshness = convex_projection_freshness(local, Some(snapshot), scope);
9069    GraphDbFreshnessReport {
9070        status: freshness.status,
9071        fail_closed: freshness.fail_closed,
9072        projection_version: Some(GRAPH_PROJECTION_VERSION.to_string()),
9073        content_hash: freshness.snapshot_hash,
9074        source_watermark: None,
9075        diagnostics: freshness.diagnostics,
9076    }
9077}
9078
9079pub(crate) fn tokensave_graph_freshness(store: &TokensaveDb) -> Result<GraphDbFreshnessReport> {
9080    let (nodes, edges) = store.graph_counts()?;
9081    let files = store.file_count()?;
9082    Ok(GraphDbFreshnessReport {
9083        status: "current".to_string(),
9084        fail_closed: false,
9085        projection_version: Some("tokensave-readonly".to_string()),
9086        content_hash: None,
9087        source_watermark: Some(store.db_path().to_string_lossy().to_string()),
9088        diagnostics: vec![format!(
9089            "tokensave read-only adapter opened {} node(s), {} edge(s), {} file(s)",
9090            nodes, edges, files
9091        )],
9092    })
9093}
9094
9095pub(crate) fn append_tokensave_graph_doctor_checks(report: &mut GraphDbDoctorReport, root: &Path) {
9096    match TokensaveDb::discover(root) {
9097        Ok(Some(store)) => {
9098            report.push_check(GraphDbDoctorCheck {
9099                name: "tokensave_db_open".to_string(),
9100                status: "ok".to_string(),
9101                fail_closed: false,
9102                diagnostics: vec![format!(
9103                    "opened tokensave database at {}",
9104                    store.db_path().display()
9105                )],
9106                repair_commands: Vec::new(),
9107            });
9108            match (store.node_count(), store.edge_count(), store.file_count()) {
9109                (Ok(nodes), Ok(edges), Ok(files)) => {
9110                    report.push_check(GraphDbDoctorCheck {
9111                        name: "tokensave_counts".to_string(),
9112                        status: "ok".to_string(),
9113                        fail_closed: false,
9114                        diagnostics: vec![format!(
9115                            "tokensave contains {} node(s), {} edge(s), {} file(s)",
9116                            nodes, edges, files
9117                        )],
9118                        repair_commands: Vec::new(),
9119                    });
9120                }
9121                (nodes, edges, files) => {
9122                    report.push_check(graph_db_doctor_check(
9123                        "tokensave_counts",
9124                        vec![format!(
9125                            "tokensave count inspection failed: nodes={:?} edges={:?} files={:?}",
9126                            nodes.err(),
9127                            edges.err(),
9128                            files.err()
9129                        )],
9130                        Vec::new(),
9131                    ));
9132                }
9133            }
9134        }
9135        Ok(None) => report.push_check(graph_db_doctor_check(
9136            "tokensave_db_exists",
9137            vec![format!(
9138                "tokensave database is missing at {}",
9139                root.join(".tokensave").join("tokensave.db").display()
9140            )],
9141            Vec::new(),
9142        )),
9143        Err(err) => report.push_check(graph_db_doctor_check(
9144            "tokensave_db_open",
9145            vec![err.to_string()],
9146            Vec::new(),
9147        )),
9148    }
9149}
9150
9151pub(crate) fn graph_db_resolve_evidence_target(
9152    store: &impl GraphStore,
9153    target: &str,
9154) -> Result<Option<SubstrateGraphNode>> {
9155    store.resolve_evidence_target(
9156        target,
9157        &[
9158            "backlog",
9159            "job_packet",
9160            "worker_result",
9161            "worker_context",
9162            "source_handle",
9163        ],
9164    )
9165}
9166
9167fn graph_db_reachable_nodes_by_kind(
9168    store: &impl GraphStore,
9169    from_id: &str,
9170    kind: &str,
9171    depth: usize,
9172    limit: usize,
9173) -> Result<Vec<(SubstrateGraphNode, substrate::GraphPath)>> {
9174    store.reachable_nodes_by_kind(from_id, kind, depth, limit)
9175}
9176
9177fn graph_db_evidence_completed_queue_drift_warnings(
9178    store: &impl GraphStore,
9179    target: &SubstrateGraphNode,
9180    worker_results: &[SubstrateGraphNode],
9181) -> Result<Vec<String>> {
9182    let ref_id = target.properties.get("ref_id").map(String::as_str);
9183    let has_completed_result = worker_results.iter().any(|node| {
9184        node.properties.get("status").map(String::as_str) == Some("completed")
9185            && node.properties.get("ref_id").map(String::as_str) == ref_id
9186    });
9187    if !has_completed_result {
9188        return Ok(Vec::new());
9189    }
9190    let active_jobs = store
9191        .nodes_by_kind("job_packet")?
9192        .into_iter()
9193        .filter(|node| {
9194            node.properties.get("ref_id").map(String::as_str) == ref_id
9195                && node.label.starts_with("do #")
9196        })
9197        .collect::<Vec<_>>();
9198    if active_jobs.is_empty() {
9199        return Ok(Vec::new());
9200    }
9201    let repair = match (target.properties.get("path"), ref_id) {
9202        (Some(path), Some(id)) => format!(
9203            "repair with `agent-doc write --commit {} --done {}` or the next `agent-doc finalize --done {}` closeout",
9204            shell_quote(path),
9205            shell_quote(id),
9206            shell_quote(id)
9207        ),
9208        _ => {
9209            "repair by marking the queue item done/reaping it in the agent-doc session".to_string()
9210        }
9211    };
9212    Ok(vec![format!(
9213        "queue-head drift: target {} has {} active queued do packet(s) but already has a completed worker_result; {repair}; do not redispatch or reactivate the completed item",
9214        target.label,
9215        active_jobs.len()
9216    )])
9217}
9218
9219fn graph_db_evidence_next_commands(
9220    root: &Path,
9221    scope: Option<&str>,
9222    target: &SubstrateGraphNode,
9223    worker_context: &[SubstrateGraphNode],
9224    source_handles: &[SubstrateGraphNode],
9225    worker_results: &[SubstrateGraphNode],
9226    semantic_related: &[SubstrateGraphNode],
9227) -> Vec<String> {
9228    let mut commands = BTreeSet::new();
9229    if let Some(expand) = target.properties.get("expand") {
9230        commands.insert(expand.clone());
9231    }
9232    for worker in worker_context {
9233        if let Some(expand) = worker.properties.get("expand") {
9234            commands.insert(expand.clone());
9235        }
9236    }
9237    for source in source_handles {
9238        if let Some(expand) = source.properties.get("expand") {
9239            commands.insert(expand.clone());
9240        }
9241    }
9242    for result in worker_results {
9243        if let Some(expand) = result.properties.get("expand") {
9244            commands.insert(expand.clone());
9245        }
9246    }
9247    for semantic in semantic_related {
9248        if let Some(expand) = semantic.properties.get("expand") {
9249            commands.insert(expand.clone());
9250        }
9251    }
9252    commands.insert(format!(
9253        "tsift graph-db --path {}{} status --json",
9254        shell_quote(root.to_string_lossy().as_ref()),
9255        graph_db_scope_arg(scope)
9256    ));
9257    commands.insert(format!(
9258        "tsift graph-db --path {}{} doctor --json",
9259        shell_quote(root.to_string_lossy().as_ref()),
9260        graph_db_scope_arg(scope)
9261    ));
9262    commands.into_iter().collect()
9263}
9264
9265fn graph_db_repair_commands(root: &Path, scope: Option<&str>) -> Vec<String> {
9266    vec![
9267        format!(
9268            "tsift graph-db --path {}{} refresh --json",
9269            shell_quote(root.to_string_lossy().as_ref()),
9270            graph_db_scope_arg(scope)
9271        ),
9272        format!(
9273            "tsift graph-db --path {}{} doctor --json",
9274            shell_quote(root.to_string_lossy().as_ref()),
9275            graph_db_scope_arg(scope)
9276        ),
9277    ]
9278}
9279
9280fn graph_db_evidence_replay_commands(
9281    root: &Path,
9282    scope: Option<&str>,
9283    target: &str,
9284    depth: usize,
9285    limit: usize,
9286) -> Vec<String> {
9287    vec![
9288        format!(
9289            "tsift graph-db --path {}{} evidence {} --depth {} --limit {} --json",
9290            shell_quote(root.to_string_lossy().as_ref()),
9291            graph_db_scope_arg(scope),
9292            shell_quote(target),
9293            depth,
9294            limit
9295        ),
9296        format!(
9297            "tsift conflict-matrix --path {} {} --json",
9298            shell_quote(root.to_string_lossy().as_ref()),
9299            shell_quote(target)
9300        ),
9301    ]
9302}
9303
9304fn graph_db_evidence_packet_id(
9305    target: &str,
9306    target_node: &SubstrateGraphNode,
9307    freshness: &GraphDbFreshnessReport,
9308) -> String {
9309    stable_handle(
9310        "gevd",
9311        &format!(
9312            "{}:{}:{}:{}",
9313            GRAPH_DB_EVIDENCE_CONTRACT_VERSION,
9314            target,
9315            target_node.id,
9316            freshness.content_hash.as_deref().unwrap_or("no-hash")
9317        ),
9318    )
9319}
9320
9321pub(crate) fn graph_db_evidence_report_from_store<S: GraphStore>(
9322    input: GraphDbEvidenceInput<'_, S>,
9323) -> Result<GraphDbEvidenceReport> {
9324    let GraphDbEvidenceInput {
9325        root,
9326        scope,
9327        backend,
9328        target,
9329        depth,
9330        limit,
9331        cursor,
9332        store,
9333        freshness,
9334        mut warnings,
9335    } = input;
9336    let repair_commands = graph_db_repair_commands(root, scope);
9337    if freshness.fail_closed {
9338        bail!(
9339            "graph database evidence failed closed for {} backend: {}; repair: {}",
9340            backend,
9341            freshness.diagnostics.join("; "),
9342            repair_commands.join("; ")
9343        );
9344    }
9345    let semantic_readiness = graph_db_semantic_readiness(
9346        root,
9347        scope,
9348        graph_store_semantic_node_count(store).ok(),
9349    );
9350    if semantic_readiness.fail_closed {
9351        warnings.push(format!(
9352            "graph evidence semantic readiness blocked: {} — {}",
9353            semantic_readiness.reason,
9354            semantic_readiness.diagnostics.join("; ")
9355        ));
9356        warnings.push(format!(
9357            "repair: {}",
9358            semantic_readiness.next_commands.join("; then ")
9359        ));
9360    }
9361    let target_node = graph_db_resolve_evidence_target(store, target)?
9362        .with_context(|| format!("graph-db evidence target not found: {target}"))?;
9363    let max_rows = if limit == 0 { usize::MAX } else { limit };
9364    let mut reachable = store.reachable_nodes_by_kinds(
9365        &target_node.id,
9366        &[
9367            "worker_context",
9368            "source_handle",
9369            "worker_result",
9370            "semantic_concept",
9371            "semantic_entity",
9372        ],
9373        depth,
9374        max_rows,
9375    )?;
9376    let worker_paths = reachable.remove("worker_context").unwrap_or_default();
9377    let source_paths = reachable.remove("source_handle").unwrap_or_default();
9378    let worker_result_paths = reachable.remove("worker_result").unwrap_or_default();
9379    let mut semantic_paths = reachable.remove("semantic_concept").unwrap_or_default();
9380    semantic_paths.extend(reachable.remove("semantic_entity").unwrap_or_default());
9381    semantic_paths.sort_by(|(left_node, left_path), (right_node, right_path)| {
9382        left_path
9383            .hops
9384            .cmp(&right_path.hops)
9385            .then(left_node.kind.cmp(&right_node.kind))
9386            .then(left_node.label.cmp(&right_node.label))
9387            .then(left_node.id.cmp(&right_node.id))
9388    });
9389    if max_rows != usize::MAX && semantic_paths.len() > max_rows {
9390        semantic_paths.truncate(max_rows);
9391    }
9392
9393    let evidence_nodes = worker_paths
9394        .iter()
9395        .chain(source_paths.iter())
9396        .chain(worker_result_paths.iter())
9397        .chain(semantic_paths.iter())
9398        .map(|(node, _)| node.clone())
9399        .collect::<Vec<_>>();
9400    let evidence_depth_by_id = worker_paths
9401        .iter()
9402        .chain(source_paths.iter())
9403        .chain(worker_result_paths.iter())
9404        .chain(semantic_paths.iter())
9405        .map(|(node, path)| (node.id.clone(), path.hops))
9406        .collect::<BTreeMap<_, _>>();
9407    let target_query = graph_db_node_search_text(&target_node);
9408    let semantic_scores = graph_db_semantic_scores_for_query(Some(&target_query), &evidence_nodes);
9409    let budgeted = graph_db_apply_output_budget_with_depths_and_cursor(
9410        std::slice::from_ref(&target_node.id),
9411        &semantic_scores,
9412        evidence_nodes,
9413        Vec::new(),
9414        Some(limit),
9415        Some(&evidence_depth_by_id),
9416        cursor,
9417    );
9418    let output_budget = budgeted.report;
9419    let truncated = budgeted.truncated;
9420    let next_cursor = budgeted.next_cursor;
9421    let retained_evidence_ids = budgeted
9422        .nodes
9423        .iter()
9424        .map(|node| node.id.as_str())
9425        .collect::<BTreeSet<_>>();
9426    let worker_context = worker_paths
9427        .iter()
9428        .filter(|(node, _)| retained_evidence_ids.contains(node.id.as_str()))
9429        .map(|(node, _)| node.clone())
9430        .collect::<Vec<_>>();
9431    let source_handles = source_paths
9432        .iter()
9433        .filter(|(node, _)| retained_evidence_ids.contains(node.id.as_str()))
9434        .map(|(node, _)| node.clone())
9435        .collect::<Vec<_>>();
9436    let worker_results = worker_result_paths
9437        .iter()
9438        .filter(|(node, _)| retained_evidence_ids.contains(node.id.as_str()))
9439        .map(|(node, _)| node.clone())
9440        .collect::<Vec<_>>();
9441    let semantic_related = semantic_paths
9442        .iter()
9443        .filter(|(node, _)| retained_evidence_ids.contains(node.id.as_str()))
9444        .map(|(node, _)| node.clone())
9445        .collect::<Vec<_>>();
9446    warnings.extend(graph_db_evidence_completed_queue_drift_warnings(
9447        store,
9448        &target_node,
9449        &worker_results,
9450    )?);
9451    if worker_context.is_empty()
9452        && source_handles.is_empty()
9453        && worker_results.is_empty()
9454        && semantic_related.is_empty()
9455    {
9456        warnings.push(format!(
9457            "graph-db evidence target {} resolved to a {} node but has no projection-linked context rows; add source/file tokens to the backlog text or rerun graph-db refresh after the session document is indexed",
9458            target, target_node.kind
9459        ));
9460    }
9461    let shortest_paths = worker_paths
9462        .iter()
9463        .chain(source_paths.iter())
9464        .chain(worker_result_paths.iter())
9465        .chain(semantic_paths.iter())
9466        .filter(|(node, _)| retained_evidence_ids.contains(node.id.as_str()))
9467        .map(|(node, path)| GraphDbEvidencePath {
9468            to: node.id.clone(),
9469            kind: node.kind.clone(),
9470            label: node.label.clone(),
9471            path: Some(path.clone()),
9472            expand: node.properties.get("expand").cloned(),
9473        })
9474        .collect::<Vec<_>>();
9475    let next_commands = graph_db_evidence_next_commands(
9476        root,
9477        scope,
9478        &target_node,
9479        &worker_context,
9480        &source_handles,
9481        &worker_results,
9482        &semantic_related,
9483    );
9484    let replay_commands = graph_db_evidence_replay_commands(root, scope, target, depth, limit);
9485    let packet_id = graph_db_evidence_packet_id(target, &target_node, &freshness);
9486    let projection_hash = freshness.content_hash.clone();
9487
9488    Ok(GraphDbEvidenceReport {
9489        root: root.to_string_lossy().to_string(),
9490        scope: scope.map(str::to_string),
9491        backend: backend.to_string(),
9492        contract_version: GRAPH_DB_EVIDENCE_CONTRACT_VERSION.to_string(),
9493        target: target.to_string(),
9494        packet_id,
9495        projection_hash,
9496        freshness,
9497        target_node: target_node.into(),
9498        worker_context: worker_context.into_iter().map(Into::into).collect(),
9499        source_handles: source_handles.into_iter().map(Into::into).collect(),
9500        worker_results: worker_results.into_iter().map(Into::into).collect(),
9501        semantic_related: semantic_related.into_iter().map(Into::into).collect(),
9502        shortest_paths,
9503        output_budget: Some(output_budget),
9504        truncated,
9505        next_cursor,
9506        next_commands,
9507        replay_commands,
9508        repair_commands,
9509        fixture_coverage: GraphDbFixtureCoverage {
9510            test: "graph_db_evidence_packet_covers_backlog_job_worker_context_and_source_handles"
9511                .to_string(),
9512            fixture: "tests/graph_db_conformance.rs::graph_db_project".to_string(),
9513            assertions: vec![
9514                "backlog id and job packet handle resolve to graph nodes".to_string(),
9515                "worker_context rows are reachable from queued work".to_string(),
9516                "source_handle rows are reachable through bounded shortest paths".to_string(),
9517                "worker_result rows are reachable from completed or blocked work".to_string(),
9518            ],
9519        },
9520        warnings,
9521    })
9522}
9523
9524fn print_graph_db_evidence_human(report: &GraphDbEvidenceReport) {
9525    println!(
9526        "graph-db evidence backend: {} target: {} [{}] packet:{}",
9527        report.backend, report.target_node.id, report.target_node.kind, report.packet_id
9528    );
9529    let page_info = if report.truncated {
9530        let cursor = report.next_cursor.as_deref().unwrap_or("?");
9531        format!(" (truncated, next_cursor: {cursor})")
9532    } else {
9533        String::new()
9534    };
9535    println!(
9536        "evidence: {} worker_context row(s), {} source_handle row(s), {} worker_result row(s), {} semantic row(s), {} path(s){page_info}",
9537        report.worker_context.len(),
9538        report.source_handles.len(),
9539        report.worker_results.len(),
9540        report.semantic_related.len(),
9541        report.shortest_paths.len()
9542    );
9543    for path in &report.shortest_paths {
9544        if let Some(graph_path) = &path.path {
9545            println!(
9546                "path: {} hop(s) {}",
9547                graph_path.hops,
9548                graph_path.nodes.join(" -> ")
9549            );
9550        }
9551    }
9552    for command in &report.next_commands {
9553        println!("next: {command}");
9554    }
9555    for warning in &report.warnings {
9556        println!("warning: {warning}");
9557    }
9558}
9559
9560pub(crate) fn print_graph_db_evidence_report(
9561    report: &GraphDbEvidenceReport,
9562    format: OutputFormat,
9563) -> Result<()> {
9564    if format.json_output {
9565        let page_info = if report.truncated {
9566            let cursor = report.next_cursor.as_deref().unwrap_or("?");
9567            format!(" (truncated, next_cursor: {cursor})")
9568        } else {
9569            String::new()
9570        };
9571        print_json_or_envelope(
9572            report,
9573            &format,
9574            "graph-db",
9575            "evidence",
9576            ToolEnvelopeSummary {
9577                text: format!(
9578                    "Graph DB evidence for {} returned {} worker context row(s), {} source handle(s), {} worker result row(s), {} semantic row(s), and {} shortest path(s){page_info}",
9579                    report.target,
9580                    report.worker_context.len(),
9581                    report.source_handles.len(),
9582                    report.worker_results.len(),
9583                    report.semantic_related.len(),
9584                    report.shortest_paths.len()
9585                ),
9586                metrics: vec![
9587                    envelope_metric("backend", &report.backend),
9588                    envelope_metric("worker_context", report.worker_context.len()),
9589                    envelope_metric("source_handles", report.source_handles.len()),
9590                    envelope_metric("worker_results", report.worker_results.len()),
9591                    envelope_metric("semantic_related", report.semantic_related.len()),
9592                    envelope_metric("paths", report.shortest_paths.len()),
9593                ],
9594            },
9595            report.truncated,
9596            report.next_commands.clone(),
9597        )
9598    } else {
9599        print_graph_db_evidence_human(report);
9600        Ok(())
9601    }
9602}
9603
9604pub(crate) fn graph_db_report_from_store(
9605    root: &Path,
9606    scope: Option<&str>,
9607    backend: &str,
9608    query: GraphDbQuery,
9609    store: &impl GraphStore,
9610    freshness: GraphDbFreshnessReport,
9611    warnings: Vec<String>,
9612) -> Result<GraphDbReport> {
9613    if freshness.fail_closed {
9614        bail!(
9615            "graph database read failed closed for {} backend: {}",
9616            backend,
9617            freshness.diagnostics.join("; ")
9618        );
9619    }
9620    let mut report = GraphDbReport {
9621        root: root.to_string_lossy().to_string(),
9622        scope: scope.map(str::to_string),
9623        backend: backend.to_string(),
9624        query: format!("{query:?}"),
9625        freshness,
9626        readiness: None,
9627        schema: None,
9628        node: None,
9629        edge: None,
9630        nodes: Vec::new(),
9631        edges: Vec::new(),
9632        ranked_neighbors: Vec::new(),
9633        semantic_related: Vec::new(),
9634        neighborhood_ranking_gate: None,
9635        ranked_neighborhood_comparison: None,
9636        knowledge_retrieval: None,
9637        output_budget: None,
9638        path: None,
9639        page: None,
9640        warnings,
9641    };
9642
9643    match query {
9644        GraphDbQuery::Refresh => {
9645            bail!("graph-db refresh must be handled by the refresh command path");
9646        }
9647        GraphDbQuery::Status => {
9648            bail!("graph-db status must be handled by the status command path");
9649        }
9650        GraphDbQuery::Doctor => {
9651            bail!("graph-db doctor must be handled by the doctor command path");
9652        }
9653        GraphDbQuery::Drift => {
9654            bail!("graph-db drift must be handled by the drift command path");
9655        }
9656        GraphDbQuery::Compact { .. } => {
9657            bail!("graph-db compact must be handled by the compact command path");
9658        }
9659        GraphDbQuery::BackendEval { .. } => {
9660            bail!("graph-db backend-eval must be handled by the benchmark command path");
9661        }
9662        GraphDbQuery::Evidence { .. } => {
9663            bail!("graph-db evidence must be handled by the evidence command path");
9664        }
9665        GraphDbQuery::Related {
9666            query,
9667            kind,
9668            depth,
9669            seed_limit,
9670            limit,
9671        } => {
9672            let semantic =
9673                semantic_related_report_from_store(root, scope, &query, seed_limit, kind, store)?;
9674            let SemanticRelatedReport {
9675                items,
9676                warnings: semantic_warnings,
9677                ..
9678            } = semantic;
9679            let readiness = graph_db_semantic_readiness(
9680                root,
9681                scope,
9682                (!items.is_empty()).then_some(items.len()),
9683            );
9684            report.warnings.extend(semantic_warnings);
9685            let seed_ids = items
9686                .iter()
9687                .map(|item| item.handle.clone())
9688                .collect::<Vec<_>>();
9689            let semantic_scores = items
9690                .iter()
9691                .map(|item| (item.handle.clone(), item.score))
9692                .collect::<BTreeMap<_, _>>();
9693            let subgraph = graph_db_semantic_seeded_neighborhood(store, &seed_ids, depth, limit)?;
9694            let seed_count = seed_ids.len();
9695            let mut diagnostics = subgraph.diagnostics;
9696            let budgeted = graph_db_apply_output_budget(
9697                &seed_ids,
9698                &semantic_scores,
9699                subgraph.nodes,
9700                subgraph.edges,
9701                Some(limit),
9702            );
9703            let budget_report = budgeted.report;
9704            let dropped_by_budget = !budget_report.dropped_by_budget.is_empty();
9705            diagnostics.extend(budget_report.diagnostics.clone());
9706            diagnostics.extend(readiness.diagnostics.clone());
9707
9708            report.readiness = Some(readiness);
9709            report.semantic_related = items;
9710            if let Some(seed_id) = seed_ids.first() {
9711                let ranked_neighbor_cap = graph_db_ranked_neighbor_cap(Some(limit));
9712                report.ranked_neighbors = graph_db_ranked_neighbors(
9713                    seed_id,
9714                    &budgeted.nodes,
9715                    &budgeted.edges,
9716                    ranked_neighbor_cap,
9717                );
9718                report.neighborhood_ranking_gate =
9719                    Some(graph_db_neighborhood_ranking_gate(ranked_neighbor_cap));
9720            }
9721            report.nodes = budgeted.nodes.into_iter().map(Into::into).collect();
9722            report.edges = budgeted.edges.into_iter().map(Into::into).collect();
9723            report.knowledge_retrieval = Some(GraphDbKnowledgeRetrieval {
9724                mode: "semantic_seeded_neighborhood".to_string(),
9725                query,
9726                seed_kind: semantic_related_kind_name(kind).to_string(),
9727                seed_limit,
9728                seed_count,
9729                depth,
9730                limit,
9731                node_count: report.nodes.len(),
9732                edge_count: report.edges.len(),
9733                truncated: subgraph.truncated || dropped_by_budget,
9734                traversal: "incident_plus_outgoing_edges".to_string(),
9735                freshness_boundary:
9736                    "semantic rows must come from refreshed summary or tsift-memory graph records"
9737                        .to_string(),
9738                privacy_boundary:
9739                    "GraphStore stores substrate records only; user consent, deletion policy, persona policy, and LiveKit session state stay in the avatar/agent adapter"
9740                        .to_string(),
9741                diagnostics,
9742            });
9743            report.output_budget = Some(budget_report);
9744        }
9745        GraphDbQuery::Schema => {
9746            report.schema = Some(graph_db_schema());
9747        }
9748        GraphDbQuery::Node { id } => {
9749            report.node = store.node(&id)?.map(Into::into);
9750        }
9751        GraphDbQuery::Edge { id } => {
9752            report.edge = store.edge(&id)?.map(Into::into);
9753        }
9754        GraphDbQuery::Edges {
9755            edge_kind,
9756            cursor,
9757            limit,
9758            property_filters,
9759        } => {
9760            let options = graph_db_query_options(cursor, limit, &property_filters)?;
9761            let paged = store.paged_edges(
9762                edge_kind.as_deref(),
9763                graph_db_query_options_for_store(&options),
9764            )?;
9765            report.edges = paged.edges.into_iter().map(Into::into).collect();
9766            report.page = Some(graph_db_page_report_from_store(
9767                paged.page,
9768                options.property_filters,
9769            ));
9770        }
9771        GraphDbQuery::Incident {
9772            id,
9773            edge_kind,
9774            cursor,
9775            limit,
9776            property_filters,
9777        } => {
9778            let options = graph_db_query_options(cursor, limit, &property_filters)?;
9779            let paged = store.paged_incident_edges(
9780                &id,
9781                edge_kind.as_deref(),
9782                graph_db_query_options_for_store(&options),
9783            )?;
9784            report.edges = paged.edges.into_iter().map(Into::into).collect();
9785            report.page = Some(graph_db_page_report_from_store(
9786                paged.page,
9787                options.property_filters,
9788            ));
9789        }
9790        GraphDbQuery::Kind {
9791            kind,
9792            cursor,
9793            limit,
9794            property_filters,
9795        } => {
9796            let options = graph_db_query_options(cursor, limit, &property_filters)?;
9797            let paged =
9798                store.paged_nodes_by_kind(&kind, graph_db_query_options_for_store(&options))?;
9799            report.nodes = paged.nodes.into_iter().map(Into::into).collect();
9800            report.edges = paged.edges.into_iter().map(Into::into).collect();
9801            report.page = Some(graph_db_page_report_from_store(
9802                paged.page,
9803                options.property_filters,
9804            ));
9805        }
9806        GraphDbQuery::Neighborhood {
9807            id,
9808            depth,
9809            edge_kind,
9810            cursor,
9811            limit,
9812            property_filters,
9813        } => {
9814            let options = graph_db_query_options(cursor, limit, &property_filters)?;
9815            if let Some(paged) = store.paged_neighborhood(
9816                &id,
9817                depth,
9818                edge_kind.as_deref(),
9819                graph_db_query_options_for_store(&options),
9820            )? {
9821                let budgeted = graph_db_apply_output_budget(
9822                    std::slice::from_ref(&id),
9823                    &BTreeMap::new(),
9824                    paged.nodes,
9825                    paged.edges,
9826                    options.limit,
9827                );
9828                let budget_report = budgeted.report;
9829                let ranked_neighbor_cap = graph_db_ranked_neighbor_cap(options.limit);
9830                let ranked_neighbors = graph_db_ranked_neighbors(
9831                    &id,
9832                    &budgeted.nodes,
9833                    &budgeted.edges,
9834                    ranked_neighbor_cap,
9835                );
9836                let comparison = graph_db_ranked_neighborhood_comparison(
9837                    &id,
9838                    depth,
9839                    edge_kind.as_deref(),
9840                    options.limit,
9841                    &budgeted.nodes,
9842                    &budgeted.edges,
9843                    store,
9844                )?;
9845                report.nodes = budgeted.nodes.into_iter().map(Into::into).collect();
9846                report.edges = budgeted.edges.into_iter().map(Into::into).collect();
9847                report.ranked_neighbors = ranked_neighbors;
9848                report.neighborhood_ranking_gate =
9849                    Some(graph_db_neighborhood_ranking_gate(ranked_neighbor_cap));
9850                let mut page =
9851                    graph_db_page_report_from_store(paged.page, options.property_filters);
9852                page.returned_nodes = report.nodes.len();
9853                page.returned_edges = report.edges.len();
9854                page.truncated |= !budget_report.dropped_by_budget.is_empty();
9855                page.diagnostics.extend(budget_report.diagnostics.clone());
9856                report.page = Some(page);
9857                report.output_budget = Some(budget_report);
9858                if let Some(comparison) = comparison {
9859                    report.ranked_neighborhood_comparison = Some(comparison);
9860                }
9861            }
9862        }
9863        GraphDbQuery::Path {
9864            from,
9865            to,
9866            edge_kind,
9867            max_hops,
9868        } => {
9869            report.path =
9870                store.shortest_path_with_max_hops(&from, &to, edge_kind.as_deref(), max_hops)?;
9871            if let Some(max_hops) = max_hops
9872                && report.path.is_none()
9873            {
9874                report.warnings.push(format!(
9875                    "no directed path found within --max-hops {}",
9876                    max_hops
9877                ));
9878            }
9879        }
9880        GraphDbQuery::Map { .. } => {
9881            bail!("graph-db map must be handled by the map command path");
9882        }
9883    }
9884    Ok(report)
9885}
9886
9887pub(crate) fn print_graph_db_human(report: &GraphDbReport, compact: bool) {
9888    if compact {
9889        println!(
9890            "graph-db backend:{} query:{} nodes:{} edges:{} freshness:{}",
9891            report.backend,
9892            report.query,
9893            report.nodes.len() + usize::from(report.node.is_some()),
9894            report.edges.len() + usize::from(report.edge.is_some()),
9895            report.freshness.status
9896        );
9897        return;
9898    }
9899    println!("graph-db backend: {}", report.backend);
9900    println!("freshness: {}", report.freshness.status);
9901    if let Some(readiness) = &report.readiness {
9902        println!(
9903            "readiness: {} reason: {} fail_closed: {}",
9904            readiness.status, readiness.reason, readiness.fail_closed
9905        );
9906        for diagnostic in &readiness.diagnostics {
9907            println!("readiness diagnostic: {diagnostic}");
9908        }
9909        for command in &readiness.next_commands {
9910            println!("readiness next: {command}");
9911        }
9912    }
9913    if let Some(schema) = &report.schema {
9914        println!(
9915            "schema: {} node fields, {} edge fields, {} operations",
9916            schema.node_fields.len(),
9917            schema.edge_fields.len(),
9918            schema.operations.len()
9919        );
9920    }
9921    if let Some(node) = &report.node {
9922        println!("node: {} [{}] {}", node.id, node.kind, node.label);
9923    }
9924    if let Some(edge) = &report.edge {
9925        let edge_full: SubstrateGraphEdge = edge.into();
9926        println!(
9927            "edge: {} {} -{}-> {}",
9928            graph_db_edge_key(&edge_full),
9929            edge.from_id,
9930            edge.kind,
9931            edge.to_id
9932        );
9933    }
9934    if let Some(knowledge) = &report.knowledge_retrieval {
9935        println!(
9936            "knowledge_retrieval: {} seeds:{} depth:{} traversal:{}",
9937            knowledge.mode, knowledge.seed_count, knowledge.depth, knowledge.traversal
9938        );
9939    }
9940    for item in &report.semantic_related {
9941        println!(
9942            "semantic_seed: {:.3} [{}] {} ({})",
9943            item.score, item.kind, item.label, item.handle
9944        );
9945    }
9946    for node in &report.nodes {
9947        println!("node: {} [{}] {}", node.id, node.kind, node.label);
9948    }
9949    for edge in &report.edges {
9950        let edge_full: SubstrateGraphEdge = edge.into();
9951        println!(
9952            "edge: {} {} -{}-> {}",
9953            graph_db_edge_key(&edge_full),
9954            edge.from_id,
9955            edge.kind,
9956            edge.to_id
9957        );
9958    }
9959    for neighbor in &report.ranked_neighbors {
9960        println!(
9961            "ranked_neighbor: #{} score:{} depth:{} {} [{}] {}",
9962            neighbor.rank,
9963            neighbor.score,
9964            neighbor
9965                .depth
9966                .map(|depth| depth.to_string())
9967                .unwrap_or_else(|| "unknown".to_string()),
9968            neighbor.node_id,
9969            neighbor.kind,
9970            neighbor.label
9971        );
9972    }
9973    if let Some(gate) = &report.neighborhood_ranking_gate {
9974        println!(
9975            "neighborhood_ranking_gate: {} default_order:{} ranked_output_default:{}",
9976            gate.status, gate.default_order, gate.ranked_output_default
9977        );
9978    }
9979    if let Some(path) = &report.path {
9980        println!("path: {} hop(s) {}", path.hops, path.nodes.join(" -> "));
9981    }
9982    if let Some(page) = &report.page {
9983        if let Some(next_cursor) = &page.next_cursor {
9984            println!("next_cursor: {next_cursor}");
9985        }
9986        for diagnostic in &page.diagnostics {
9987            println!("page: {diagnostic}");
9988        }
9989    }
9990    for warning in &report.warnings {
9991        println!("warning: {warning}");
9992    }
9993}
9994
9995pub(crate) fn graph_db_backend_eval_phase_timing(
9996    name: &str,
9997    duration_micros: u128,
9998    detail: &str,
9999) -> GraphDbBackendEvalPhaseTiming {
10000    GraphDbBackendEvalPhaseTiming {
10001        name: name.to_string(),
10002        duration_micros,
10003        detail: detail.to_string(),
10004    }
10005}
10006
10007pub(crate) fn graph_db_backend_eval_timed_phase<T>(
10008    phases: &mut Vec<GraphDbBackendEvalPhaseTiming>,
10009    name: &str,
10010    detail: &str,
10011    run: impl FnOnce() -> Result<T>,
10012) -> Result<T> {
10013    let started = Instant::now();
10014    let result = run();
10015    phases.push(graph_db_backend_eval_phase_timing(
10016        name,
10017        started.elapsed().as_micros(),
10018        detail,
10019    ));
10020    result
10021}
10022
10023pub(crate) fn graph_db_backend_eval_refresh_total_micros(
10024    phases: &[GraphDbBackendEvalPhaseTiming],
10025) -> u128 {
10026    phases
10027        .iter()
10028        .filter(|phase| phase.name != "conflict_matrix_preparation")
10029        .map(|phase| phase.duration_micros)
10030        .sum()
10031}
10032
10033pub(crate) fn graph_db_backend_eval_cached_refresh(
10034    root: &Path,
10035    scope: Option<&str>,
10036    source_watermark: Option<&str>,
10037) -> Result<
10038    Option<(
10039        TraversalGraphBuild,
10040        SqliteProjectionRefresh,
10041        Vec<GraphDbBackendEvalPhaseTiming>,
10042    )>,
10043> {
10044    let Some(source_watermark) = source_watermark else {
10045        return Ok(None);
10046    };
10047    let graph_db = graph_substrate_db_path(root, scope);
10048    if !graph_db.exists() {
10049        return Ok(None);
10050    }
10051
10052    let started = Instant::now();
10053    let store = match SqliteGraphStore::open_read_only_resilient(&graph_db) {
10054        Ok(store) => store,
10055        Err(_) => return Ok(None),
10056    };
10057    if store.has_user_triggers().unwrap_or(true) {
10058        return Ok(None);
10059    }
10060    let freshness = sqlite_graph_freshness(&store, scope.unwrap_or("root"))?;
10061    if freshness.fail_closed || freshness.source_watermark.as_deref() != Some(source_watermark) {
10062        return Ok(None);
10063    }
10064
10065    let phases = vec![
10066        graph_db_backend_eval_phase_timing(
10067            "source_graph_build",
10068            started.elapsed().as_micros(),
10069            "reused current graph.db projection because the source watermark matched; skipped code-index loading, session markdown scanning, source-handle construction, and semantic summary reads",
10070        ),
10071        graph_db_backend_eval_phase_timing(
10072            "projection_rows",
10073            0,
10074            "reused cached provider-neutral projection rows from graph.db",
10075        ),
10076        graph_db_backend_eval_phase_timing(
10077            "sqlite_open",
10078            0,
10079            "reused existing graph.db projection without opening a write transaction",
10080        ),
10081    ];
10082    let refresh = SqliteProjectionRefresh {
10083        scope: scope.unwrap_or("root").to_string(),
10084        projection_version: freshness
10085            .projection_version
10086            .unwrap_or_else(|| GRAPH_PROJECTION_VERSION.to_string()),
10087        source_watermark: Some(source_watermark.to_string()),
10088        tombstoned_nodes: Vec::new(),
10089        tombstoned_edges: Vec::new(),
10090        upserted_nodes: 0,
10091        upserted_edges: 0,
10092        unchanged_nodes: 0,
10093        unchanged_edges: 0,
10094        upserted_properties: 0,
10095        unchanged_properties: 0,
10096        deleted_properties: 0,
10097        deleted_nodes: 0,
10098        deleted_edges: 0,
10099        pruned_tombstones: 0,
10100        file_size_bytes_before: None,
10101        file_size_bytes_after: None,
10102        phase_timings: Vec::new(),
10103    };
10104    Ok(Some((TraversalGraphBuild::default(), refresh, phases)))
10105}
10106
10107pub(crate) fn graph_db_backend_eval_reused_cached_projection(
10108    phases: &[GraphDbBackendEvalPhaseTiming],
10109) -> bool {
10110    phases.iter().any(|phase| {
10111        phase.name == "source_graph_build"
10112            && phase.detail.contains("reused current graph.db projection")
10113    })
10114}
10115
10116pub(crate) fn graph_db_backend_eval_update_source_watermark(
10117    root: &Path,
10118    path_hint: &Path,
10119    scope: Option<&str>,
10120) -> Result<()> {
10121    let Some(source_watermark) = traversal_source_watermark(root, path_hint, scope, false)? else {
10122        return Ok(());
10123    };
10124    let graph_db = graph_substrate_db_path(root, scope);
10125    let mut store = SqliteGraphStore::open(&graph_db)?;
10126    store.update_projection_source_watermark(scope.unwrap_or("root"), Some(source_watermark))?;
10127    Ok(())
10128}
10129
10130pub(crate) fn graph_db_backend_eval_refresh_with_profile(
10131    root: &Path,
10132    path_hint: &Path,
10133    scope: Option<&str>,
10134) -> Result<(
10135    TraversalGraphBuild,
10136    SqliteProjectionRefresh,
10137    Vec<GraphDbBackendEvalPhaseTiming>,
10138)> {
10139    let source_watermark = traversal_source_watermark(root, path_hint, scope, false)?;
10140    if let Some(cached) =
10141        graph_db_backend_eval_cached_refresh(root, scope, source_watermark.as_deref())?
10142    {
10143        return Ok(cached);
10144    }
10145
10146    let mut phases = Vec::new();
10147    let source_graph_detail = if hinted_markdown_file(root, path_hint).is_some() {
10148        "bounded session projection: index/source loading plus agent-doc session markdown scan, source-handle construction, and semantic summary reads; skips global call-edge materialization because full-projection is the complete-call-graph regression guard"
10149    } else {
10150        "index/source loading plus agent-doc session markdown scan, source-handle construction, and semantic summary reads when summaries are cached"
10151    };
10152    let source_graph = graph_db_backend_eval_timed_phase(
10153        &mut phases,
10154        "source_graph_build",
10155        source_graph_detail,
10156        || build_traversal_graph_source_with_options(root, path_hint, scope, false),
10157    )?;
10158    let projection = graph_db_backend_eval_timed_phase(
10159        &mut phases,
10160        "projection_rows",
10161        "provider-neutral GraphStore node/edge row construction before SQLite persistence",
10162        || traversal_projection_from_graph(root, scope, &source_graph),
10163    )?;
10164    let graph_db = graph_substrate_db_path(root, scope);
10165    let mut store = graph_db_backend_eval_timed_phase(
10166        &mut phases,
10167        "sqlite_open",
10168        "open the local SQLite graph.db with WAL and busy-timeout settings",
10169        || SqliteGraphStore::open(&graph_db),
10170    )?;
10171    let refreshed_source_watermark = traversal_source_watermark(root, path_hint, scope, false)
10172        .ok()
10173        .flatten();
10174    let refresh = store.replace_projection_with_version(
10175        scope.unwrap_or("root"),
10176        &projection,
10177        Some(GRAPH_PROJECTION_VERSION),
10178        refreshed_source_watermark
10179            .or(source_watermark)
10180            .or_else(|| graph_projection_content_hash(&projection)),
10181    )?;
10182    phases.extend(
10183        refresh
10184            .phase_timings
10185            .iter()
10186            .map(|phase| GraphDbBackendEvalPhaseTiming {
10187                name: phase.name.clone(),
10188                duration_micros: phase.duration_micros,
10189                detail: phase.detail.clone(),
10190            }),
10191    );
10192    Ok((source_graph, refresh, phases))
10193}
10194
10195fn graph_db_backend_eval_disk_cache_dir(root: &Path) -> PathBuf {
10196    root.join(".tsift/backend-eval-cache")
10197}
10198
10199fn graph_db_backend_eval_disk_cache_path(root: &Path, kind: &str, key: &str) -> PathBuf {
10200    graph_db_backend_eval_disk_cache_dir(root)
10201        .join(kind)
10202        .join(format!("{key}.json.gz"))
10203}
10204
10205fn graph_db_backend_eval_legacy_disk_cache_path(root: &Path, kind: &str, key: &str) -> PathBuf {
10206    graph_db_backend_eval_disk_cache_dir(root)
10207        .join(kind)
10208        .join(format!("{key}.json"))
10209}
10210
10211#[derive(Default, Clone)]
10212struct GraphDbBackendEvalDiskCacheReadProfile {
10213    file_read_micros: u128,
10214    gzip_decode_micros: u128,
10215    serde_decode_micros: u128,
10216    legacy: bool,
10217}
10218
10219fn graph_db_backend_eval_read_disk_cache<T: for<'de> Deserialize<'de>>(
10220    root: &Path,
10221    kind: &str,
10222    key: &str,
10223) -> Option<(T, u64, u64, GraphDbBackendEvalDiskCacheReadProfile)> {
10224    let mut profile = GraphDbBackendEvalDiskCacheReadProfile::default();
10225    let path = graph_db_backend_eval_disk_cache_path(root, kind, key);
10226    let read_started = Instant::now();
10227    let read_result = fs::read(&path);
10228    profile.file_read_micros = read_started.elapsed().as_micros();
10229    if let Ok(bytes) = read_result {
10230        let decode_started = Instant::now();
10231        let mut decoder = GzDecoder::new(bytes.as_slice());
10232        let mut decoded = Vec::new();
10233        let decode_ok = decoder.read_to_end(&mut decoded).is_ok();
10234        profile.gzip_decode_micros = decode_started.elapsed().as_micros();
10235        if decode_ok {
10236            let serde_started = Instant::now();
10237            let parsed: Option<T> = serde_json::from_slice(&decoded).ok();
10238            profile.serde_decode_micros = serde_started.elapsed().as_micros();
10239            if let Some(value) = parsed {
10240                return Some((value, bytes.len() as u64, decoded.len() as u64, profile));
10241            }
10242        }
10243    }
10244
10245    let legacy_path = graph_db_backend_eval_legacy_disk_cache_path(root, kind, key);
10246    let legacy_started = Instant::now();
10247    let bytes = fs::read(legacy_path).ok()?;
10248    profile.file_read_micros = profile
10249        .file_read_micros
10250        .saturating_add(legacy_started.elapsed().as_micros());
10251    let serde_started = Instant::now();
10252    let value = serde_json::from_slice(&bytes).ok()?;
10253    profile.serde_decode_micros = profile
10254        .serde_decode_micros
10255        .saturating_add(serde_started.elapsed().as_micros());
10256    profile.legacy = true;
10257    Some((value, bytes.len() as u64, bytes.len() as u64, profile))
10258}
10259
10260#[derive(Default, Clone)]
10261struct GraphDbBackendEvalDiskCacheWriteProfile {
10262    serde_encode_micros: u128,
10263    gzip_encode_micros: u128,
10264    file_write_micros: u128,
10265}
10266
10267fn graph_db_backend_eval_write_disk_cache<T: Serialize>(
10268    root: &Path,
10269    kind: &str,
10270    key: &str,
10271    value: &T,
10272) -> Option<(u64, u64, GraphDbBackendEvalDiskCacheWriteProfile)> {
10273    let mut profile = GraphDbBackendEvalDiskCacheWriteProfile::default();
10274    let path = graph_db_backend_eval_disk_cache_path(root, kind, key);
10275    let parent = path.parent()?;
10276    if fs::create_dir_all(parent).is_err() {
10277        return None;
10278    }
10279    let serde_started = Instant::now();
10280    let bytes = serde_json::to_vec(value).ok()?;
10281    profile.serde_encode_micros = serde_started.elapsed().as_micros();
10282    let gzip_started = Instant::now();
10283    let mut encoder = GzEncoder::new(Vec::new(), Compression::fast());
10284    if encoder.write_all(&bytes).is_err() {
10285        return None;
10286    }
10287    let encoded = encoder.finish().ok()?;
10288    profile.gzip_encode_micros = gzip_started.elapsed().as_micros();
10289    let write_started = Instant::now();
10290    if fs::write(&path, &encoded).is_err() {
10291        return None;
10292    }
10293    profile.file_write_micros = write_started.elapsed().as_micros();
10294    Some((encoded.len() as u64, bytes.len() as u64, profile))
10295}
10296
10297fn graph_db_backend_eval_prune_disk_cache(root: &Path, kind: &str, keep_key: &str) -> (usize, u64) {
10298    let dir = graph_db_backend_eval_disk_cache_dir(root).join(kind);
10299    let Ok(entries) = fs::read_dir(dir) else {
10300        return (0, 0);
10301    };
10302    let keep_name = format!("{keep_key}.json.gz");
10303    let mut pruned_files = 0usize;
10304    let mut pruned_bytes = 0u64;
10305    for entry in entries.flatten() {
10306        let path = entry.path();
10307        if !path.is_file() {
10308            continue;
10309        }
10310        let Some(name) = path.file_name().and_then(|name| name.to_str()) else {
10311            continue;
10312        };
10313        if name == keep_name {
10314            continue;
10315        }
10316        let is_backend_eval_cache = name.ends_with(".json") || name.ends_with(".json.gz");
10317        if !is_backend_eval_cache {
10318            continue;
10319        }
10320        let bytes = entry.metadata().map(|metadata| metadata.len()).unwrap_or(0);
10321        if fs::remove_file(&path).is_ok() {
10322            pruned_files += 1;
10323            pruned_bytes += bytes;
10324        }
10325    }
10326    (pruned_files, pruned_bytes)
10327}
10328
10329fn graph_db_backend_eval_full_projection_raw_watermark_rows(
10330    root: &Path,
10331    source_root: &Path,
10332) -> Result<Vec<GraphDbBackendEvalRawSourceWatermarkRow>> {
10333    let mut rows = Vec::new();
10334    let mut entries = walk::walk_files(source_root)?;
10335    entries.sort_by(|left, right| left.path.cmp(&right.path));
10336    for entry in entries {
10337        if traversal_path_is_generated_artifact(root, source_root, &entry.path) {
10338            continue;
10339        }
10340        if traversal_path_is_session_markdown(root, source_root, &entry.path) {
10341            continue;
10342        }
10343        let bytes = fs::read(&entry.path)
10344            .with_context(|| format!("reading source input {}", entry.path.display()))?;
10345        rows.push(GraphDbBackendEvalRawSourceWatermarkRow {
10346            path: traversal_watermark_path(root, &entry.path),
10347            bytes: bytes.len() as u64,
10348            content_hash: content_hash(&bytes)?,
10349        });
10350    }
10351    Ok(rows)
10352}
10353
10354fn graph_db_backend_eval_full_projection_source_watermark(
10355    root: &Path,
10356    scope: Option<&str>,
10357) -> Result<GraphDbBackendEvalFullProjectionSourceWatermark> {
10358    let path_hint = root;
10359    let mut detail_parts = Vec::new();
10360    let mut parts = vec![
10361        format!("projection_version:{GRAPH_PROJECTION_VERSION}"),
10362        format!("cache_version:{GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION}"),
10363        "watermark_kind:stable_full_projection_inputs".to_string(),
10364        format!("scope:{}", scope.unwrap_or("root")),
10365        format!("path_hint:{}", traversal_watermark_path(root, path_hint)),
10366    ];
10367
10368    let gate = prepare_agent_doc_index_gate(root, path_hint, scope, "full-projection cache key");
10369    match gate.db_path.as_ref().filter(|db_path| db_path.exists()) {
10370        Some(db_path) => {
10371            let db = index::IndexDb::open_read_only_resilient(db_path)?;
10372            parts.push("index_mode:indexed".to_string());
10373            detail_parts.push("mode=indexed".to_string());
10374            parts.push(format!(
10375                "index_source_root:{}",
10376                traversal_watermark_path(root, &gate.source_root)
10377            ));
10378
10379            let symbols = db
10380                .all_symbols()?
10381                .into_iter()
10382                .filter(|symbol| {
10383                    !traversal_path_is_generated_artifact(
10384                        root,
10385                        &gate.source_root,
10386                        Path::new(&symbol.file),
10387                    ) && !traversal_path_is_session_markdown(
10388                        root,
10389                        &gate.source_root,
10390                        Path::new(&symbol.file),
10391                    )
10392                })
10393                .collect::<Vec<_>>();
10394            let symbols_hash = content_hash(&symbols)?;
10395            detail_parts.push(format!("symbols={symbols_hash}"));
10396            parts.push(format!("index_symbols:{symbols_hash}"));
10397
10398            let edges = db
10399                .all_stored_edges()?
10400                .into_iter()
10401                .filter(|edge| {
10402                    !traversal_path_is_generated_artifact(
10403                        root,
10404                        &gate.source_root,
10405                        Path::new(&edge.caller_file),
10406                    ) && !traversal_path_is_session_markdown(
10407                        root,
10408                        &gate.source_root,
10409                        Path::new(&edge.caller_file),
10410                    )
10411                })
10412                .collect::<Vec<_>>();
10413            let edges_hash = content_hash(&edges)?;
10414            detail_parts.push(format!("call_edges={edges_hash}"));
10415            parts.push(format!("index_call_edges:{edges_hash}"));
10416
10417            let routes = db
10418                .all_routes()?
10419                .into_iter()
10420                .filter(|route| {
10421                    !traversal_path_is_generated_artifact(
10422                        root,
10423                        &gate.source_root,
10424                        Path::new(&route.file),
10425                    ) && !traversal_path_is_session_markdown(
10426                        root,
10427                        &gate.source_root,
10428                        Path::new(&route.file),
10429                    )
10430                })
10431                .collect::<Vec<_>>();
10432            let routes_hash = content_hash(&routes)?;
10433            detail_parts.push(format!("routes={routes_hash}"));
10434            parts.push(format!("index_routes:{routes_hash}"));
10435        }
10436        None => {
10437            parts.push("index_mode:raw_fallback".to_string());
10438            detail_parts.push("mode=raw_fallback".to_string());
10439            parts.push(format!(
10440                "raw_source_root:{}",
10441                traversal_watermark_path(root, &gate.source_root)
10442            ));
10443            let raw_rows =
10444                graph_db_backend_eval_full_projection_raw_watermark_rows(root, &gate.source_root)?;
10445            let raw_hash = content_hash(&raw_rows)?;
10446            detail_parts.push(format!("raw_source_files={raw_hash}"));
10447            parts.push(format!("raw_source_files:{raw_hash}"));
10448        }
10449    }
10450
10451    parts.push("agent_doc_session_markdown:bounded_real_dataset_only".to_string());
10452    detail_parts.push("session_markdown=bounded_real_dataset_only".to_string());
10453    let summaries_start = parts.len();
10454    push_traversal_summaries_watermark_part(root, &mut parts)?;
10455    let summaries_hash = content_hash(&parts[summaries_start..].to_vec())?;
10456    detail_parts.push(format!("summaries={summaries_hash}"));
10457    let value = content_hash(&parts)?;
10458    detail_parts.push(format!("watermark={value}"));
10459    Ok(GraphDbBackendEvalFullProjectionSourceWatermark {
10460        value,
10461        detail: detail_parts.join(" "),
10462    })
10463}
10464
10465fn graph_db_backend_eval_full_projection_cache_key(
10466    root: &Path,
10467    scope: Option<&str>,
10468) -> Result<(String, String, String)> {
10469    let source_watermark = graph_db_backend_eval_full_projection_source_watermark(root, scope)?;
10470    let key = graph_db_backend_eval_full_projection_cache_key_for_watermark(
10471        root,
10472        scope,
10473        &source_watermark.value,
10474    )?;
10475    Ok((source_watermark.value, key, source_watermark.detail))
10476}
10477
10478fn graph_db_backend_eval_full_projection_cache_key_for_watermark(
10479    root: &Path,
10480    scope: Option<&str>,
10481    source_watermark: &str,
10482) -> Result<String> {
10483    content_hash(&serde_json::json!({
10484    "version": GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION,
10485    "root": root.display().to_string(),
10486    "scope": scope.unwrap_or("root"),
10487    "source_watermark": source_watermark,
10488    }))
10489}
10490
10491pub(crate) fn graph_db_backend_eval_full_projection_with_profile(
10492    root: &Path,
10493    scope: Option<&str>,
10494) -> Result<(
10495    GraphProjection,
10496    Vec<String>,
10497    Vec<GraphDbBackendEvalPhaseTiming>,
10498    GraphDbBackendEvalFullProjectionCacheStats,
10499)> {
10500    let (source_watermark, key, source_watermark_detail) =
10501        graph_db_backend_eval_full_projection_cache_key(root, scope)?;
10502    let lookup_started = Instant::now();
10503    if let Some((cached, disk_bytes, json_bytes, read_profile)) =
10504        graph_db_backend_eval_read_disk_cache::<GraphDbBackendEvalFullProjectionCache>(
10505            root,
10506            "full_projection",
10507            &key,
10508        )
10509        && cached.version == GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION
10510        && cached.key == key
10511        && cached.source_watermark == source_watermark
10512    {
10513        let lookup_overhead_micros = lookup_started
10514            .elapsed()
10515            .as_micros()
10516            .saturating_sub(read_profile.file_read_micros)
10517            .saturating_sub(read_profile.gzip_decode_micros)
10518            .saturating_sub(read_profile.serde_decode_micros);
10519        let prune_started = Instant::now();
10520        let (pruned_files, pruned_bytes) =
10521            graph_db_backend_eval_prune_disk_cache(root, "full_projection", &key);
10522        let prune_micros = prune_started.elapsed().as_micros();
10523        let cache_stats = GraphDbBackendEvalFullProjectionCacheStats {
10524            hit: true,
10525            disk_bytes,
10526            json_bytes,
10527            pruned_files,
10528            pruned_bytes,
10529        };
10530        let read_detail_suffix = if read_profile.legacy {
10531            " (legacy uncompressed cache path)"
10532        } else {
10533            ""
10534        };
10535        return Ok((
10536            cached.projection,
10537            cached.warnings,
10538            vec![
10539                graph_db_backend_eval_phase_timing(
10540                    "full_projection.cache_lookup",
10541                    lookup_overhead_micros,
10542                    &format!(
10543                        "watermark/version check overhead around the cache load phases; {source_watermark_detail}"
10544                    ),
10545                ),
10546                graph_db_backend_eval_phase_timing(
10547                    "full_projection.cache.file_read",
10548                    read_profile.file_read_micros,
10549                    &format!(
10550                        "read compressed cache bytes from .tsift/backend-eval-cache{read_detail_suffix}"
10551                    ),
10552                ),
10553                graph_db_backend_eval_phase_timing(
10554                    "full_projection.cache.gzip_decode",
10555                    read_profile.gzip_decode_micros,
10556                    "gunzip the compressed projection cache bytes",
10557                ),
10558                graph_db_backend_eval_phase_timing(
10559                    "full_projection.cache.serde_decode",
10560                    read_profile.serde_decode_micros,
10561                    "serde_json deserialize the decoded projection cache payload",
10562                ),
10563                graph_db_backend_eval_phase_timing(
10564                    "full_projection.cache.prune",
10565                    prune_micros,
10566                    "prune sibling cache files older than the current key",
10567                ),
10568                graph_db_backend_eval_phase_timing(
10569                    "full_projection.source_graph_build",
10570                    0,
10571                    "reused cached full-project source graph; skipped code-index loading, session markdown scanning, source-handle construction, and semantic summary reads",
10572                ),
10573                graph_db_backend_eval_phase_timing(
10574                    "full_projection.projection_rows",
10575                    0,
10576                    "reused cached provider-neutral full-project projection rows",
10577                ),
10578            ],
10579            cache_stats,
10580        ));
10581    }
10582
10583    let mut cache_stats = GraphDbBackendEvalFullProjectionCacheStats::default();
10584    let mut phases = vec![graph_db_backend_eval_phase_timing(
10585        "full_projection.cache_lookup",
10586        lookup_started.elapsed().as_micros(),
10587        &format!(
10588            "no full-project projection cache entry matched the source watermark; {source_watermark_detail}"
10589        ),
10590    )];
10591    let full_source = graph_db_backend_eval_timed_phase(
10592        &mut phases,
10593        "full_projection.source_graph_build",
10594        "opt-in full-project source graph build; uses the project root as the path hint so bounded session projections cannot hide full-graph regressions",
10595        || build_traversal_graph_source_with_options(root, root, scope, false),
10596    )?;
10597    let projection = graph_db_backend_eval_timed_phase(
10598        &mut phases,
10599        "full_projection.projection_rows",
10600        "provider-neutral row construction for the opt-in full-project projection dataset",
10601        || traversal_projection_from_graph(root, scope, &full_source),
10602    )?;
10603    let warnings = full_source.warnings;
10604    let refreshed_source_watermark =
10605        graph_db_backend_eval_full_projection_source_watermark(root, scope)
10606            .map(|watermark| watermark.value)
10607            .unwrap_or_else(|_| source_watermark.clone());
10608    let write_key = graph_db_backend_eval_full_projection_cache_key_for_watermark(
10609        root,
10610        scope,
10611        &refreshed_source_watermark,
10612    )?;
10613    let cache = GraphDbBackendEvalFullProjectionCache {
10614        version: GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION.to_string(),
10615        key: write_key.clone(),
10616        source_watermark: refreshed_source_watermark,
10617        projection: projection.clone(),
10618        warnings: warnings.clone(),
10619    };
10620    if let Some((disk_bytes, json_bytes, write_profile)) =
10621        graph_db_backend_eval_write_disk_cache(root, "full_projection", &write_key, &cache)
10622    {
10623        cache_stats.disk_bytes = disk_bytes;
10624        cache_stats.json_bytes = json_bytes;
10625        phases.push(graph_db_backend_eval_phase_timing(
10626            "full_projection.cache.serde_encode",
10627            write_profile.serde_encode_micros,
10628            "serde_json serialize the projection cache payload before compression",
10629        ));
10630        phases.push(graph_db_backend_eval_phase_timing(
10631            "full_projection.cache.gzip_encode",
10632            write_profile.gzip_encode_micros,
10633            "gzip-compress the serialized projection cache payload",
10634        ));
10635        phases.push(graph_db_backend_eval_phase_timing(
10636            "full_projection.cache.file_write",
10637            write_profile.file_write_micros,
10638            "write the compressed projection cache bytes to .tsift/backend-eval-cache",
10639        ));
10640    }
10641    let prune_started = Instant::now();
10642    let (pruned_files, pruned_bytes) =
10643        graph_db_backend_eval_prune_disk_cache(root, "full_projection", &write_key);
10644    phases.push(graph_db_backend_eval_phase_timing(
10645        "full_projection.cache.prune",
10646        prune_started.elapsed().as_micros(),
10647        "prune sibling cache files older than the current key",
10648    ));
10649    cache_stats.pruned_files = pruned_files;
10650    cache_stats.pruned_bytes = pruned_bytes;
10651    Ok((projection, warnings, phases, cache_stats))
10652}
10653
10654fn graph_db_backend_eval_timed(
10655    name: &str,
10656    run: impl FnOnce() -> Result<(Option<usize>, serde_json::Value)>,
10657) -> (
10658    GraphDbBackendEvalOperation,
10659    Option<GraphDbBackendEvalSignature>,
10660) {
10661    let started = Instant::now();
10662    match run() {
10663        Ok((rows, value)) => (
10664            GraphDbBackendEvalOperation {
10665                name: name.to_string(),
10666                supported: true,
10667                status: "ok".to_string(),
10668                duration_micros: started.elapsed().as_micros(),
10669                rows,
10670                error: None,
10671            },
10672            Some(GraphDbBackendEvalSignature {
10673                operation: name.to_string(),
10674                value,
10675            }),
10676        ),
10677        Err(err) => (
10678            GraphDbBackendEvalOperation {
10679                name: name.to_string(),
10680                supported: false,
10681                status: "error".to_string(),
10682                duration_micros: started.elapsed().as_micros(),
10683                rows: None,
10684                error: Some(format!("{err:#}")),
10685            },
10686            None,
10687        ),
10688    }
10689}
10690
10691fn graph_db_backend_eval_parity(
10692    sqlite_signatures: Option<&[GraphDbBackendEvalSignature]>,
10693    candidate_signatures: &[GraphDbBackendEvalSignature],
10694) -> GraphDbBackendEvalParity {
10695    let Some(sqlite_signatures) = sqlite_signatures else {
10696        return GraphDbBackendEvalParity {
10697            matches_sqlite: true,
10698            diagnostics: Vec::new(),
10699        };
10700    };
10701    let sqlite = sqlite_signatures
10702        .iter()
10703        .map(|signature| (signature.operation.as_str(), &signature.value))
10704        .collect::<BTreeMap<_, _>>();
10705    let candidate = candidate_signatures
10706        .iter()
10707        .map(|signature| (signature.operation.as_str(), &signature.value))
10708        .collect::<BTreeMap<_, _>>();
10709    let mut diagnostics = Vec::new();
10710    for (operation, sqlite_value) in sqlite {
10711        match candidate.get(operation) {
10712            Some(candidate_value) if *candidate_value == sqlite_value => {}
10713            Some(_) => diagnostics.push(format!("{operation} output differed from SQLite")),
10714            None => diagnostics.push(format!(
10715                "{operation} did not complete for candidate backend"
10716            )),
10717        }
10718    }
10719    GraphDbBackendEvalParity {
10720        matches_sqlite: diagnostics.is_empty(),
10721        diagnostics,
10722    }
10723}
10724
10725pub(crate) fn graph_db_backend_eval_targets(
10726    store: &impl GraphStore,
10727    requested: &[String],
10728) -> Result<Vec<String>> {
10729    let requested = requested
10730        .iter()
10731        .filter_map(|target| normalize_conflict_target(target))
10732        .collect::<Vec<_>>();
10733    if !requested.is_empty() {
10734        return Ok(requested);
10735    }
10736
10737    for kind in ["backlog", "job_packet"] {
10738        let nodes = store.nodes_by_kind(kind)?;
10739        if let Some(node) = nodes.first() {
10740            if let Some(ref_id) = node.properties.get("ref_id") {
10741                return Ok(vec![ref_id.clone()]);
10742            }
10743            return Ok(vec![node.id.clone()]);
10744        }
10745    }
10746    Ok(Vec::new())
10747}
10748
10749fn graph_db_backend_eval_path_targets(
10750    store: &impl GraphStore,
10751    max_hops: usize,
10752) -> Result<Option<(String, String, usize)>> {
10753    let synthetic_from = "gsym-synthetic-0000";
10754    let synthetic_to = format!("gsym-synthetic-{max_hops:04}");
10755    if store.node(synthetic_from)?.is_some() && store.node(&synthetic_to)?.is_some() {
10756        let outgoing = store.outgoing_edges(synthetic_from, None)?;
10757        if outgoing.len() > 1
10758            && let Some(edge) = outgoing.first()
10759        {
10760            return Ok(Some((
10761                edge.from_id.clone(),
10762                edge.to_id.clone(),
10763                GRAPH_DB_BACKEND_EVAL_DIRECT_PATH_HOPS,
10764            )));
10765        }
10766        return Ok(Some((synthetic_from.to_string(), synthetic_to, max_hops)));
10767    }
10768
10769    Ok(store.sample_edge(None)?.map(|edge| {
10770        (
10771            edge.from_id,
10772            edge.to_id,
10773            GRAPH_DB_BACKEND_EVAL_DIRECT_PATH_HOPS,
10774        )
10775    }))
10776}
10777
10778fn graph_db_backend_eval_path_operation<S: GraphStore>(
10779    store: &S,
10780    configured_max_hops: usize,
10781) -> (
10782    GraphDbBackendEvalOperation,
10783    Option<GraphDbBackendEvalSignature>,
10784) {
10785    let operation_name = if configured_max_hops == GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS {
10786        "path_max_hops".to_string()
10787    } else {
10788        format!("path_max_hops_{configured_max_hops}")
10789    };
10790    graph_db_backend_eval_timed(&operation_name, || {
10791        let (from, to, effective_max_hops) =
10792            graph_db_backend_eval_path_targets(store, configured_max_hops)?
10793                .context("backend-eval path probe requires at least one traversable edge")?;
10794        let path = store.shortest_path_with_max_hops(&from, &to, None, Some(effective_max_hops))?;
10795        let warning = if configured_max_hops > GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS {
10796            Some(format!(
10797                "{configured_max_hops}-hop tier is measured only; keep user-facing defaults at {} until repeated samples and SQLite query-plan checks pass",
10798                GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS
10799            ))
10800        } else if path.is_none() && effective_max_hops == configured_max_hops {
10801            Some(format!(
10802                "path probe truncated at {configured_max_hops} hops before a route was found"
10803            ))
10804        } else {
10805            None
10806        };
10807        Ok((
10808            path.as_ref().map(|path| path.nodes.len()),
10809            serde_json::json!({
10810                "from": from,
10811                "to": to,
10812                "configured_max_hops": configured_max_hops,
10813                "effective_max_hops": effective_max_hops,
10814                "hops": path.as_ref().map(|path| path.hops),
10815                "nodes": path.as_ref().map(|path| &path.nodes),
10816                "found": path.is_some(),
10817                "warning": warning,
10818            }),
10819        ))
10820    })
10821}
10822
10823fn graph_db_backend_eval_neighborhood_operation<S: GraphStore>(
10824    store: &S,
10825    depth: usize,
10826    limit: usize,
10827) -> (
10828    GraphDbBackendEvalOperation,
10829    Option<GraphDbBackendEvalSignature>,
10830) {
10831    graph_db_backend_eval_timed("neighborhood", || {
10832        let edge = match store.sample_edge(Some("calls"))? {
10833            Some(edge) => edge,
10834            None => store.sample_edge(None)?.context(
10835                "backend-eval neighborhood probe requires at least one traversable edge",
10836            )?,
10837        };
10838        let page = store
10839            .paged_neighborhood(
10840                &edge.from_id,
10841                depth,
10842                Some(&edge.kind),
10843                GraphQueryOptions {
10844                    limit: Some(limit.max(1)),
10845                    ..GraphQueryOptions::default()
10846                },
10847            )?
10848            .with_context(|| {
10849                format!(
10850                    "backend-eval neighborhood target not found: {}",
10851                    edge.from_id
10852                )
10853            })?;
10854        Ok((
10855            Some(page.nodes.len() + page.edges.len()),
10856            serde_json::json!({
10857                "center": edge.from_id,
10858                "kind": edge.kind,
10859                "depth": depth,
10860                "limit": limit.max(1),
10861                "node_ids": page.nodes.iter().map(|node| &node.id).collect::<Vec<_>>(),
10862                "edge_ids": page.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
10863                "truncated": page.page.truncated,
10864            }),
10865        ))
10866    })
10867}
10868
10869fn graph_db_backend_eval_related_operation<S: GraphStore>(
10870    root: &Path,
10871    scope: Option<&str>,
10872    store: &S,
10873    depth: usize,
10874    limit: usize,
10875) -> (
10876    GraphDbBackendEvalOperation,
10877    Option<GraphDbBackendEvalSignature>,
10878) {
10879    graph_db_backend_eval_timed("related", || {
10880        let query = "backend evaluation";
10881        let semantic = semantic_related_report_from_store(
10882            root,
10883            scope,
10884            query,
10885            3,
10886            SemanticRelatedKind::All,
10887            store,
10888        )?;
10889        let seed_ids = semantic
10890            .items
10891            .iter()
10892            .map(|item| item.handle.clone())
10893            .collect::<Vec<_>>();
10894        let subgraph =
10895            graph_db_semantic_seeded_neighborhood(store, &seed_ids, depth, limit.max(1))?;
10896        Ok((
10897            Some(subgraph.nodes.len() + subgraph.edges.len()),
10898            serde_json::json!({
10899                "query": query,
10900                "seed_ids": seed_ids,
10901                "node_ids": subgraph.nodes.iter().map(|node| &node.id).collect::<Vec<_>>(),
10902                "edge_ids": subgraph.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
10903                "truncated": subgraph.truncated,
10904                "warnings": semantic.warnings,
10905                "diagnostics": subgraph.diagnostics,
10906            }),
10907        ))
10908    })
10909}
10910
10911fn graph_db_backend_eval_evidence_signature(report: &GraphDbEvidenceReport) -> serde_json::Value {
10912    serde_json::json!({
10913        "target": report.target,
10914        "target_node_id": report.target_node.id,
10915        "target_kind": report.target_node.kind,
10916        "worker_context": report.worker_context.iter().map(|node| &node.id).collect::<Vec<_>>(),
10917        "source_handles": report.source_handles.iter().map(|node| &node.id).collect::<Vec<_>>(),
10918        "worker_results": report.worker_results.iter().map(|node| &node.id).collect::<Vec<_>>(),
10919        "semantic_related": report.semantic_related.iter().map(|node| &node.id).collect::<Vec<_>>(),
10920        "path_count": report.shortest_paths.len(),
10921    })
10922}
10923
10924fn graph_db_backend_eval_target_resolution_signature(
10925    resolved: &[(String, SubstrateGraphNode)],
10926) -> serde_json::Value {
10927    serde_json::json!({
10928        "targets": resolved.iter().map(|(target, node)| {
10929            serde_json::json!({
10930                "target": target,
10931                "target_node_id": node.id,
10932                "target_kind": node.kind,
10933                "target_label": node.label,
10934            })
10935        }).collect::<Vec<_>>(),
10936    })
10937}
10938
10939fn graph_db_backend_eval_conflict_signature(report: &ConflictMatrixReport) -> serde_json::Value {
10940    serde_json::json!({
10941        "targets": report.targets,
10942        "can_parallel": report.can_parallel,
10943        "fail_closed": report.fail_closed,
10944        "cross_target_parallel_safe": report.cross_target_parallel_safe,
10945        "per_target_fail_closed": report.per_target_fail_closed.iter().map(|target| &target.target).collect::<Vec<_>>(),
10946        "candidates": report.candidates.iter().map(|candidate| {
10947            serde_json::json!({
10948                "target": candidate.target,
10949                "risk": conflict_risk_label(candidate.risk),
10950                "owned_files": candidate.owned_files,
10951                "owned_symbols": candidate.owned_symbols,
10952                "source_handles": candidate.source_handles.iter().map(|handle| &handle.handle).collect::<Vec<_>>(),
10953                "previously_completed": candidate.previously_completed,
10954                "parallel_safe": candidate.parallel_safe,
10955            })
10956        }).collect::<Vec<_>>(),
10957        "conflicts": report.conflicts.iter().map(|pair| {
10958            serde_json::json!({
10959                "left": pair.left,
10960                "right": pair.right,
10961                "risk": conflict_risk_label(pair.risk),
10962            })
10963        }).collect::<Vec<_>>(),
10964    })
10965}
10966
10967fn graph_db_backend_eval_dispatch_signature(report: &DispatchTraceReport) -> serde_json::Value {
10968    serde_json::json!({
10969        "targets": report.targets,
10970        "node_ids": report.nodes.iter().map(|node| &node.id).collect::<Vec<_>>(),
10971        "edge_keys": report.edges.iter().map(|e| graph_db_edge_key(&SubstrateGraphEdge::from(e))).collect::<Vec<_>>(),
10972        "evidence_packet_ids": report.evidence_packet_ids,
10973        "worker_prompt_targets": report.worker_prompt_packets.iter().map(|packet| &packet.target).collect::<Vec<_>>(),
10974        "truncated": report.truncated,
10975    })
10976}
10977
10978fn graph_db_backend_eval_edge_scan_probe(
10979    store: &impl GraphStore,
10980) -> Result<(SubstrateGraphEdge, Vec<GraphPropertyFilter>)> {
10981    if let Some((edge, filter)) = store.sample_edge_with_property()? {
10982        return Ok((edge, vec![filter]));
10983    }
10984    let edge = store
10985        .sample_edge(None)?
10986        .context("backend-eval edge scan requires at least one edge")?;
10987    Ok((edge, Vec::new()))
10988}
10989
10990#[allow(clippy::too_many_arguments)]
10991fn graph_db_backend_eval_report_for_store<S: GraphStore>(
10992    backend: &str,
10993    adapter: &str,
10994    read_only: bool,
10995    root: &Path,
10996    path: &Path,
10997    scope: Option<&str>,
10998    targets: &[String],
10999    depth: usize,
11000    limit: usize,
11001    impact_limit: usize,
11002    store: &S,
11003    freshness: GraphDbFreshnessReport,
11004    refresh_operation: GraphDbBackendEvalOperation,
11005    refresh_signature: Option<GraphDbBackendEvalSignature>,
11006    sqlite_signatures: Option<&[GraphDbBackendEvalSignature]>,
11007    extra_warnings: Vec<String>,
11008    prepared: &ConflictMatrixPreparedInputs,
11009    projection_load: &str,
11010    lock_behavior: &str,
11011    install_portability: &str,
11012) -> (
11013    GraphDbBackendEvalBackendReport,
11014    Vec<GraphDbBackendEvalSignature>,
11015) {
11016    let mut operations = vec![refresh_operation];
11017    let mut signatures = refresh_signature.into_iter().collect::<Vec<_>>();
11018
11019    let (operation, signature) = graph_db_backend_eval_timed("status", || {
11020        let (nodes, edges) = store.graph_counts()?;
11021        Ok((
11022            Some(nodes + edges),
11023            serde_json::json!({
11024                "freshness": freshness.status,
11025                "nodes": nodes,
11026                "edges": edges,
11027            }),
11028        ))
11029    });
11030    operations.push(operation);
11031    signatures.extend(signature);
11032
11033    let (operation, signature) = graph_db_backend_eval_timed("edge_lookup", || {
11034        let edge = store
11035            .sample_edge(None)?
11036            .context("backend-eval edge lookup requires at least one edge")?;
11037        let edge_id = graph_db_edge_key(&edge);
11038        let found = store
11039            .edge(&edge_id)?
11040            .with_context(|| format!("backend-eval edge lookup missed {edge_id}"))?;
11041        Ok((
11042            Some(1),
11043            serde_json::json!({
11044                "edge_id": edge_id,
11045                "from_id": found.from_id,
11046                "to_id": found.to_id,
11047                "kind": found.kind,
11048            }),
11049        ))
11050    });
11051    operations.push(operation);
11052    signatures.extend(signature);
11053
11054    let (operation, signature) = graph_db_backend_eval_timed("edge_property_scan", || {
11055        let (edge, filters) = graph_db_backend_eval_edge_scan_probe(store)?;
11056        let page = store.paged_edges(
11057            Some(&edge.kind),
11058            GraphQueryOptions {
11059                limit: Some(limit.max(1)),
11060                property_filters: filters.clone(),
11061                ..GraphQueryOptions::default()
11062            },
11063        )?;
11064        Ok((
11065            Some(page.edges.len()),
11066            serde_json::json!({
11067                "kind": edge.kind,
11068                "filters": filters.iter().map(|filter| format!("{}={}", filter.key, filter.value)).collect::<Vec<_>>(),
11069                "edge_ids": page.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
11070                "truncated": page.page.truncated,
11071            }),
11072        ))
11073    });
11074    operations.push(operation);
11075    signatures.extend(signature);
11076
11077    let (operation, signature) = graph_db_backend_eval_timed("incident_edges", || {
11078        let edge = store
11079            .sample_edge(None)?
11080            .context("backend-eval incident edge scan requires at least one edge")?;
11081        let page = store.paged_incident_edges(
11082            &edge.from_id,
11083            Some(&edge.kind),
11084            GraphQueryOptions {
11085                limit: Some(limit.max(1)),
11086                ..GraphQueryOptions::default()
11087            },
11088        )?;
11089        Ok((
11090            Some(page.edges.len()),
11091            serde_json::json!({
11092                "node_id": edge.from_id,
11093                "kind": edge.kind,
11094                "edge_ids": page.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
11095                "truncated": page.page.truncated,
11096            }),
11097        ))
11098    });
11099    operations.push(operation);
11100    signatures.extend(signature);
11101
11102    let (operation, signature) = graph_db_backend_eval_neighborhood_operation(store, depth, limit);
11103    operations.push(operation);
11104    signatures.extend(signature);
11105
11106    let (operation, signature) =
11107        graph_db_backend_eval_related_operation(root, scope, store, depth, limit);
11108    operations.push(operation);
11109    signatures.extend(signature);
11110
11111    for configured_max_hops in std::iter::once(GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS)
11112        .chain(GRAPH_DB_BACKEND_EVAL_EXTENDED_PATH_HOPS)
11113    {
11114        let (operation, signature) =
11115            graph_db_backend_eval_path_operation(store, configured_max_hops);
11116        operations.push(operation);
11117        signatures.extend(signature);
11118    }
11119
11120    let (operation, signature) = graph_db_backend_eval_timed("evidence_target_resolution", || {
11121        let resolved = targets
11122            .iter()
11123            .map(|target| {
11124                let node = graph_db_resolve_evidence_target(store, target)?
11125                    .with_context(|| format!("backend-eval target not found: {target}"))?;
11126                Ok((target.clone(), node))
11127            })
11128            .collect::<Result<Vec<_>>>()?;
11129        let signature = graph_db_backend_eval_target_resolution_signature(&resolved);
11130        Ok((Some(resolved.len()), signature))
11131    });
11132    operations.push(operation);
11133    signatures.extend(signature);
11134
11135    let mut evidence_for_report = None;
11136    let mut graph_snapshot_for_trace = None;
11137    let (operation, signature) = graph_db_backend_eval_timed("evidence", || {
11138        let resolved_targets =
11139            resolve_conflict_matrix_targets(store, targets, &prepared.context_pack)?;
11140        let evidence = collect_conflict_matrix_evidence_packets(
11141            root,
11142            scope,
11143            backend,
11144            &resolved_targets,
11145            depth,
11146            limit,
11147            store,
11148            freshness.clone(),
11149        )?;
11150        let report = &evidence
11151            .first()
11152            .context("backend-eval evidence requires at least one target")?
11153            .report;
11154        let rows = evidence
11155            .iter()
11156            .map(|entry| {
11157                entry.report.worker_context.len()
11158                    + entry.report.source_handles.len()
11159                    + entry.report.worker_results.len()
11160                    + entry.report.semantic_related.len()
11161            })
11162            .sum();
11163        let signature = graph_db_backend_eval_evidence_signature(report);
11164        evidence_for_report = Some((resolved_targets, evidence));
11165        Ok((Some(rows), signature))
11166    });
11167    operations.push(operation);
11168    signatures.extend(signature);
11169
11170    let mut conflict_for_trace = None;
11171    let (operation, signature) = graph_db_backend_eval_timed("conflict_matrix", || {
11172        let graph_prepared = if let Some((targets, evidence)) = evidence_for_report.take() {
11173            let graph =
11174                conflict_matrix_target_scoped_graph_snapshot(store, &evidence, depth, limit)?;
11175            let shared_preparation =
11176                conflict_matrix_shared_preparation_summary(&graph, &evidence, "memory_reuse");
11177            ConflictMatrixGraphPreparedInputs {
11178                targets,
11179                graph,
11180                evidence,
11181                shared_preparation,
11182            }
11183        } else {
11184            prepare_conflict_matrix_graph_orchestration(
11185                root,
11186                scope,
11187                backend,
11188                targets,
11189                prepared,
11190                depth,
11191                limit,
11192                store,
11193                freshness.clone(),
11194            )?
11195        };
11196        let report = build_conflict_matrix_report_from_prepared_graph(
11197            root,
11198            path,
11199            scope,
11200            depth,
11201            limit,
11202            impact_limit,
11203            freshness.clone(),
11204            extra_warnings.clone(),
11205            prepared,
11206            &graph_prepared,
11207        )?;
11208        let signature = graph_db_backend_eval_conflict_signature(&report);
11209        let rows = report.candidates.len() + report.conflicts.len();
11210        conflict_for_trace = Some(report);
11211        graph_snapshot_for_trace = Some(graph_prepared.graph);
11212        Ok((Some(rows), signature))
11213    });
11214    operations.push(operation);
11215    signatures.extend(signature);
11216
11217    let (operation, signature) = graph_db_backend_eval_timed("dispatch_trace", || {
11218        let conflict = conflict_for_trace
11219            .take()
11220            .context("backend-eval dispatch-trace requires a completed conflict-matrix report")?;
11221        let graph = graph_snapshot_for_trace
11222            .take()
11223            .context("backend-eval dispatch-trace requires conflict-matrix graph preparation")?;
11224        let report = build_dispatch_trace_report_from_conflict_snapshot(
11225            root,
11226            scope,
11227            conflict,
11228            graph.nodes,
11229            graph.edges,
11230            depth,
11231            limit,
11232            Vec::new(),
11233        )?;
11234        Ok((
11235            Some(report.nodes.len() + report.edges.len()),
11236            graph_db_backend_eval_dispatch_signature(&report),
11237        ))
11238    });
11239    operations.push(operation);
11240    signatures.extend(signature);
11241
11242    let total_micros = operations
11243        .iter()
11244        .map(|operation| operation.duration_micros)
11245        .sum();
11246    let parity = graph_db_backend_eval_parity(sqlite_signatures, &signatures);
11247    (
11248        GraphDbBackendEvalBackendReport {
11249            backend: backend.to_string(),
11250            adapter: adapter.to_string(),
11251            read_only,
11252            projection_load: projection_load.to_string(),
11253            operations,
11254            total_micros,
11255            parity,
11256            lock_behavior: lock_behavior.to_string(),
11257            install_portability: install_portability.to_string(),
11258        },
11259        signatures,
11260    )
11261}
11262
11263pub(crate) fn graph_db_backend_eval_refresh_operation(
11264    duration_micros: u128,
11265    rows: usize,
11266    value: serde_json::Value,
11267) -> (GraphDbBackendEvalOperation, GraphDbBackendEvalSignature) {
11268    (
11269        GraphDbBackendEvalOperation {
11270            name: "refresh".to_string(),
11271            supported: true,
11272            status: "ok".to_string(),
11273            duration_micros,
11274            rows: Some(rows),
11275            error: None,
11276        },
11277        GraphDbBackendEvalSignature {
11278            operation: "refresh".to_string(),
11279            value,
11280        },
11281    )
11282}
11283
11284pub(crate) fn graph_db_backend_eval_synthetic_projection(
11285    nodes: usize,
11286    fanout: usize,
11287) -> GraphProjection {
11288    let nodes = nodes.max(12);
11289    let symbol_count = nodes.saturating_sub(9).max(1);
11290    let source = GraphProvenance::new("backend-eval", "synthetic");
11291    let mut projection_nodes = vec![
11292        SubstrateGraphNode::new(
11293            "projection:tsift-traversal:synthetic",
11294            GRAPH_PROJECTION_META_KIND,
11295            "synthetic projection",
11296        )
11297        .with_property("projection_version", GRAPH_PROJECTION_VERSION)
11298        .with_property(
11299            "content_hash",
11300            format!("synthetic-{nodes}-{fanout}-{symbol_count}"),
11301        )
11302        .with_provenance(source.clone()),
11303        SubstrateGraphNode::new("gses-synthetic", "session", "synthetic session")
11304            .with_property("ref_id", "synthetic-session"),
11305        SubstrateGraphNode::new("gbak-synthetic", "backlog", "#synthetic")
11306            .with_property("ref_id", "synthetic")
11307            .with_property("path", "tasks/software/synthetic.md")
11308            .with_property("line", "1")
11309            .with_property(
11310                "expand",
11311                "tsift source-read tasks/software/synthetic.md --start 1 --lines 40",
11312            ),
11313        SubstrateGraphNode::new("gjob-synthetic", "job_packet", "do #synthetic")
11314            .with_property("ref_id", "synthetic"),
11315        SubstrateGraphNode::new("gwctx-synthetic", "worker_context", "synthetic context")
11316            .with_property("target", "synthetic")
11317            .with_property("summary", "Synthetic worker owns synthetic.rs")
11318            .with_property(
11319                "expand",
11320                "tsift source-read synthetic.rs --start 1 --lines 80",
11321            ),
11322        SubstrateGraphNode::new("gsrc-synthetic", "source_handle", "synthetic.rs:1-80")
11323            .with_property("file", "synthetic.rs")
11324            .with_property("start", "1")
11325            .with_property("end", "80")
11326            .with_property(
11327                "expand",
11328                "tsift source-read synthetic.rs --start 1 --lines 80",
11329            ),
11330        SubstrateGraphNode::new("gfil-synthetic", "file", "synthetic.rs")
11331            .with_property("path", "synthetic.rs"),
11332        SubstrateGraphNode::new("gsem-synthetic", "semantic_concept", "backend evaluation")
11333            .with_property("handle", "gsem-synthetic")
11334            .with_property("label", "backend evaluation")
11335            .with_property("embedding_model", SEMANTIC_EMBEDDING_MODEL)
11336            .with_property(
11337                "embedding",
11338                semantic_embedding_property("backend evaluation"),
11339            ),
11340        SubstrateGraphNode::new("gwres-synthetic", "worker_result", "completed #synthetic")
11341            .with_property("ref_id", "synthetic")
11342            .with_property("status", "completed")
11343            .with_property("touched_files", "synthetic.rs")
11344            .with_property("expected_tests", "cargo test --test graph_db_conformance"),
11345    ];
11346    for idx in 0..symbol_count {
11347        projection_nodes.push(
11348            SubstrateGraphNode::new(
11349                format!("gsym-synthetic-{idx:04}"),
11350                "symbol",
11351                format!("synthetic_symbol_{idx:04}"),
11352            )
11353            .with_property("ref_id", format!("synthetic_symbol_{idx:04}"))
11354            .with_property("path", "synthetic.rs")
11355            .with_property("line", (idx + 1).to_string()),
11356        );
11357    }
11358
11359    let mut projection_edges = vec![
11360        SubstrateGraphEdge::new("gses-synthetic", "gbak-synthetic", "contains"),
11361        SubstrateGraphEdge::new("gses-synthetic", "gjob-synthetic", "queues"),
11362        SubstrateGraphEdge::new("gbak-synthetic", "gwctx-synthetic", "has_context"),
11363        SubstrateGraphEdge::new("gjob-synthetic", "gwctx-synthetic", "has_context"),
11364        SubstrateGraphEdge::new("gwctx-synthetic", "gsrc-synthetic", "uses_source"),
11365        SubstrateGraphEdge::new("gbak-synthetic", "gwres-synthetic", "has_worker_result"),
11366        SubstrateGraphEdge::new("gbak-synthetic", "gsem-synthetic", "mentions_concept"),
11367        SubstrateGraphEdge::new("gsrc-synthetic", "gfil-synthetic", "reads_file"),
11368        SubstrateGraphEdge::new("gfil-synthetic", "gsym-synthetic-0000", "defines"),
11369    ];
11370    for idx in 0..symbol_count {
11371        let from = format!("gsym-synthetic-{idx:04}");
11372        for offset in 1..=fanout.max(1).min(symbol_count) {
11373            let to_idx = (idx + offset) % symbol_count;
11374            if to_idx != idx {
11375                projection_edges.push(SubstrateGraphEdge::new(
11376                    from.clone(),
11377                    format!("gsym-synthetic-{to_idx:04}"),
11378                    "calls",
11379                ));
11380            }
11381        }
11382    }
11383
11384    GraphProjection {
11385        nodes: projection_nodes,
11386        edges: projection_edges
11387            .into_iter()
11388            .map(|edge| {
11389                edge.with_property("dataset", "synthetic")
11390                    .with_provenance(source.clone())
11391            })
11392            .collect(),
11393    }
11394}
11395
11396pub(crate) fn graph_db_backend_eval_promotion(
11397    datasets: &[GraphDbBackendEvalDataset],
11398    candidates: &[GraphDbExperimentalBackend],
11399) -> Vec<GraphDbBackendPromotionDecision> {
11400    let mut decisions = Vec::new();
11401    for candidate in candidates {
11402        let mut reasons = Vec::new();
11403        let mut faster_everywhere = true;
11404        let mut parity_everywhere = true;
11405        for dataset in datasets {
11406            let Some(sqlite_report) = dataset
11407                .backends
11408                .iter()
11409                .find(|backend| backend.backend == "sqlite")
11410            else {
11411                parity_everywhere = false;
11412                faster_everywhere = false;
11413                reasons.push(format!(
11414                    "{} dataset is missing SQLite baseline",
11415                    dataset.name
11416                ));
11417                continue;
11418            };
11419            let sqlite_total = sqlite_report.total_micros;
11420            let Some(candidate_report) = dataset
11421                .backends
11422                .iter()
11423                .find(|backend| backend.backend == candidate.name())
11424            else {
11425                parity_everywhere = false;
11426                reasons.push(format!("{} dataset did not run", dataset.name));
11427                continue;
11428            };
11429            if !candidate_report.parity.matches_sqlite {
11430                parity_everywhere = false;
11431                reasons.push(format!("{} parity differed from SQLite", dataset.name));
11432            }
11433            if candidate_report.total_micros >= sqlite_total {
11434                faster_everywhere = false;
11435                reasons.push(format!(
11436                    "{} total {}us did not beat SQLite {}us",
11437                    dataset.name, candidate_report.total_micros, sqlite_total
11438                ));
11439            }
11440            let sqlite_operations = sqlite_report
11441                .operations
11442                .iter()
11443                .map(|operation| (operation.name.as_str(), operation.duration_micros))
11444                .collect::<BTreeMap<_, _>>();
11445            for operation in &candidate_report.operations {
11446                if let Some(sqlite_duration) = sqlite_operations.get(operation.name.as_str())
11447                    && operation.duration_micros >= *sqlite_duration
11448                {
11449                    faster_everywhere = false;
11450                    reasons.push(format!(
11451                        "{} {} operation {}us did not beat SQLite {}us",
11452                        dataset.name, operation.name, operation.duration_micros, sqlite_duration
11453                    ));
11454                }
11455            }
11456            if candidate_report
11457                .operations
11458                .iter()
11459                .any(|operation| operation.status != "ok")
11460            {
11461                parity_everywhere = false;
11462                reasons.push(format!("{} has failed benchmark operations", dataset.name));
11463            }
11464        }
11465        let decision = if let Some(reason) = candidate.prototype_hold_reason() {
11466            reasons.push(reason.to_string());
11467            reasons.push(
11468                "current bounded prototype timings are benchmark evidence, not a backend switch approval"
11469                    .to_string(),
11470            );
11471            "hold"
11472        } else if parity_everywhere && faster_everywhere {
11473            reasons.push(
11474                "prototype gate passed; production promotion still requires the real engine adapter to preserve SQLite's bundled install and multi-process lock behavior"
11475                    .to_string(),
11476            );
11477            "eligible"
11478        } else {
11479            reasons.push(
11480                "production promotion requires SQLite parity plus lower total time for every measured operation on every dataset without worse lock behavior or install portability"
11481                    .to_string(),
11482            );
11483            "hold"
11484        };
11485        decisions.push(GraphDbBackendPromotionDecision {
11486            backend: candidate.name().to_string(),
11487            decision: decision.to_string(),
11488            reasons: dedupe_preserve_order(reasons),
11489            gate: candidate.promotion_gate(),
11490        });
11491    }
11492    decisions
11493}
11494
11495pub(crate) fn graph_db_backend_eval_metrics(
11496    datasets: &[GraphDbBackendEvalDataset],
11497) -> BTreeMap<String, f64> {
11498    let mut metrics = BTreeMap::new();
11499    for dataset in datasets {
11500        let graph_rows = graph_db_backend_eval_graph_rows(dataset);
11501        metrics.insert(format!("{}.nodes", dataset.name), dataset.nodes as f64);
11502        metrics.insert(format!("{}.edges", dataset.name), dataset.edges as f64);
11503        metrics.insert(format!("{}.graph_rows", dataset.name), graph_rows as f64);
11504        for backend in &dataset.backends {
11505            let prefix = format!("{}.{}", dataset.name, backend.backend.replace('-', "_"));
11506            metrics.insert(
11507                format!("{prefix}.total_duration_micros"),
11508                backend.total_micros as f64,
11509            );
11510            append_graph_db_backend_eval_normalized_duration_metric(
11511                &mut metrics,
11512                &format!("{prefix}.total_duration_micros_per_1k_graph_rows"),
11513                backend.total_micros,
11514                graph_rows,
11515            );
11516            for operation in &backend.operations {
11517                metrics.insert(
11518                    format!("{prefix}.{}.duration_micros", operation.name),
11519                    operation.duration_micros as f64,
11520                );
11521                append_graph_db_backend_eval_normalized_duration_metric(
11522                    &mut metrics,
11523                    &format!(
11524                        "{prefix}.{}.duration_micros_per_1k_graph_rows",
11525                        operation.name
11526                    ),
11527                    operation.duration_micros,
11528                    graph_rows,
11529                );
11530                if let Some(rows) = operation.rows {
11531                    metrics.insert(format!("{prefix}.{}.rows", operation.name), rows as f64);
11532                }
11533            }
11534        }
11535    }
11536    metrics
11537}
11538
11539pub(crate) fn graph_db_backend_eval_graph_rows(dataset: &GraphDbBackendEvalDataset) -> usize {
11540    dataset.nodes + dataset.edges
11541}
11542
11543pub(crate) fn append_graph_db_backend_eval_normalized_duration_metric(
11544    metrics: &mut BTreeMap<String, f64>,
11545    key: &str,
11546    duration_micros: u128,
11547    graph_rows: usize,
11548) {
11549    if graph_rows == 0 {
11550        return;
11551    }
11552    metrics.insert(
11553        key.to_string(),
11554        duration_micros as f64 / graph_rows as f64 * GRAPH_DB_BACKEND_EVAL_NORMALIZATION_ROW_UNIT,
11555    );
11556}
11557
11558pub(crate) fn append_graph_db_backend_eval_phase_metrics(
11559    metrics: &mut BTreeMap<String, f64>,
11560    dataset: &str,
11561    graph_rows: usize,
11562    phases: &[GraphDbBackendEvalPhaseTiming],
11563) {
11564    for phase in phases {
11565        metrics.insert(
11566            format!("{dataset}.refresh_phase.{}.duration_micros", phase.name),
11567            phase.duration_micros as f64,
11568        );
11569        append_graph_db_backend_eval_normalized_duration_metric(
11570            metrics,
11571            &format!(
11572                "{dataset}.refresh_phase.{}.duration_micros_per_1k_graph_rows",
11573                phase.name
11574            ),
11575            phase.duration_micros,
11576            graph_rows,
11577        );
11578    }
11579}
11580
11581fn graph_db_backend_eval_base_command(
11582    root: &Path,
11583    scope: Option<&str>,
11584    full_projection: bool,
11585) -> String {
11586    let full_projection_arg = if full_projection {
11587        " --full-projection"
11588    } else {
11589        ""
11590    };
11591    format!(
11592        "tsift graph-db --path {}{} --json backend-eval{}",
11593        shell_quote(root.to_string_lossy().as_ref()),
11594        graph_db_scope_arg(scope),
11595        full_projection_arg
11596    )
11597}
11598
11599pub(crate) fn graph_db_backend_eval_metric_digest_command(
11600    root: &Path,
11601    scope: Option<&str>,
11602    full_projection: bool,
11603) -> String {
11604    format!(
11605        "{} | tsift metric-digest --baseline fixtures/graph-db-performance-history.json",
11606        graph_db_backend_eval_base_command(root, scope, full_projection)
11607    )
11608}
11609
11610fn graph_db_backend_eval_repeated_sample_command(
11611    root: &Path,
11612    scope: Option<&str>,
11613    full_projection: bool,
11614) -> String {
11615    format!(
11616        "for sample in 1 2 3; do {}; done | tsift metric-digest --baseline fixtures/graph-db-performance-history.json",
11617        graph_db_backend_eval_base_command(root, scope, full_projection)
11618    )
11619}
11620
11621fn graph_db_backend_eval_hop_cap_promotion_gate() -> GraphDbHopCapPromotionGate {
11622    let mut required_metrics = Vec::new();
11623    for workload in perf_gate::HOP_CAP_REQUIRED_WORKLOADS {
11624        required_metrics.push(format!("{workload}.sqlite.path_max_hops.duration_micros"));
11625        required_metrics.push(format!("{workload}.sqlite.path_max_hops.rows"));
11626        for hops in perf_gate::HOP_CAP_CANDIDATE_TIERS {
11627            required_metrics.push(format!(
11628                "{workload}.sqlite.path_max_hops_{hops}.duration_micros"
11629            ));
11630            required_metrics.push(format!("{workload}.sqlite.path_max_hops_{hops}.rows"));
11631        }
11632    }
11633    GraphDbHopCapPromotionGate {
11634        status: "hold_64_default_until_gate_passes".to_string(),
11635        current_default_hops: perf_gate::HOP_CAP_CURRENT_DEFAULT,
11636        candidate_hop_tiers: perf_gate::HOP_CAP_CANDIDATE_TIERS.to_vec(),
11637        required_backend: perf_gate::BASELINE_BACKEND.to_string(),
11638        required_workloads: perf_gate::HOP_CAP_REQUIRED_WORKLOADS
11639            .iter()
11640            .map(|workload| (*workload).to_string())
11641            .collect(),
11642        required_metrics,
11643        allowed_regression_percent: GRAPH_DB_BACKEND_EVAL_ALLOWED_REGRESSION_PERCENT,
11644        minimum_sample_runs: GRAPH_DB_BACKEND_EVAL_MIN_SAMPLE_RUNS,
11645        decision_rule:
11646            "keep 64 as the user-facing default until each candidate tier has repeated real, full_projection, and synthetic_deep_chain SQLite samples within the latency-regression budget and returning useful path rows; full_projection samples are binding only after a cold populate leg proves a cache-hit leg"
11647                .to_string(),
11648    }
11649}
11650
11651fn graph_db_backend_eval_backend_adapter_spike_gate() -> GraphDbBackendAdapterSpikeGate {
11652    let candidate_backends = [
11653        GraphDbExperimentalBackend::Falkordb,
11654        GraphDbExperimentalBackend::Kuzu,
11655        GraphDbExperimentalBackend::Surrealdb,
11656    ]
11657    .into_iter()
11658    .map(|backend| GraphDbBackendAdapterSpikeCandidate {
11659        backend: backend.name().to_string(),
11660        adapter_label: backend.adapter_label().to_string(),
11661        projection_load: backend.projection_load().to_string(),
11662        lock_behavior: backend.lock_behavior().to_string(),
11663        install_portability: backend.install_portability().to_string(),
11664    })
11665    .collect();
11666
11667    GraphDbBackendAdapterSpikeGate {
11668        status: "hold_real_optional_adapter_required".to_string(),
11669        candidate_backends,
11670        required_workloads: perf_gate::GATE_WORKLOAD_PREFIXES
11671            .iter()
11672            .map(|workload| (*workload).to_string())
11673            .collect(),
11674        required_checks: vec![
11675            "real_optional_adapter_behind_graphstore_without_default_build_dependency".to_string(),
11676            "projection_load_writes_provider_neutral_rows_without_sqlite_row_replay".to_string(),
11677            "freshness_and_full_parity_match_sqlite_on_every_graphstore_operation".to_string(),
11678            "lock_semantics_match_or_beat_sqlite_for_writer_and_read_only_workflows".to_string(),
11679            "install_portability_preserves_cargo_build_install_without_external_service_or_native_toolchain"
11680                .to_string(),
11681            "full_projection_cache_hit_sample_before_backend_or_hop_cap_changes".to_string(),
11682            "beats_sqlite_on_every_required_workload_and_metric_in_backend_eval".to_string(),
11683        ],
11684        decision_rule:
11685            "do not promote a read-only prototype; FalkorDB, Kuzu, or SurrealDB can only advance after a real optional adapter proves projection writes/load, lock semantics, install portability, full parity, and faster-than-SQLite results across every required workload"
11686                .to_string(),
11687        evidence_plan: "plans/gback-evidence.md".to_string(),
11688    }
11689}
11690
11691pub(crate) fn graph_db_backend_eval_performance_gate(
11692    root: &Path,
11693    scope: Option<&str>,
11694    full_projection: bool,
11695) -> GraphDbBackendEvalPerformanceGate {
11696    let mut required_metrics = vec![
11697        "real.sqlite.refresh.duration_micros".to_string(),
11698        "real.sqlite.refresh.duration_micros_per_1k_graph_rows".to_string(),
11699        "real.sqlite.edge_lookup.duration_micros_per_1k_graph_rows".to_string(),
11700        "real.sqlite.edge_property_scan.duration_micros_per_1k_graph_rows".to_string(),
11701        "real.sqlite.incident_edges.duration_micros_per_1k_graph_rows".to_string(),
11702        "real.sqlite.neighborhood.duration_micros_per_1k_graph_rows".to_string(),
11703        "real.sqlite.evidence_target_resolution.duration_micros_per_1k_graph_rows".to_string(),
11704        "real.sqlite.evidence.duration_micros_per_1k_graph_rows".to_string(),
11705        "real.sqlite.total_duration_micros_per_1k_graph_rows".to_string(),
11706        "real.refresh_phase.source_graph_build.duration_micros_per_1k_graph_rows".to_string(),
11707        "real.refresh_phase.sqlite_delta_write.duration_micros".to_string(),
11708        "real.refresh_phase.sqlite_property_row_staging.duration_micros".to_string(),
11709        "real.refresh_phase.sqlite_edge_property_row_staging.duration_micros".to_string(),
11710        "real.sqlite.conflict_matrix.duration_micros".to_string(),
11711        "real.sqlite.dispatch_trace.duration_micros".to_string(),
11712        "real.sqlite.path_max_hops.duration_micros".to_string(),
11713        "real.sqlite.path_max_hops_128.duration_micros".to_string(),
11714        "real.sqlite.path_max_hops_256.duration_micros".to_string(),
11715        "real.sqlite.path_max_hops_512.duration_micros".to_string(),
11716        "real.sqlite.path_max_hops_128.duration_micros_per_1k_graph_rows".to_string(),
11717        "real.sqlite.path_max_hops_256.duration_micros_per_1k_graph_rows".to_string(),
11718        "real.sqlite.path_max_hops_512.duration_micros_per_1k_graph_rows".to_string(),
11719        "synthetic_high_degree.sqlite.total_duration_micros".to_string(),
11720        "synthetic_high_degree.sqlite.total_duration_micros_per_1k_graph_rows".to_string(),
11721        "synthetic_high_degree.sqlite.neighborhood.duration_micros_per_1k_graph_rows".to_string(),
11722        "synthetic_high_degree.sqlite.edge_property_scan.duration_micros_per_1k_graph_rows"
11723            .to_string(),
11724        "synthetic_high_degree.sqlite.evidence_target_resolution.duration_micros_per_1k_graph_rows"
11725            .to_string(),
11726        "synthetic_deep_chain.sqlite.incident_edges.duration_micros_per_1k_graph_rows".to_string(),
11727        "synthetic_deep_chain.sqlite.neighborhood.duration_micros_per_1k_graph_rows".to_string(),
11728        "synthetic_deep_chain.sqlite.path_max_hops.duration_micros".to_string(),
11729        "synthetic_deep_chain.sqlite.path_max_hops_128.duration_micros".to_string(),
11730        "synthetic_deep_chain.sqlite.path_max_hops_256.duration_micros".to_string(),
11731        "synthetic_deep_chain.sqlite.path_max_hops_512.duration_micros".to_string(),
11732        "synthetic_deep_chain.sqlite.evidence_target_resolution.duration_micros_per_1k_graph_rows"
11733            .to_string(),
11734        "synthetic_deep_chain.sqlite.path_max_hops.duration_micros_per_1k_graph_rows".to_string(),
11735        "synthetic_deep_chain.sqlite.path_max_hops_128.duration_micros_per_1k_graph_rows"
11736            .to_string(),
11737        "synthetic_deep_chain.sqlite.path_max_hops_256.duration_micros_per_1k_graph_rows"
11738            .to_string(),
11739        "synthetic_deep_chain.sqlite.path_max_hops_512.duration_micros_per_1k_graph_rows"
11740            .to_string(),
11741    ];
11742    if full_projection {
11743        required_metrics.extend([
11744            "full_projection.cache.hit".to_string(),
11745            "full_projection.cache.disk_bytes".to_string(),
11746            "full_projection.cache.compression_ratio".to_string(),
11747            "full_projection.refresh_phase.cache_lookup.duration_micros".to_string(),
11748            "full_projection.sqlite.total_duration_micros_per_1k_graph_rows".to_string(),
11749            "full_projection.refresh_phase.source_graph_build.duration_micros_per_1k_graph_rows"
11750                .to_string(),
11751            "full_projection.refresh_phase.projection_rows.duration_micros_per_1k_graph_rows"
11752                .to_string(),
11753            "full_projection.sqlite.sqlite_delta_write.duration_micros".to_string(),
11754            "full_projection.sqlite.sqlite_node_staging.duration_micros".to_string(),
11755            "full_projection.sqlite.post_write_reads.duration_micros".to_string(),
11756            "full_projection.sqlite.neighborhood.duration_micros".to_string(),
11757            "full_projection.sqlite.evidence_target_resolution.duration_micros".to_string(),
11758            "full_projection.sqlite.evidence.duration_micros".to_string(),
11759            "full_projection.sqlite.path_max_hops.duration_micros".to_string(),
11760            "full_projection.sqlite.path_max_hops_128.duration_micros".to_string(),
11761            "full_projection.sqlite.path_max_hops_256.duration_micros".to_string(),
11762            "full_projection.sqlite.path_max_hops_512.duration_micros".to_string(),
11763            "full_projection.sqlite.conflict_matrix.duration_micros".to_string(),
11764            "full_projection.sqlite.dispatch_trace.duration_micros".to_string(),
11765        ]);
11766    }
11767    GraphDbBackendEvalPerformanceGate {
11768        baseline_fixture: "fixtures/graph-db-performance-history.json".to_string(),
11769        ci_profile: "synthetic_high_degree + synthetic_deep_chain metrics are CI-safe and bounded"
11770            .to_string(),
11771        opt_in_real_profile:
11772            "pass --full-projection to add the full-project dataset when checking for large projection regressions"
11773                .to_string(),
11774        full_projection_cache_hit_gate: if full_projection {
11775            "binding full_projection performance evidence requires a cold populate leg followed by cache-leg samples with full_projection.cache.hit=1; cache-miss samples are diagnostics, not backend or hop-cap promotion proof"
11776                .to_string()
11777        } else {
11778            "not evaluated until --full-projection is enabled".to_string()
11779        },
11780        allowed_regression_percent: GRAPH_DB_BACKEND_EVAL_ALLOWED_REGRESSION_PERCENT,
11781        minimum_sample_runs: GRAPH_DB_BACKEND_EVAL_MIN_SAMPLE_RUNS,
11782        normalized_metric_unit: "duration_micros_per_1k_graph_rows".to_string(),
11783        required_metrics,
11784        digest_command: graph_db_backend_eval_metric_digest_command(root, scope, full_projection),
11785        repeated_sample_command: graph_db_backend_eval_repeated_sample_command(
11786            root,
11787            scope,
11788            full_projection,
11789        ),
11790        hop_cap_promotion: graph_db_backend_eval_hop_cap_promotion_gate(),
11791        backend_adapter_spike: graph_db_backend_eval_backend_adapter_spike_gate(),
11792    }
11793}
11794
11795#[cfg(feature = "backend-surrealdb")]
11796fn graph_db_backend_eval_path_segment(value: &str) -> String {
11797    value
11798        .chars()
11799        .map(|ch| {
11800            if ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_' | '.') {
11801                ch
11802            } else {
11803                '_'
11804            }
11805        })
11806        .collect()
11807}
11808
11809#[cfg(feature = "backend-surrealdb")]
11810fn graph_db_backend_eval_surrealdb_store_path(
11811    root: &Path,
11812    scope: Option<&str>,
11813    dataset: &str,
11814) -> PathBuf {
11815    root.join(".tsift/backend-eval-cache/surrealdb")
11816        .join(graph_db_backend_eval_path_segment(scope.unwrap_or("root")))
11817        .join(graph_db_backend_eval_path_segment(dataset))
11818        .join("surrealkv")
11819}
11820
11821pub(crate) struct GraphDbBackendEvalOptions<'a> {
11822    path: &'a Path,
11823    scope: Option<&'a str>,
11824    candidates: &'a [String],
11825    targets: &'a [String],
11826    full_projection: bool,
11827}
11828
11829#[allow(clippy::too_many_arguments)]
11830pub(crate) fn graph_db_backend_eval_dataset(
11831    name: &str,
11832    root: &Path,
11833    path: &Path,
11834    scope: Option<&str>,
11835    targets: &[String],
11836    depth: usize,
11837    limit: usize,
11838    impact_limit: usize,
11839    candidates: &[GraphDbExperimentalBackend],
11840    sqlite_store: &SqliteGraphStore,
11841    sqlite_freshness: GraphDbFreshnessReport,
11842    sqlite_refresh: (GraphDbBackendEvalOperation, GraphDbBackendEvalSignature),
11843    sqlite_rows: ConvexProjectionRows,
11844    extra_warnings: Vec<String>,
11845    prepared: &ConflictMatrixPreparedInputs,
11846) -> Result<GraphDbBackendEvalDataset> {
11847    let (nodes, edges) = sqlite_store.graph_counts()?;
11848    let (sqlite_operation, sqlite_signature) = sqlite_refresh;
11849    let (sqlite_report, sqlite_signatures) = graph_db_backend_eval_report_for_store(
11850        "sqlite",
11851        "SQLite GraphStore correctness baseline",
11852        false,
11853        root,
11854        path,
11855        scope,
11856        targets,
11857        depth,
11858        limit,
11859        impact_limit,
11860        sqlite_store,
11861        sqlite_freshness,
11862        sqlite_operation,
11863        Some(sqlite_signature),
11864        None,
11865        extra_warnings.clone(),
11866        prepared,
11867        "SQLite refresh writes provider-neutral projection rows into graph.db transactionally",
11868        "SQLite WAL correctness store; refresh uses one transactional writer and read-only queries use snapshot recovery",
11869        "bundled rusqlite baseline; no external service or runtime required",
11870    );
11871
11872    let mut backends = vec![sqlite_report];
11873    for candidate in candidates {
11874        #[cfg(feature = "backend-surrealdb")]
11875        if *candidate == GraphDbExperimentalBackend::Surrealdb {
11876            let started = Instant::now();
11877            let store_path = graph_db_backend_eval_surrealdb_store_path(root, scope, name);
11878            let (store, warm_start) =
11879                SurrealdbGraphStore::open_or_refresh(&store_path, &sqlite_rows)?;
11880            let (candidate_nodes, candidate_edges) = store.graph_counts()?;
11881            let rows = candidate_nodes + candidate_edges;
11882            let mut refresh_meta = serde_json::json!({
11883                "nodes": candidate_nodes,
11884                "edges": candidate_edges,
11885            });
11886            if warm_start == tsift_surrealdb::WarmStartOutcome::CacheHit {
11887                refresh_meta["warm_start"] = serde_json::json!("cache_hit");
11888            }
11889            let refresh = graph_db_backend_eval_refresh_operation(
11890                started.elapsed().as_micros(),
11891                rows,
11892                refresh_meta,
11893            );
11894            let freshness = sqlite_graph_freshness(sqlite_store, scope.unwrap_or("root"))?;
11895            let (candidate_report, _signatures) = graph_db_backend_eval_report_for_store(
11896                candidate.name(),
11897                "SurrealDB SurrealKV optional adapter spike",
11898                false,
11899                root,
11900                path,
11901                scope,
11902                targets,
11903                depth,
11904                limit,
11905                impact_limit,
11906                &store,
11907                freshness,
11908                refresh.0,
11909                Some(refresh.1),
11910                Some(&sqlite_signatures),
11911                extra_warnings.clone(),
11912                prepared,
11913                "provider-neutral rows written into an embedded/file-backed SurrealDB SurrealKV store through the optional tsift-surrealdb adapter; warm-start reuses existing store when row hash matches",
11914                "embedded/file-backed writer through SurrealDB SurrealKV rewrites backend-eval rows before read-only measurements; promotion still requires multi-process/read-only contention samples",
11915                "feature-gated optional tsift-surrealdb crate; default cargo build/install does not pull SurrealDB into the dependency graph",
11916            );
11917            backends.push(candidate_report);
11918            continue;
11919        }
11920        let started = Instant::now();
11921        let store = ExperimentalReadOnlyGraphStore::from_rows(*candidate, &sqlite_rows)?;
11922        let (candidate_nodes, candidate_edges) = store.graph_counts()?;
11923        let rows = candidate_nodes + candidate_edges;
11924        let refresh = graph_db_backend_eval_refresh_operation(
11925            started.elapsed().as_micros(),
11926            rows,
11927            serde_json::json!({
11928                "nodes": candidate_nodes,
11929                "edges": candidate_edges,
11930            }),
11931        );
11932        let freshness = sqlite_graph_freshness(sqlite_store, scope.unwrap_or("root"))?;
11933        let (candidate_report, _signatures) = graph_db_backend_eval_report_for_store(
11934            candidate.name(),
11935            candidate.adapter_label(),
11936            true,
11937            root,
11938            path,
11939            scope,
11940            targets,
11941            depth,
11942            limit,
11943            impact_limit,
11944            &store,
11945            freshness,
11946            refresh.0,
11947            Some(refresh.1),
11948            Some(&sqlite_signatures),
11949            extra_warnings.clone(),
11950            prepared,
11951            candidate.projection_load(),
11952            candidate.lock_behavior(),
11953            candidate.install_portability(),
11954        );
11955        backends.push(candidate_report);
11956    }
11957
11958    Ok(GraphDbBackendEvalDataset {
11959        name: name.to_string(),
11960        target_count: targets.len(),
11961        nodes,
11962        edges,
11963        backends,
11964    })
11965}
11966
11967pub(crate) fn print_graph_db_backend_eval_human(report: &GraphDbBackendEvalReport) {
11968    println!(
11969        "graph-db backend-eval baseline:{} candidates:{}",
11970        report.baseline_backend,
11971        report.candidates.join(", ")
11972    );
11973    for phase in &report.phase_timings {
11974        println!(
11975            "phase:{} {}us {}",
11976            phase.name, phase.duration_micros, phase.detail
11977        );
11978    }
11979    for dataset in &report.datasets {
11980        println!(
11981            "dataset:{} targets:{} rows:{}",
11982            dataset.name,
11983            dataset.target_count,
11984            dataset.nodes + dataset.edges
11985        );
11986        for backend in &dataset.backends {
11987            println!(
11988                "  backend:{} total:{}us parity:{}",
11989                backend.backend, backend.total_micros, backend.parity.matches_sqlite
11990            );
11991            println!("    projection-load: {}", backend.projection_load);
11992            println!("    lock-behavior: {}", backend.lock_behavior);
11993            println!("    install-portability: {}", backend.install_portability);
11994            for operation in &backend.operations {
11995                println!(
11996                    "    {} {} {}us",
11997                    operation.name, operation.status, operation.duration_micros
11998                );
11999            }
12000            for diagnostic in &backend.parity.diagnostics {
12001                println!("    parity: {diagnostic}");
12002            }
12003        }
12004    }
12005    for decision in &report.promotion {
12006        println!("promotion {}: {}", decision.backend, decision.decision);
12007        println!("  gate: {}", decision.gate.status);
12008        for reason in &decision.reasons {
12009            println!("  reason: {reason}");
12010        }
12011        for check in &decision.gate.required_checks {
12012            println!("  check: {check}");
12013        }
12014    }
12015    println!("metric-digest: {}", report.metric_digest_command);
12016    println!(
12017        "repeat-samples: {}",
12018        report.performance_gate.repeated_sample_command
12019    );
12020}
12021
12022fn traversal_expand_command(root: &Path, handle: &str) -> String {
12023    format!(
12024        "tsift traverse {} --path {} --depth 1 --limit 50",
12025        shell_quote(handle),
12026        shell_quote(root.to_string_lossy().as_ref())
12027    )
12028}
12029
12030fn traversal_file_node(root: &Path, file: &str) -> TraversalNode {
12031    let display = relativize(file, root);
12032    let handle = stable_handle("gfil", &format!("file:{display}"));
12033    TraversalNode {
12034        handle: handle.clone(),
12035        kind: "file".to_string(),
12036        label: display.clone(),
12037        ref_id: Some(display.clone()),
12038        path: Some(display),
12039        line: None,
12040        detail: None,
12041        properties: BTreeMap::new(),
12042        expand: traversal_expand_command(root, &handle),
12043    }
12044}
12045
12046fn traversal_raw_source_file_node(root: &Path, file: &str) -> TraversalNode {
12047    let mut node = traversal_file_node(root, file);
12048    if let Some(path) = node.path.clone() {
12049        node.detail = Some("raw source fallback; graph evidence unavailable".to_string());
12050        node.expand = source_read_command(root, &path, 1, 80);
12051    }
12052    node
12053}
12054
12055fn traversal_symbol_node(root: &Path, symbol: &index::StoredSymbol) -> TraversalNode {
12056    let file = relativize(&symbol.file, root);
12057    let key = format!("symbol:{file}:{}:{}", symbol.line, symbol.name);
12058    let handle = stable_handle("gsym", &key);
12059    TraversalNode {
12060        handle: handle.clone(),
12061        kind: "symbol".to_string(),
12062        label: symbol.name.clone(),
12063        ref_id: Some(symbol.name.clone()),
12064        path: Some(file),
12065        line: Some(symbol.line),
12066        detail: Some(format!("{} {}", symbol.language, symbol.kind)),
12067        properties: BTreeMap::new(),
12068        expand: traversal_expand_command(root, &handle),
12069    }
12070}
12071
12072fn traversal_ast_span_expand_command(
12073    root: &Path,
12074    file: &str,
12075    symbol: &index::StoredSymbol,
12076    span: &AstSpanPreview,
12077) -> String {
12078    if symbol.language == "markdown" {
12079        markdown_ast_command(root, file, Some(&span.handle))
12080    } else {
12081        let line_count = span
12082            .end_line
12083            .saturating_sub(span.start_line)
12084            .saturating_add(1)
12085            .max(1);
12086        source_read_command(root, file, span.start_line, line_count)
12087    }
12088}
12089
12090fn traversal_ast_span_node(
12091    root: &Path,
12092    symbol: &index::StoredSymbol,
12093    source: &[u8],
12094    symbols: &[index::StoredSymbol],
12095) -> Option<(TraversalNode, TraversalAstSpanIndexEntry)> {
12096    let span = stored_symbol_ast_span(symbol, source, symbols, usize::MAX)?;
12097    let file = relativize(&symbol.file, root);
12098    let mut properties = BTreeMap::new();
12099    properties.insert("layer".to_string(), "ast_navigation".to_string());
12100    properties.insert("language".to_string(), symbol.language.clone());
12101    properties.insert("symbol_kind".to_string(), symbol.kind.clone());
12102    properties.insert("node_kind".to_string(), span.node_kind.clone());
12103    properties.insert("start_byte".to_string(), span.start_byte.to_string());
12104    properties.insert("end_byte".to_string(), span.end_byte.to_string());
12105    properties.insert("end_line".to_string(), span.end_line.to_string());
12106    if let Some(body_start_byte) = span.body_start_byte {
12107        properties.insert("body_start_byte".to_string(), body_start_byte.to_string());
12108    }
12109    if let Some(body_end_byte) = span.body_end_byte {
12110        properties.insert("body_end_byte".to_string(), body_end_byte.to_string());
12111    }
12112    if let Some(body_start_line) = span.body_start_line {
12113        properties.insert("body_start_line".to_string(), body_start_line.to_string());
12114    }
12115    if let Some(body_end_line) = span.body_end_line {
12116        properties.insert("body_end_line".to_string(), body_end_line.to_string());
12117    }
12118    if let Some(parent_handle) = &span.parent_handle {
12119        properties.insert("parent_handle".to_string(), parent_handle.clone());
12120    }
12121    if !span.child_handles.is_empty() {
12122        properties.insert("child_handles".to_string(), span.child_handles.join(","));
12123    }
12124    if let Some(parent_module) = &symbol.parent_module {
12125        properties.insert("parent_module".to_string(), parent_module.clone());
12126    }
12127    if let Some(markdown) = &span.markdown {
12128        properties.insert(
12129            "markdown_block_kind".to_string(),
12130            markdown_ast_block_kind(&symbol.kind),
12131        );
12132        if let Some(heading_level) = markdown.heading_level {
12133            properties.insert("heading_level".to_string(), heading_level.to_string());
12134        }
12135        if !markdown.section_path.is_empty() {
12136            properties.insert(
12137                "section_path".to_string(),
12138                markdown.section_path.join(" > "),
12139            );
12140        }
12141        if let Some(section_handle) = &markdown.section_handle {
12142            properties.insert("section_handle".to_string(), section_handle.clone());
12143        }
12144        if let Some(list_depth) = markdown.list_depth {
12145            properties.insert("list_depth".to_string(), list_depth.to_string());
12146        }
12147        if let Some(fence_language) = &markdown.fence_language {
12148            properties.insert("fence_language".to_string(), fence_language.clone());
12149        }
12150    }
12151
12152    let line = i64::try_from(span.start_line).unwrap_or(i64::MAX);
12153    let node = TraversalNode {
12154        handle: span.handle.clone(),
12155        kind: "ast_span".to_string(),
12156        label: symbol.name.clone(),
12157        ref_id: Some(symbol.name.clone()),
12158        path: Some(file.clone()),
12159        line: Some(line),
12160        detail: Some(format!("{} {} AST span", symbol.language, symbol.kind)),
12161        properties,
12162        expand: traversal_ast_span_expand_command(root, &file, symbol, &span),
12163    };
12164    let entry = TraversalAstSpanIndexEntry {
12165        handle: span.handle,
12166        symbol_handle: String::new(),
12167        file_handle: None,
12168        file,
12169        name: symbol.name.clone(),
12170        kind: symbol.kind.clone(),
12171        language: symbol.language.clone(),
12172        node_kind: span.node_kind,
12173        start_byte: span.start_byte,
12174        end_byte: span.end_byte,
12175        parent_module: symbol.parent_module.clone(),
12176        markdown: span.markdown,
12177    };
12178    Some((node, entry))
12179}
12180
12181fn traversal_unresolved_symbol_node(root: &Path, name: &str) -> TraversalNode {
12182    let handle = stable_handle("gsym", &format!("symbol:{name}"));
12183    TraversalNode {
12184        handle: handle.clone(),
12185        kind: "symbol".to_string(),
12186        label: name.to_string(),
12187        ref_id: Some(name.to_string()),
12188        path: None,
12189        line: None,
12190        detail: Some("unresolved call target".to_string()),
12191        properties: BTreeMap::new(),
12192        expand: traversal_expand_command(root, &handle),
12193    }
12194}
12195
12196fn traversal_route_node(root: &Path, route: &index::StoredRoute) -> TraversalNode {
12197    let file = relativize(&route.file, root);
12198    let method = route.method.as_deref().unwrap_or("any");
12199    let key = format!(
12200        "route:{file}:{}:{}:{}",
12201        route.line, method, route.route_path
12202    );
12203    let handle = stable_handle("grte", &key);
12204    TraversalNode {
12205        handle: handle.clone(),
12206        kind: "route".to_string(),
12207        label: format!("{} {}", method.to_uppercase(), route.route_path),
12208        ref_id: Some(route.route_path.clone()),
12209        path: Some(file),
12210        line: Some(route.line),
12211        detail: Some(format!(
12212            "{} route handled by {}",
12213            route.framework, route.handler_name
12214        )),
12215        properties: BTreeMap::new(),
12216        expand: traversal_expand_command(root, &handle),
12217    }
12218}
12219
12220fn traversal_cargo_workspace_node(
12221    root: &Path,
12222    workspace: &multiplicity::CargoWorkspaceInfo,
12223) -> TraversalNode {
12224    let manifest = relativize_pathbuf(&workspace.manifest_path, root)
12225        .to_string_lossy()
12226        .replace('\\', "/");
12227    let workspace_root = relativize_pathbuf(&workspace.workspace_root, root)
12228        .to_string_lossy()
12229        .replace('\\', "/");
12230    let handle = stable_handle("gcwk", &format!("cargo-workspace:{manifest}"));
12231    let mut properties = BTreeMap::new();
12232    properties.insert("layer".to_string(), "cargo_workspace".to_string());
12233    properties.insert("workspace_root".to_string(), workspace_root.clone());
12234    properties.insert("members".to_string(), workspace.members.join(","));
12235    properties.insert(
12236        "default_members".to_string(),
12237        workspace.default_members.join(","),
12238    );
12239    TraversalNode {
12240        handle: handle.clone(),
12241        kind: "cargo_workspace".to_string(),
12242        label: if workspace_root.is_empty() {
12243            "root cargo workspace".to_string()
12244        } else {
12245            workspace_root
12246        },
12247        ref_id: Some(workspace.id.clone()),
12248        path: Some(manifest),
12249        line: None,
12250        detail: Some("Cargo workspace manifest".to_string()),
12251        properties,
12252        expand: traversal_expand_command(root, &handle),
12253    }
12254}
12255
12256fn traversal_cargo_package_node(
12257    root: &Path,
12258    package: &multiplicity::CargoPackageInfo,
12259) -> TraversalNode {
12260    let manifest = relativize_pathbuf(&package.manifest_path, root)
12261        .to_string_lossy()
12262        .replace('\\', "/");
12263    let package_root = relativize_pathbuf(&package.package_root, root)
12264        .to_string_lossy()
12265        .replace('\\', "/");
12266    let workspace_root = relativize_pathbuf(&package.workspace_root, root)
12267        .to_string_lossy()
12268        .replace('\\', "/");
12269    let handle = stable_handle(
12270        "gcpk",
12271        &format!("cargo-package:{manifest}:{}", package.name),
12272    );
12273    let mut properties = BTreeMap::new();
12274    properties.insert("layer".to_string(), "cargo_package".to_string());
12275    properties.insert("package_name".to_string(), package.name.clone());
12276    properties.insert(
12277        "normalized_name".to_string(),
12278        package.normalized_name.clone(),
12279    );
12280    properties.insert("package_root".to_string(), package_root.clone());
12281    properties.insert("workspace_root".to_string(), workspace_root);
12282    properties.insert("features".to_string(), package.features.join(","));
12283    properties.insert("targets".to_string(), package.targets.join(","));
12284    properties.insert(
12285        "dependencies".to_string(),
12286        package
12287            .dependencies
12288            .iter()
12289            .map(|dependency| format!("{}:{}", dependency.kind, dependency.name))
12290            .collect::<Vec<_>>()
12291            .join(","),
12292    );
12293    TraversalNode {
12294        handle: handle.clone(),
12295        kind: "cargo_package".to_string(),
12296        label: package.name.clone(),
12297        ref_id: Some(package.scope_id.clone()),
12298        path: Some(manifest),
12299        line: None,
12300        detail: Some(format!(
12301            "Cargo package in {}",
12302            if package_root.is_empty() {
12303                "."
12304            } else {
12305                package_root.as_str()
12306            }
12307        )),
12308        properties,
12309        expand: traversal_expand_command(root, &handle),
12310    }
12311}
12312
12313fn traversal_session_node(
12314    root: &Path,
12315    markdown_path: &Path,
12316    session_id: Option<&str>,
12317) -> TraversalNode {
12318    let display = relativize_pathbuf(markdown_path, root)
12319        .to_string_lossy()
12320        .replace('\\', "/");
12321    let handle = stable_handle("gses", &format!("session:{display}"));
12322    TraversalNode {
12323        handle: handle.clone(),
12324        kind: "session".to_string(),
12325        label: session_id.unwrap_or(&display).to_string(),
12326        ref_id: session_id.map(str::to_string),
12327        path: Some(display),
12328        line: None,
12329        detail: Some("agent-doc session artifact".to_string()),
12330        properties: BTreeMap::new(),
12331        expand: traversal_expand_command(root, &handle),
12332    }
12333}
12334
12335fn traversal_backlog_node(
12336    root: &Path,
12337    markdown_path: &Path,
12338    id: &str,
12339    text: &str,
12340    line: i64,
12341) -> TraversalNode {
12342    let display = relativize_pathbuf(markdown_path, root)
12343        .to_string_lossy()
12344        .replace('\\', "/");
12345    let handle = stable_handle("gbak", &format!("backlog:{display}:#{id}"));
12346    TraversalNode {
12347        handle: handle.clone(),
12348        kind: "backlog".to_string(),
12349        label: format!("#{id}"),
12350        ref_id: Some(id.to_string()),
12351        path: Some(display),
12352        line: Some(line),
12353        detail: Some(text.to_string()),
12354        properties: BTreeMap::new(),
12355        expand: traversal_expand_command(root, &handle),
12356    }
12357}
12358
12359fn traversal_job_packet_node(
12360    root: &Path,
12361    markdown_path: &Path,
12362    label: &str,
12363    ref_id: Option<&str>,
12364    detail: &str,
12365    line: i64,
12366) -> TraversalNode {
12367    let display = relativize_pathbuf(markdown_path, root)
12368        .to_string_lossy()
12369        .replace('\\', "/");
12370    let handle = stable_handle("gjob", &format!("job:{display}:{line}:{label}"));
12371    TraversalNode {
12372        handle: handle.clone(),
12373        kind: "job_packet".to_string(),
12374        label: label.to_string(),
12375        ref_id: ref_id.map(str::to_string),
12376        path: Some(display),
12377        line: Some(line),
12378        detail: Some(detail.to_string()),
12379        properties: BTreeMap::new(),
12380        expand: traversal_expand_command(root, &handle),
12381    }
12382}
12383
12384#[derive(Clone, Debug)]
12385struct ParsedWorkerResult {
12386    id: String,
12387    status: String,
12388    touched_files: Vec<String>,
12389    tests: Vec<String>,
12390    follow_up_ids: Vec<String>,
12391}
12392
12393fn traversal_worker_result_node(
12394    root: &Path,
12395    markdown_path: &Path,
12396    parsed: &ParsedWorkerResult,
12397    line_text: &str,
12398    line: i64,
12399) -> TraversalNode {
12400    let display = relativize_pathbuf(markdown_path, root)
12401        .to_string_lossy()
12402        .replace('\\', "/");
12403    let handle = stable_handle(
12404        "wres",
12405        &format!(
12406            "worker-result:{display}:{}:{}:{}",
12407            parsed.id, parsed.status, line
12408        ),
12409    );
12410    let mut properties = BTreeMap::new();
12411    properties.insert("status".to_string(), parsed.status.clone());
12412    if !parsed.touched_files.is_empty() {
12413        properties.insert("touched_files".to_string(), parsed.touched_files.join(","));
12414    }
12415    if !parsed.tests.is_empty() {
12416        properties.insert("expected_tests".to_string(), parsed.tests.join(" && "));
12417    }
12418    if !parsed.follow_up_ids.is_empty() {
12419        properties.insert("follow_up_ids".to_string(), parsed.follow_up_ids.join(","));
12420    }
12421    TraversalNode {
12422        handle: handle.clone(),
12423        kind: "worker_result".to_string(),
12424        label: format!("{} #{}", parsed.status, parsed.id),
12425        ref_id: Some(parsed.id.clone()),
12426        path: Some(display),
12427        line: Some(line),
12428        detail: Some(line_text.trim().to_string()),
12429        properties,
12430        expand: traversal_expand_command(root, &handle),
12431    }
12432}
12433
12434fn traversal_tokens(input: &str) -> BTreeSet<String> {
12435    input
12436        .split(|ch: char| !(ch.is_ascii_alphanumeric() || ch == '_' || ch == '-'))
12437        .flat_map(|part| part.split(['_', '-']))
12438        .map(str::trim)
12439        .filter(|part| part.len() >= 3)
12440        .map(|part| part.to_ascii_lowercase())
12441        .collect()
12442}
12443
12444fn traversal_ast_span_contains(
12445    parent: &TraversalAstSpanIndexEntry,
12446    child: &TraversalAstSpanIndexEntry,
12447) -> bool {
12448    parent.handle != child.handle
12449        && parent.file == child.file
12450        && parent.start_byte <= child.start_byte
12451        && parent.end_byte >= child.end_byte
12452}
12453
12454fn traversal_ast_parent_handle<'a>(
12455    entry: &TraversalAstSpanIndexEntry,
12456    entries: &'a [TraversalAstSpanIndexEntry],
12457) -> Option<&'a str> {
12458    entries
12459        .iter()
12460        .filter(|candidate| traversal_ast_span_contains(candidate, entry))
12461        .min_by_key(|candidate| {
12462            (
12463                candidate.end_byte.saturating_sub(candidate.start_byte),
12464                candidate.start_byte,
12465                candidate.end_byte,
12466                candidate.kind.as_str(),
12467                candidate.name.as_str(),
12468                candidate.node_kind.as_str(),
12469            )
12470        })
12471        .map(|candidate| candidate.handle.as_str())
12472}
12473
12474fn traversal_ast_enclosing_module_handle<'a>(
12475    entry: &TraversalAstSpanIndexEntry,
12476    entries_by_handle: &'a BTreeMap<String, TraversalAstSpanIndexEntry>,
12477    parent_by_handle: &BTreeMap<String, String>,
12478) -> Option<&'a str> {
12479    let mut current = parent_by_handle.get(&entry.handle);
12480    while let Some(handle) = current {
12481        let Some(parent) = entries_by_handle.get(handle) else {
12482            break;
12483        };
12484        if matches!(parent.kind.as_str(), "module" | "mod")
12485            || entry
12486                .parent_module
12487                .as_deref()
12488                .is_some_and(|module| module == parent.name)
12489        {
12490            return Some(parent.handle.as_str());
12491        }
12492        current = parent_by_handle.get(&parent.handle);
12493    }
12494    None
12495}
12496
12497fn link_ast_navigation_edges(
12498    graph: &mut TraversalGraphBuild,
12499    entries: &[TraversalAstSpanIndexEntry],
12500) {
12501    let mut entries_by_file = BTreeMap::<String, Vec<TraversalAstSpanIndexEntry>>::new();
12502    let entries_by_handle = entries
12503        .iter()
12504        .map(|entry| (entry.handle.clone(), entry.clone()))
12505        .collect::<BTreeMap<_, _>>();
12506    let mut parent_by_handle = BTreeMap::<String, String>::new();
12507    let mut children_by_parent = BTreeMap::<Option<String>, Vec<TraversalAstSpanIndexEntry>>::new();
12508
12509    for entry in entries {
12510        entries_by_file
12511            .entry(entry.file.clone())
12512            .or_default()
12513            .push(entry.clone());
12514    }
12515
12516    for file_entries in entries_by_file.values() {
12517        for entry in file_entries {
12518            let parent = traversal_ast_parent_handle(entry, file_entries).map(str::to_string);
12519            if let Some(parent) = &parent {
12520                parent_by_handle.insert(entry.handle.clone(), parent.clone());
12521            }
12522            let sibling_key = parent.clone().or_else(|| entry.file_handle.clone());
12523            children_by_parent
12524                .entry(sibling_key)
12525                .or_default()
12526                .push(entry.clone());
12527        }
12528    }
12529
12530    for entry in entries {
12531        let parent = parent_by_handle.get(&entry.handle);
12532        if let Some(parent) = parent {
12533            graph.add_edge(
12534                parent,
12535                &entry.handle,
12536                "contains",
12537                Some("AST parent contains child span".to_string()),
12538                1,
12539            );
12540            graph.add_edge(
12541                parent,
12542                &entry.handle,
12543                "child",
12544                Some("AST child span".to_string()),
12545                1,
12546            );
12547            graph.add_edge(
12548                &entry.handle,
12549                parent,
12550                "parent",
12551                Some("AST parent span".to_string()),
12552                1,
12553            );
12554        } else if let Some(file_handle) = &entry.file_handle {
12555            graph.add_edge(
12556                file_handle,
12557                &entry.handle,
12558                "contains",
12559                Some("file contains top-level AST span".to_string()),
12560                1,
12561            );
12562        }
12563
12564        if let Some(module_handle) =
12565            traversal_ast_enclosing_module_handle(entry, &entries_by_handle, &parent_by_handle)
12566        {
12567            graph.add_edge(
12568                &entry.handle,
12569                module_handle,
12570                "enclosing_module",
12571                Some("nearest enclosing module AST span".to_string()),
12572                1,
12573            );
12574        }
12575
12576        if entry.language == "markdown"
12577            && let Some(markdown) = &entry.markdown
12578            && let Some(section_handle) = &markdown.section_handle
12579            && section_handle != &entry.handle
12580        {
12581            graph.add_edge(
12582                section_handle,
12583                &entry.handle,
12584                "contains_markdown_block",
12585                Some("Markdown section contains block".to_string()),
12586                1,
12587            );
12588            graph.add_edge(
12589                &entry.handle,
12590                section_handle,
12591                "enclosing_section",
12592                Some("Markdown enclosing section".to_string()),
12593                1,
12594            );
12595        }
12596    }
12597
12598    for siblings in children_by_parent.values_mut() {
12599        siblings.sort_by(|left, right| {
12600            left.start_byte
12601                .cmp(&right.start_byte)
12602                .then(left.end_byte.cmp(&right.end_byte))
12603                .then(left.kind.cmp(&right.kind))
12604                .then(left.name.cmp(&right.name))
12605                .then(left.node_kind.cmp(&right.node_kind))
12606                .then(left.handle.cmp(&right.handle))
12607        });
12608        for pair in siblings.windows(2) {
12609            let previous = &pair[0];
12610            let next = &pair[1];
12611            graph.add_edge(
12612                &previous.handle,
12613                &next.handle,
12614                "next_sibling",
12615                Some("next AST sibling span".to_string()),
12616                1,
12617            );
12618            graph.add_edge(
12619                &next.handle,
12620                &previous.handle,
12621                "previous_sibling",
12622                Some("previous AST sibling span".to_string()),
12623                1,
12624            );
12625        }
12626    }
12627}
12628
12629fn traversal_markdown_embedded_symbol_node(
12630    root: &Path,
12631    entry: &TraversalAstSpanIndexEntry,
12632    markdown: &MarkdownSpanMetadata,
12633    embedded: &MarkdownEmbeddedSymbol,
12634) -> TraversalNode {
12635    let mut properties = BTreeMap::new();
12636    properties.insert("layer".to_string(), "embedded_code".to_string());
12637    properties.insert("embedded".to_string(), "true".to_string());
12638    properties.insert("language".to_string(), embedded.language.clone());
12639    properties.insert("symbol_kind".to_string(), embedded.kind.clone());
12640    properties.insert("node_kind".to_string(), embedded.node_kind.clone());
12641    properties.insert("start_byte".to_string(), embedded.start_byte.to_string());
12642    properties.insert("end_byte".to_string(), embedded.end_byte.to_string());
12643    properties.insert("end_line".to_string(), embedded.end_line.to_string());
12644    properties.insert("markdown_block_handle".to_string(), entry.handle.clone());
12645    properties.insert(
12646        "markdown_block_kind".to_string(),
12647        markdown_ast_block_kind(&entry.kind),
12648    );
12649    if let Some(body_start_byte) = embedded.body_start_byte {
12650        properties.insert("body_start_byte".to_string(), body_start_byte.to_string());
12651    }
12652    if let Some(body_end_byte) = embedded.body_end_byte {
12653        properties.insert("body_end_byte".to_string(), body_end_byte.to_string());
12654    }
12655    if let Some(body_start_line) = embedded.body_start_line {
12656        properties.insert("body_start_line".to_string(), body_start_line.to_string());
12657    }
12658    if let Some(body_end_line) = embedded.body_end_line {
12659        properties.insert("body_end_line".to_string(), body_end_line.to_string());
12660    }
12661    if let Some(fence_language) = &markdown.fence_language {
12662        properties.insert("fence_language".to_string(), fence_language.clone());
12663    }
12664    if !markdown.section_path.is_empty() {
12665        properties.insert(
12666            "section_path".to_string(),
12667            markdown.section_path.join(" > "),
12668        );
12669    }
12670    if let Some(section_handle) = &markdown.section_handle {
12671        properties.insert("section_handle".to_string(), section_handle.clone());
12672    }
12673    let line_count = embedded
12674        .end_line
12675        .saturating_sub(embedded.start_line)
12676        .saturating_add(1)
12677        .max(1);
12678    TraversalNode {
12679        handle: embedded.handle.clone(),
12680        kind: "ast_span".to_string(),
12681        label: embedded.name.clone(),
12682        ref_id: Some(embedded.name.clone()),
12683        path: Some(entry.file.clone()),
12684        line: Some(i64::try_from(embedded.start_line).unwrap_or(i64::MAX)),
12685        detail: Some(format!(
12686            "{} {} embedded in Markdown fence",
12687            embedded.language, embedded.kind
12688        )),
12689        properties,
12690        expand: source_read_command(root, &entry.file, embedded.start_line, line_count),
12691    }
12692}
12693
12694fn link_markdown_embedded_code_edges(
12695    graph: &mut TraversalGraphBuild,
12696    root: &Path,
12697    entries: &[TraversalAstSpanIndexEntry],
12698) {
12699    for entry in entries {
12700        let Some(markdown) = &entry.markdown else {
12701            continue;
12702        };
12703        for embedded in &markdown.embedded_symbols {
12704            let node = traversal_markdown_embedded_symbol_node(root, entry, markdown, embedded);
12705            graph.add_node(node);
12706            graph.add_edge(
12707                &entry.handle,
12708                &embedded.handle,
12709                "contains",
12710                Some("Markdown fence contains embedded AST symbol".to_string()),
12711                1,
12712            );
12713            graph.add_edge(
12714                &entry.handle,
12715                &embedded.handle,
12716                "child",
12717                Some("embedded code symbol".to_string()),
12718                1,
12719            );
12720            graph.add_edge(
12721                &entry.handle,
12722                &embedded.handle,
12723                "contains_embedded_symbol",
12724                Some("Markdown fence contains embedded code symbol".to_string()),
12725                1,
12726            );
12727            graph.add_edge(
12728                &embedded.handle,
12729                &entry.handle,
12730                "parent",
12731                Some("Markdown fence parent span".to_string()),
12732                1,
12733            );
12734            graph.add_edge(
12735                &embedded.handle,
12736                &entry.handle,
12737                "embedded_in_fence",
12738                Some("embedded code symbol belongs to Markdown fence".to_string()),
12739                1,
12740            );
12741            if let Some(section_handle) = &markdown.section_handle
12742                && section_handle != &entry.handle
12743            {
12744                graph.add_edge(
12745                    section_handle,
12746                    &embedded.handle,
12747                    "contains_embedded_code",
12748                    Some("Markdown section contains embedded code symbol".to_string()),
12749                    1,
12750                );
12751                graph.add_edge(
12752                    &embedded.handle,
12753                    section_handle,
12754                    "enclosing_section",
12755                    Some("Markdown enclosing section".to_string()),
12756                    1,
12757                );
12758            }
12759        }
12760    }
12761}
12762
12763fn traversal_node_tokens(node: &TraversalNode) -> BTreeSet<String> {
12764    let mut tokens = traversal_tokens(&node.label);
12765    if let Some(ref_id) = &node.ref_id {
12766        tokens.extend(traversal_tokens(ref_id));
12767    }
12768    if let Some(path) = &node.path {
12769        tokens.extend(traversal_tokens(path));
12770    }
12771    if let Some(detail) = &node.detail {
12772        tokens.extend(traversal_tokens(detail));
12773    }
12774    tokens
12775}
12776
12777fn parse_agent_doc_session_id(content: &str) -> Option<String> {
12778    content.lines().find_map(|line| {
12779        let trimmed = line.trim();
12780        trimmed
12781            .strip_prefix("agent_doc_session:")
12782            .map(str::trim)
12783            .filter(|value| !value.is_empty())
12784            .map(str::to_string)
12785    })
12786}
12787
12788fn parse_backlog_line(line: &str) -> Option<(String, String)> {
12789    let trimmed = line.trim();
12790    if !trimmed.starts_with("- [") {
12791        return None;
12792    }
12793    let start = trimmed.find("[#")?;
12794    let after_start = start + 2;
12795    let rest = &trimmed[after_start..];
12796    let end = rest.find(']')?;
12797    let id = rest[..end].trim();
12798    if id.is_empty() {
12799        return None;
12800    }
12801    let text = rest[end + 1..].trim().to_string();
12802    Some((id.to_string(), text))
12803}
12804
12805fn parse_queue_dispatch_line(line: &str) -> Option<String> {
12806    let trimmed = line.trim();
12807    ["dispatch ", "preset "].iter().find_map(|prefix| {
12808        trimmed
12809            .strip_prefix(prefix)
12810            .map(str::trim)
12811            .filter(|value| !value.is_empty())
12812            .map(str::to_string)
12813    })
12814}
12815
12816fn parse_queue_do_line(line: &str) -> Option<String> {
12817    let trimmed = line.trim();
12818    let rest = trimmed.strip_prefix("- do [#")?;
12819    let end = rest.find(']')?;
12820    let id = rest[..end].trim();
12821    (!id.is_empty()).then(|| id.to_string())
12822}
12823
12824fn markdown_code_spans(input: &str) -> Vec<String> {
12825    input
12826        .split('`')
12827        .enumerate()
12828        .filter(|(idx, _)| idx % 2 == 1)
12829        .map(|(_, part)| part.trim().to_string())
12830        .filter(|part| !part.is_empty())
12831        .collect()
12832}
12833
12834fn push_traversal_token_index(
12835    index: &mut HashMap<String, Vec<usize>>,
12836    tokens: &BTreeSet<String>,
12837    entry_index: usize,
12838) {
12839    for token in tokens {
12840        index.entry(token.clone()).or_default().push(entry_index);
12841    }
12842}
12843
12844impl<'a> TraversalCodeLookup<'a> {
12845    fn new(
12846        symbols: &'a [TraversalSymbolIndexEntry],
12847        files: &'a [TraversalFileIndexEntry],
12848        routes: &'a [TraversalRouteIndexEntry],
12849        multiplicities: &'a [TraversalMultiplicityIndexEntry],
12850    ) -> Self {
12851        let mut symbol_index = HashMap::new();
12852        for (idx, entry) in symbols.iter().enumerate() {
12853            push_traversal_token_index(&mut symbol_index, &entry.tokens, idx);
12854        }
12855        let mut file_index = HashMap::new();
12856        let mut file_path_index = HashMap::new();
12857        for (idx, entry) in files.iter().enumerate() {
12858            push_traversal_token_index(&mut file_index, &entry.tokens, idx);
12859            if let Some(path) = entry.node.path.as_ref() {
12860                file_path_index.insert(path.clone(), path.clone());
12861            }
12862        }
12863        let mut route_index = HashMap::new();
12864        for (idx, entry) in routes.iter().enumerate() {
12865            push_traversal_token_index(&mut route_index, &entry.tokens, idx);
12866        }
12867        let mut multiplicity_index = HashMap::new();
12868        for (idx, entry) in multiplicities.iter().enumerate() {
12869            push_traversal_token_index(&mut multiplicity_index, &entry.tokens, idx);
12870        }
12871        Self {
12872            symbols,
12873            files,
12874            routes,
12875            multiplicities,
12876            symbol_index,
12877            file_index,
12878            route_index,
12879            multiplicity_index,
12880            file_path_index,
12881        }
12882    }
12883
12884    fn touched_files_for_line(&self, line: &str) -> Vec<String> {
12885        let mut touched_files = BTreeSet::new();
12886        for candidate in markdown_code_spans(line)
12887            .into_iter()
12888            .chain(line.split_whitespace().map(str::to_string))
12889        {
12890            for path in traversal_path_candidates(&candidate) {
12891                if let Some(file) = self.file_path_index.get(&path) {
12892                    touched_files.insert(file.clone());
12893                }
12894            }
12895        }
12896        touched_files.into_iter().collect()
12897    }
12898}
12899
12900fn traversal_path_candidates(candidate: &str) -> Vec<String> {
12901    let trimmed = candidate.trim_matches(|ch: char| {
12902        matches!(
12903            ch,
12904            '`' | '"' | '\'' | ',' | ';' | '.' | '!' | '?' | '(' | ')' | '[' | ']' | '{' | '}'
12905        )
12906    });
12907    if trimmed.is_empty() {
12908        return Vec::new();
12909    }
12910    let mut candidates = vec![trimmed.to_string()];
12911    if let Some((path, line_suffix)) = trimmed.rsplit_once(':')
12912        && !path.is_empty()
12913        && line_suffix.chars().all(|ch| ch.is_ascii_digit())
12914    {
12915        candidates.push(path.to_string());
12916    }
12917    candidates
12918}
12919
12920fn parse_worker_result_line(
12921    line: &str,
12922    lookup: &TraversalCodeLookup<'_>,
12923) -> Vec<ParsedWorkerResult> {
12924    if line.trim_start().starts_with("- [") {
12925        return Vec::new();
12926    }
12927    let lower = line.to_ascii_lowercase();
12928    let status =
12929        if lower.contains("completed") || lower.contains("code-complete") || lower.contains("done")
12930        {
12931            "completed"
12932        } else if lower.contains("blocked") || lower.contains("externally blocked") {
12933            "blocked"
12934        } else {
12935            return Vec::new();
12936        };
12937    let result_prefix_end = ["follow-up", "follow up", "next:"]
12938        .iter()
12939        .filter_map(|marker| lower.find(marker))
12940        .min()
12941        .unwrap_or(line.len());
12942    let ids = extract_conflict_target_refs(&line[..result_prefix_end]);
12943    if ids.is_empty() {
12944        return Vec::new();
12945    }
12946    let result_ids = ids.iter().cloned().collect::<BTreeSet<_>>();
12947    let all_ids = extract_conflict_target_refs(line);
12948
12949    let touched_files = lookup.touched_files_for_line(line);
12950    let tests = markdown_code_spans(line)
12951        .into_iter()
12952        .filter(|span| span.to_ascii_lowercase().contains("test"))
12953        .collect::<Vec<_>>();
12954
12955    ids.iter()
12956        .map(|id| ParsedWorkerResult {
12957            id: id.clone(),
12958            status: status.to_string(),
12959            touched_files: touched_files.clone(),
12960            tests: tests.clone(),
12961            follow_up_ids: all_ids
12962                .iter()
12963                .filter(|other| *other != id && !result_ids.contains(*other))
12964                .cloned()
12965                .collect(),
12966        })
12967        .collect()
12968}
12969
12970fn hinted_markdown_file(root: &Path, path_hint: &Path) -> Option<PathBuf> {
12971    let hinted_path = if path_hint.is_absolute() {
12972        path_hint.to_path_buf()
12973    } else {
12974        root.join(path_hint)
12975    };
12976    if hinted_path.extension().and_then(|ext| ext.to_str()) == Some("md") && hinted_path.is_file() {
12977        return Some(hinted_path);
12978    }
12979    None
12980}
12981
12982fn traversal_markdown_content_looks_like_session(content: &str) -> bool {
12983    parse_agent_doc_session_id(content).is_some()
12984        || content.contains("<!-- agent:exchange")
12985        || content.contains("<!-- agent:backlog")
12986        || content.contains("## Backlog")
12987}
12988
12989fn traversal_path_is_session_markdown(root: &Path, source_root: &Path, path: &Path) -> bool {
12990    let candidate = if path.is_absolute() {
12991        path.to_path_buf()
12992    } else {
12993        source_root.join(path)
12994    };
12995    if !candidate.starts_with(source_root) && !candidate.starts_with(root) {
12996        return false;
12997    }
12998    if !matches!(
12999        candidate.extension().and_then(|ext| ext.to_str()),
13000        Some("md" | "mdx")
13001    ) {
13002        return false;
13003    }
13004    fs::read_to_string(&candidate)
13005        .map(|content| traversal_markdown_content_looks_like_session(&content))
13006        .unwrap_or(false)
13007}
13008
13009fn markdown_files_for_traversal(root: &Path, path_hint: &Path) -> Result<Vec<PathBuf>> {
13010    if let Some(hinted_path) = hinted_markdown_file(root, path_hint) {
13011        return Ok(vec![hinted_path]);
13012    }
13013    let mut files = Vec::new();
13014    let walker = ignore::WalkBuilder::new(root)
13015        .hidden(true)
13016        .git_ignore(true)
13017        .git_global(true)
13018        .git_exclude(true)
13019        .build();
13020    for result in walker {
13021        let entry =
13022            result.with_context(|| format!("walking markdown files under {}", root.display()))?;
13023        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
13024            continue;
13025        }
13026        if traversal_path_is_generated_artifact(root, root, entry.path()) {
13027            continue;
13028        }
13029        if entry.path().extension().and_then(|ext| ext.to_str()) == Some("md") {
13030            files.push(entry.path().to_path_buf());
13031        }
13032    }
13033    files.sort();
13034    Ok(files)
13035}
13036
13037fn traversal_watermark_path(root: &Path, path: &Path) -> String {
13038    path.strip_prefix(root)
13039        .unwrap_or(path)
13040        .to_string_lossy()
13041        .replace('\\', "/")
13042}
13043
13044fn push_traversal_metadata_watermark_part(
13045    root: &Path,
13046    path: &Path,
13047    label: &str,
13048    parts: &mut Vec<String>,
13049) {
13050    let display = traversal_watermark_path(root, path);
13051    match fs::metadata(path) {
13052        Ok(metadata) => {
13053            let (secs, nanos) = metadata
13054                .modified()
13055                .ok()
13056                .and_then(|modified| modified.duration_since(UNIX_EPOCH).ok())
13057                .map(|duration| (duration.as_secs(), duration.subsec_nanos()))
13058                .unwrap_or((0, 0));
13059            parts.push(format!(
13060                "{label}:{display}:len={}:mtime={secs}.{nanos}",
13061                metadata.len()
13062            ));
13063        }
13064        Err(_) => parts.push(format!("{label}:{display}:missing")),
13065    }
13066}
13067
13068#[derive(Serialize)]
13069struct TraversalSummaryWatermarkRow<'a> {
13070    symbol_name: &'a str,
13071    file_path: &'a str,
13072    entities: &'a Option<Vec<summarize::Entity>>,
13073    relationships: &'a Option<Vec<summarize::Relationship>>,
13074    concept_labels: &'a Option<Vec<String>>,
13075}
13076
13077fn push_traversal_summaries_watermark_part(root: &Path, parts: &mut Vec<String>) -> Result<()> {
13078    let summaries_db = root.join(".tsift/summaries.db");
13079    if !summaries_db.exists() {
13080        parts.push("summaries_db:absent".to_string());
13081        return Ok(());
13082    }
13083
13084    match summarize::SummaryDb::open_read_only_resilient(&summaries_db)
13085        .and_then(|summary_db| summary_db.all())
13086    {
13087        Ok(summaries) => {
13088            let rows = summaries
13089                .iter()
13090                .map(|summary| TraversalSummaryWatermarkRow {
13091                    symbol_name: &summary.symbol_name,
13092                    file_path: &summary.file_path,
13093                    entities: &summary.entities,
13094                    relationships: &summary.relationships,
13095                    concept_labels: &summary.concept_labels,
13096                })
13097                .collect::<Vec<_>>();
13098            parts.push(format!(
13099                "summaries_db:rows={}:semantic_hash={}",
13100                rows.len(),
13101                content_hash(&rows)?
13102            ));
13103        }
13104        Err(_) => {
13105            push_traversal_metadata_watermark_part(
13106                root,
13107                &summaries_db,
13108                "summaries_db_unreadable",
13109                parts,
13110            );
13111        }
13112    }
13113    Ok(())
13114}
13115
13116#[cfg(test)]
13117fn traversal_relative_path_is_generated_artifact(relative: &str) -> bool {
13118    resolution::relative_path_is_generated_artifact(relative)
13119}
13120
13121fn traversal_path_is_generated_artifact(root: &Path, source_root: &Path, path: &Path) -> bool {
13122    resolution::path_is_generated_artifact(root, source_root, path)
13123}
13124
13125fn traversal_index_snapshot_part_is_generated(root: &Path, source_root: &Path, part: &str) -> bool {
13126    resolution::index_snapshot_part_is_generated(root, source_root, part)
13127}
13128
13129pub(crate) fn traversal_source_watermark(
13130    root: &Path,
13131    path_hint: &Path,
13132    scope: Option<&str>,
13133    session_only: bool,
13134) -> Result<Option<String>> {
13135    let mut parts = vec![
13136        format!("projection_version:{GRAPH_PROJECTION_VERSION}"),
13137        format!("scope:{}", scope.unwrap_or("root")),
13138        format!("path_hint:{}", traversal_watermark_path(root, path_hint)),
13139        format!("session_only:{session_only}"),
13140    ];
13141
13142    if !session_only || hinted_markdown_file(root, path_hint).is_none() {
13143        let targets = match resolve_search_index_targets(root, path_hint, scope, false) {
13144            Ok(targets) => targets,
13145            Err(_) => return Ok(None),
13146        };
13147        let Some(target) = targets.into_iter().next() else {
13148            return Ok(None);
13149        };
13150        let db = match index::IndexDb::open_read_only_resilient(&target.db_path) {
13151            Ok(db) => db,
13152            Err(_) => return Ok(None),
13153        };
13154        parts.push(format!("index_label:{}", target.label));
13155        parts.push(format!(
13156            "index_scope:{}",
13157            target.scope_name.as_deref().unwrap_or("root")
13158        ));
13159        parts.push(format!(
13160            "index_source_root:{}",
13161            traversal_watermark_path(root, &target.source_root)
13162        ));
13163        let mut snapshot_rows = 0usize;
13164        for part in db.source_snapshot_parts()? {
13165            if traversal_index_snapshot_part_is_generated(root, &target.source_root, &part) {
13166                continue;
13167            }
13168            snapshot_rows += 1;
13169            parts.push(format!("index_snapshot:{part}"));
13170        }
13171        parts.push(format!("index_snapshot_rows:{snapshot_rows}"));
13172    }
13173
13174    let markdown_files = markdown_files_for_traversal(root, path_hint)?;
13175    parts.push(format!("markdown_count:{}", markdown_files.len()));
13176    for markdown_path in markdown_files {
13177        push_traversal_metadata_watermark_part(root, &markdown_path, "markdown", &mut parts);
13178    }
13179
13180    push_traversal_summaries_watermark_part(root, &mut parts)?;
13181
13182    Ok(Some(content_hash(&parts)?))
13183}
13184
13185fn ranked_symbol_matches<'a>(
13186    query_tokens: &BTreeSet<String>,
13187    entries: &'a [TraversalSymbolIndexEntry],
13188    index: &HashMap<String, Vec<usize>>,
13189) -> Vec<(usize, &'a TraversalSymbolIndexEntry)> {
13190    let mut scores = BTreeMap::<usize, usize>::new();
13191    for token in query_tokens {
13192        if let Some(indices) = index.get(token) {
13193            for idx in indices {
13194                *scores.entry(*idx).or_default() += 1;
13195            }
13196        }
13197    }
13198    let mut matches = scores
13199        .into_iter()
13200        .map(|(idx, score)| (score, &entries[idx]))
13201        .collect::<Vec<_>>();
13202    matches.sort_by(|(left_score, left), (right_score, right)| {
13203        right_score
13204            .cmp(left_score)
13205            .then_with(|| left.node.label.cmp(&right.node.label))
13206            .then_with(|| left.handle.cmp(&right.handle))
13207    });
13208    matches
13209}
13210
13211fn ranked_file_matches<'a>(
13212    query_tokens: &BTreeSet<String>,
13213    entries: &'a [TraversalFileIndexEntry],
13214    index: &HashMap<String, Vec<usize>>,
13215) -> Vec<(usize, &'a TraversalFileIndexEntry)> {
13216    let mut scores = BTreeMap::<usize, usize>::new();
13217    for token in query_tokens {
13218        if let Some(indices) = index.get(token) {
13219            for idx in indices {
13220                *scores.entry(*idx).or_default() += 1;
13221            }
13222        }
13223    }
13224    let mut matches = scores
13225        .into_iter()
13226        .map(|(idx, score)| (score, &entries[idx]))
13227        .collect::<Vec<_>>();
13228    matches.sort_by(|(left_score, left), (right_score, right)| {
13229        right_score
13230            .cmp(left_score)
13231            .then_with(|| left.node.label.cmp(&right.node.label))
13232            .then_with(|| left.handle.cmp(&right.handle))
13233    });
13234    matches
13235}
13236
13237fn ranked_route_matches<'a>(
13238    query_tokens: &BTreeSet<String>,
13239    entries: &'a [TraversalRouteIndexEntry],
13240    index: &HashMap<String, Vec<usize>>,
13241) -> Vec<(usize, &'a TraversalRouteIndexEntry)> {
13242    let mut scores = BTreeMap::<usize, usize>::new();
13243    for token in query_tokens {
13244        if let Some(indices) = index.get(token) {
13245            for idx in indices {
13246                *scores.entry(*idx).or_default() += 1;
13247            }
13248        }
13249    }
13250    let mut matches = scores
13251        .into_iter()
13252        .map(|(idx, score)| (score, &entries[idx]))
13253        .collect::<Vec<_>>();
13254    matches.sort_by(|(left_score, left), (right_score, right)| {
13255        right_score
13256            .cmp(left_score)
13257            .then_with(|| left.node.label.cmp(&right.node.label))
13258            .then_with(|| left.handle.cmp(&right.handle))
13259    });
13260    matches
13261}
13262
13263fn ranked_multiplicity_matches<'a>(
13264    query_tokens: &BTreeSet<String>,
13265    entries: &'a [TraversalMultiplicityIndexEntry],
13266    index: &HashMap<String, Vec<usize>>,
13267) -> Vec<(usize, &'a TraversalMultiplicityIndexEntry)> {
13268    let mut scores = BTreeMap::<usize, usize>::new();
13269    for token in query_tokens {
13270        if let Some(indices) = index.get(token) {
13271            for idx in indices {
13272                *scores.entry(*idx).or_default() += 1;
13273            }
13274        }
13275    }
13276    let mut matches = scores
13277        .into_iter()
13278        .map(|(idx, score)| (score, &entries[idx]))
13279        .collect::<Vec<_>>();
13280    matches.sort_by(|(left_score, left), (right_score, right)| {
13281        right_score
13282            .cmp(left_score)
13283            .then_with(|| left.node.kind.cmp(&right.node.kind))
13284            .then_with(|| left.node.label.cmp(&right.node.label))
13285            .then_with(|| left.handle.cmp(&right.handle))
13286    });
13287    matches
13288}
13289
13290fn link_backlog_to_code_nodes(
13291    graph: &mut TraversalGraphBuild,
13292    backlog: &TraversalNode,
13293    text: &str,
13294    lookup: &TraversalCodeLookup<'_>,
13295    limit: usize,
13296) {
13297    let mut query_tokens = traversal_tokens(text);
13298    if let Some(ref_id) = &backlog.ref_id {
13299        query_tokens.extend(traversal_tokens(ref_id));
13300    }
13301    if query_tokens.is_empty() {
13302        return;
13303    }
13304
13305    for (score, entry) in ranked_symbol_matches(&query_tokens, lookup.symbols, &lookup.symbol_index)
13306        .into_iter()
13307        .take(limit)
13308    {
13309        graph.add_edge(
13310            &backlog.handle,
13311            &entry.handle,
13312            "mentions",
13313            Some("backlog text matches symbol tokens".to_string()),
13314            score,
13315        );
13316    }
13317
13318    for (score, entry) in ranked_file_matches(&query_tokens, lookup.files, &lookup.file_index)
13319        .into_iter()
13320        .take(limit.min(5))
13321    {
13322        graph.add_edge(
13323            &backlog.handle,
13324            &entry.handle,
13325            "mentions",
13326            Some("backlog text matches file tokens".to_string()),
13327            score,
13328        );
13329    }
13330
13331    for (score, entry) in ranked_route_matches(&query_tokens, lookup.routes, &lookup.route_index)
13332        .into_iter()
13333        .take(limit.min(5))
13334    {
13335        graph.add_edge(
13336            &backlog.handle,
13337            &entry.handle,
13338            "mentions",
13339            Some("backlog text matches route tokens".to_string()),
13340            score,
13341        );
13342    }
13343
13344    for (score, entry) in ranked_multiplicity_matches(
13345        &query_tokens,
13346        lookup.multiplicities,
13347        &lookup.multiplicity_index,
13348    )
13349    .into_iter()
13350    .take(limit.min(5))
13351    {
13352        graph.add_edge(
13353            &backlog.handle,
13354            &entry.handle,
13355            "mentions",
13356            Some("backlog text matches multiplicity tokens".to_string()),
13357            score,
13358        );
13359    }
13360}
13361
13362fn load_agent_doc_traversal_nodes(
13363    root: &Path,
13364    path_hint: &Path,
13365    graph: &mut TraversalGraphBuild,
13366    lookup: &TraversalCodeLookup<'_>,
13367) -> Result<()> {
13368    for markdown_path in markdown_files_for_traversal(root, path_hint)? {
13369        let content = match fs::read_to_string(&markdown_path) {
13370            Ok(content) => content,
13371            Err(err) => {
13372                graph.warnings.push(format!(
13373                    "session artifact unavailable: {}: {err}",
13374                    markdown_path.display()
13375                ));
13376                continue;
13377            }
13378        };
13379        if !traversal_markdown_content_looks_like_session(&content) {
13380            continue;
13381        }
13382
13383        let session_id = parse_agent_doc_session_id(&content);
13384        let session = traversal_session_node(root, &markdown_path, session_id.as_deref());
13385        graph.add_node(session.clone());
13386        let lines = content.lines().collect::<Vec<_>>();
13387        let mut backlog_by_id = BTreeMap::<String, TraversalNode>::new();
13388        for (idx, line) in lines.iter().enumerate() {
13389            let Some((id, text)) = parse_backlog_line(line) else {
13390                continue;
13391            };
13392            let backlog = traversal_backlog_node(root, &markdown_path, &id, &text, idx as i64 + 1);
13393            graph.add_node(backlog.clone());
13394            backlog_by_id.insert(id.clone(), backlog.clone());
13395            graph.add_edge(
13396                &session.handle,
13397                &backlog.handle,
13398                "contains",
13399                Some("session backlog item".to_string()),
13400                1,
13401            );
13402            link_backlog_to_code_nodes(graph, &backlog, &text, lookup, 8);
13403        }
13404
13405        let mut in_queue = false;
13406        let mut job_by_id = BTreeMap::<String, TraversalNode>::new();
13407        for (idx, line) in lines.iter().enumerate() {
13408            let trimmed = line.trim();
13409            if trimmed.starts_with("<!-- agent:queue") {
13410                in_queue = true;
13411                continue;
13412            }
13413            if trimmed.starts_with("<!-- /agent:queue") {
13414                in_queue = false;
13415                continue;
13416            }
13417            if !in_queue {
13418                continue;
13419            }
13420            if let Some(dispatch) = parse_queue_dispatch_line(line) {
13421                let dispatch_ref = dispatch.strip_prefix('#').unwrap_or(dispatch.as_str());
13422                let node = traversal_job_packet_node(
13423                    root,
13424                    &markdown_path,
13425                    &format!("dispatch {dispatch}"),
13426                    Some(dispatch_ref),
13427                    "agent-doc dispatch preset",
13428                    idx as i64 + 1,
13429                );
13430                graph.add_node(node.clone());
13431                graph.add_edge(
13432                    &session.handle,
13433                    &node.handle,
13434                    "contains",
13435                    Some("session queued dispatch".to_string()),
13436                    1,
13437                );
13438                continue;
13439            }
13440            if let Some(id) = parse_queue_do_line(line) {
13441                let detail = backlog_by_id
13442                    .get(&id)
13443                    .and_then(|node| node.detail.clone())
13444                    .unwrap_or_else(|| "queued backlog item".to_string());
13445                let node = traversal_job_packet_node(
13446                    root,
13447                    &markdown_path,
13448                    &format!("do #{id}"),
13449                    Some(&id),
13450                    &detail,
13451                    idx as i64 + 1,
13452                );
13453                graph.add_node(node.clone());
13454                graph.add_edge(
13455                    &session.handle,
13456                    &node.handle,
13457                    "contains",
13458                    Some("session queued job packet".to_string()),
13459                    1,
13460                );
13461                if let Some(backlog) = backlog_by_id.get(&id) {
13462                    graph.add_edge(
13463                        &node.handle,
13464                        &backlog.handle,
13465                        "targets",
13466                        Some("queued backlog item".to_string()),
13467                        1,
13468                    );
13469                }
13470                job_by_id.insert(id, node);
13471            }
13472        }
13473
13474        let mut seen_results = BTreeSet::<(String, String, i64)>::new();
13475        for (idx, line) in lines.iter().enumerate() {
13476            for parsed in parse_worker_result_line(line, lookup) {
13477                let line_no = idx as i64 + 1;
13478                if !seen_results.insert((parsed.id.clone(), parsed.status.clone(), line_no)) {
13479                    continue;
13480                }
13481                let result =
13482                    traversal_worker_result_node(root, &markdown_path, &parsed, line, line_no);
13483                graph.add_node(result.clone());
13484                graph.add_edge(
13485                    &session.handle,
13486                    &result.handle,
13487                    "contains",
13488                    Some("session worker result".to_string()),
13489                    1,
13490                );
13491                if let Some(backlog) = backlog_by_id.get(&parsed.id) {
13492                    graph.add_edge(
13493                        &backlog.handle,
13494                        &result.handle,
13495                        "has_result",
13496                        Some(format!("worker result {}", parsed.status)),
13497                        1,
13498                    );
13499                }
13500                if let Some(job) = job_by_id.get(&parsed.id) {
13501                    graph.add_edge(
13502                        &job.handle,
13503                        &result.handle,
13504                        "has_result",
13505                        Some(format!("queued worker result {}", parsed.status)),
13506                        1,
13507                    );
13508                }
13509                let mut result_text = line.to_string();
13510                if !parsed.touched_files.is_empty() {
13511                    result_text.push(' ');
13512                    result_text.push_str(&parsed.touched_files.join(" "));
13513                }
13514                link_backlog_to_code_nodes(graph, &result, &result_text, lookup, 8);
13515            }
13516        }
13517    }
13518    Ok(())
13519}
13520
13521#[derive(Debug, Clone)]
13522struct AgentDocIndexGate {
13523    db_path: Option<PathBuf>,
13524    source_root: PathBuf,
13525    diagnostics: Vec<String>,
13526}
13527
13528#[derive(Clone, Hash, PartialEq, Eq)]
13529struct AgentDocIndexGateCacheKey {
13530    root: PathBuf,
13531    path_hint: PathBuf,
13532    scope: Option<String>,
13533    packet_label: String,
13534}
13535
13536fn agent_doc_index_gate_cache() -> &'static std::sync::Mutex<
13537    std::collections::HashMap<AgentDocIndexGateCacheKey, AgentDocIndexGate>,
13538> {
13539    static CACHE: std::sync::OnceLock<
13540        std::sync::Mutex<std::collections::HashMap<AgentDocIndexGateCacheKey, AgentDocIndexGate>>,
13541    > = std::sync::OnceLock::new();
13542    CACHE.get_or_init(|| std::sync::Mutex::new(std::collections::HashMap::new()))
13543}
13544
13545fn prepare_agent_doc_index_gate_cached(
13546    root: &Path,
13547    path_hint: &Path,
13548    scope: Option<&str>,
13549    packet_label: &str,
13550) -> (AgentDocIndexGate, String) {
13551    let key = AgentDocIndexGateCacheKey {
13552        root: root.to_path_buf(),
13553        path_hint: path_hint.to_path_buf(),
13554        scope: scope.map(str::to_string),
13555        packet_label: packet_label.to_string(),
13556    };
13557    if let Ok(cache) = agent_doc_index_gate_cache().lock()
13558        && let Some(cached) = cache.get(&key)
13559    {
13560        return (
13561            cached.clone(),
13562            "reused from in-process index gate cache by root/path_hint/scope key".to_string(),
13563        );
13564    }
13565    let gate = prepare_agent_doc_index_gate(root, path_hint, scope, packet_label);
13566    if let Ok(mut cache) = agent_doc_index_gate_cache().lock() {
13567        cache.insert(key, gate.clone());
13568    }
13569    (
13570        gate,
13571        "fresh inspection/refresh — cache miss on this preparation key".to_string(),
13572    )
13573}
13574
13575fn index_reason_for_state(state: SearchIndexState) -> Option<RebuildSearchReason> {
13576    match state {
13577        SearchIndexState::Fresh => None,
13578        SearchIndexState::Missing => Some(RebuildSearchReason::Missing),
13579        SearchIndexState::Stale { stale_files } => Some(RebuildSearchReason::Stale { stale_files }),
13580    }
13581}
13582
13583fn index_reason_detail(target: &SearchIndexTarget, reason: RebuildSearchReason) -> String {
13584    rebuild_search_target_detail(&RebuildSearchTarget {
13585        label: target.label.clone(),
13586        reason,
13587        reindex_cmd: target.reindex_cmd.clone(),
13588    })
13589}
13590
13591fn index_refresh_diagnostic(
13592    target: &SearchIndexTarget,
13593    reason: RebuildSearchReason,
13594    summary: &index::IndexSummary,
13595    packet_label: &str,
13596) -> String {
13597    let changed = summary.new + summary.modified + summary.deleted;
13598    format!(
13599        "index refreshed: {}; updated {} changed file{} before {}",
13600        index_reason_detail(target, reason),
13601        changed,
13602        if changed == 1 { "" } else { "s" },
13603        packet_label
13604    )
13605}
13606
13607fn index_refresh_fallback_diagnostic(
13608    target: &SearchIndexTarget,
13609    reason: RebuildSearchReason,
13610    err: &anyhow::Error,
13611    packet_label: &str,
13612) -> String {
13613    format!(
13614        "{}; could not refresh before {}: {err:#}; falling back to raw source file nodes",
13615        index_reason_detail(target, reason),
13616        packet_label
13617    )
13618}
13619
13620fn graph_fallback_source_root(root: &Path, path_hint: &Path, scope: Option<&str>) -> PathBuf {
13621    if let Some(scope_name) = scope
13622        && let Ok(Some(scope)) = config::Config::find_submodule(root, scope_name)
13623    {
13624        return scope.source_root;
13625    }
13626    if let Some(scope_name) = scope
13627        && let Ok(Some(package)) = multiplicity::find_cargo_package(root, scope_name)
13628    {
13629        return package.package_root;
13630    }
13631    if let Ok(Some(scope)) = config::Config::infer_submodule_from_path(root, path_hint) {
13632        return scope.source_root;
13633    }
13634    if let Ok(Some(package)) = multiplicity::infer_cargo_package_from_path(root, path_hint) {
13635        return package.package_root;
13636    }
13637    if let Ok(Some(scope)) = infer_agent_doc_task_submodule(root, path_hint) {
13638        return scope.source_root;
13639    }
13640    root.to_path_buf()
13641}
13642
13643fn prepare_agent_doc_index_gate(
13644    root: &Path,
13645    path_hint: &Path,
13646    scope: Option<&str>,
13647    packet_label: &str,
13648) -> AgentDocIndexGate {
13649    let fallback_source_root = graph_fallback_source_root(root, path_hint, scope);
13650    let targets = match resolve_search_index_targets(root, path_hint, scope, false) {
13651        Ok(targets) => targets,
13652        Err(err) => {
13653            return AgentDocIndexGate {
13654                db_path: None,
13655                source_root: fallback_source_root,
13656                diagnostics: vec![format!(
13657                    "code index unavailable before {packet_label}: {err:#}; falling back to raw source file nodes"
13658                )],
13659            };
13660        }
13661    };
13662    let Some(target) = targets.into_iter().next() else {
13663        return AgentDocIndexGate {
13664            db_path: None,
13665            source_root: fallback_source_root,
13666            diagnostics: vec![format!(
13667                "code index unavailable before {packet_label}: no index target resolved; falling back to raw source file nodes"
13668            )],
13669        };
13670    };
13671
13672    let state = match inspect_search_index(&target) {
13673        Ok(state) => state,
13674        Err(err) => {
13675            return AgentDocIndexGate {
13676                db_path: None,
13677                source_root: target.source_root,
13678                diagnostics: vec![format!(
13679                    "code index freshness unavailable before {packet_label}: {err:#}; falling back to raw source file nodes"
13680                )],
13681            };
13682        }
13683    };
13684
13685    let Some(reason) = index_reason_for_state(state) else {
13686        return AgentDocIndexGate {
13687            db_path: Some(target.db_path),
13688            source_root: target.source_root,
13689            diagnostics: Vec::new(),
13690        };
13691    };
13692
13693    match apply_search_index_update(root, &target) {
13694        Ok(summary) => {
13695            // #gdbgatecold: the index was just rewritten, so any cached
13696            // pre-refresh inspection result for this scope (held by the
13697            // active lazily-backed `InspectScopeGuard`) is stale. Invalidate
13698            // the scope epoch so the next `inspect_read_only` re-reads the
13699            // fresh index.
13700            index::inspect_scope_invalidate_all();
13701            let diagnostics = vec![index_refresh_diagnostic(
13702                &target,
13703                reason,
13704                &summary,
13705                packet_label,
13706            )];
13707            AgentDocIndexGate {
13708                db_path: Some(target.db_path),
13709                source_root: target.source_root,
13710                diagnostics,
13711            }
13712        }
13713        Err(err) => {
13714            let diagnostics = vec![index_refresh_fallback_diagnostic(
13715                &target,
13716                reason,
13717                &err,
13718                packet_label,
13719            )];
13720            AgentDocIndexGate {
13721                db_path: None,
13722                source_root: target.source_root,
13723                diagnostics,
13724            }
13725        }
13726    }
13727}
13728
13729fn add_raw_source_file_nodes(
13730    root: &Path,
13731    source_root: &Path,
13732    graph: &mut TraversalGraphBuild,
13733    file_entries: &mut Vec<TraversalFileIndexEntry>,
13734) -> Result<()> {
13735    let mut entries = walk::walk_files(source_root)?;
13736    entries.sort_by(|left, right| left.path.cmp(&right.path));
13737    for entry in entries {
13738        let file = entry.path.to_string_lossy();
13739        let node = traversal_raw_source_file_node(root, file.as_ref());
13740        let entry = TraversalFileIndexEntry {
13741            handle: node.handle.clone(),
13742            tokens: traversal_node_tokens(&node),
13743            node: node.clone(),
13744        };
13745        graph.add_node(node);
13746        file_entries.push(entry);
13747    }
13748    Ok(())
13749}
13750
13751fn relative_path_inside_scope(path: &str, scope_root: &str) -> bool {
13752    if scope_root.is_empty() {
13753        return true;
13754    }
13755    path == scope_root || path.starts_with(&format!("{scope_root}/"))
13756}
13757
13758fn traversal_symbol_source_path(root: &Path, source_root: &Path, file: &str) -> PathBuf {
13759    let path = Path::new(file);
13760    if path.is_absolute() {
13761        return path.to_path_buf();
13762    }
13763    let source_candidate = source_root.join(path);
13764    if source_candidate.exists() {
13765        source_candidate
13766    } else {
13767        root.join(path)
13768    }
13769}
13770
13771fn cargo_import_alias_from_line(line: &str) -> Option<String> {
13772    let trimmed = line.trim();
13773    let rest = trimmed
13774        .strip_prefix("pub use ")
13775        .or_else(|| trimmed.strip_prefix("use "))
13776        .or_else(|| trimmed.strip_prefix("extern crate "))?;
13777    let alias = rest
13778        .split([':', ';', ' ', '\t'])
13779        .next()
13780        .unwrap_or_default()
13781        .trim();
13782    (!alias.is_empty()).then(|| alias.to_string())
13783}
13784
13785fn cargo_import_aliases(package: &multiplicity::CargoPackageInfo) -> Result<BTreeSet<String>> {
13786    let mut aliases = BTreeSet::new();
13787    for entry in walk::walk_files(&package.package_root)? {
13788        if entry.path.extension().and_then(|ext| ext.to_str()) != Some("rs") {
13789            continue;
13790        }
13791        let content = fs::read_to_string(&entry.path)
13792            .with_context(|| format!("reading Rust source {}", entry.path.display()))?;
13793        aliases.extend(content.lines().filter_map(cargo_import_alias_from_line));
13794    }
13795    Ok(aliases)
13796}
13797
13798fn load_multiplicity_traversal_nodes(
13799    root: &Path,
13800    source_root: &Path,
13801    graph: &mut TraversalGraphBuild,
13802    file_handle_by_path: &HashMap<String, String>,
13803    multiplicity_entries: &mut Vec<TraversalMultiplicityIndexEntry>,
13804) -> Result<()> {
13805    let inventory = multiplicity::discover_cargo_inventory(source_root)?;
13806    let mut workspace_handle_by_root = BTreeMap::<String, String>::new();
13807    for workspace in &inventory.workspaces {
13808        let node = traversal_cargo_workspace_node(root, workspace);
13809        workspace_handle_by_root.insert(workspace.relative_root.clone(), node.handle.clone());
13810        multiplicity_entries.push(TraversalMultiplicityIndexEntry {
13811            handle: node.handle.clone(),
13812            tokens: traversal_node_tokens(&node),
13813            node: node.clone(),
13814        });
13815        graph.add_node(node);
13816    }
13817
13818    let mut package_handle_by_name = BTreeMap::<String, Vec<String>>::new();
13819    let mut package_nodes = Vec::new();
13820    for package in &inventory.packages {
13821        let node = traversal_cargo_package_node(root, package);
13822        package_handle_by_name
13823            .entry(package.name.clone())
13824            .or_default()
13825            .push(node.handle.clone());
13826        package_handle_by_name
13827            .entry(package.normalized_name.clone())
13828            .or_default()
13829            .push(node.handle.clone());
13830        multiplicity_entries.push(TraversalMultiplicityIndexEntry {
13831            handle: node.handle.clone(),
13832            tokens: traversal_node_tokens(&node),
13833            node: node.clone(),
13834        });
13835        graph.add_node(node.clone());
13836        package_nodes.push((package, node));
13837    }
13838
13839    for (package, node) in &package_nodes {
13840        if let Some(workspace_handle) =
13841            workspace_handle_by_root.get(&package.relative_workspace_root)
13842        {
13843            graph.add_edge(
13844                workspace_handle,
13845                &node.handle,
13846                "contains_package",
13847                Some("Cargo workspace member package".to_string()),
13848                1,
13849            );
13850        }
13851        let package_root = relativize_pathbuf(&package.package_root, root)
13852            .to_string_lossy()
13853            .replace('\\', "/");
13854        for (file, handle) in file_handle_by_path {
13855            if relative_path_inside_scope(file, &package_root) {
13856                graph.add_edge(
13857                    &node.handle,
13858                    handle,
13859                    "owns_file",
13860                    Some("Cargo package owns source file".to_string()),
13861                    1,
13862                );
13863            }
13864        }
13865        for dependency in &package.dependencies {
13866            if let Some(handles) = package_handle_by_name.get(&dependency.name)
13867                && handles.len() == 1
13868            {
13869                graph.add_edge(
13870                    &node.handle,
13871                    &handles[0],
13872                    "declares_dependency",
13873                    Some(format!("{} Cargo dependency", dependency.kind)),
13874                    1,
13875                );
13876            }
13877        }
13878        for alias in cargo_import_aliases(package)? {
13879            if let Some(handles) = package_handle_by_name.get(&alias)
13880                && handles.len() == 1
13881                && handles[0] != node.handle
13882            {
13883                graph.add_edge(
13884                    &node.handle,
13885                    &handles[0],
13886                    "uses_crate",
13887                    Some("Rust use/extern crate reference".to_string()),
13888                    1,
13889                );
13890                graph.add_edge(
13891                    &node.handle,
13892                    &handles[0],
13893                    "imports",
13894                    Some("Rust use/extern crate import".to_string()),
13895                    1,
13896                );
13897            }
13898        }
13899    }
13900
13901    Ok(())
13902}
13903
13904fn build_traversal_graph_source_with_options(
13905    root: &Path,
13906    path_hint: &Path,
13907    scope: Option<&str>,
13908    session_only: bool,
13909) -> Result<TraversalGraphBuild> {
13910    let mut graph = TraversalGraphBuild::default();
13911    let mut symbol_entries = Vec::new();
13912    let mut file_entries = Vec::new();
13913    let mut route_entries = Vec::new();
13914    let mut multiplicity_entries = Vec::new();
13915    let mut file_handle_by_path = HashMap::<String, String>::new();
13916    let bounded_session_projection = hinted_markdown_file(root, path_hint).is_some();
13917    if !session_only || hinted_markdown_file(root, path_hint).is_none() {
13918        let (gate, _cache_detail) =
13919            prepare_agent_doc_index_gate_cached(root, path_hint, scope, "graph traversal packet");
13920        graph.warnings.extend(gate.diagnostics);
13921        let gate_source_root = gate.source_root.clone();
13922
13923        match gate.db_path {
13924            Some(db_path) if db_path.exists() => {
13925                let db = index::IndexDb::open_read_only_resilient(&db_path)?;
13926                let file_paths = db.file_paths()?;
13927                for file in file_paths {
13928                    if traversal_path_is_generated_artifact(
13929                        root,
13930                        &gate_source_root,
13931                        Path::new(&file),
13932                    ) {
13933                        continue;
13934                    }
13935                    let node = traversal_file_node(root, &file);
13936                    let entry = TraversalFileIndexEntry {
13937                        handle: node.handle.clone(),
13938                        tokens: traversal_node_tokens(&node),
13939                        node: node.clone(),
13940                    };
13941                    if let Some(path) = entry.node.path.as_ref() {
13942                        file_handle_by_path.insert(path.clone(), entry.handle.clone());
13943                    }
13944                    graph.add_node(node);
13945                    file_entries.push(entry);
13946                }
13947
13948                let symbols = db.all_symbols()?;
13949                let mut symbol_by_file_name_line = HashMap::new();
13950                let mut span_by_file_name_line = HashMap::new();
13951                let mut first_symbol_by_name = BTreeMap::<String, String>::new();
13952                let mut first_span_by_name = BTreeMap::<String, String>::new();
13953                let mut ast_entries = Vec::<TraversalAstSpanIndexEntry>::new();
13954                let mut source_by_file = HashMap::<String, Option<Vec<u8>>>::new();
13955                for symbol in symbols.iter().filter(|symbol| {
13956                    !traversal_path_is_generated_artifact(
13957                        root,
13958                        &gate_source_root,
13959                        Path::new(&symbol.file),
13960                    )
13961                }) {
13962                    let node = traversal_symbol_node(root, symbol);
13963                    let file = relativize(&symbol.file, root);
13964                    symbol_by_file_name_line.insert(
13965                        format!("{file}:{}:{}", symbol.line, symbol.name),
13966                        node.handle.clone(),
13967                    );
13968                    first_symbol_by_name
13969                        .entry(symbol.name.clone())
13970                        .or_insert_with(|| node.handle.clone());
13971                    let entry = TraversalSymbolIndexEntry {
13972                        handle: node.handle.clone(),
13973                        tokens: traversal_node_tokens(&node),
13974                        node: node.clone(),
13975                    };
13976                    graph.add_node(node.clone());
13977                    if let Some(file_handle) = file_handle_by_path.get(&file) {
13978                        graph.add_edge(
13979                            file_handle,
13980                            &node.handle,
13981                            "defines",
13982                            Some("file defines symbol".to_string()),
13983                            1,
13984                        );
13985                    }
13986                    if !source_by_file.contains_key(&symbol.file) {
13987                        let source_path =
13988                            traversal_symbol_source_path(root, &gate_source_root, &symbol.file);
13989                        source_by_file.insert(symbol.file.clone(), fs::read(source_path).ok());
13990                    }
13991                    if let Some(Some(source)) = source_by_file.get(&symbol.file)
13992                        && let Some((ast_node, mut ast_entry)) =
13993                            traversal_ast_span_node(root, symbol, source, &symbols)
13994                    {
13995                        ast_entry.symbol_handle = node.handle.clone();
13996                        ast_entry.file_handle = file_handle_by_path.get(&file).cloned();
13997                        span_by_file_name_line.insert(
13998                            format!("{file}:{}:{}", symbol.line, symbol.name),
13999                            ast_node.handle.clone(),
14000                        );
14001                        first_span_by_name
14002                            .entry(symbol.name.clone())
14003                            .or_insert_with(|| ast_node.handle.clone());
14004                        graph.add_node(ast_node.clone());
14005                        graph.add_edge(
14006                            &node.handle,
14007                            &ast_node.handle,
14008                            "has_ast_span",
14009                            Some("symbol projects to indexed AST span".to_string()),
14010                            1,
14011                        );
14012                        graph.add_edge(
14013                            &ast_node.handle,
14014                            &node.handle,
14015                            "represents_symbol",
14016                            Some("AST span represents indexed symbol".to_string()),
14017                            1,
14018                        );
14019                        ast_entries.push(ast_entry);
14020                    }
14021                    symbol_entries.push(entry);
14022                }
14023                link_ast_navigation_edges(&mut graph, &ast_entries);
14024                link_markdown_embedded_code_edges(&mut graph, root, &ast_entries);
14025
14026                if !bounded_session_projection {
14027                    for edge in db.all_stored_edges()? {
14028                        if traversal_path_is_generated_artifact(
14029                            root,
14030                            &gate_source_root,
14031                            Path::new(&edge.caller_file),
14032                        ) {
14033                            continue;
14034                        }
14035                        let caller_file = relativize(&edge.caller_file, root);
14036                        let caller_key =
14037                            format!("{caller_file}:{}:{}", edge.caller_line, edge.caller_name);
14038                        let Some(caller_handle) =
14039                            symbol_by_file_name_line.get(&caller_key).cloned()
14040                        else {
14041                            continue;
14042                        };
14043                        let callee_handle = if let Some(handle) =
14044                            first_symbol_by_name.get(&edge.callee_name)
14045                        {
14046                            handle.clone()
14047                        } else {
14048                            let node = traversal_unresolved_symbol_node(root, &edge.callee_name);
14049                            let handle = node.handle.clone();
14050                            graph.add_node(node);
14051                            handle
14052                        };
14053                        graph.add_edge(
14054                            &caller_handle,
14055                            &callee_handle,
14056                            "calls",
14057                            Some(format!("call site {}:{}", caller_file, edge.call_site_line)),
14058                            1,
14059                        );
14060                        if let Some(caller_span) = span_by_file_name_line.get(&caller_key)
14061                            && let Some(callee_span) = first_span_by_name.get(&edge.callee_name)
14062                        {
14063                            graph.add_edge(
14064                                caller_span,
14065                                callee_span,
14066                                "calls",
14067                                Some(format!(
14068                                    "AST call site {}:{}",
14069                                    caller_file, edge.call_site_line
14070                                )),
14071                                1,
14072                            );
14073                        }
14074                    }
14075                }
14076
14077                for route in db.all_routes()? {
14078                    if traversal_path_is_generated_artifact(
14079                        root,
14080                        &gate_source_root,
14081                        Path::new(&route.file),
14082                    ) {
14083                        continue;
14084                    }
14085                    let node = traversal_route_node(root, &route);
14086                    let entry = TraversalRouteIndexEntry {
14087                        handle: node.handle.clone(),
14088                        tokens: traversal_node_tokens(&node),
14089                        node: node.clone(),
14090                    };
14091                    graph.add_node(node.clone());
14092                    if let Some(path) = node.path.as_ref()
14093                        && let Some(file_handle) = file_handle_by_path.get(path)
14094                    {
14095                        graph.add_edge(
14096                            file_handle,
14097                            &node.handle,
14098                            "defines_route",
14099                            Some("file declares route".to_string()),
14100                            1,
14101                        );
14102                    }
14103                    let handler_handle =
14104                        if let Some(handle) = first_symbol_by_name.get(&route.handler_name) {
14105                            handle.clone()
14106                        } else {
14107                            let node = traversal_unresolved_symbol_node(root, &route.handler_name);
14108                            let handle = node.handle.clone();
14109                            graph.add_node(node);
14110                            handle
14111                        };
14112                    graph.add_edge(
14113                        &entry.handle,
14114                        &handler_handle,
14115                        "handled_by",
14116                        Some("route handler reference".to_string()),
14117                        1,
14118                    );
14119                    if let Some(handler_span) = first_span_by_name.get(&route.handler_name) {
14120                        graph.add_edge(
14121                            &entry.handle,
14122                            handler_span,
14123                            "handled_by",
14124                            Some("route handler AST span".to_string()),
14125                            1,
14126                        );
14127                        graph.add_edge(
14128                            handler_span,
14129                            &entry.handle,
14130                            "handles_route",
14131                            Some("AST span handles route".to_string()),
14132                            1,
14133                        );
14134                    }
14135                    route_entries.push(entry);
14136                }
14137            }
14138            _ => {
14139                add_raw_source_file_nodes(root, &gate_source_root, &mut graph, &mut file_entries)
14140                    .with_context(|| {
14141                    format!(
14142                        "loading raw source fallback nodes from {}",
14143                        gate_source_root.display()
14144                    )
14145                })?;
14146                for entry in &file_entries {
14147                    if let Some(path) = entry.node.path.as_ref() {
14148                        file_handle_by_path.insert(path.clone(), entry.handle.clone());
14149                    }
14150                }
14151            }
14152        }
14153        load_multiplicity_traversal_nodes(
14154            root,
14155            &gate_source_root,
14156            &mut graph,
14157            &file_handle_by_path,
14158            &mut multiplicity_entries,
14159        )?;
14160    }
14161
14162    let code_lookup = TraversalCodeLookup::new(
14163        &symbol_entries,
14164        &file_entries,
14165        &route_entries,
14166        &multiplicity_entries,
14167    );
14168    load_agent_doc_traversal_nodes(root, path_hint, &mut graph, &code_lookup)?;
14169    Ok(graph)
14170}
14171
14172#[cfg(test)]
14173fn build_traversal_graph_source(
14174    root: &Path,
14175    path_hint: &Path,
14176    scope: Option<&str>,
14177) -> Result<TraversalGraphBuild> {
14178    build_traversal_graph_source_with_options(root, path_hint, scope, false)
14179}
14180
14181pub(crate) fn write_traversal_graph_store_with_options(
14182    root: &Path,
14183    path_hint: &Path,
14184    scope: Option<&str>,
14185    session_only: bool,
14186) -> Result<(TraversalGraphBuild, SqliteProjectionRefresh)> {
14187    let source_graph =
14188        build_traversal_graph_source_with_options(root, path_hint, scope, session_only)?;
14189    let projection = traversal_projection_from_graph(root, scope, &source_graph)?;
14190    let graph_db = graph_substrate_db_path(root, scope);
14191    let mut store = SqliteGraphStore::open(&graph_db)?;
14192    let source_watermark = traversal_source_watermark(root, path_hint, scope, session_only)
14193        .ok()
14194        .flatten()
14195        .or_else(|| graph_projection_content_hash(&projection));
14196    let refresh = store.replace_projection_with_version(
14197        scope.unwrap_or("root"),
14198        &projection,
14199        Some(GRAPH_PROJECTION_VERSION),
14200        source_watermark,
14201    )?;
14202    Ok((source_graph, refresh))
14203}
14204
14205pub(crate) fn write_traversal_graph_store(
14206    root: &Path,
14207    path_hint: &Path,
14208    scope: Option<&str>,
14209) -> Result<(TraversalGraphBuild, SqliteProjectionRefresh)> {
14210    write_traversal_graph_store_with_options(root, path_hint, scope, false)
14211}
14212
14213fn refresh_traversal_graph_store_with_options(
14214    root: &Path,
14215    path_hint: &Path,
14216    scope: Option<&str>,
14217    session_only: bool,
14218) -> Result<(TraversalGraphBuild, SqliteProjectionRefresh)> {
14219    let (source_graph, refresh) =
14220        write_traversal_graph_store_with_options(root, path_hint, scope, session_only)?;
14221    let graph_db = graph_substrate_db_path(root, scope);
14222    let store = SqliteGraphStore::open_read_only_resilient(&graph_db)?;
14223    let mut graph = traversal_graph_from_store(root, &store)?;
14224    graph.warnings = source_graph.warnings;
14225    Ok((graph, refresh))
14226}
14227
14228fn refresh_traversal_graph_store(
14229    root: &Path,
14230    path_hint: &Path,
14231    scope: Option<&str>,
14232) -> Result<(TraversalGraphBuild, SqliteProjectionRefresh)> {
14233    refresh_traversal_graph_store_with_options(root, path_hint, scope, false)
14234}
14235
14236pub(crate) fn build_traversal_graph(
14237    root: &Path,
14238    path_hint: &Path,
14239    scope: Option<&str>,
14240) -> Result<TraversalGraphBuild> {
14241    let (graph, _refresh) = refresh_traversal_graph_store(root, path_hint, scope)?;
14242    Ok(graph)
14243}
14244
14245fn traversal_query_kind_priority(kind: &str) -> usize {
14246    match kind {
14247        "backlog" => 0,
14248        "job_packet" => 1,
14249        "worker_result" => 2,
14250        "symbol" => 3,
14251        "ast_span" => 4,
14252        "file" => 5,
14253        "route" => 6,
14254        "cargo_package" => 7,
14255        "cargo_workspace" => 8,
14256        "session" => 9,
14257        "semantic_concept" => 10,
14258        "semantic_entity" => 11,
14259        _ => 12,
14260    }
14261}
14262
14263fn traversal_node_match_rank(node: &TraversalNode, query: &str) -> Option<(usize, usize, String)> {
14264    let trimmed = query.trim();
14265    if trimmed.is_empty() {
14266        return None;
14267    }
14268    let kind_priority = traversal_query_kind_priority(&node.kind);
14269    if node.handle == trimmed {
14270        return Some((0, kind_priority, node.handle.clone()));
14271    }
14272    if node.path.as_deref() == Some(trimmed) {
14273        let path_priority = if node.kind == "file" {
14274            0
14275        } else {
14276            kind_priority.saturating_add(1)
14277        };
14278        return Some((1, path_priority, node.handle.clone()));
14279    }
14280    let normalized_backlog = trimmed.trim_start_matches('#');
14281    if node.ref_id.as_deref() == Some(trimmed) || node.ref_id.as_deref() == Some(normalized_backlog)
14282    {
14283        return Some((2, kind_priority, node.handle.clone()));
14284    }
14285    if node.label == trimmed || (node.kind == "symbol" && node.label == normalized_backlog) {
14286        return Some((3, kind_priority, node.handle.clone()));
14287    }
14288    None
14289}
14290
14291fn resolve_traversal_node<'a>(
14292    graph: &'a TraversalGraphBuild,
14293    query: &str,
14294) -> Option<&'a TraversalNode> {
14295    graph
14296        .nodes
14297        .values()
14298        .filter_map(|node| traversal_node_match_rank(node, query).map(|rank| (rank, node)))
14299        .min_by(|(left_rank, _), (right_rank, _)| left_rank.cmp(right_rank))
14300        .map(|(_, node)| node)
14301}
14302
14303fn traversal_adjacency(edges: &[TraversalEdge]) -> BTreeMap<String, Vec<String>> {
14304    let mut adj = BTreeMap::<String, BTreeSet<String>>::new();
14305    for edge in edges {
14306        adj.entry(edge.from.clone())
14307            .or_default()
14308            .insert(edge.to.clone());
14309        adj.entry(edge.to.clone())
14310            .or_default()
14311            .insert(edge.from.clone());
14312    }
14313    adj.into_iter()
14314        .map(|(node, neighbors)| (node, neighbors.into_iter().collect()))
14315        .collect()
14316}
14317
14318fn traversal_shortest_handles(
14319    edges: &[TraversalEdge],
14320    from: &str,
14321    to: &str,
14322) -> Option<Vec<String>> {
14323    if from == to {
14324        return Some(vec![from.to_string()]);
14325    }
14326    let adj = traversal_adjacency(edges);
14327    if !adj.contains_key(from) || !adj.contains_key(to) {
14328        return None;
14329    }
14330    let mut visited = BTreeSet::new();
14331    let mut queue = VecDeque::new();
14332    let mut parent = BTreeMap::<String, String>::new();
14333    visited.insert(from.to_string());
14334    queue.push_back(from.to_string());
14335    while let Some(current) = queue.pop_front() {
14336        if let Some(neighbors) = adj.get(&current) {
14337            for neighbor in neighbors {
14338                if visited.insert(neighbor.clone()) {
14339                    parent.insert(neighbor.clone(), current.clone());
14340                    if neighbor == to {
14341                        let mut path = vec![to.to_string()];
14342                        let mut cursor = to.to_string();
14343                        while let Some(prev) = parent.get(&cursor) {
14344                            path.push(prev.clone());
14345                            cursor = prev.clone();
14346                        }
14347                        path.reverse();
14348                        return Some(path);
14349                    }
14350                    queue.push_back(neighbor.clone());
14351                }
14352            }
14353        }
14354    }
14355    None
14356}
14357
14358fn traversal_scored_neighbors(edges: &[TraversalEdge], current: &str) -> Vec<String> {
14359    let mut best_score_by_neighbor = BTreeMap::<String, usize>::new();
14360    for edge in edges {
14361        let neighbor = if edge.from == current {
14362            edge.to.as_str()
14363        } else if edge.to == current {
14364            edge.from.as_str()
14365        } else {
14366            continue;
14367        };
14368        let score = traversal_relation_score(edge, current);
14369        best_score_by_neighbor
14370            .entry(neighbor.to_string())
14371            .and_modify(|best| *best = (*best).max(score))
14372            .or_insert(score);
14373    }
14374    let mut ranked = best_score_by_neighbor.into_iter().collect::<Vec<_>>();
14375    ranked.sort_by(|(left_handle, left_score), (right_handle, right_score)| {
14376        right_score
14377            .cmp(left_score)
14378            .then_with(|| left_handle.cmp(right_handle))
14379    });
14380    ranked.into_iter().map(|(handle, _)| handle).collect()
14381}
14382
14383fn traversal_neighborhood_handles(
14384    edges: &[TraversalEdge],
14385    origin: &str,
14386    depth: usize,
14387    limit: usize,
14388) -> BTreeSet<String> {
14389    let mut seen = BTreeSet::new();
14390    let mut queue = VecDeque::new();
14391    seen.insert(origin.to_string());
14392    queue.push_back((origin.to_string(), 0usize));
14393    while let Some((current, current_depth)) = queue.pop_front() {
14394        if current_depth >= depth {
14395            continue;
14396        }
14397        for neighbor in traversal_scored_neighbors(edges, &current) {
14398            if limit > 0 && seen.len() >= limit {
14399                return seen;
14400            }
14401            if seen.insert(neighbor.clone()) {
14402                queue.push_back((neighbor, current_depth + 1));
14403            }
14404        }
14405    }
14406    seen
14407}
14408
14409fn traversal_edges_between(
14410    handles: &BTreeSet<String>,
14411    edges: &[TraversalEdge],
14412) -> Vec<TraversalEdge> {
14413    edges
14414        .iter()
14415        .filter(|edge| handles.contains(&edge.from) && handles.contains(&edge.to))
14416        .cloned()
14417        .collect()
14418}
14419
14420fn traversal_path_edges(path: &[String], edges: &[TraversalEdge]) -> Vec<TraversalEdge> {
14421    let mut result = Vec::new();
14422    for pair in path.windows(2) {
14423        if let Some(edge) = edges.iter().find(|edge| {
14424            (edge.from == pair[0] && edge.to == pair[1])
14425                || (edge.from == pair[1] && edge.to == pair[0])
14426        }) {
14427            result.push(edge.clone());
14428        }
14429    }
14430    result
14431}
14432
14433fn sorted_traversal_nodes<'a>(
14434    nodes: impl IntoIterator<Item = &'a TraversalNode>,
14435) -> Vec<TraversalNode> {
14436    let mut nodes = nodes.into_iter().cloned().collect::<Vec<_>>();
14437    nodes.sort_by(|left, right| {
14438        left.kind
14439            .cmp(&right.kind)
14440            .then_with(|| left.label.cmp(&right.label))
14441            .then_with(|| left.path.cmp(&right.path))
14442            .then_with(|| left.handle.cmp(&right.handle))
14443    });
14444    nodes
14445}
14446
14447fn traversal_relation_score(edge: &TraversalEdge, origin: &str) -> usize {
14448    let base = match edge.relation.as_str() {
14449        "mentions" => 100,
14450        "contains" => 80,
14451        "parent" | "child" | "has_ast_span" | "represents_symbol" => 78,
14452        "contains_embedded_symbol" | "embedded_in_fence" => 77,
14453        "contains_markdown_block"
14454        | "contains_embedded_code"
14455        | "enclosing_module"
14456        | "enclosing_section" => 76,
14457        "calls" => {
14458            if edge.from == origin {
14459                70
14460            } else {
14461                65
14462            }
14463        }
14464        "handled_by" | "handles_route" => 68,
14465        "defines_route" => 62,
14466        "imports" => 62,
14467        "previous_sibling" | "next_sibling" => 54,
14468        "mentions_concept" | "mentions_entity" => 66,
14469        "semantic_relation" => 64,
14470        "tagged_concept" | "related_concept" => 58,
14471        "defines" => {
14472            if edge.from == origin {
14473                60
14474            } else {
14475                55
14476            }
14477        }
14478        _ => 10,
14479    };
14480    base + edge.weight
14481}
14482
14483fn traversal_recommendation_reason(edge: &TraversalEdge, origin: &str) -> String {
14484    match edge.relation.as_str() {
14485        "mentions" => "matched from backlog/session text".to_string(),
14486        "contains" => "contained in the selected session artifact".to_string(),
14487        "has_ast_span" => "indexed AST span for the selected symbol".to_string(),
14488        "represents_symbol" => "indexed symbol represented by the selected AST span".to_string(),
14489        "parent" => "parent AST span".to_string(),
14490        "child" => "child AST span".to_string(),
14491        "previous_sibling" => "previous AST sibling".to_string(),
14492        "next_sibling" => "next AST sibling".to_string(),
14493        "contains_markdown_block" => "Markdown section block".to_string(),
14494        "contains_embedded_symbol" => "embedded code symbol in Markdown fence".to_string(),
14495        "embedded_in_fence" => "Markdown fence containing the embedded symbol".to_string(),
14496        "contains_embedded_code" => "embedded code symbol in Markdown section".to_string(),
14497        "enclosing_module" => "nearest enclosing module".to_string(),
14498        "enclosing_section" => "nearest enclosing Markdown section".to_string(),
14499        "defines" if edge.from == origin => "symbol defined in selected file".to_string(),
14500        "defines" => "file that defines the selected symbol".to_string(),
14501        "defines_route" if edge.from == origin => "route declared in selected file".to_string(),
14502        "defines_route" => "file that declares the selected route".to_string(),
14503        "handled_by" if edge.from == origin => "handler for the selected route".to_string(),
14504        "handled_by" => "route handled by the selected symbol".to_string(),
14505        "handles_route" => "route handled by the selected AST span".to_string(),
14506        "imports" => "import dependency from the selected package".to_string(),
14507        "mentions_concept" => "cached summary concept for the selected source".to_string(),
14508        "mentions_entity" => "cached summary entity for the selected source".to_string(),
14509        "semantic_relation" => "LLM-extracted semantic relationship".to_string(),
14510        "tagged_concept" => "concept label attached to the selected entity".to_string(),
14511        "related_concept" => "co-occurring cached summary concept".to_string(),
14512        "calls" if edge.from == origin => "callee from the selected symbol".to_string(),
14513        "calls" => "caller of the selected symbol".to_string(),
14514        other => format!("connected by {other}"),
14515    }
14516}
14517
14518fn traversal_recommendations(
14519    graph: &TraversalGraphBuild,
14520    origin: Option<&str>,
14521    shortest_path: Option<&[String]>,
14522    limit: usize,
14523) -> Vec<TraversalRecommendation> {
14524    let Some(origin) = origin else {
14525        return Vec::new();
14526    };
14527    let mut recommendations = Vec::new();
14528    let mut seen = BTreeSet::new();
14529
14530    if let Some(path) = shortest_path
14531        && path.len() > 1
14532        && path.first().is_some_and(|handle| handle == origin)
14533        && let Some(next) = graph.nodes.get(&path[1])
14534    {
14535        seen.insert(next.handle.clone());
14536        recommendations.push(TraversalRecommendation {
14537            handle: next.handle.clone(),
14538            kind: next.kind.clone(),
14539            label: next.label.clone(),
14540            reason: "next hop on shortest path".to_string(),
14541            score: 1_000,
14542            expand: next.expand.clone(),
14543        });
14544    }
14545
14546    let mut candidates = graph
14547        .edges
14548        .iter()
14549        .filter_map(|edge| {
14550            let neighbor = if edge.from == origin {
14551                edge.to.as_str()
14552            } else if edge.to == origin {
14553                edge.from.as_str()
14554            } else {
14555                return None;
14556            };
14557            let node = graph.nodes.get(neighbor)?;
14558            Some((traversal_relation_score(edge, origin), edge, node))
14559        })
14560        .collect::<Vec<_>>();
14561    candidates.sort_by(|(left_score, _, left), (right_score, _, right)| {
14562        right_score
14563            .cmp(left_score)
14564            .then_with(|| left.kind.cmp(&right.kind))
14565            .then_with(|| left.label.cmp(&right.label))
14566            .then_with(|| left.handle.cmp(&right.handle))
14567    });
14568
14569    let max = if limit == 0 { usize::MAX } else { limit };
14570    for (score, edge, node) in candidates {
14571        if recommendations.len() >= max {
14572            break;
14573        }
14574        if seen.insert(node.handle.clone()) {
14575            recommendations.push(TraversalRecommendation {
14576                handle: node.handle.clone(),
14577                kind: node.kind.clone(),
14578                label: node.label.clone(),
14579                reason: traversal_recommendation_reason(edge, origin),
14580                score,
14581                expand: node.expand.clone(),
14582            });
14583        }
14584    }
14585
14586    recommendations
14587}
14588
14589fn exploration_budget_for_counts(nodes: usize, edges: usize) -> ExplorationBudget {
14590    let scale = nodes.saturating_add(edges);
14591    if scale <= 80 {
14592        ExplorationBudget {
14593            project_size: "small".to_string(),
14594            max_source_windows: 8,
14595            lines_per_window: 96,
14596            relationship_limit: 40,
14597        }
14598    } else if scale <= 800 {
14599        ExplorationBudget {
14600            project_size: "medium".to_string(),
14601            max_source_windows: 6,
14602            lines_per_window: 80,
14603            relationship_limit: 32,
14604        }
14605    } else {
14606        ExplorationBudget {
14607            project_size: "large".to_string(),
14608            max_source_windows: 4,
14609            lines_per_window: 64,
14610            relationship_limit: 24,
14611        }
14612    }
14613}
14614
14615fn exploration_node_label(node: &TraversalNode) -> String {
14616    format!("{}:{}", node.kind, node.label)
14617}
14618
14619fn exploration_source_window_for_node(
14620    root: &Path,
14621    node: &TraversalNode,
14622    budget: &ExplorationBudget,
14623) -> Option<ExplorationSourceWindow> {
14624    let file = node.path.as_ref()?;
14625    let anchor = node
14626        .line
14627        .and_then(|line| usize::try_from(line).ok())
14628        .and_then(|line| line.checked_add(1))
14629        .unwrap_or(1);
14630    let context_before = budget.lines_per_window / 3;
14631    let start = anchor.saturating_sub(context_before).max(1);
14632    let end = start
14633        .saturating_add(budget.lines_per_window)
14634        .saturating_sub(1);
14635    let handle = stable_handle("xwin", &format!("{file}:{start}:{end}:{}", node.handle));
14636    Some(ExplorationSourceWindow {
14637        handle,
14638        file: file.clone(),
14639        start,
14640        end,
14641        reason: format!("cluster around {}", exploration_node_label(node)),
14642        expand: source_read_command(root, file, start, budget.lines_per_window),
14643    })
14644}
14645
14646fn build_exploration_packet(
14647    root: &Path,
14648    totals: &TraversalTotals,
14649    selected_nodes: &[TraversalNode],
14650    selected_edges: &[TraversalEdge],
14651) -> ExplorationPacket {
14652    let budget = exploration_budget_for_counts(totals.nodes, totals.edges);
14653    let node_by_handle = selected_nodes
14654        .iter()
14655        .map(|node| (node.handle.as_str(), node))
14656        .collect::<BTreeMap<_, _>>();
14657    let relationship_map = selected_edges
14658        .iter()
14659        .take(budget.relationship_limit)
14660        .filter_map(|edge| {
14661            let from = node_by_handle.get(edge.from.as_str())?;
14662            let to = node_by_handle.get(edge.to.as_str())?;
14663            Some(ExplorationRelation {
14664                from: exploration_node_label(from),
14665                relation: edge.relation.clone(),
14666                to: exploration_node_label(to),
14667                label: edge.label.clone(),
14668            })
14669        })
14670        .collect::<Vec<_>>();
14671
14672    let mut seen_windows = BTreeSet::new();
14673    let mut source_windows = Vec::new();
14674    for node in selected_nodes {
14675        if source_windows.len() >= budget.max_source_windows {
14676            break;
14677        }
14678        let Some(window) = exploration_source_window_for_node(root, node, &budget) else {
14679            continue;
14680        };
14681        let key = (window.file.clone(), window.start, window.end);
14682        if seen_windows.insert(key) {
14683            source_windows.push(window);
14684        }
14685    }
14686
14687    ExplorationPacket {
14688        budget,
14689        relationship_map,
14690        source_windows,
14691        worker_context: Vec::new(),
14692        no_reread_guidance:
14693            "Use the source_windows expand commands for line-numbered context; avoid whole-file reads unless the needed line is outside every listed window."
14694                .to_string(),
14695    }
14696}
14697
14698pub(crate) fn traversal_report(
14699    root: &Path,
14700    scope: Option<&str>,
14701    graph: TraversalGraphBuild,
14702    query: Option<&str>,
14703    target: Option<&str>,
14704    depth: usize,
14705    limit: usize,
14706) -> Result<TraversalReport> {
14707    let totals = TraversalTotals {
14708        nodes: graph.nodes.len(),
14709        edges: graph.edges.len(),
14710    };
14711    let origin_node = query.and_then(|value| resolve_traversal_node(&graph, value));
14712    let target_node = target.and_then(|value| resolve_traversal_node(&graph, value));
14713    if let Some(query) = query
14714        && origin_node.is_none()
14715    {
14716        bail!("traversal node not found: {}", query);
14717    }
14718    if let Some(target) = target
14719        && target_node.is_none()
14720    {
14721        bail!("traversal target not found: {}", target);
14722    }
14723
14724    let (mode, selected_nodes, selected_edges, shortest_path) =
14725        if let (Some(origin), Some(target)) = (origin_node, target_node) {
14726            if let Some(handles) =
14727                traversal_shortest_handles(&graph.edges, &origin.handle, &target.handle)
14728            {
14729                let handle_set = handles.iter().cloned().collect::<BTreeSet<_>>();
14730                let nodes = handles
14731                    .iter()
14732                    .filter_map(|handle| graph.nodes.get(handle).cloned())
14733                    .collect::<Vec<_>>();
14734                let edges = traversal_path_edges(&handles, &graph.edges);
14735                let path = TraversalPathReport {
14736                    from: origin.clone(),
14737                    to: target.clone(),
14738                    hops: handles.len().saturating_sub(1),
14739                    nodes: nodes.clone(),
14740                    edges: edges.clone(),
14741                };
14742                (
14743                    "path".to_string(),
14744                    nodes,
14745                    traversal_edges_between(&handle_set, &graph.edges),
14746                    Some(path),
14747                )
14748            } else {
14749                (
14750                    "path".to_string(),
14751                    vec![origin.clone(), target.clone()],
14752                    Vec::new(),
14753                    None,
14754                )
14755            }
14756        } else if let Some(origin) = origin_node {
14757            let handles =
14758                traversal_neighborhood_handles(&graph.edges, &origin.handle, depth, limit);
14759            let nodes =
14760                sorted_traversal_nodes(handles.iter().filter_map(|handle| graph.nodes.get(handle)));
14761            let edges = traversal_edges_between(&handles, &graph.edges);
14762            ("neighborhood".to_string(), nodes, edges, None)
14763        } else {
14764            let mut nodes = sorted_traversal_nodes(graph.nodes.values());
14765            let truncated_nodes = limit > 0 && nodes.len() > limit;
14766            if truncated_nodes {
14767                nodes.truncate(limit);
14768            }
14769            let handles = nodes
14770                .iter()
14771                .map(|node| node.handle.clone())
14772                .collect::<BTreeSet<_>>();
14773            let mut edges = traversal_edges_between(&handles, &graph.edges);
14774            let truncated_edges = limit > 0 && edges.len() > limit;
14775            if truncated_edges {
14776                edges.truncate(limit);
14777            }
14778            ("export".to_string(), nodes, edges, None)
14779        };
14780
14781    let shortest_handles = shortest_path.as_ref().map(|path| {
14782        path.nodes
14783            .iter()
14784            .map(|node| node.handle.clone())
14785            .collect::<Vec<_>>()
14786    });
14787    let recommendations = traversal_recommendations(
14788        &graph,
14789        origin_node.map(|node| node.handle.as_str()),
14790        shortest_handles.as_deref(),
14791        if limit == 0 { 10 } else { limit.min(10) },
14792    );
14793    let exploration = build_exploration_packet(root, &totals, &selected_nodes, &selected_edges);
14794    let truncated = selected_nodes.len() < totals.nodes || selected_edges.len() < totals.edges;
14795
14796    Ok(TraversalReport {
14797        root: root.to_string_lossy().to_string(),
14798        scope: scope.map(str::to_string),
14799        mode,
14800        totals,
14801        query: query.map(str::to_string),
14802        target: target.map(str::to_string),
14803        nodes: selected_nodes,
14804        edges: selected_edges,
14805        shortest_path,
14806        recommendations,
14807        exploration,
14808        truncated,
14809        warnings: graph.warnings,
14810    })
14811}
14812
14813fn html_escape(input: &str) -> String {
14814    input
14815        .replace('&', "&amp;")
14816        .replace('<', "&lt;")
14817        .replace('>', "&gt;")
14818        .replace('"', "&quot;")
14819        .replace('\'', "&#39;")
14820}
14821
14822pub(crate) fn traversal_report_html(report: &TraversalReport) -> Result<String> {
14823    let json = serde_json::to_string(report)?.replace("</", "<\\/");
14824    let mut html = String::new();
14825    html.push_str(
14826        "<!doctype html><html><head><meta charset=\"utf-8\"><title>tsift traversal graph</title>",
14827    );
14828    html.push_str(
14829        r#"<style>
14830:root{color-scheme:light dark;--bg:#f7f8fb;--panel:#ffffff;--text:#17202a;--muted:#5c6674;--line:#d7dce3;--edge:#8b98a8;--accent:#0f766e;--semantic:#9a3412}
14831@media (prefers-color-scheme:dark){:root{--bg:#111318;--panel:#1b2028;--text:#ecf1f7;--muted:#a8b3c1;--line:#323946;--edge:#667386;--accent:#2dd4bf;--semantic:#fb923c}}
14832*{box-sizing:border-box}body{margin:0;background:var(--bg);color:var(--text);font-family:Inter,ui-sans-serif,system-ui,sans-serif;line-height:1.4}.page{max-width:1280px;margin:0 auto;padding:20px}.top{display:flex;align-items:flex-end;justify-content:space-between;gap:16px;margin-bottom:14px}.top h1{font-size:22px;margin:0}.meta{color:var(--muted);font-size:13px}.toolbar{display:flex;gap:8px;align-items:center}.toolbar input{min-width:220px;border:1px solid var(--line);border-radius:6px;background:var(--panel);color:var(--text);padding:8px 10px}.layout{display:grid;grid-template-columns:minmax(0,1fr) 320px;gap:14px;min-height:650px}.graph-panel,.side{background:var(--panel);border:1px solid var(--line);border-radius:8px;overflow:hidden}.graph-panel{position:relative}.legend{position:absolute;left:12px;top:12px;display:flex;flex-wrap:wrap;gap:6px;max-width:calc(100% - 24px)}.legend span{font-size:12px;background:color-mix(in srgb,var(--panel) 86%,transparent);border:1px solid var(--line);border-radius:999px;padding:4px 8px}.side{padding:14px;overflow:auto}.side h2{font-size:15px;margin:0 0 8px}.selected{border-top:1px solid var(--line);margin-top:12px;padding-top:12px}.list{display:grid;gap:8px}.row{border:1px solid var(--line);border-radius:6px;padding:8px;cursor:pointer}.row:hover{border-color:var(--accent)}.kind{font-size:11px;text-transform:uppercase;color:var(--muted);letter-spacing:.04em}.label{font-weight:650;overflow-wrap:anywhere}.handle,code{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;font-size:12px;color:var(--muted)}svg{width:100%;height:650px;display:block}.edge{stroke:var(--edge);stroke-width:1.4;opacity:.72}.edge.semantic{stroke:var(--semantic);stroke-width:1.8}.node{stroke:var(--panel);stroke-width:2;cursor:pointer}.node.semantic{stroke:var(--semantic);stroke-width:2.5}.node-label{font-size:12px;paint-order:stroke;stroke:var(--panel);stroke-width:4px;stroke-linejoin:round;fill:var(--text);pointer-events:none}.hidden{display:none}@media(max-width:900px){.top{display:block}.toolbar{margin-top:12px}.layout{grid-template-columns:1fr}.side{max-height:360px}svg{height:560px}}
14833</style>"#,
14834    );
14835    html.push_str("</head><body>");
14836    html.push_str("<div class=\"page\">");
14837    html.push_str(&format!(
14838        "<header class=\"top\"><div><h1>tsift traversal graph</h1><div class=\"meta\">mode <code>{}</code> | nodes <code>{}</code>/<code>{}</code> | edges <code>{}</code>/<code>{}</code></div></div><div class=\"toolbar\"><input id=\"filter\" type=\"search\" placeholder=\"Filter nodes\"></div></header>",
14839        html_escape(&report.mode),
14840        report.nodes.len(),
14841        report.totals.nodes,
14842        report.edges.len(),
14843        report.totals.edges
14844    ));
14845    html.push_str(
14846        r#"<main class="layout"><section class="graph-panel"><div id="legend" class="legend"></div><svg id="graph-canvas" role="img" aria-label="Traversal graph"></svg></section><aside class="side"><h2>Nodes</h2><div id="node-list" class="list"></div><div id="selected" class="selected"></div></aside></main>"#,
14847    );
14848    html.push_str("<script id=\"graph-data\" type=\"application/json\">");
14849    html.push_str(&json);
14850    html.push_str(
14851        r##"</script><script>
14852const report = JSON.parse(document.getElementById("graph-data").textContent);
14853const svg = document.getElementById("graph-canvas");
14854const list = document.getElementById("node-list");
14855const selected = document.getElementById("selected");
14856const filter = document.getElementById("filter");
14857const legend = document.getElementById("legend");
14858const nodes = report.nodes.map((node, index) => ({...node, index}));
14859const nodeByHandle = new Map(nodes.map(node => [node.handle, node]));
14860const edges = report.edges.filter(edge => nodeByHandle.has(edge.from) && nodeByHandle.has(edge.to));
14861const colorByKind = new Map([
14862  ["file", "#2563eb"], ["symbol", "#16a34a"], ["route", "#7c3aed"],
14863  ["session", "#0891b2"], ["backlog", "#dc2626"], ["job_packet", "#ea580c"],
14864  ["semantic_concept", "#9a3412"], ["semantic_entity", "#b45309"],
14865  ["source_handle", "#64748b"], ["worker_context", "#475569"], ["worker_result", "#15803d"]
14866]);
14867function color(kind){ return colorByKind.get(kind) || "#6b7280"; }
14868function isSemantic(edge){ return edge.relation.includes("concept") || edge.relation.includes("entity") || edge.relation.includes("semantic"); }
14869function text(value){ return value == null ? "" : String(value); }
14870function matches(node, query){
14871  if (!query) return true;
14872  const haystack = [node.kind,node.label,node.handle,node.ref_id,node.path,node.detail].map(text).join(" ").toLowerCase();
14873  return haystack.includes(query);
14874}
14875function layout(){
14876  const rect = svg.getBoundingClientRect();
14877  const width = rect.width || 900;
14878  const height = rect.height || 650;
14879  const cx = width / 2;
14880  const cy = height / 2;
14881  const kinds = [...new Set(nodes.map(node => node.kind))].sort();
14882  const counts = new Map();
14883  for (const node of nodes) counts.set(node.kind, (counts.get(node.kind) || 0) + 1);
14884  const offsets = new Map();
14885  for (const node of nodes) {
14886    const group = kinds.indexOf(node.kind);
14887    const index = offsets.get(node.kind) || 0;
14888    offsets.set(node.kind, index + 1);
14889    const groupCount = counts.get(node.kind) || 1;
14890    const ring = Math.min(width, height) * (0.18 + ((group % 4) * 0.09));
14891    const angle = (Math.PI * 2 * index / Math.max(groupCount, 1)) + (group * 0.47);
14892    node.x = cx + Math.cos(angle) * ring;
14893    node.y = cy + Math.sin(angle) * ring;
14894  }
14895}
14896function draw(){
14897  const query = filter.value.trim().toLowerCase();
14898  const visible = new Set(nodes.filter(node => matches(node, query)).map(node => node.handle));
14899  svg.innerHTML = "";
14900  for (const edge of edges) {
14901    if (!visible.has(edge.from) || !visible.has(edge.to)) continue;
14902    const from = nodeByHandle.get(edge.from);
14903    const to = nodeByHandle.get(edge.to);
14904    const line = document.createElementNS("http://www.w3.org/2000/svg", "line");
14905    line.setAttribute("x1", from.x); line.setAttribute("y1", from.y);
14906    line.setAttribute("x2", to.x); line.setAttribute("y2", to.y);
14907    line.setAttribute("class", "edge" + (isSemantic(edge) ? " semantic" : ""));
14908    line.appendChild(document.createElementNS("http://www.w3.org/2000/svg", "title")).textContent = edge.relation + (edge.label ? ": " + edge.label : "");
14909    svg.appendChild(line);
14910  }
14911  for (const node of nodes) {
14912    if (!visible.has(node.handle)) continue;
14913    const circle = document.createElementNS("http://www.w3.org/2000/svg", "circle");
14914    circle.setAttribute("cx", node.x); circle.setAttribute("cy", node.y);
14915    circle.setAttribute("r", node.kind.startsWith("semantic_") ? 8 : 6);
14916    circle.setAttribute("fill", color(node.kind));
14917    circle.setAttribute("class", "node" + (node.kind.startsWith("semantic_") ? " semantic" : ""));
14918    circle.addEventListener("click", () => selectNode(node));
14919    circle.appendChild(document.createElementNS("http://www.w3.org/2000/svg", "title")).textContent = node.kind + ": " + node.label;
14920    svg.appendChild(circle);
14921    const label = document.createElementNS("http://www.w3.org/2000/svg", "text");
14922    label.setAttribute("x", node.x + 9); label.setAttribute("y", node.y + 4);
14923    label.setAttribute("class", "node-label");
14924    label.textContent = node.label.length > 34 ? node.label.slice(0, 31) + "..." : node.label;
14925    svg.appendChild(label);
14926  }
14927  renderList(query);
14928}
14929function renderLegend(){
14930  const kinds = [...new Set(nodes.map(node => node.kind))].sort();
14931  legend.innerHTML = kinds.map(kind => `<span><b style="color:${color(kind)}">&#9679;</b> ${kind}</span>`).join("");
14932}
14933function renderList(query){
14934  const rows = nodes.filter(node => matches(node, query)).slice(0, 120);
14935  list.innerHTML = rows.map(node => `<div class="row" data-handle="${node.handle}"><div class="kind">${node.kind}</div><div class="label">${escapeHtml(node.label)}</div><div class="handle">${node.handle}</div></div>`).join("");
14936  for (const row of list.querySelectorAll(".row")) {
14937    row.addEventListener("click", () => selectNode(nodeByHandle.get(row.dataset.handle)));
14938  }
14939}
14940function selectNode(node){
14941  const adjacent = edges.filter(edge => edge.from === node.handle || edge.to === node.handle).slice(0, 20);
14942  selected.innerHTML = `<h2>${escapeHtml(node.label)}</h2><div class="kind">${node.kind}</div><p class="handle">${node.handle}</p>${node.path ? `<p>${escapeHtml(node.path)}${node.line != null ? ":" + node.line : ""}</p>` : ""}${node.detail ? `<p>${escapeHtml(node.detail)}</p>` : ""}<p><code>${escapeHtml(node.expand)}</code></p><h2>Edges</h2><div class="list">${adjacent.map(edge => `<div class="row"><div class="kind">${edge.relation}</div><div>${escapeHtml(edge.from)} -> ${escapeHtml(edge.to)}</div>${edge.label ? `<div>${escapeHtml(edge.label)}</div>` : ""}</div>`).join("") || "<div class=\"meta\">No visible edges.</div>"}</div>`;
14943}
14944function escapeHtml(value){
14945  return text(value).replace(/[&<>"']/g, ch => ({"&":"&amp;","<":"&lt;",">":"&gt;","\"":"&quot;","'":"&#39;"}[ch]));
14946}
14947filter.addEventListener("input", draw);
14948window.addEventListener("resize", () => { layout(); draw(); });
14949renderLegend();
14950layout();
14951draw();
14952if (nodes.length) selectNode(nodes[0]);
14953</script></div></body></html>"##,
14954    );
14955    Ok(html)
14956}
14957
14958fn semantic_related_report_from_store(
14959    root: &Path,
14960    scope: Option<&str>,
14961    query: &str,
14962    limit: usize,
14963    kind: SemanticRelatedKind,
14964    store: &impl GraphStore,
14965) -> Result<SemanticRelatedReport> {
14966    if query.trim().is_empty() {
14967        bail!("semantic query cannot be empty");
14968    }
14969
14970    let query_embedding = semantic_embedding(query);
14971    let node_kinds: &[&str] = match kind {
14972        SemanticRelatedKind::Concept => &["semantic_concept"],
14973        SemanticRelatedKind::Entity => &["semantic_entity"],
14974        SemanticRelatedKind::All => &["semantic_concept", "semantic_entity"],
14975    };
14976
14977    let mut items = Vec::new();
14978    for node_kind in node_kinds {
14979        for node in store.nodes_by_kind(node_kind)? {
14980            let Some(embedding) = node
14981                .properties
14982                .get("embedding")
14983                .and_then(|value| parse_semantic_embedding_property(value))
14984            else {
14985                continue;
14986            };
14987            let score = semantic_cosine(&query_embedding, &embedding);
14988            items.push(SemanticRelatedItem {
14989                handle: node
14990                    .properties
14991                    .get("handle")
14992                    .cloned()
14993                    .unwrap_or_else(|| node.id.clone()),
14994                kind: node.kind,
14995                label: node.label,
14996                score,
14997                file_path: node
14998                    .properties
14999                    .get("source_file")
15000                    .or_else(|| node.properties.get("path"))
15001                    .cloned(),
15002                source_symbol: node.properties.get("source_symbol").cloned(),
15003                detail: node
15004                    .properties
15005                    .get("description")
15006                    .or_else(|| node.properties.get("detail"))
15007                    .cloned(),
15008                expand: node
15009                    .properties
15010                    .get("expand")
15011                    .cloned()
15012                    .unwrap_or_else(|| traversal_expand_command(root, &node.id)),
15013            });
15014        }
15015    }
15016
15017    items.sort_by(|left, right| {
15018        right
15019            .score
15020            .partial_cmp(&left.score)
15021            .unwrap_or(Ordering::Equal)
15022            .then_with(|| left.kind.cmp(&right.kind))
15023            .then_with(|| left.label.cmp(&right.label))
15024            .then_with(|| left.handle.cmp(&right.handle))
15025    });
15026    if limit > 0 && items.len() > limit {
15027        items.truncate(limit);
15028    }
15029
15030    let mut warnings = Vec::new();
15031    if items.is_empty() {
15032        warnings.push(
15033            "no semantic graph rows found; run `tsift summarize --extract <path>` first"
15034                .to_string(),
15035        );
15036    }
15037
15038    Ok(SemanticRelatedReport {
15039        root: root.to_string_lossy().to_string(),
15040        scope: scope.map(str::to_string),
15041        query: query.to_string(),
15042        embedding_model: SEMANTIC_EMBEDDING_MODEL.to_string(),
15043        count: items.len(),
15044        items,
15045        warnings,
15046    })
15047}
15048
15049fn graph_store_semantic_node_count(store: &impl GraphStore) -> Result<usize> {
15050    Ok(store.nodes_by_kind("semantic_concept")?.len()
15051        + store.nodes_by_kind("semantic_entity")?.len())
15052}
15053
15054fn graph_db_semantic_edge_scan_cap(limit: usize) -> usize {
15055    if limit == 0 {
15056        return 0;
15057    }
15058    limit.saturating_mul(4).clamp(
15059        GRAPH_DB_SEMANTIC_MIN_EDGE_SCAN_CAP,
15060        GRAPH_DB_SEMANTIC_MAX_EDGE_SCAN_CAP,
15061    )
15062}
15063
15064fn graph_db_semantic_node_discovery_cap(seed_count: usize, limit: usize) -> usize {
15065    if limit == 0 {
15066        return usize::MAX;
15067    }
15068    limit.saturating_mul(3).max(limit).max(seed_count)
15069}
15070
15071fn graph_db_semantic_edge_other_id<'a>(
15072    edge: &'a SubstrateGraphEdge,
15073    current_id: &str,
15074) -> Option<&'a str> {
15075    if edge.from_id == current_id {
15076        Some(edge.to_id.as_str())
15077    } else if edge.to_id == current_id {
15078        Some(edge.from_id.as_str())
15079    } else {
15080        None
15081    }
15082}
15083
15084fn graph_db_semantic_edge_score(edge: &SubstrateGraphEdge, current_id: &str) -> i64 {
15085    let mut score = resolution::edge_kind_rank_score(&edge.kind).saturating_mul(10);
15086    score += if edge.from_id == current_id { 8 } else { 4 };
15087    score += match edge.kind.as_str() {
15088        "mentions_concept" | "mentions_entity" | "tagged_concept" | "tagged_entity"
15089        | "related_concept" => 30,
15090        "semantic_relation" => 28,
15091        "calls" => 24,
15092        "mentions" => 22,
15093        "requests_context" | "scopes_context" | "scopes_source" | "explains_result" => 18,
15094        "defines" | "contains" | "belongs_to" => 12,
15095        _ => 0,
15096    };
15097    score
15098}
15099
15100fn graph_db_semantic_seeded_neighborhood(
15101    store: &impl GraphStore,
15102    seed_ids: &[String],
15103    depth: usize,
15104    limit: usize,
15105) -> Result<GraphDbSemanticSeededSubgraph> {
15106    let seed_rank = seed_ids
15107        .iter()
15108        .enumerate()
15109        .map(|(idx, seed)| (seed.clone(), idx))
15110        .collect::<BTreeMap<_, _>>();
15111    let mut nodes = BTreeMap::<String, SubstrateGraphNode>::new();
15112    let mut edges = BTreeMap::<String, SubstrateGraphEdge>::new();
15113    let mut node_score_by_id = BTreeMap::<String, i64>::new();
15114    let mut queue = VecDeque::<(String, usize)>::new();
15115    let mut seen_at_depth = BTreeMap::<String, usize>::new();
15116    let edge_scan_cap = graph_db_semantic_edge_scan_cap(limit);
15117    let node_discovery_cap = graph_db_semantic_node_discovery_cap(seed_ids.len(), limit);
15118    let mut skipped_by_edge_cap = 0usize;
15119    let mut skipped_by_node_cap = 0usize;
15120    let mut diagnostics = vec![
15121        "semantic-seeded retrieval uses phrase similarity to pick graph seeds".to_string(),
15122        "seed expansion traverses both outgoing and incident edges so code, markdown, conversation, and memory adapters can link into semantic rows without reversing their edge direction".to_string(),
15123        format!(
15124            "seed expansion ranks incident/outgoing edges before caps; per-node edge scan cap={} node discovery cap={}",
15125            if edge_scan_cap == 0 {
15126                "unbounded".to_string()
15127            } else {
15128                edge_scan_cap.to_string()
15129            },
15130            if node_discovery_cap == usize::MAX {
15131                "unbounded".to_string()
15132            } else {
15133                node_discovery_cap.to_string()
15134            }
15135        ),
15136    ];
15137
15138    for (idx, seed_id) in seed_ids.iter().enumerate() {
15139        if let Some(node) = store.node(seed_id)? {
15140            nodes.entry(seed_id.clone()).or_insert(node);
15141            node_score_by_id
15142                .entry(seed_id.clone())
15143                .or_insert(1_000_000i64.saturating_sub(idx as i64));
15144            queue.push_back((seed_id.clone(), 0));
15145            seen_at_depth.entry(seed_id.clone()).or_insert(0);
15146        } else {
15147            diagnostics.push(format!(
15148                "semantic seed {seed_id} was not present in the graph store"
15149            ));
15150        }
15151    }
15152
15153    while let Some((current_id, current_depth)) = queue.pop_front() {
15154        if current_depth >= depth {
15155            continue;
15156        }
15157
15158        let mut expansion_edges_by_key = BTreeMap::<String, SubstrateGraphEdge>::new();
15159        for edge in store.outgoing_edges(&current_id, None)? {
15160            expansion_edges_by_key
15161                .entry(graph_db_edge_key(&edge))
15162                .or_insert(edge);
15163        }
15164        for edge in store.incident_edges(&current_id, None)? {
15165            expansion_edges_by_key
15166                .entry(graph_db_edge_key(&edge))
15167                .or_insert(edge);
15168        }
15169        let mut expansion_edges = expansion_edges_by_key.into_values().collect::<Vec<_>>();
15170        expansion_edges.sort_by(|left, right| {
15171            graph_db_semantic_edge_score(right, &current_id)
15172                .cmp(&graph_db_semantic_edge_score(left, &current_id))
15173                .then_with(|| graph_db_edge_key(left).cmp(&graph_db_edge_key(right)))
15174        });
15175        if edge_scan_cap > 0 && expansion_edges.len() > edge_scan_cap {
15176            skipped_by_edge_cap += expansion_edges.len() - edge_scan_cap;
15177            expansion_edges.truncate(edge_scan_cap);
15178        }
15179
15180        for edge in expansion_edges {
15181            let Some(other_id) = graph_db_semantic_edge_other_id(&edge, &current_id) else {
15182                continue;
15183            };
15184            let other_known = nodes.contains_key(other_id);
15185            if !other_known && nodes.len() >= node_discovery_cap {
15186                skipped_by_node_cap += 1;
15187                continue;
15188            }
15189            let other_id = other_id.to_string();
15190            let edge_score = graph_db_semantic_edge_score(&edge, &current_id)
15191                .saturating_add((depth.saturating_sub(current_depth) as i64).saturating_mul(5));
15192            node_score_by_id
15193                .entry(other_id.clone())
15194                .and_modify(|score| *score = (*score).max(edge_score))
15195                .or_insert(edge_score);
15196            let edge_key = graph_db_edge_key(&edge);
15197            edges.entry(edge_key).or_insert_with(|| edge.clone());
15198            if let std::collections::btree_map::Entry::Vacant(entry) = nodes.entry(other_id.clone())
15199                && let Some(node) = store.node(&other_id)?
15200            {
15201                entry.insert(node);
15202            }
15203            if !nodes.contains_key(&other_id) {
15204                continue;
15205            }
15206            let next_depth = current_depth + 1;
15207            let should_queue = seen_at_depth
15208                .get(&other_id)
15209                .is_none_or(|seen_depth| next_depth < *seen_depth);
15210            if should_queue {
15211                seen_at_depth.insert(other_id.clone(), next_depth);
15212                queue.push_back((other_id, next_depth));
15213            }
15214        }
15215    }
15216
15217    if skipped_by_edge_cap > 0 {
15218        diagnostics.push(format!(
15219            "semantic-seeded expansion skipped {skipped_by_edge_cap} lower-scoring incident/outgoing edge(s) after per-node caps"
15220        ));
15221    }
15222    if skipped_by_node_cap > 0 {
15223        diagnostics.push(format!(
15224            "semantic-seeded expansion skipped {skipped_by_node_cap} lower-scoring node discovery edge(s) after the discovery cap"
15225        ));
15226    }
15227
15228    let mut nodes = nodes.into_values().collect::<Vec<_>>();
15229    nodes.sort_by(|left, right| {
15230        seed_rank
15231            .get(&left.id)
15232            .copied()
15233            .unwrap_or(usize::MAX)
15234            .cmp(&seed_rank.get(&right.id).copied().unwrap_or(usize::MAX))
15235            .then_with(|| {
15236                node_score_by_id
15237                    .get(&right.id)
15238                    .copied()
15239                    .unwrap_or_default()
15240                    .cmp(&node_score_by_id.get(&left.id).copied().unwrap_or_default())
15241            })
15242            .then(left.id.cmp(&right.id))
15243    });
15244
15245    let before_limit = nodes.len();
15246    let truncated = limit > 0 && nodes.len() > limit;
15247    if truncated {
15248        nodes.truncate(limit);
15249        diagnostics.push(format!(
15250            "semantic-seeded neighborhood truncated from {before_limit} to {limit} node(s)"
15251        ));
15252    }
15253
15254    let node_ids = nodes
15255        .iter()
15256        .map(|node| node.id.as_str())
15257        .collect::<BTreeSet<_>>();
15258    let mut edges = edges
15259        .into_values()
15260        .filter(|edge| {
15261            node_ids.contains(edge.from_id.as_str()) && node_ids.contains(edge.to_id.as_str())
15262        })
15263        .collect::<Vec<_>>();
15264    edges.sort_by_key(graph_db_edge_key);
15265
15266    Ok(GraphDbSemanticSeededSubgraph {
15267        nodes,
15268        edges,
15269        truncated,
15270        diagnostics,
15271    })
15272}
15273
15274#[allow(clippy::too_many_arguments)]
15275fn cmd_semantic_related(
15276    query: &str,
15277    path: &Path,
15278    scope: Option<&str>,
15279    limit: usize,
15280    kind: SemanticRelatedKind,
15281    json_output: bool,
15282    compact: bool,
15283    pretty: bool,
15284    terse: bool,
15285    schema: bool,
15286) -> Result<()> {
15287    let root = lint::resolve_project_root_or_canonical_path(path)?;
15288    write_traversal_graph_store(&root, path, scope)?;
15289    let graph_db = graph_substrate_db_path(&root, scope);
15290    let store = SqliteGraphStore::open_read_only_resilient(&graph_db)?;
15291    let mut report = semantic_related_report_from_store(&root, scope, query, limit, kind, &store)?;
15292    if let Some(recovery) = store.read_only_recovery() {
15293        report
15294            .warnings
15295            .push(graph_db_read_recovery_diagnostic(recovery));
15296    }
15297
15298    if json_output {
15299        println!("{}", to_json_schema(&report, pretty, terse, false, schema)?);
15300    } else if compact {
15301        for item in &report.items {
15302            println!(
15303                "{:.3}\t{}\t{}\t{}",
15304                item.score, item.kind, item.label, item.handle
15305            );
15306        }
15307        for warning in &report.warnings {
15308            eprintln!("warning: {warning}");
15309        }
15310    } else {
15311        println!(
15312            "Related semantic graph rows for {:?} ({})",
15313            report.query, report.embedding_model
15314        );
15315        for item in &report.items {
15316            println!(
15317                "  {:.3} [{}] {} ({})",
15318                item.score, item.kind, item.label, item.handle
15319            );
15320            if let Some(detail) = &item.detail {
15321                println!("      {}", detail);
15322            }
15323            if let Some(file_path) = &item.file_path {
15324                println!("      file: {}", file_path);
15325            }
15326            println!("      expand: {}", item.expand);
15327        }
15328        for warning in &report.warnings {
15329            eprintln!("warning: {warning}");
15330        }
15331    }
15332
15333    Ok(())
15334}
15335
15336#[derive(Serialize)]
15337struct SourceLinePreview {
15338    line: usize,
15339    text: String,
15340}
15341
15342#[derive(Serialize)]
15343pub(crate) struct SourceRangePreview {
15344    start: usize,
15345    end: usize,
15346    total_lines: usize,
15347    truncated_before: bool,
15348    truncated_after: bool,
15349}
15350
15351#[derive(Serialize)]
15352struct SourceExpandCommands {
15353    #[serde(skip_serializing_if = "Option::is_none")]
15354    before: Option<String>,
15355    #[serde(skip_serializing_if = "Option::is_none")]
15356    after: Option<String>,
15357    #[serde(skip_serializing_if = "Option::is_none")]
15358    body: Option<String>,
15359    file: String,
15360    #[serde(skip_serializing_if = "Option::is_none")]
15361    markdown_ast: Option<String>,
15362}
15363
15364#[derive(Serialize)]
15365struct SourceSymbolRef {
15366    handle: String,
15367    name: String,
15368    kind: String,
15369    language: String,
15370    file: String,
15371    line: usize,
15372    #[serde(skip_serializing_if = "Option::is_none")]
15373    end_line: Option<usize>,
15374    #[serde(skip_serializing_if = "Option::is_none")]
15375    signature: Option<String>,
15376    #[serde(skip_serializing_if = "Option::is_none")]
15377    span: Option<AstSpanPreview>,
15378    expand: String,
15379}
15380
15381#[derive(Serialize)]
15382struct SourceSummaryRef {
15383    handle: String,
15384    symbol_name: String,
15385    file_path: String,
15386    summary: String,
15387    expand: String,
15388}
15389
15390#[derive(Serialize)]
15391struct SourceReadReport {
15392    handle: String,
15393    root: String,
15394    file: String,
15395    range: SourceRangePreview,
15396    preview: Vec<SourceLinePreview>,
15397    symbols: Vec<SourceSymbolRef>,
15398    summaries: Vec<SourceSummaryRef>,
15399    #[serde(skip_serializing_if = "Option::is_none")]
15400    markdown: Option<SourceReadMarkdownProjection>,
15401    expand: SourceExpandCommands,
15402    #[serde(skip_serializing_if = "Vec::is_empty", default)]
15403    warnings: Vec<String>,
15404}
15405
15406#[derive(Serialize)]
15407struct SymbolReadTarget {
15408    handle: String,
15409    name: String,
15410    kind: String,
15411    language: String,
15412    file: String,
15413    line: usize,
15414    #[serde(skip_serializing_if = "Option::is_none")]
15415    end_line: Option<usize>,
15416    #[serde(skip_serializing_if = "Option::is_none")]
15417    signature: Option<String>,
15418    #[serde(skip_serializing_if = "Option::is_none")]
15419    parent_module: Option<String>,
15420    #[serde(skip_serializing_if = "Option::is_none")]
15421    visibility: Option<String>,
15422    #[serde(skip_serializing_if = "Option::is_none")]
15423    span: Option<AstSpanPreview>,
15424}
15425
15426#[derive(Serialize)]
15427struct SymbolReadExpandCommands {
15428    source_window: String,
15429    #[serde(skip_serializing_if = "Option::is_none")]
15430    body: Option<String>,
15431    file: String,
15432    explain: String,
15433    callers: String,
15434    callees: String,
15435    #[serde(skip_serializing_if = "Option::is_none")]
15436    markdown_ast: Option<String>,
15437}
15438
15439#[derive(Serialize)]
15440struct SymbolReadReport {
15441    handle: String,
15442    root: String,
15443    query: String,
15444    symbol: SymbolReadTarget,
15445    range: SourceRangePreview,
15446    body: Vec<SourceLinePreview>,
15447    child_symbols: Vec<SourceSymbolRef>,
15448    summaries: Vec<SourceSummaryRef>,
15449    expand: SymbolReadExpandCommands,
15450    #[serde(skip_serializing_if = "Vec::is_empty", default)]
15451    warnings: Vec<String>,
15452}
15453
15454#[derive(Clone)]
15455pub(crate) struct MarkdownAstRawNode {
15456    handle: String,
15457    span_handle: String,
15458    name: String,
15459    kind: String,
15460    block_kind: String,
15461    node_kind: String,
15462    start_byte: usize,
15463    end_byte: usize,
15464    body_start_byte: Option<usize>,
15465    body_end_byte: Option<usize>,
15466}
15467
15468#[derive(Clone)]
15469pub(crate) struct MarkdownAstProjection {
15470    source_hash: String,
15471    nodes: Vec<MarkdownAstRawNode>,
15472    parse_duration_micros: u128,
15473    cache_hit: bool,
15474}
15475
15476#[derive(Clone)]
15477struct MarkdownAstCacheEntry {
15478    source_hash: String,
15479    nodes: Vec<MarkdownAstRawNode>,
15480    parse_duration_micros: u128,
15481}
15482
15483static MARKDOWN_AST_CACHE: OnceLock<Mutex<HashMap<String, MarkdownAstCacheEntry>>> =
15484    OnceLock::new();
15485
15486#[derive(Serialize, Clone)]
15487struct MarkdownAstNodeMetadata {
15488    #[serde(skip_serializing_if = "Option::is_none")]
15489    heading_level: Option<usize>,
15490    #[serde(skip_serializing_if = "Vec::is_empty", default)]
15491    section_path: Vec<String>,
15492    #[serde(skip_serializing_if = "Option::is_none")]
15493    section_handle: Option<String>,
15494    #[serde(skip_serializing_if = "Option::is_none")]
15495    list_depth: Option<usize>,
15496    #[serde(skip_serializing_if = "Option::is_none")]
15497    list_marker: Option<String>,
15498    #[serde(skip_serializing_if = "Option::is_none")]
15499    list_order: Option<usize>,
15500    #[serde(skip_serializing_if = "Option::is_none")]
15501    fence_language: Option<String>,
15502    #[serde(skip_serializing_if = "Option::is_none")]
15503    fence_marker: Option<String>,
15504    #[serde(skip_serializing_if = "Vec::is_empty", default)]
15505    embedded_symbols: Vec<MarkdownEmbeddedSymbol>,
15506}
15507
15508#[derive(Serialize, Clone)]
15509struct MarkdownAstNodeExpand {
15510    source_window: String,
15511    source_body: String,
15512    symbol_read: String,
15513    edit_intents: String,
15514}
15515
15516#[derive(Serialize, Clone)]
15517struct MarkdownAstCacheReport {
15518    source_hash: String,
15519    cache_hit: bool,
15520    parse_duration_micros: u128,
15521    node_count: usize,
15522    section_count: usize,
15523    list_item_count: usize,
15524    code_block_count: usize,
15525}
15526
15527#[derive(Serialize, Clone)]
15528struct MarkdownAstPhaseTiming {
15529    name: String,
15530    duration_micros: u128,
15531    detail: String,
15532}
15533
15534#[derive(Serialize, Clone)]
15535struct MarkdownAstOutlineEntry {
15536    handle: String,
15537    span_handle: String,
15538    name: String,
15539    kind: String,
15540    block_kind: String,
15541    line: usize,
15542    end_line: usize,
15543    #[serde(skip_serializing_if = "Vec::is_empty", default)]
15544    section_path: Vec<String>,
15545    child_count: usize,
15546    expand: String,
15547}
15548
15549#[derive(Serialize, Clone)]
15550struct MarkdownAstProjectionPreview {
15551    mode: String,
15552    total_nodes: usize,
15553    returned_nodes: usize,
15554    omitted_nodes: usize,
15555    selected_node: Option<String>,
15556    cache: MarkdownAstCacheReport,
15557    outline: Vec<MarkdownAstOutlineEntry>,
15558    phase_timings: Vec<MarkdownAstPhaseTiming>,
15559}
15560
15561#[derive(Serialize)]
15562struct SourceReadMarkdownProjection {
15563    handle: String,
15564    mode: String,
15565    total_nodes: usize,
15566    visible_nodes: usize,
15567    outline: Vec<MarkdownAstOutlineEntry>,
15568    expand: String,
15569}
15570
15571#[derive(Serialize, Clone)]
15572struct SourceByteRangePreview {
15573    start: usize,
15574    end: usize,
15575}
15576
15577#[derive(Serialize, Clone)]
15578struct MarkdownAstNode {
15579    handle: String,
15580    span_handle: String,
15581    name: String,
15582    kind: String,
15583    block_kind: String,
15584    node_kind: String,
15585    line: usize,
15586    end_line: usize,
15587    byte_span: SourceByteRangePreview,
15588    #[serde(skip_serializing_if = "Option::is_none")]
15589    body_byte_span: Option<SourceByteRangePreview>,
15590    parent_handle: Option<String>,
15591    #[serde(skip_serializing_if = "Vec::is_empty", default)]
15592    child_handles: Vec<String>,
15593    metadata: MarkdownAstNodeMetadata,
15594    expand: MarkdownAstNodeExpand,
15595}
15596
15597#[derive(Serialize)]
15598struct MarkdownAstExpandCommands {
15599    file: String,
15600    source_read: String,
15601    edit_intents: String,
15602}
15603
15604#[derive(Serialize)]
15605struct MarkdownAstReport {
15606    handle: String,
15607    root: String,
15608    file: String,
15609    range: SourceRangePreview,
15610    projection: MarkdownAstProjectionPreview,
15611    nodes: Vec<MarkdownAstNode>,
15612    expand: MarkdownAstExpandCommands,
15613    #[serde(skip_serializing_if = "Vec::is_empty", default)]
15614    warnings: Vec<String>,
15615}
15616
15617pub(crate) fn resolve_source_file(root: &Path, file: &Path) -> Result<PathBuf> {
15618    let candidate = if file.is_absolute() {
15619        file.to_path_buf()
15620    } else {
15621        root.join(file)
15622    };
15623    let canonical = candidate
15624        .canonicalize()
15625        .with_context(|| format!("canonicalizing source file {}", candidate.display()))?;
15626    if !canonical.is_file() {
15627        bail!("source file is not a regular file: {}", canonical.display());
15628    }
15629    let canonical_root = root
15630        .canonicalize()
15631        .with_context(|| format!("canonicalizing project root {}", root.display()))?;
15632    if !canonical.starts_with(&canonical_root) {
15633        bail!(
15634            "source file {} is outside project root {}",
15635            canonical.display(),
15636            canonical_root.display()
15637        );
15638    }
15639    Ok(canonical)
15640}
15641
15642pub(crate) fn source_read_command(root: &Path, file: &str, start: usize, lines: usize) -> String {
15643    format!(
15644        "tsift source-read {} --path {} --start {} --lines {} --budget normal",
15645        shell_quote(file),
15646        shell_quote(&root.to_string_lossy()),
15647        start,
15648        lines
15649    )
15650}
15651
15652pub(crate) fn source_symbol_read_command(root: &Path, symbol: &str, file: &str) -> String {
15653    format!(
15654        "tsift --envelope symbol-read {} --path {} --file {} --budget normal",
15655        shell_quote(symbol),
15656        shell_quote(&root.to_string_lossy()),
15657        shell_quote(file)
15658    )
15659}
15660
15661fn source_symbol_expand_command(root: &Path, symbol: &str) -> String {
15662    format!(
15663        "tsift --envelope explain {} --path {} --budget normal",
15664        shell_quote(symbol),
15665        shell_quote(&root.to_string_lossy())
15666    )
15667}
15668
15669fn source_symbol_graph_command(root: &Path, symbol: &str, relation: &str) -> String {
15670    format!(
15671        "tsift graph {} --path {} --{} --json",
15672        shell_quote(symbol),
15673        shell_quote(&root.to_string_lossy()),
15674        relation
15675    )
15676}
15677
15678fn source_summary_expand_command(root: &Path, symbol: &str) -> String {
15679    format!(
15680        "tsift summarize {} --path {} --json",
15681        shell_quote(symbol),
15682        shell_quote(&root.to_string_lossy())
15683    )
15684}
15685
15686pub(crate) fn markdown_ast_command(root: &Path, file: &str, node: Option<&str>) -> String {
15687    let mut command = format!(
15688        "tsift --envelope markdown-ast {} --path {} --budget normal",
15689        shell_quote(file),
15690        shell_quote(&root.to_string_lossy())
15691    );
15692    if let Some(node) = node {
15693        command.push_str(" --node ");
15694        command.push_str(&shell_quote(node));
15695    }
15696    command
15697}
15698
15699fn markdown_edit_intents_command(root: &Path) -> String {
15700    format!(
15701        "tsift --envelope edit-intents --path {} --budget normal",
15702        shell_quote(&root.to_string_lossy())
15703    )
15704}
15705
15706pub(crate) fn source_symbol_line(symbol: &index::StoredSymbol) -> usize {
15707    usize::try_from(symbol.line)
15708        .ok()
15709        .and_then(|line| line.checked_add(1))
15710        .unwrap_or(1)
15711}
15712
15713fn source_symbol_end_line(symbol: &index::StoredSymbol) -> Option<usize> {
15714    symbol
15715        .end_line
15716        .and_then(|line| usize::try_from(line).ok())
15717        .and_then(|line| line.checked_add(1))
15718}
15719
15720fn symbol_span_byte(value: Option<i64>) -> Option<usize> {
15721    value.and_then(|byte| usize::try_from(byte).ok())
15722}
15723
15724fn source_line_for_byte(source: &[u8], byte: usize) -> usize {
15725    let byte = byte.min(source.len());
15726    source[..byte]
15727        .iter()
15728        .filter(|value| **value == b'\n')
15729        .count()
15730        .saturating_add(1)
15731}
15732
15733fn source_line_for_end_byte(source: &[u8], end_byte: usize) -> usize {
15734    source_line_for_byte(source, end_byte.saturating_sub(1))
15735}
15736
15737fn ast_span_handle(
15738    file: &str,
15739    name: &str,
15740    kind: &str,
15741    start_byte: usize,
15742    end_byte: usize,
15743) -> String {
15744    stable_handle(
15745        "span",
15746        &format!("{file}:{kind}:{name}:{start_byte}:{end_byte}"),
15747    )
15748}
15749
15750pub(crate) fn stored_symbol_span_bounds(symbol: &index::StoredSymbol) -> Option<(usize, usize)> {
15751    Some((
15752        symbol_span_byte(symbol.start_byte)?,
15753        symbol_span_byte(symbol.end_byte)?,
15754    ))
15755}
15756
15757pub(crate) fn symbol_hit_span_bounds(symbol: &index::SymbolHit) -> Option<(usize, usize)> {
15758    Some((
15759        symbol_span_byte(symbol.start_byte)?,
15760        symbol_span_byte(symbol.end_byte)?,
15761    ))
15762}
15763
15764pub(crate) fn stored_symbol_span_handle(symbol: &index::StoredSymbol) -> Option<String> {
15765    let (start_byte, end_byte) = stored_symbol_span_bounds(symbol)?;
15766    Some(ast_span_handle(
15767        &symbol.file,
15768        &symbol.name,
15769        &symbol.kind,
15770        start_byte,
15771        end_byte,
15772    ))
15773}
15774
15775fn same_stored_symbol_span(left: &index::StoredSymbol, right: &index::StoredSymbol) -> bool {
15776    left.file == right.file
15777        && left.name == right.name
15778        && left.kind == right.kind
15779        && stored_symbol_span_bounds(left) == stored_symbol_span_bounds(right)
15780}
15781
15782fn stored_symbol_parent_span_handle(
15783    symbol: &index::StoredSymbol,
15784    symbols: &[index::StoredSymbol],
15785) -> Option<String> {
15786    let (start_byte, end_byte) = stored_symbol_span_bounds(symbol)?;
15787    symbols
15788        .iter()
15789        .filter(|candidate| {
15790            if candidate.file != symbol.file || same_stored_symbol_span(candidate, symbol) {
15791                return false;
15792            }
15793            let Some((candidate_start, candidate_end)) = stored_symbol_span_bounds(candidate)
15794            else {
15795                return false;
15796            };
15797            candidate_start <= start_byte && candidate_end >= end_byte
15798        })
15799        .min_by_key(|candidate| {
15800            stored_symbol_span_bounds(candidate)
15801                .map(|(start, end)| end.saturating_sub(start))
15802                .unwrap_or(usize::MAX)
15803        })
15804        .and_then(stored_symbol_span_handle)
15805}
15806
15807fn stored_symbol_child_span_handles(
15808    symbol: &index::StoredSymbol,
15809    symbols: &[index::StoredSymbol],
15810    limit: usize,
15811) -> Vec<String> {
15812    let Some((start_byte, end_byte)) = stored_symbol_span_bounds(symbol) else {
15813        return Vec::new();
15814    };
15815    symbols
15816        .iter()
15817        .filter(|candidate| {
15818            if candidate.file != symbol.file || same_stored_symbol_span(candidate, symbol) {
15819                return false;
15820            }
15821            let Some((candidate_start, candidate_end)) = stored_symbol_span_bounds(candidate)
15822            else {
15823                return false;
15824            };
15825            candidate_start >= start_byte && candidate_end <= end_byte
15826        })
15827        .take(limit)
15828        .filter_map(stored_symbol_span_handle)
15829        .collect()
15830}
15831
15832fn markdown_heading_level(source: &[u8], start_byte: usize) -> Option<usize> {
15833    let start = start_byte.min(source.len());
15834    let line_end = source[start..]
15835        .iter()
15836        .position(|value| *value == b'\n')
15837        .map(|pos| start + pos)
15838        .unwrap_or(source.len());
15839    let line = std::str::from_utf8(&source[start..line_end]).unwrap_or("");
15840    let marker = line.trim_start();
15841    let level = marker.chars().take_while(|ch| *ch == '#').count();
15842    (1..=6).contains(&level).then_some(level)
15843}
15844
15845fn markdown_list_depth(source: &[u8], start_byte: usize) -> usize {
15846    let start = start_byte.min(source.len());
15847    let line_start = source[..start]
15848        .iter()
15849        .rposition(|value| *value == b'\n')
15850        .map(|pos| pos + 1)
15851        .unwrap_or(0);
15852    source[line_start..start]
15853        .iter()
15854        .map(|byte| match byte {
15855            b'\t' => 4,
15856            b' ' => 1,
15857            _ => 0,
15858        })
15859        .sum::<usize>()
15860        / 2
15861}
15862
15863fn markdown_enclosing_heading_symbols<'a>(
15864    file: &str,
15865    start_byte: usize,
15866    end_byte: usize,
15867    symbols: &'a [index::StoredSymbol],
15868) -> Vec<&'a index::StoredSymbol> {
15869    let mut headings = symbols
15870        .iter()
15871        .filter(|candidate| candidate.file == file && candidate.kind == "heading")
15872        .filter(|candidate| {
15873            let Some((candidate_start, candidate_end)) = stored_symbol_span_bounds(candidate)
15874            else {
15875                return false;
15876            };
15877            candidate_start <= start_byte && candidate_end >= end_byte
15878        })
15879        .collect::<Vec<_>>();
15880    headings.sort_by(|left, right| {
15881        stored_symbol_span_bounds(left)
15882            .map(|(start, _)| start)
15883            .unwrap_or(usize::MAX)
15884            .cmp(
15885                &stored_symbol_span_bounds(right)
15886                    .map(|(start, _)| start)
15887                    .unwrap_or(usize::MAX),
15888            )
15889            .then(left.name.cmp(&right.name))
15890    });
15891    headings
15892}
15893
15894fn markdown_stored_symbol_metadata(
15895    symbol: &index::StoredSymbol,
15896    source: &[u8],
15897    symbols: &[index::StoredSymbol],
15898) -> Option<MarkdownSpanMetadata> {
15899    if symbol.language != "markdown" {
15900        return None;
15901    }
15902    let (start_byte, end_byte) = stored_symbol_span_bounds(symbol)?;
15903    let section_symbols =
15904        markdown_enclosing_heading_symbols(&symbol.file, start_byte, end_byte, symbols);
15905    let section_path = section_symbols
15906        .iter()
15907        .map(|heading| heading.name.clone())
15908        .collect::<Vec<_>>();
15909    let section_handle = section_symbols
15910        .last()
15911        .and_then(|heading| stored_symbol_span_handle(heading));
15912    let heading_level = (symbol.kind == "heading")
15913        .then(|| markdown_heading_level(source, start_byte))
15914        .flatten();
15915    let list_depth = (symbol.kind == "list_item").then(|| markdown_list_depth(source, start_byte));
15916    let fence_language = (symbol.kind == "code_block").then(|| symbol.name.clone());
15917    let embedded_symbols = if symbol.kind == "code_block" {
15918        markdown_embedded_symbols(
15919            &symbol.file,
15920            source,
15921            symbol_span_byte(symbol.body_start_byte),
15922            symbol_span_byte(symbol.body_end_byte),
15923            fence_language.as_deref(),
15924        )
15925    } else {
15926        Vec::new()
15927    };
15928
15929    (heading_level.is_some()
15930        || !section_path.is_empty()
15931        || section_handle.is_some()
15932        || list_depth.is_some()
15933        || fence_language.is_some()
15934        || !embedded_symbols.is_empty())
15935    .then_some(MarkdownSpanMetadata {
15936        heading_level,
15937        section_path,
15938        section_handle,
15939        list_depth,
15940        fence_language,
15941        embedded_symbols,
15942    })
15943}
15944
15945fn markdown_symbol_hit_metadata(
15946    symbol: &index::SymbolHit,
15947    source: &[u8],
15948    start_byte: usize,
15949) -> Option<MarkdownSpanMetadata> {
15950    if symbol.language != "markdown" {
15951        return None;
15952    }
15953    let heading_level = (symbol.kind == "heading")
15954        .then(|| markdown_heading_level(source, start_byte))
15955        .flatten();
15956    let list_depth = (symbol.kind == "list_item").then(|| markdown_list_depth(source, start_byte));
15957    let fence_language = (symbol.kind == "code_block").then(|| symbol.name.clone());
15958    let embedded_symbols = if symbol.kind == "code_block" {
15959        markdown_embedded_symbols(
15960            &symbol.file,
15961            source,
15962            symbol_span_byte(symbol.body_start_byte),
15963            symbol_span_byte(symbol.body_end_byte),
15964            fence_language.as_deref(),
15965        )
15966    } else {
15967        Vec::new()
15968    };
15969    (heading_level.is_some()
15970        || list_depth.is_some()
15971        || fence_language.is_some()
15972        || !embedded_symbols.is_empty())
15973    .then_some(MarkdownSpanMetadata {
15974        heading_level,
15975        section_path: Vec::new(),
15976        section_handle: None,
15977        list_depth,
15978        fence_language,
15979        embedded_symbols,
15980    })
15981}
15982
15983fn is_markdown_path(path: &Path) -> bool {
15984    path.extension()
15985        .and_then(|ext| ext.to_str())
15986        .map(|ext| matches!(ext.to_ascii_lowercase().as_str(), "md" | "mdx"))
15987        .unwrap_or(false)
15988}
15989
15990fn markdown_ast_block_kind(kind: &str) -> String {
15991    match kind {
15992        "heading" => "section",
15993        "code_block" => "fenced_code_block",
15994        "list_item" => "list_item",
15995        other => other,
15996    }
15997    .to_string()
15998}
15999
16000fn markdown_embedded_language_key(language: &str) -> Option<String> {
16001    let key = language
16002        .split_whitespace()
16003        .next()
16004        .unwrap_or("")
16005        .trim()
16006        .trim_start_matches("language-")
16007        .trim_start_matches("lang-")
16008        .trim_matches(|ch| matches!(ch, '`' | '"' | '\''))
16009        .to_ascii_lowercase();
16010    (!key.is_empty()).then_some(key)
16011}
16012
16013fn markdown_embedded_lang(language: &str) -> Option<graph::Lang> {
16014    let key = markdown_embedded_language_key(language)?;
16015    let extension = match key.as_str() {
16016        "rust" => "rs",
16017        "python" => "py",
16018        "typescript" => "ts",
16019        "javascript" => "js",
16020        "kotlin" => "kt",
16021        "shell" | "sh" | "zsh" => "bash",
16022        other => other,
16023    };
16024    let lang = graph::Lang::from_extension(extension)?;
16025    (lang.name() != "markdown").then_some(lang)
16026}
16027
16028fn markdown_embedded_ast_span_handle(
16029    file: &str,
16030    language: &str,
16031    name: &str,
16032    kind: &str,
16033    start_byte: usize,
16034    end_byte: usize,
16035) -> String {
16036    stable_handle(
16037        "span",
16038        &format!("{file}:embedded:{language}:{kind}:{name}:{start_byte}:{end_byte}"),
16039    )
16040}
16041
16042fn markdown_embedded_symbols(
16043    file: &str,
16044    source: &[u8],
16045    body_start_byte: Option<usize>,
16046    body_end_byte: Option<usize>,
16047    fence_language: Option<&str>,
16048) -> Vec<MarkdownEmbeddedSymbol> {
16049    let Some(fence_language) = fence_language else {
16050        return Vec::new();
16051    };
16052    let Some(lang) = markdown_embedded_lang(fence_language) else {
16053        return Vec::new();
16054    };
16055    let Some((body_start_byte, body_end_byte)) = body_start_byte.zip(body_end_byte) else {
16056        return Vec::new();
16057    };
16058    let Some(body) = source.get(body_start_byte.min(source.len())..body_end_byte.min(source.len()))
16059    else {
16060        return Vec::new();
16061    };
16062    if body.is_empty() {
16063        return Vec::new();
16064    }
16065
16066    let Ok(symbols) = lang.extract_symbols(body) else {
16067        return Vec::new();
16068    };
16069    let language = lang.name().to_string();
16070    symbols
16071        .into_iter()
16072        .map(|symbol| {
16073            let start_byte = body_start_byte.saturating_add(symbol.start_byte);
16074            let end_byte = body_start_byte.saturating_add(symbol.end_byte);
16075            let body_start = symbol
16076                .body_start_byte
16077                .map(|byte| body_start_byte.saturating_add(byte));
16078            let body_end = symbol
16079                .body_end_byte
16080                .map(|byte| body_start_byte.saturating_add(byte));
16081            let start_line = source_line_for_byte(source, start_byte);
16082            let end_line = source_line_for_end_byte(source, end_byte).max(start_line);
16083            MarkdownEmbeddedSymbol {
16084                handle: markdown_embedded_ast_span_handle(
16085                    file,
16086                    &language,
16087                    &symbol.name,
16088                    &symbol.kind,
16089                    start_byte,
16090                    end_byte,
16091                ),
16092                name: symbol.name,
16093                kind: symbol.kind,
16094                language: language.clone(),
16095                node_kind: symbol.node_kind,
16096                start_byte,
16097                end_byte,
16098                start_line,
16099                end_line,
16100                body_start_byte: body_start,
16101                body_end_byte: body_end,
16102                body_start_line: body_start.map(|byte| source_line_for_byte(source, byte)),
16103                body_end_line: body_end.map(|byte| source_line_for_end_byte(source, byte)),
16104            }
16105        })
16106        .collect()
16107}
16108
16109fn markdown_source_line(source: &[u8], start_byte: usize) -> &str {
16110    let start = start_byte.min(source.len());
16111    let line_start = source[..start]
16112        .iter()
16113        .rposition(|value| *value == b'\n')
16114        .map(|pos| pos + 1)
16115        .unwrap_or(0);
16116    let line_end = source[start..]
16117        .iter()
16118        .position(|value| *value == b'\n')
16119        .map(|pos| start + pos)
16120        .unwrap_or(source.len());
16121    std::str::from_utf8(&source[line_start..line_end]).unwrap_or("")
16122}
16123
16124fn markdown_list_attributes(source: &[u8], start_byte: usize) -> (Option<String>, Option<usize>) {
16125    let line = markdown_source_line(source, start_byte);
16126    let trimmed = line.trim_start();
16127    for marker in ["-", "*", "+"] {
16128        if trimmed
16129            .strip_prefix(marker)
16130            .and_then(|rest| rest.strip_prefix(' '))
16131            .is_some()
16132        {
16133            return (Some(marker.to_string()), None);
16134        }
16135    }
16136
16137    let digit_end = trimmed
16138        .find(|ch: char| !ch.is_ascii_digit())
16139        .unwrap_or(trimmed.len());
16140    let (digits, rest) = trimmed.split_at(digit_end);
16141    if !digits.is_empty() {
16142        for marker in [".", ")"] {
16143            if rest
16144                .strip_prefix(marker)
16145                .and_then(|value| value.strip_prefix(' '))
16146                .is_some()
16147            {
16148                return (
16149                    Some(format!("{digits}{marker}")),
16150                    digits.parse::<usize>().ok(),
16151                );
16152            }
16153        }
16154    }
16155    (None, None)
16156}
16157
16158fn markdown_fence_marker(source: &[u8], start_byte: usize) -> Option<String> {
16159    let line = markdown_source_line(source, start_byte);
16160    let trimmed = line.trim_start();
16161    ["```", "~~~"]
16162        .into_iter()
16163        .find(|marker| trimmed.starts_with(marker))
16164        .map(str::to_string)
16165}
16166
16167fn markdown_ast_extract_raw_nodes(file: &str, source: &[u8]) -> Result<Vec<MarkdownAstRawNode>> {
16168    let mut nodes = graph::Lang::Markdown
16169        .extract_symbols(source)
16170        .context("extracting Markdown AST nodes")?
16171        .into_iter()
16172        .map(|symbol| {
16173            let body_start_byte = symbol.body_start_byte;
16174            let body_end_byte = symbol.body_end_byte;
16175            let span_handle = ast_span_handle(
16176                file,
16177                &symbol.name,
16178                &symbol.kind,
16179                symbol.start_byte,
16180                symbol.end_byte,
16181            );
16182            MarkdownAstRawNode {
16183                handle: stable_handle(
16184                    "mdast",
16185                    &format!(
16186                        "{}:{}:{}:{}:{}",
16187                        file, symbol.kind, symbol.name, symbol.start_byte, symbol.end_byte
16188                    ),
16189                ),
16190                span_handle,
16191                name: symbol.name,
16192                kind: symbol.kind.clone(),
16193                block_kind: markdown_ast_block_kind(&symbol.kind),
16194                node_kind: symbol.node_kind,
16195                start_byte: symbol.start_byte,
16196                end_byte: symbol.end_byte,
16197                body_start_byte,
16198                body_end_byte,
16199            }
16200        })
16201        .collect::<Vec<_>>();
16202    nodes.sort_by(|left, right| {
16203        left.start_byte
16204            .cmp(&right.start_byte)
16205            .then(left.end_byte.cmp(&right.end_byte))
16206            .then(left.kind.cmp(&right.kind))
16207            .then(left.name.cmp(&right.name))
16208    });
16209    Ok(nodes)
16210}
16211
16212pub(crate) fn markdown_ast_projection(file: &str, source: &[u8]) -> Result<MarkdownAstProjection> {
16213    let source_hash = blake3::hash(source).to_hex().to_string();
16214    let cache_key = format!("{file}:{source_hash}");
16215    let cache = MARKDOWN_AST_CACHE.get_or_init(|| Mutex::new(HashMap::new()));
16216    if let Some(entry) = cache
16217        .lock()
16218        .expect("markdown ast cache poisoned")
16219        .get(&cache_key)
16220    {
16221        return Ok(MarkdownAstProjection {
16222            source_hash: entry.source_hash.clone(),
16223            nodes: entry.nodes.clone(),
16224            parse_duration_micros: entry.parse_duration_micros,
16225            cache_hit: true,
16226        });
16227    }
16228
16229    let started = Instant::now();
16230    let nodes = markdown_ast_extract_raw_nodes(file, source)?;
16231    let parse_duration_micros = started.elapsed().as_micros();
16232    cache.lock().expect("markdown ast cache poisoned").insert(
16233        cache_key,
16234        MarkdownAstCacheEntry {
16235            source_hash: source_hash.clone(),
16236            nodes: nodes.clone(),
16237            parse_duration_micros,
16238        },
16239    );
16240    Ok(MarkdownAstProjection {
16241        source_hash,
16242        nodes,
16243        parse_duration_micros,
16244        cache_hit: false,
16245    })
16246}
16247
16248fn markdown_ast_cache_report(projection: &MarkdownAstProjection) -> MarkdownAstCacheReport {
16249    MarkdownAstCacheReport {
16250        source_hash: projection.source_hash.clone(),
16251        cache_hit: projection.cache_hit,
16252        parse_duration_micros: projection.parse_duration_micros,
16253        node_count: projection.nodes.len(),
16254        section_count: projection
16255            .nodes
16256            .iter()
16257            .filter(|node| node.kind == "heading")
16258            .count(),
16259        list_item_count: projection
16260            .nodes
16261            .iter()
16262            .filter(|node| node.kind == "list_item")
16263            .count(),
16264        code_block_count: projection
16265            .nodes
16266            .iter()
16267            .filter(|node| node.kind == "code_block")
16268            .count(),
16269    }
16270}
16271
16272fn markdown_ast_node_direct_child_count(
16273    node: &MarkdownAstRawNode,
16274    nodes: &[MarkdownAstRawNode],
16275) -> usize {
16276    nodes
16277        .iter()
16278        .filter(|candidate| {
16279            markdown_ast_parent_handle(candidate, nodes).as_deref() == Some(&node.handle)
16280        })
16281        .count()
16282}
16283
16284fn markdown_ast_outline_entry(
16285    root: &Path,
16286    file: &str,
16287    source: &[u8],
16288    nodes: &[MarkdownAstRawNode],
16289    node: &MarkdownAstRawNode,
16290    max_bytes: usize,
16291) -> MarkdownAstOutlineEntry {
16292    let line = source_line_for_byte(source, node.start_byte);
16293    let end_line = source_line_for_end_byte(source, node.end_byte).max(line);
16294    MarkdownAstOutlineEntry {
16295        handle: node.handle.clone(),
16296        span_handle: node.span_handle.clone(),
16297        name: truncate_for_budget(&node.name, max_bytes),
16298        kind: node.kind.clone(),
16299        block_kind: node.block_kind.clone(),
16300        line,
16301        end_line,
16302        section_path: markdown_ast_node_metadata(file, node, source, nodes).section_path,
16303        child_count: markdown_ast_node_direct_child_count(node, nodes),
16304        expand: markdown_ast_command(root, file, Some(&node.handle)),
16305    }
16306}
16307
16308fn markdown_ast_outline_entries(
16309    root: &Path,
16310    file: &str,
16311    source: &[u8],
16312    nodes: &[MarkdownAstRawNode],
16313    limit: usize,
16314    max_bytes: usize,
16315) -> Vec<MarkdownAstOutlineEntry> {
16316    let mut headings = nodes
16317        .iter()
16318        .filter(|node| node.kind == "heading")
16319        .collect::<Vec<_>>();
16320    let mut blocks = nodes
16321        .iter()
16322        .filter(|node| node.kind != "heading")
16323        .collect::<Vec<_>>();
16324    headings.sort_by_key(|node| (node.start_byte, node.end_byte));
16325    blocks.sort_by_key(|node| (node.start_byte, node.end_byte));
16326    headings
16327        .into_iter()
16328        .chain(blocks)
16329        .take(limit)
16330        .map(|node| markdown_ast_outline_entry(root, file, source, nodes, node, max_bytes))
16331        .collect()
16332}
16333
16334fn markdown_ast_node_intersects_lines(
16335    source: &[u8],
16336    node: &MarkdownAstRawNode,
16337    start: usize,
16338    end: usize,
16339) -> bool {
16340    let line = source_line_for_byte(source, node.start_byte);
16341    let end_line = source_line_for_end_byte(source, node.end_byte).max(line);
16342    line <= end && end_line >= start
16343}
16344
16345fn source_read_markdown_projection(
16346    root: &Path,
16347    file: &str,
16348    source: &[u8],
16349    start: usize,
16350    end: usize,
16351    budget: ResponseBudget,
16352) -> Result<SourceReadMarkdownProjection> {
16353    let projection = markdown_ast_projection(file, source)?;
16354    let visible_nodes = projection
16355        .nodes
16356        .iter()
16357        .filter(|node| markdown_ast_node_intersects_lines(source, node, start, end))
16358        .collect::<Vec<_>>();
16359    let mut outline_nodes = visible_nodes.clone();
16360    outline_nodes.sort_by_key(|node| {
16361        (
16362            node.kind != "heading",
16363            node.start_byte,
16364            node.end_byte,
16365            node.name.as_str(),
16366        )
16367    });
16368    let outline = outline_nodes
16369        .into_iter()
16370        .take(budget.preview_items())
16371        .map(|node| {
16372            markdown_ast_outline_entry(
16373                root,
16374                file,
16375                source,
16376                &projection.nodes,
16377                node,
16378                budget.preview_bytes(),
16379            )
16380        })
16381        .collect::<Vec<_>>();
16382    Ok(SourceReadMarkdownProjection {
16383        handle: stable_handle(
16384            "mdproj",
16385            &format!("{file}:{start}:{end}:{}", projection.source_hash),
16386        ),
16387        mode: "window_outline".to_string(),
16388        total_nodes: projection.nodes.len(),
16389        visible_nodes: visible_nodes.len(),
16390        outline,
16391        expand: markdown_ast_command(root, file, None),
16392    })
16393}
16394
16395fn markdown_ast_contains(parent: &MarkdownAstRawNode, child: &MarkdownAstRawNode) -> bool {
16396    if parent.handle == child.handle {
16397        return false;
16398    }
16399    parent.start_byte <= child.start_byte && parent.end_byte >= child.end_byte
16400}
16401
16402fn markdown_ast_parent_handle(
16403    node: &MarkdownAstRawNode,
16404    nodes: &[MarkdownAstRawNode],
16405) -> Option<String> {
16406    nodes
16407        .iter()
16408        .filter(|candidate| markdown_ast_contains(candidate, node))
16409        .min_by_key(|candidate| {
16410            (
16411                candidate.end_byte.saturating_sub(candidate.start_byte),
16412                candidate.start_byte,
16413            )
16414        })
16415        .map(|candidate| candidate.handle.clone())
16416}
16417
16418fn markdown_ast_child_handles(
16419    node: &MarkdownAstRawNode,
16420    nodes: &[MarkdownAstRawNode],
16421    limit: usize,
16422) -> Vec<String> {
16423    nodes
16424        .iter()
16425        .filter(|candidate| {
16426            markdown_ast_parent_handle(candidate, nodes).as_deref() == Some(&node.handle)
16427        })
16428        .take(limit)
16429        .map(|candidate| candidate.handle.clone())
16430        .collect()
16431}
16432
16433fn markdown_ast_section_nodes<'a>(
16434    node: &MarkdownAstRawNode,
16435    nodes: &'a [MarkdownAstRawNode],
16436) -> Vec<&'a MarkdownAstRawNode> {
16437    let mut headings = nodes
16438        .iter()
16439        .filter(|candidate| candidate.kind == "heading")
16440        .filter(|candidate| {
16441            candidate.start_byte <= node.start_byte && candidate.end_byte >= node.end_byte
16442        })
16443        .collect::<Vec<_>>();
16444    headings.sort_by(|left, right| {
16445        left.start_byte
16446            .cmp(&right.start_byte)
16447            .then(left.end_byte.cmp(&right.end_byte))
16448            .then(left.name.cmp(&right.name))
16449    });
16450    headings
16451}
16452
16453fn markdown_ast_node_metadata(
16454    file: &str,
16455    node: &MarkdownAstRawNode,
16456    source: &[u8],
16457    nodes: &[MarkdownAstRawNode],
16458) -> MarkdownAstNodeMetadata {
16459    let section_nodes = markdown_ast_section_nodes(node, nodes);
16460    let section_path = section_nodes
16461        .iter()
16462        .map(|heading| heading.name.clone())
16463        .collect::<Vec<_>>();
16464    let section_handle = section_nodes.last().map(|heading| heading.handle.clone());
16465    let heading_level = (node.kind == "heading")
16466        .then(|| markdown_heading_level(source, node.start_byte))
16467        .flatten();
16468    let (list_marker, list_order) = if node.kind == "list_item" {
16469        markdown_list_attributes(source, node.start_byte)
16470    } else {
16471        (None, None)
16472    };
16473    let fence_language = (node.kind == "code_block").then(|| node.name.clone());
16474    let embedded_symbols = if node.kind == "code_block" {
16475        markdown_embedded_symbols(
16476            file,
16477            source,
16478            node.body_start_byte,
16479            node.body_end_byte,
16480            fence_language.as_deref(),
16481        )
16482    } else {
16483        Vec::new()
16484    };
16485    MarkdownAstNodeMetadata {
16486        heading_level,
16487        section_path,
16488        section_handle,
16489        list_depth: (node.kind == "list_item")
16490            .then(|| markdown_list_depth(source, node.start_byte)),
16491        list_marker,
16492        list_order,
16493        fence_language,
16494        fence_marker: (node.kind == "code_block")
16495            .then(|| markdown_fence_marker(source, node.start_byte))
16496            .flatten(),
16497        embedded_symbols,
16498    }
16499}
16500
16501fn markdown_ast_node_expand(
16502    root: &Path,
16503    file: &str,
16504    node: &MarkdownAstRawNode,
16505    source: &[u8],
16506) -> MarkdownAstNodeExpand {
16507    let start_line = source_line_for_byte(source, node.start_byte);
16508    let end_line = source_line_for_end_byte(source, node.end_byte).max(start_line);
16509    let line_count = end_line.saturating_sub(start_line).saturating_add(1).max(1);
16510    let body_start_line = node
16511        .body_start_byte
16512        .map(|byte| source_line_for_byte(source, byte))
16513        .unwrap_or(start_line);
16514    let body_end_line = node
16515        .body_end_byte
16516        .map(|byte| source_line_for_end_byte(source, byte))
16517        .unwrap_or(end_line)
16518        .max(body_start_line);
16519    let body_line_count = body_end_line
16520        .saturating_sub(body_start_line)
16521        .saturating_add(1)
16522        .max(1);
16523    MarkdownAstNodeExpand {
16524        source_window: source_read_command(root, file, start_line, line_count),
16525        source_body: source_read_command(root, file, body_start_line, body_line_count),
16526        symbol_read: source_symbol_read_command(root, &node.name, file),
16527        edit_intents: markdown_edit_intents_command(root),
16528    }
16529}
16530
16531fn markdown_ast_node(
16532    root: &Path,
16533    file: &str,
16534    node: &MarkdownAstRawNode,
16535    source: &[u8],
16536    nodes: &[MarkdownAstRawNode],
16537    child_limit: usize,
16538) -> MarkdownAstNode {
16539    let line = source_line_for_byte(source, node.start_byte);
16540    let end_line = source_line_for_end_byte(source, node.end_byte).max(line);
16541    let body_byte_span = node
16542        .body_start_byte
16543        .zip(node.body_end_byte)
16544        .map(|(start, end)| SourceByteRangePreview { start, end });
16545    MarkdownAstNode {
16546        handle: node.handle.clone(),
16547        span_handle: node.span_handle.clone(),
16548        name: node.name.clone(),
16549        kind: node.kind.clone(),
16550        block_kind: node.block_kind.clone(),
16551        node_kind: node.node_kind.clone(),
16552        line,
16553        end_line,
16554        byte_span: SourceByteRangePreview {
16555            start: node.start_byte,
16556            end: node.end_byte,
16557        },
16558        body_byte_span,
16559        parent_handle: markdown_ast_parent_handle(node, nodes),
16560        child_handles: markdown_ast_child_handles(node, nodes, child_limit),
16561        metadata: markdown_ast_node_metadata(file, node, source, nodes),
16562        expand: markdown_ast_node_expand(root, file, node, source),
16563    }
16564}
16565
16566pub(crate) fn stored_symbol_ast_span(
16567    symbol: &index::StoredSymbol,
16568    source: &[u8],
16569    symbols: &[index::StoredSymbol],
16570    child_limit: usize,
16571) -> Option<AstSpanPreview> {
16572    let (start_byte, end_byte) = stored_symbol_span_bounds(symbol)?;
16573    let node_kind = symbol.node_kind.clone()?;
16574    let body_start_byte = symbol_span_byte(symbol.body_start_byte);
16575    let body_end_byte = symbol_span_byte(symbol.body_end_byte);
16576    Some(AstSpanPreview {
16577        handle: ast_span_handle(
16578            &symbol.file,
16579            &symbol.name,
16580            &symbol.kind,
16581            start_byte,
16582            end_byte,
16583        ),
16584        node_kind,
16585        start_byte,
16586        end_byte,
16587        start_line: source_line_for_byte(source, start_byte),
16588        end_line: source_line_for_end_byte(source, end_byte),
16589        body_start_byte,
16590        body_end_byte,
16591        body_start_line: body_start_byte.map(|byte| source_line_for_byte(source, byte)),
16592        body_end_line: body_end_byte.map(|byte| source_line_for_end_byte(source, byte)),
16593        parent_handle: stored_symbol_parent_span_handle(symbol, symbols),
16594        child_handles: stored_symbol_child_span_handles(symbol, symbols, child_limit),
16595        markdown: markdown_stored_symbol_metadata(symbol, source, symbols),
16596    })
16597}
16598
16599pub(crate) fn symbol_hit_ast_span(symbol: &index::SymbolHit, source: &[u8]) -> Option<AstSpanPreview> {
16600    let (start_byte, end_byte) = symbol_hit_span_bounds(symbol)?;
16601    let node_kind = symbol.node_kind.clone()?;
16602    let body_start_byte = symbol_span_byte(symbol.body_start_byte);
16603    let body_end_byte = symbol_span_byte(symbol.body_end_byte);
16604    Some(AstSpanPreview {
16605        handle: ast_span_handle(
16606            &symbol.file,
16607            &symbol.name,
16608            &symbol.kind,
16609            start_byte,
16610            end_byte,
16611        ),
16612        node_kind,
16613        start_byte,
16614        end_byte,
16615        start_line: source_line_for_byte(source, start_byte),
16616        end_line: source_line_for_end_byte(source, end_byte),
16617        body_start_byte,
16618        body_end_byte,
16619        body_start_line: body_start_byte.map(|byte| source_line_for_byte(source, byte)),
16620        body_end_line: body_end_byte.map(|byte| source_line_for_end_byte(source, byte)),
16621        parent_handle: None,
16622        child_handles: Vec::new(),
16623        markdown: markdown_symbol_hit_metadata(symbol, source, start_byte),
16624    })
16625}
16626
16627pub(crate) fn symbol_hit_line(symbol: &index::SymbolHit) -> usize {
16628    usize::try_from(symbol.line)
16629        .ok()
16630        .and_then(|line| line.checked_add(1))
16631        .unwrap_or(1)
16632}
16633
16634pub(crate) fn symbol_hit_end_line(symbol: &index::SymbolHit) -> Option<usize> {
16635    symbol
16636        .end_line
16637        .and_then(|line| usize::try_from(line).ok())
16638        .and_then(|line| line.checked_add(1))
16639}
16640
16641fn source_symbol_intersects(symbol: &index::StoredSymbol, start: usize, end: usize) -> bool {
16642    if end == 0 {
16643        return false;
16644    }
16645    let symbol_start = source_symbol_line(symbol);
16646    let symbol_end = source_symbol_end_line(symbol).unwrap_or(symbol_start);
16647    symbol_start <= end && symbol_end >= start
16648}
16649
16650#[allow(clippy::too_many_arguments)]
16651fn load_source_symbols(
16652    root: &Path,
16653    file_abs: &Path,
16654    file_display: &str,
16655    source: &[u8],
16656    scope: Option<&str>,
16657    start: usize,
16658    end: usize,
16659    limit: usize,
16660    max_bytes: usize,
16661    warnings: &mut Vec<String>,
16662) -> Vec<SourceSymbolRef> {
16663    let db_path = match resolve_query_db_path(root, file_abs, scope) {
16664        Ok(path) => path,
16665        Err(err) => {
16666            warnings.push(format!("index refs unavailable: {err:#}"));
16667            return Vec::new();
16668        }
16669    };
16670    if !db_path.exists() {
16671        warnings.push(format!(
16672            "index refs unavailable: no index found at {}",
16673            db_path.display()
16674        ));
16675        return Vec::new();
16676    }
16677
16678    let db = match index::IndexDb::open_read_only_resilient(&db_path) {
16679        Ok(db) => db,
16680        Err(err) => {
16681            warnings.push(format!("index refs unavailable: {err:#}"));
16682            return Vec::new();
16683        }
16684    };
16685
16686    let file_key = file_abs.to_string_lossy().to_string();
16687    let symbols = match db.symbols_for_file(&file_key) {
16688        Ok(symbols) => symbols,
16689        Err(err) => {
16690            warnings.push(format!("symbol refs unavailable: {err:#}"));
16691            return Vec::new();
16692        }
16693    };
16694
16695    symbols
16696        .iter()
16697        .filter(|symbol| source_symbol_intersects(symbol, start, end))
16698        .take(limit)
16699        .map(|symbol| {
16700            let line = source_symbol_line(symbol);
16701            let end_line = source_symbol_end_line(symbol);
16702            let handle = stable_handle(
16703                "ssym",
16704                &format!("{}:{}:{}", file_display, symbol.name, line),
16705            );
16706            SourceSymbolRef {
16707                handle,
16708                name: truncate_for_budget(&symbol.name, max_bytes),
16709                kind: symbol.kind.clone(),
16710                language: symbol.language.clone(),
16711                file: file_display.to_string(),
16712                line,
16713                end_line,
16714                signature: symbol
16715                    .signature
16716                    .clone()
16717                    .map(|signature| truncate_for_budget(&signature, max_bytes)),
16718                span: stored_symbol_ast_span(symbol, source, &symbols, limit),
16719                expand: source_symbol_read_command(root, &symbol.name, file_display),
16720            }
16721        })
16722        .collect()
16723}
16724
16725fn load_source_summaries(
16726    root: &Path,
16727    file_display: &str,
16728    limit: usize,
16729    max_bytes: usize,
16730    warnings: &mut Vec<String>,
16731) -> Vec<SourceSummaryRef> {
16732    let db_path = root.join(".tsift/summaries.db");
16733    if !db_path.exists() {
16734        return Vec::new();
16735    }
16736    let db = match summarize::SummaryDb::open_read_only_resilient(&db_path) {
16737        Ok(db) => db,
16738        Err(err) => {
16739            warnings.push(format!("summary refs unavailable: {err:#}"));
16740            return Vec::new();
16741        }
16742    };
16743    let summaries = match db.get_by_file(file_display) {
16744        Ok(summaries) => summaries,
16745        Err(err) => {
16746            warnings.push(format!("summary refs unavailable: {err:#}"));
16747            return Vec::new();
16748        }
16749    };
16750
16751    summaries
16752        .into_iter()
16753        .take(limit)
16754        .map(|summary| SourceSummaryRef {
16755            handle: stable_handle(
16756                "sum",
16757                &format!(
16758                    "{}:{}:{}",
16759                    summary.file_path, summary.symbol_name, summary.id
16760                ),
16761            ),
16762            symbol_name: truncate_for_budget(&summary.symbol_name, max_bytes),
16763            file_path: summary.file_path,
16764            summary: truncate_for_budget(&summary.summary, max_bytes),
16765            expand: source_summary_expand_command(root, &summary.symbol_name),
16766        })
16767        .collect()
16768}
16769
16770fn cmd_markdown_ast(
16771    file: &Path,
16772    path: &Path,
16773    node: Option<&str>,
16774    format: OutputFormat,
16775    absolute: bool,
16776    budget: ResponseBudget,
16777) -> Result<()> {
16778    let root = lint::resolve_project_root_or_canonical_path(path)?;
16779    let file_abs = resolve_source_file(&root, file)?;
16780    if !is_markdown_path(&file_abs) {
16781        bail!(
16782            "markdown-ast only supports Markdown files (.md/.mdx): {}",
16783            file_abs.display()
16784        );
16785    }
16786    let file_display = if absolute {
16787        file_abs.to_string_lossy().to_string()
16788    } else {
16789        relativize_pathbuf(&file_abs, &root)
16790            .to_string_lossy()
16791            .to_string()
16792    };
16793    let source = fs::read(&file_abs).with_context(|| format!("reading {}", file_abs.display()))?;
16794    let text = String::from_utf8_lossy(&source);
16795    let total_lines = text.lines().count();
16796    let projection = markdown_ast_projection(&file_display, &source)?;
16797    let raw_nodes = &projection.nodes;
16798    let max_items = budget.preview_items();
16799    let max_bytes = budget.preview_bytes();
16800
16801    let selected_nodes = if let Some(handle) = node {
16802        let matches = raw_nodes
16803            .iter()
16804            .filter(|candidate| candidate.handle == handle || candidate.span_handle == handle)
16805            .collect::<Vec<_>>();
16806        if matches.is_empty() {
16807            bail!("Markdown AST node handle {handle:?} was not found in {file_display}");
16808        }
16809        matches
16810    } else {
16811        raw_nodes.iter().take(max_items).collect::<Vec<_>>()
16812    };
16813    let nodes = selected_nodes
16814        .into_iter()
16815        .map(|raw| {
16816            let mut node =
16817                markdown_ast_node(&root, &file_display, raw, &source, raw_nodes, max_items);
16818            node.name = truncate_for_budget(&node.name, max_bytes);
16819            node
16820        })
16821        .collect::<Vec<_>>();
16822    let outline_started = Instant::now();
16823    let outline = markdown_ast_outline_entries(
16824        &root,
16825        &file_display,
16826        &source,
16827        raw_nodes,
16828        max_items,
16829        max_bytes,
16830    );
16831    let outline_duration_micros = outline_started.elapsed().as_micros();
16832    let projection_preview = MarkdownAstProjectionPreview {
16833        mode: if node.is_some() {
16834            "selected_node".to_string()
16835        } else {
16836            "outline_first".to_string()
16837        },
16838        total_nodes: raw_nodes.len(),
16839        returned_nodes: nodes.len(),
16840        omitted_nodes: raw_nodes.len().saturating_sub(nodes.len()),
16841        selected_node: node.map(str::to_string),
16842        cache: markdown_ast_cache_report(&projection),
16843        outline,
16844        phase_timings: vec![
16845            MarkdownAstPhaseTiming {
16846                name: "parse_extract".to_string(),
16847                duration_micros: projection.parse_duration_micros,
16848                detail: if projection.cache_hit {
16849                    "reused cached tree-sitter Markdown symbol extraction".to_string()
16850                } else {
16851                    "tree-sitter Markdown symbol extraction".to_string()
16852                },
16853            },
16854            MarkdownAstPhaseTiming {
16855                name: "outline_projection".to_string(),
16856                duration_micros: outline_duration_micros,
16857                detail: "outline-first section/block preview construction".to_string(),
16858            },
16859        ],
16860    };
16861    let report = MarkdownAstReport {
16862        handle: stable_handle("mdastrep", &file_display),
16863        root: root.to_string_lossy().to_string(),
16864        file: file_display.clone(),
16865        range: SourceRangePreview {
16866            start: 1,
16867            end: total_lines,
16868            total_lines,
16869            truncated_before: false,
16870            truncated_after: false,
16871        },
16872        projection: projection_preview,
16873        nodes,
16874        expand: MarkdownAstExpandCommands {
16875            file: markdown_ast_command(&root, &file_display, None),
16876            source_read: source_read_command(&root, &file_display, 1, total_lines.max(1)),
16877            edit_intents: markdown_edit_intents_command(&root),
16878        },
16879        warnings: Vec::new(),
16880    };
16881
16882    if format.json_output {
16883        let truncated = node.is_none() && raw_nodes.len() > report.nodes.len();
16884        let mut follow_up = vec![
16885            report.expand.file.clone(),
16886            report.expand.source_read.clone(),
16887            report.expand.edit_intents.clone(),
16888        ];
16889        follow_up.extend(
16890            report
16891                .nodes
16892                .iter()
16893                .map(|node| node.expand.source_window.clone()),
16894        );
16895        print_json_or_envelope(
16896            &report,
16897            &format,
16898            "markdown-ast",
16899            "ast",
16900            ToolEnvelopeSummary {
16901                text: format!("markdown ast {} nodes:{}", report.file, report.nodes.len()),
16902                metrics: vec![
16903                    envelope_metric("nodes", report.nodes.len()),
16904                    envelope_metric("total_nodes", report.projection.total_nodes),
16905                    envelope_metric(
16906                        "parse_duration_micros",
16907                        report.projection.cache.parse_duration_micros,
16908                    ),
16909                    envelope_metric("total_lines", report.range.total_lines),
16910                ],
16911            },
16912            truncated,
16913            follow_up,
16914        )?;
16915    } else if format.compact {
16916        println!(
16917            "markdown-ast {} nodes:{} handle:{}",
16918            report.file,
16919            report.nodes.len(),
16920            report.handle
16921        );
16922        for node in &report.nodes {
16923            println!(
16924                "  {} {} {}:{}-{}",
16925                node.handle, node.kind, node.name, node.line, node.end_line
16926            );
16927        }
16928        if node.is_none() && raw_nodes.len() > report.nodes.len() {
16929            println!("expand: {}", report.expand.file);
16930        }
16931    } else {
16932        println!(
16933            "Markdown AST `{}` nodes {} of {} ({})",
16934            report.file,
16935            report.nodes.len(),
16936            raw_nodes.len(),
16937            report.handle
16938        );
16939        for node in &report.nodes {
16940            println!(
16941                "  {} `{}` {}:{}-{} — {}",
16942                node.handle,
16943                node.name,
16944                node.kind,
16945                node.line,
16946                node.end_line,
16947                node.expand.source_window
16948            );
16949        }
16950        if node.is_none() && raw_nodes.len() > report.nodes.len() {
16951            println!();
16952            println!("Expand:");
16953            println!("  file: {}", report.expand.file);
16954        }
16955    }
16956
16957    Ok(())
16958}
16959
16960#[allow(clippy::too_many_arguments)]
16961fn cmd_source_read(
16962    file: &Path,
16963    path: &Path,
16964    start: usize,
16965    lines: usize,
16966    end: Option<usize>,
16967    scope: Option<&str>,
16968    format: OutputFormat,
16969    absolute: bool,
16970    budget: ResponseBudget,
16971) -> Result<()> {
16972    if start == 0 {
16973        bail!("--start is 1-based and must be greater than zero");
16974    }
16975    if lines == 0 {
16976        bail!("--lines must be greater than zero");
16977    }
16978    if let Some(end) = end
16979        && end < start
16980    {
16981        bail!("--end must be greater than or equal to --start");
16982    }
16983
16984    let root = lint::resolve_project_root_or_canonical_path(path)?;
16985    let file_abs = resolve_source_file(&root, file)?;
16986    let file_display = if absolute {
16987        file_abs.to_string_lossy().to_string()
16988    } else {
16989        relativize_pathbuf(&file_abs, &root)
16990            .to_string_lossy()
16991            .to_string()
16992    };
16993
16994    let source = fs::read(&file_abs).with_context(|| format!("reading {}", file_abs.display()))?;
16995    let text = String::from_utf8_lossy(&source);
16996    let all_lines: Vec<&str> = text.lines().collect();
16997    let total_lines = all_lines.len();
16998    if total_lines > 0 && start > total_lines {
16999        bail!(
17000            "--start {} is beyond end of {} ({} lines)",
17001            start,
17002            file_display,
17003            total_lines
17004        );
17005    }
17006    let requested_end = end.unwrap_or_else(|| start.saturating_add(lines).saturating_sub(1));
17007    let end_line = requested_end.min(total_lines);
17008    let max_bytes = budget.preview_bytes();
17009    let token_cap = budget.body_token_cap();
17010    let (preview, preview_end, body_truncated) = if total_lines == 0 {
17011        (Vec::new(), end_line, false)
17012    } else {
17013        let capped = build_token_capped_preview(&all_lines, start, end_line, max_bytes, token_cap);
17014        (capped.preview, capped.capped_end, capped.was_capped)
17015    };
17016    let effective_end = if body_truncated { preview_end } else { end_line };
17017
17018    let mut warnings = Vec::new();
17019    if body_truncated {
17020        warnings.push(format!(
17021            "body preview capped at ~{token_cap} tokens at line {preview_end} of {end_line}"
17022        ));
17023    }
17024    let max_items = budget.preview_items();
17025    let symbols = load_source_symbols(
17026        &root,
17027        &file_abs,
17028        &file_display,
17029        &source,
17030        scope,
17031        start,
17032        effective_end,
17033        max_items,
17034        max_bytes,
17035        &mut warnings,
17036    );
17037    let summaries =
17038        load_source_summaries(&root, &file_display, max_items, max_bytes, &mut warnings);
17039    let markdown = if is_markdown_path(&file_abs) {
17040        match source_read_markdown_projection(
17041            &root,
17042            &file_display,
17043            &source,
17044            start,
17045            effective_end,
17046            budget,
17047        ) {
17048            Ok(markdown) => Some(markdown),
17049            Err(err) => {
17050                warnings.push(format!("markdown projection unavailable: {err:#}"));
17051                None
17052            }
17053        }
17054    } else {
17055        None
17056    };
17057
17058    let effective_lines = effective_end.saturating_sub(start).saturating_add(1).max(1);
17059    let expand = SourceExpandCommands {
17060        before: (start > 1).then(|| {
17061            let before_start = start.saturating_sub(lines).max(1);
17062            source_read_command(&root, &file_display, before_start, start - before_start)
17063        }),
17064        after: (effective_end < total_lines)
17065            .then(|| source_read_command(&root, &file_display, effective_end + 1, lines)),
17066        body: body_truncated.then(|| {
17067            let remaining = end_line.saturating_sub(effective_end);
17068            source_read_command(&root, &file_display, effective_end + 1, remaining)
17069        }),
17070        file: source_read_command(&root, &file_display, 1, total_lines.max(effective_lines)),
17071        markdown_ast: is_markdown_path(&file_abs)
17072            .then(|| markdown_ast_command(&root, &file_display, None)),
17073    };
17074
17075    let report = SourceReadReport {
17076        handle: stable_handle("swin", &format!("{file_display}:{start}:{effective_end}")),
17077        root: root.to_string_lossy().to_string(),
17078        file: file_display,
17079        range: SourceRangePreview {
17080            start,
17081            end: effective_end,
17082            total_lines,
17083            truncated_before: start > 1,
17084            truncated_after: effective_end < total_lines,
17085        },
17086        preview,
17087        symbols,
17088        summaries,
17089        markdown,
17090        expand,
17091        warnings,
17092    };
17093
17094    if format.json_output {
17095        let truncated = report.range.truncated_before || report.range.truncated_after;
17096        let follow_up = [
17097            report.expand.before.clone(),
17098            report.expand.after.clone(),
17099            report.expand.body.clone(),
17100            Some(report.expand.file.clone()),
17101            report.expand.markdown_ast.clone(),
17102        ]
17103        .into_iter()
17104        .flatten()
17105        .collect::<Vec<_>>();
17106        print_json_or_envelope(
17107            &report,
17108            &format,
17109            "source-read",
17110            "window",
17111            ToolEnvelopeSummary {
17112                text: format!(
17113                    "source window {}:{}-{}",
17114                    report.file, report.range.start, report.range.end
17115                ),
17116                metrics: vec![
17117                    envelope_metric("lines", report.preview.len()),
17118                    envelope_metric("symbols", report.symbols.len()),
17119                    envelope_metric("summaries", report.summaries.len()),
17120                    envelope_metric(
17121                        "markdown_nodes",
17122                        report
17123                            .markdown
17124                            .as_ref()
17125                            .map_or(0, |markdown| markdown.visible_nodes),
17126                    ),
17127                ],
17128            },
17129            truncated,
17130            follow_up,
17131        )?;
17132    } else if format.compact {
17133        println!(
17134            "source {}:{}-{} / {} handle:{}",
17135            report.file,
17136            report.range.start,
17137            report.range.end,
17138            report.range.total_lines,
17139            report.handle
17140        );
17141        for line in &report.preview {
17142            println!("{:>5} {}", line.line, line.text);
17143        }
17144        if !report.symbols.is_empty() {
17145            println!("syms[{}]:", report.symbols.len());
17146            for symbol in &report.symbols {
17147                println!("  {} {}:{}", symbol.name, symbol.file, symbol.line);
17148            }
17149        }
17150        if report.range.truncated_before || report.range.truncated_after {
17151            println!("expand: {}", report.expand.file);
17152        }
17153    } else {
17154        println!(
17155            "Source window `{}` lines {}-{} of {} ({})",
17156            report.file,
17157            report.range.start,
17158            report.range.end,
17159            report.range.total_lines,
17160            report.handle
17161        );
17162        for line in &report.preview {
17163            println!("{:>5} | {}", line.line, line.text);
17164        }
17165        if !report.symbols.is_empty() {
17166            println!();
17167            println!("Symbol refs:");
17168            for symbol in &report.symbols {
17169                println!(
17170                    "  {} `{}` {}:{} — {}",
17171                    symbol.handle, symbol.name, symbol.file, symbol.line, symbol.expand
17172                );
17173            }
17174        }
17175        if !report.summaries.is_empty() {
17176            println!();
17177            println!("Summary refs:");
17178            for summary in &report.summaries {
17179                println!(
17180                    "  {} `{}` — {}",
17181                    summary.handle, summary.symbol_name, summary.expand
17182                );
17183            }
17184        }
17185        if report.range.truncated_before || report.range.truncated_after {
17186            println!();
17187            println!("Expand:");
17188            if let Some(before) = &report.expand.before {
17189                println!("  before: {}", before);
17190            }
17191            if let Some(after) = &report.expand.after {
17192                println!("  after: {}", after);
17193            }
17194            println!("  file:   {}", report.expand.file);
17195        }
17196        for warning in &report.warnings {
17197            eprintln!("warning: {warning}");
17198        }
17199    }
17200
17201    Ok(())
17202}
17203
17204#[allow(clippy::too_many_arguments)]
17205fn cmd_symbol_read(
17206    symbol: &str,
17207    file_hint: Option<&Path>,
17208    path: &Path,
17209    scope: Option<&str>,
17210    format: OutputFormat,
17211    absolute: bool,
17212    budget: ResponseBudget,
17213) -> Result<()> {
17214    let root = lint::resolve_project_root_or_canonical_path(path)?;
17215    let hinted_file_abs = file_hint
17216        .map(|file| resolve_source_file(&root, file))
17217        .transpose()?;
17218    let path_hint = hinted_file_abs.as_deref().unwrap_or(root.as_path());
17219    let db_path = resolve_query_db_path(&root, path_hint, scope)?;
17220    if !db_path.exists() {
17221        bail!(
17222            "index refs unavailable: no index found at {}",
17223            db_path.display()
17224        );
17225    }
17226    let db = index::IndexDb::open_read_only_resilient(&db_path)
17227        .with_context(|| format!("opening symbol index {}", db_path.display()))?;
17228    let search_limit = budget.follow_up_items().max(10);
17229    let hits = db
17230        .symbol_search(symbol, search_limit)
17231        .with_context(|| format!("searching symbols for {symbol:?}"))?;
17232    let selected = hits
17233        .into_iter()
17234        .find(|hit| {
17235            let Some(hinted_file_abs) = &hinted_file_abs else {
17236                return true;
17237            };
17238            resolve_source_file(&root, Path::new(&hit.file))
17239                .map(|hit_file| hit_file == *hinted_file_abs)
17240                .unwrap_or(false)
17241        })
17242        .with_context(|| {
17243            let hint = file_hint
17244                .map(|file| format!(" in {}", file.display()))
17245                .unwrap_or_default();
17246            format!("no indexed symbol matched {symbol:?}{hint}")
17247        })?;
17248
17249    let file_abs = resolve_source_file(&root, Path::new(&selected.file))?;
17250    let file_display = if absolute {
17251        file_abs.to_string_lossy().to_string()
17252    } else {
17253        relativize_pathbuf(&file_abs, &root)
17254            .to_string_lossy()
17255            .to_string()
17256    };
17257    let source = fs::read(&file_abs).with_context(|| format!("reading {}", file_abs.display()))?;
17258    let content_hash = blake3::hash(&source).to_hex().to_string();
17259    let text = String::from_utf8_lossy(&source);
17260    let all_lines: Vec<&str> = text.lines().collect();
17261    let total_lines = all_lines.len();
17262    let file_symbols = db
17263        .symbols_for_file(&file_abs.to_string_lossy())
17264        .with_context(|| format!("loading symbols for {}", file_abs.display()))?;
17265    let max_items = budget.preview_items();
17266    let max_bytes = budget.preview_bytes();
17267    let selected_start = symbol_hit_line(&selected);
17268    let selected_end = symbol_hit_end_line(&selected)
17269        .unwrap_or(selected_start)
17270        .max(selected_start);
17271    let stored_target = file_symbols.iter().find(|candidate| {
17272        candidate.name == selected.name
17273            && candidate.kind == selected.kind
17274            && source_symbol_line(candidate) == selected_start
17275    });
17276    let target_span = stored_target
17277        .and_then(|stored| stored_symbol_ast_span(stored, &source, &file_symbols, max_items))
17278        .or_else(|| symbol_hit_ast_span(&selected, &source));
17279    let target_start = target_span
17280        .as_ref()
17281        .map(|span| span.start_line)
17282        .unwrap_or(selected_start);
17283    let target_end = target_span
17284        .as_ref()
17285        .map(|span| span.end_line)
17286        .or_else(|| stored_target.and_then(source_symbol_end_line))
17287        .unwrap_or(selected_end)
17288        .max(target_start);
17289    let target_bounds = stored_target
17290        .and_then(stored_symbol_span_bounds)
17291        .or_else(|| symbol_hit_span_bounds(&selected));
17292    let target_end = stored_target
17293        .and_then(source_symbol_end_line)
17294        .unwrap_or(target_end)
17295        .max(target_start);
17296    let body_line_budget = budget.preview_items().max(1).saturating_mul(16);
17297    let line_capped_end = target_start
17298        .saturating_add(body_line_budget)
17299        .saturating_sub(1)
17300        .min(target_end)
17301        .min(total_lines.max(target_start));
17302    let token_cap = budget.body_token_cap();
17303    let (body, effective_preview_end, body_truncated) = if total_lines == 0 || target_start > total_lines {
17304        (Vec::new(), line_capped_end, false)
17305    } else {
17306        let capped = build_token_capped_preview(&all_lines, target_start, line_capped_end, max_bytes, token_cap);
17307        (capped.preview, capped.capped_end, capped.was_capped)
17308    };
17309    let preview_end = if body_truncated { effective_preview_end } else { line_capped_end };
17310    let child_symbols = file_symbols
17311        .iter()
17312        .filter(|candidate| {
17313            if let Some((target_start_byte, target_end_byte)) = target_bounds {
17314                let Some((candidate_start, candidate_end)) = stored_symbol_span_bounds(candidate)
17315                else {
17316                    return false;
17317                };
17318                return candidate_start >= target_start_byte
17319                    && candidate_end <= target_end_byte
17320                    && (candidate_start, candidate_end) != (target_start_byte, target_end_byte);
17321            }
17322            let line = source_symbol_line(candidate);
17323            line > target_start && line <= target_end
17324        })
17325        .take(max_items)
17326        .map(|symbol| {
17327            let line = source_symbol_line(symbol);
17328            let end_line = source_symbol_end_line(symbol);
17329            SourceSymbolRef {
17330                handle: stable_handle(
17331                    "ssym",
17332                    &format!("{}:{}:{}", file_display, symbol.name, line),
17333                ),
17334                name: truncate_for_budget(&symbol.name, max_bytes),
17335                kind: symbol.kind.clone(),
17336                language: symbol.language.clone(),
17337                file: file_display.clone(),
17338                line,
17339                end_line,
17340                signature: symbol
17341                    .signature
17342                    .clone()
17343                    .map(|signature| truncate_for_budget(&signature, max_bytes)),
17344                span: stored_symbol_ast_span(symbol, &source, &file_symbols, max_items),
17345                expand: source_symbol_read_command(&root, &symbol.name, &file_display),
17346            }
17347        })
17348        .collect::<Vec<_>>();
17349    let mut warnings = Vec::new();
17350    if body_truncated {
17351        warnings.push(format!(
17352            "body preview capped at ~{token_cap} tokens at line {preview_end} of {target_end}"
17353        ));
17354    }
17355    let summaries =
17356        load_source_summaries(&root, &file_display, max_items, max_bytes, &mut warnings);
17357    let symbol_handle = stable_handle(
17358        "sread",
17359        &format!("{}:{}:{}", file_display, selected.name, target_start),
17360    );
17361    let source_lines = preview_end
17362        .saturating_sub(target_start)
17363        .saturating_add(1)
17364        .max(1);
17365    let expand = SymbolReadExpandCommands {
17366        source_window: source_read_command(&root, &file_display, target_start, source_lines),
17367        body: body_truncated.then(|| {
17368            let remaining = target_end.saturating_sub(preview_end);
17369            source_read_command(&root, &file_display, preview_end + 1, remaining)
17370        }),
17371        file: source_read_command(&root, &file_display, 1, total_lines.max(source_lines)),
17372        explain: source_symbol_expand_command(&root, &selected.name),
17373        callers: source_symbol_graph_command(&root, &selected.name, "callers"),
17374        callees: source_symbol_graph_command(&root, &selected.name, "callees"),
17375        markdown_ast: (selected.language == "markdown").then(|| {
17376            markdown_ast_command(
17377                &root,
17378                &file_display,
17379                target_span.as_ref().map(|span| span.handle.as_str()),
17380            )
17381        }),
17382    };
17383    let report = SymbolReadReport {
17384        handle: symbol_handle.clone(),
17385        root: root.to_string_lossy().to_string(),
17386        query: symbol.to_string(),
17387        symbol: SymbolReadTarget {
17388            handle: symbol_handle,
17389            name: selected.name.clone(),
17390            kind: selected.kind.clone(),
17391            language: selected.language.clone(),
17392            file: file_display.clone(),
17393            line: target_start,
17394            end_line: Some(target_end),
17395            signature: stored_target
17396                .and_then(|stored| stored.signature.clone())
17397                .map(|signature| truncate_for_budget(&signature, max_bytes)),
17398            parent_module: stored_target.and_then(|stored| stored.parent_module.clone()),
17399            visibility: stored_target.and_then(|stored| stored.visibility.clone()),
17400            span: target_span,
17401        },
17402        range: SourceRangePreview {
17403            start: target_start,
17404            end: preview_end,
17405            total_lines,
17406            truncated_before: false,
17407            truncated_after: preview_end < target_end,
17408        },
17409        body,
17410        child_symbols,
17411        summaries,
17412        expand,
17413        warnings,
17414    };
17415
17416    if format.json_output {
17417        let truncated = report.range.truncated_after
17418            || report.body.iter().any(|line| line.text.len() >= max_bytes)
17419            || report.child_symbols.len() >= max_items;
17420        let follow_up = [
17421            Some(report.expand.source_window.clone()),
17422            report.expand.body.clone(),
17423            Some(report.expand.file.clone()),
17424            Some(report.expand.explain.clone()),
17425            Some(report.expand.callers.clone()),
17426            Some(report.expand.callees.clone()),
17427        ]
17428        .into_iter()
17429        .flatten()
17430        .chain(report.expand.markdown_ast.clone())
17431        .collect::<Vec<_>>();
17432        print_json_or_envelope(
17433            &report,
17434            &format,
17435            "symbol-read",
17436            "symbol",
17437            ToolEnvelopeSummary {
17438                text: format!(
17439                    "symbol {} {}:{}-{}",
17440                    report.symbol.name, report.symbol.file, report.range.start, report.range.end
17441                ),
17442                metrics: vec![
17443                    envelope_metric("body_lines", report.body.len()),
17444                    envelope_metric("child_symbols", report.child_symbols.len()),
17445                    envelope_metric("summaries", report.summaries.len()),
17446                ],
17447            },
17448            truncated,
17449            follow_up,
17450        )?;
17451    } else if format.compact {
17452        println!(
17453            "symbol {} {}:{}-{} handle:{} hash:{}",
17454            report.symbol.name,
17455            report.symbol.file,
17456            report.range.start,
17457            report.range.end,
17458            report.handle,
17459            content_hash
17460        );
17461        for line in &report.body {
17462            println!("{:>5} {}", line.line, line.text);
17463        }
17464        if !report.child_symbols.is_empty() {
17465            println!("children[{}]:", report.child_symbols.len());
17466            for child in &report.child_symbols {
17467                println!("  {} {}:{}", child.name, child.file, child.line);
17468            }
17469        }
17470    } else {
17471        println!(
17472            "Symbol `{}` in `{}` lines {}-{} ({})",
17473            report.symbol.name,
17474            report.symbol.file,
17475            report.range.start,
17476            report.range.end,
17477            report.handle
17478        );
17479        for line in &report.body {
17480            println!("{:>5} | {}", line.line, line.text);
17481        }
17482        if !report.child_symbols.is_empty() {
17483            println!();
17484            println!("Child symbols:");
17485            for child in &report.child_symbols {
17486                println!(
17487                    "  {} `{}` {}:{} — {}",
17488                    child.handle, child.name, child.file, child.line, child.expand
17489                );
17490            }
17491        }
17492        println!();
17493        println!("Expand:");
17494        println!("  source:  {}", report.expand.source_window);
17495        println!("  file:    {}", report.expand.file);
17496        println!("  explain: {}", report.expand.explain);
17497        println!("  callers: {}", report.expand.callers);
17498        println!("  callees: {}", report.expand.callees);
17499        for warning in &report.warnings {
17500            eprintln!("warning: {warning}");
17501        }
17502    }
17503
17504    Ok(())
17505}
17506
17507#[allow(clippy::too_many_arguments)]
17508#[derive(Serialize)]
17509struct ExplainBudgetDefinitionPreview {
17510    handle: String,
17511    #[serde(skip_serializing_if = "Option::is_none")]
17512    tag_alias: Option<String>,
17513    kind: String,
17514    name: String,
17515    file: String,
17516    line: i64,
17517    expand: String,
17518}
17519
17520#[derive(Serialize)]
17521struct ExplainBudgetEdgePreview {
17522    handle: String,
17523    #[serde(skip_serializing_if = "Option::is_none")]
17524    tag_alias: Option<String>,
17525    name: String,
17526    file: String,
17527    line: i64,
17528    expand: String,
17529}
17530
17531#[derive(Serialize)]
17532struct ExplainBudgetCommunityPreview {
17533    size: usize,
17534    members: Vec<String>,
17535}
17536
17537#[derive(Serialize)]
17538struct ExplainBudgetReport {
17539    symbol: String,
17540    max_items: usize,
17541    max_bytes: usize,
17542    definition_total: usize,
17543    callers_total: usize,
17544    callers_truncated_by_limit: bool,
17545    callees_total: usize,
17546    callees_truncated_by_limit: bool,
17547    truncated: bool,
17548    definitions: Vec<ExplainBudgetDefinitionPreview>,
17549    callers: Vec<ExplainBudgetEdgePreview>,
17550    callees: Vec<ExplainBudgetEdgePreview>,
17551    #[serde(skip_serializing_if = "Option::is_none")]
17552    community: Option<ExplainBudgetCommunityPreview>,
17553}
17554
17555#[allow(clippy::too_many_arguments)]
17556pub(crate) fn build_explain_budget_report(
17557    symbol: &str,
17558    _root: &Path,
17559    symbols: &[index::StoredSymbol],
17560    callers: &[index::StoredEdge],
17561    callers_total: usize,
17562    callers_truncated_by_limit: bool,
17563    callees: &[index::StoredEdge],
17564    callees_total: usize,
17565    callees_truncated_by_limit: bool,
17566    community: Option<&graph::Community>,
17567    budget: ResponseBudget,
17568) -> ExplainBudgetReport {
17569    let max_items = budget.preview_items();
17570    let max_bytes = budget.preview_bytes();
17571    let definitions = symbols
17572        .iter()
17573        .take(max_items)
17574        .map(|entry| {
17575            let symbol_ref = build_compact_symbol_ref(
17576                "edef",
17577                &format!(
17578                    "{}:{}:{}:{}",
17579                    entry.kind, entry.name, entry.file, entry.line
17580                ),
17581                &entry.name,
17582                entry.tags.as_deref(),
17583                max_bytes,
17584            );
17585            ExplainBudgetDefinitionPreview {
17586                handle: symbol_ref.handle,
17587                tag_alias: symbol_ref.tag_alias,
17588                kind: entry.kind.clone(),
17589                name: symbol_ref.name,
17590                file: truncate_for_budget(&entry.file, max_bytes),
17591                line: entry.line,
17592                expand: format!(
17593                    "tsift search {} --exact --path {} --limit 20",
17594                    shell_quote(&entry.name),
17595                    shell_quote(&entry.file)
17596                ),
17597            }
17598        })
17599        .collect();
17600    let callers_preview: Vec<ExplainBudgetEdgePreview> = callers
17601        .iter()
17602        .take(max_items)
17603        .map(|entry| {
17604            let symbol_ref = build_compact_symbol_ref(
17605                "ecall",
17606                &format!(
17607                    "{}:{}:{}:{}",
17608                    entry.caller_name, entry.caller_file, entry.call_site_line, symbol
17609                ),
17610                &entry.caller_name,
17611                None,
17612                max_bytes,
17613            );
17614            ExplainBudgetEdgePreview {
17615                handle: symbol_ref.handle,
17616                tag_alias: symbol_ref.tag_alias,
17617                name: symbol_ref.name,
17618                file: truncate_for_budget(&entry.caller_file, max_bytes),
17619                line: entry.call_site_line,
17620                expand: format!(
17621                    "tsift explain {} --path {} --limit 0",
17622                    shell_quote(&entry.caller_name),
17623                    shell_quote(&entry.caller_file)
17624                ),
17625            }
17626        })
17627        .collect();
17628    let callees_preview: Vec<ExplainBudgetEdgePreview> = callees
17629        .iter()
17630        .take(max_items)
17631        .map(|entry| {
17632            let symbol_ref = build_compact_symbol_ref(
17633                "eces",
17634                &format!(
17635                    "{}:{}:{}:{}",
17636                    entry.callee_name, entry.caller_file, entry.call_site_line, symbol
17637                ),
17638                &entry.callee_name,
17639                None,
17640                max_bytes,
17641            );
17642            ExplainBudgetEdgePreview {
17643                handle: symbol_ref.handle,
17644                tag_alias: symbol_ref.tag_alias,
17645                name: symbol_ref.name,
17646                file: truncate_for_budget(&entry.caller_file, max_bytes),
17647                line: entry.call_site_line,
17648                expand: format!(
17649                    "tsift explain {} --path {} --limit 0",
17650                    shell_quote(&entry.callee_name),
17651                    shell_quote(&entry.caller_file)
17652                ),
17653            }
17654        })
17655        .collect();
17656    let community_preview = community.map(|entry| ExplainBudgetCommunityPreview {
17657        size: entry.members.len(),
17658        members: entry
17659            .members
17660            .iter()
17661            .take(max_items)
17662            .map(|member| truncate_for_budget(&member.name, max_bytes))
17663            .collect(),
17664    });
17665
17666    ExplainBudgetReport {
17667        symbol: symbol.to_string(),
17668        max_items,
17669        max_bytes,
17670        definition_total: symbols.len(),
17671        callers_total,
17672        callers_truncated_by_limit,
17673        callees_total,
17674        callees_truncated_by_limit,
17675        truncated: symbols.len() > max_items
17676            || callers_total > callers_preview.len()
17677            || callees_total > callees_preview.len()
17678            || community
17679                .map(|entry| entry.members.len() > max_items)
17680                .unwrap_or(false),
17681        definitions,
17682        callers: callers_preview,
17683        callees: callees_preview,
17684        community: community_preview,
17685    }
17686}
17687
17688pub(crate) fn print_explain_budget_human(report: &ExplainBudgetReport) {
17689    println!(
17690        "explain-budget sym:{} defs:{}/{} crs:{}/{} ces:{}/{}",
17691        shell_quote(&report.symbol),
17692        report.definitions.len(),
17693        report.definition_total,
17694        report.callers.len(),
17695        report.callers_total,
17696        report.callees.len(),
17697        report.callees_total
17698    );
17699    for entry in &report.definitions {
17700        println!(
17701            "def {} {} {}:{} expand:{}",
17702            format_symbol_preview_line(&entry.handle, &entry.name, entry.tag_alias.as_deref()),
17703            entry.kind,
17704            entry.file,
17705            entry.line,
17706            entry.expand
17707        );
17708    }
17709    for entry in &report.callers {
17710        println!(
17711            "caller {} {}:{} expand:{}",
17712            format_symbol_preview_line(&entry.handle, &entry.name, entry.tag_alias.as_deref()),
17713            entry.file,
17714            entry.line,
17715            entry.expand
17716        );
17717    }
17718    for entry in &report.callees {
17719        println!(
17720            "callee {} {}:{} expand:{}",
17721            format_symbol_preview_line(&entry.handle, &entry.name, entry.tag_alias.as_deref()),
17722            entry.file,
17723            entry.line,
17724            entry.expand
17725        );
17726    }
17727    if let Some(community) = &report.community {
17728        println!(
17729            "community size:{} members:{}",
17730            community.size,
17731            community.members.join(", ")
17732        );
17733    }
17734    if report.truncated {
17735        println!(
17736            "budget truncated items:{} bytes:{}",
17737            report.max_items, report.max_bytes
17738        );
17739    }
17740}
17741
17742/// Reconcile the tsift symbol index against the tagpath `.naming/index.json`
17743/// source set and report files covered by one but not the other.
17744///
17745/// Today silent recall loss happens when tagpath's `[exclude]` / `extends`
17746/// chain or its hard-coded `SKIP_DIRS` skip files or languages that tsift
17747/// still indexes — the tsift symbols in those files cannot resolve a
17748/// `tagpath_handle` even with a fresh tagpath index. This audit surfaces
17749/// the diff so operators can decide whether to broaden the tagpath walk,
17750/// add an `[exclude]` to tsift, or accept the gap.
17751const TAGPATH_AUDIT_SKIP_DIRS: &[&str] = &[
17752    ".git",
17753    "node_modules",
17754    "target",
17755    "__pycache__",
17756    ".venv",
17757    "vendor",
17758];
17759
17760const TAGPATH_AUDIT_SOURCE_EXTENSIONS: &[&str] = &[
17761    "rs", "py", "ts", "js", "go", "java", "rb", "c", "cpp", "h", "hpp", "cs", "swift", "kt",
17762    "scala", "zig", "nim", "ex", "exs", "erl", "hs", "ml", "clj", "r", "lua", "php", "pl", "d",
17763    "cr", "dart", "jl", "v", "odin", "gleam", "rkt", "scm", "lisp", "lsp", "f", "fs", "fsi", "fsx",
17764    "sh", "bash", "zsh", "sql", "css", "tsx",
17765];
17766
17767pub(crate) fn tagpath_audit_supported_extensions(root: &Path) -> BTreeSet<String> {
17768    let mut extensions = TAGPATH_AUDIT_SOURCE_EXTENSIONS
17769        .iter()
17770        .map(|ext| (*ext).to_string())
17771        .collect::<BTreeSet<_>>();
17772
17773    let config_path = root.join(".naming.toml");
17774    if !config_path.exists() {
17775        return extensions;
17776    }
17777
17778    match tagpath::config::resolve(&config_path) {
17779        Ok(config) => {
17780            if let Some(grammars) = config.grammars {
17781                for grammar in grammars.languages.values() {
17782                    for ext in &grammar.extensions {
17783                        if let Some(normalized) = normalize_extension(ext) {
17784                            extensions.insert(normalized);
17785                        }
17786                    }
17787                }
17788            }
17789        }
17790        Err(err) => {
17791            eprintln!("tagpath_policy_hint_config_unreadable: {err}");
17792        }
17793    }
17794    extensions
17795}
17796
17797pub(crate) fn tagpath_audit_policy_hints(
17798    rel_path: &str,
17799    supported_extensions: &BTreeSet<String>,
17800) -> Vec<String> {
17801    let path = Path::new(rel_path);
17802    let mut hints = BTreeSet::new();
17803    if let Some(parent) = path.parent() {
17804        for component in parent.components() {
17805            if let std::path::Component::Normal(name) = component {
17806                let name = name.to_string_lossy();
17807                if TAGPATH_AUDIT_SKIP_DIRS.contains(&name.as_ref()) {
17808                    hints.insert(format!("skip_dir:{name}"));
17809                }
17810            }
17811        }
17812    }
17813    if path
17814        .extension()
17815        .and_then(|ext| ext.to_str())
17816        .and_then(normalize_extension)
17817        .is_some_and(|ext| !supported_extensions.contains(&ext))
17818    {
17819        hints.insert("extension_unsupported".to_string());
17820    }
17821    hints.into_iter().collect()
17822}
17823
17824fn normalize_extension(ext: &str) -> Option<String> {
17825    let normalized = ext.trim().trim_start_matches('.').to_ascii_lowercase();
17826    if normalized.is_empty() {
17827        None
17828    } else {
17829        Some(normalized)
17830    }
17831}
17832
17833pub(crate) fn diff_digest_status_label(status: diff_digest::DiffDigestFileStatus) -> &'static str {
17834    match status {
17835        diff_digest::DiffDigestFileStatus::Added => "added",
17836        diff_digest::DiffDigestFileStatus::Modified => "modified",
17837        diff_digest::DiffDigestFileStatus::Deleted => "deleted",
17838    }
17839}
17840
17841pub(crate) fn diff_digest_summary_label(
17842    state: diff_digest::DiffDigestSummaryState,
17843) -> &'static str {
17844    match state {
17845        diff_digest::DiffDigestSummaryState::Current => "current",
17846        diff_digest::DiffDigestSummaryState::Stale => "stale",
17847        diff_digest::DiffDigestSummaryState::Missing => "missing",
17848        diff_digest::DiffDigestSummaryState::Unavailable => "unavailable",
17849    }
17850}
17851
17852fn test_digest_summary_label(state: test_digest::TestDigestSummaryState) -> &'static str {
17853    match state {
17854        test_digest::TestDigestSummaryState::Current => "current",
17855        test_digest::TestDigestSummaryState::Stale => "stale",
17856        test_digest::TestDigestSummaryState::Missing => "missing",
17857        test_digest::TestDigestSummaryState::Unavailable => "unavailable",
17858    }
17859}
17860
17861fn log_digest_summary_label(state: log_digest::LogDigestSummaryState) -> &'static str {
17862    match state {
17863        log_digest::LogDigestSummaryState::Current => "current",
17864        log_digest::LogDigestSummaryState::Stale => "stale",
17865        log_digest::LogDigestSummaryState::Missing => "missing",
17866        log_digest::LogDigestSummaryState::Unavailable => "unavailable",
17867    }
17868}
17869
17870pub(crate) fn diff_digest_mode_label(mode: diff_digest::DiffDigestMode) -> &'static str {
17871    match mode {
17872        diff_digest::DiffDigestMode::WorkingTree => "worktree",
17873        diff_digest::DiffDigestMode::Cached => "cached",
17874        diff_digest::DiffDigestMode::Revision => "revision",
17875    }
17876}
17877
17878pub(crate) fn diff_digest_mode_display(report: &diff_digest::DiffDigestReport) -> String {
17879    match (&report.mode, &report.revision) {
17880        (diff_digest::DiffDigestMode::WorkingTree, _) => "working tree".to_string(),
17881        (diff_digest::DiffDigestMode::Cached, _) => "staged index".to_string(),
17882        (diff_digest::DiffDigestMode::Revision, Some(revision)) => {
17883            format!("revision {revision}")
17884        }
17885        (diff_digest::DiffDigestMode::Revision, None) => "revision".to_string(),
17886    }
17887}
17888
17889pub(crate) fn diff_digest_empty_message(report: &diff_digest::DiffDigestReport) -> String {
17890    match (&report.mode, &report.revision) {
17891        (diff_digest::DiffDigestMode::WorkingTree, _) => "No git changes found.".to_string(),
17892        (diff_digest::DiffDigestMode::Cached, _) => "No staged git changes found.".to_string(),
17893        (diff_digest::DiffDigestMode::Revision, Some(revision)) => {
17894            format!("No diff found for revision {revision}.")
17895        }
17896        (diff_digest::DiffDigestMode::Revision, None) => "No revision diff found.".to_string(),
17897    }
17898}
17899
17900fn cmd_impact(
17901    path: &Path,
17902    cached: bool,
17903    revision: Option<&str>,
17904    scope: Option<&str>,
17905    limit: usize,
17906    format: OutputFormat,
17907) -> Result<()> {
17908    let report = impact::compute(
17909        path,
17910        impact::ImpactOptions {
17911            cached,
17912            revision,
17913            scope,
17914            limit,
17915        },
17916    )?;
17917    if format.json_output {
17918        println!(
17919            "{}",
17920            to_json_schema(
17921                &report,
17922                format.pretty,
17923                format.terse,
17924                format.ultra_terse,
17925                format.schema
17926            )?
17927        );
17928        return Ok(());
17929    }
17930
17931    if format.compact {
17932        println!(
17933            "impact mode:{} changed:{} symbols:{} tests:{}/{}",
17934            diff_digest_mode_label(report.mode),
17935            report.changed_files.len(),
17936            report.changed_symbols.len(),
17937            report.affected_tests.len(),
17938            report.affected_tests_total
17939        );
17940        for target in &report.affected_tests {
17941            println!(
17942                "{} reasons:{} command:{}",
17943                target.path,
17944                target.reasons.len(),
17945                target.commands.join(" && ")
17946            );
17947        }
17948        for warning in &report.warnings {
17949            println!("warning {warning}");
17950        }
17951        return Ok(());
17952    }
17953
17954    println!("Impact ({})", diff_digest_mode_label(report.mode));
17955    println!("  changed files:          {}", report.changed_files.len());
17956    println!("  changed symbols:        {}", report.changed_symbols.len());
17957    println!(
17958        "  affected tests:         {}/{}",
17959        report.affected_tests.len(),
17960        report.affected_tests_total
17961    );
17962    for target in &report.affected_tests {
17963        println!();
17964        println!("{}", target.path);
17965        for reason in &target.reasons {
17966            println!("  - {reason}");
17967        }
17968        if !target.symbols.is_empty() {
17969            println!("  symbols: {}", target.symbols.join(", "));
17970        }
17971        for command in &target.commands {
17972            println!("  run: {}", command);
17973        }
17974    }
17975    for warning in &report.warnings {
17976        println!("warning: {warning}");
17977    }
17978    Ok(())
17979}
17980
17981pub(crate) fn render_test_digest_from_input(
17982    path: &Path,
17983    input: &str,
17984    runner: Option<&str>,
17985    format: OutputFormat,
17986) -> Result<()> {
17987    let report = test_digest::compute(path, input, runner)?;
17988    if format.json_output {
17989        println!(
17990            "{}",
17991            to_json_schema(
17992                &report,
17993                format.pretty,
17994                format.terse,
17995                format.ultra_terse,
17996                format.schema
17997            )?
17998        );
17999        return Ok(());
18000    }
18001
18002    if report.failure_groups.is_empty() {
18003        println!("No failures detected (runner: {}).", report.runner);
18004        for warning in &report.warnings {
18005            println!("warning: {warning}");
18006        }
18007        return Ok(());
18008    }
18009
18010    if format.compact {
18011        println!(
18012            "test runner:{} failures:{} groups:{} passed:{} failed:{} skipped:{}",
18013            report.runner,
18014            report.failures,
18015            report.grouped_failures,
18016            report.counts.passed.unwrap_or(0),
18017            report.counts.failed.unwrap_or(report.grouped_failures),
18018            report.counts.skipped.unwrap_or(0),
18019        );
18020        for failure in &report.failure_groups {
18021            let tests = truncate_for_compact(&failure.tests.join(","), 60);
18022            let location = match (&failure.path, failure.line) {
18023                (Some(path), Some(line)) => format!("{path}:{line}"),
18024                (Some(path), None) => path.clone(),
18025                _ => "-".to_string(),
18026            };
18027            println!(
18028                "{} tests:{} count:{} summaries:{} msg:{}",
18029                location,
18030                tests,
18031                failure.occurrences,
18032                test_digest_summary_label(failure.summary_state),
18033                truncate_for_compact(&failure.message, 80)
18034            );
18035        }
18036        for warning in &report.warnings {
18037            println!("warning: {warning}");
18038        }
18039        return Ok(());
18040    }
18041
18042    println!("Test digest ({})", report.runner);
18043    println!("  failures:        {}", report.failures);
18044    println!("  failure groups:  {}", report.grouped_failures);
18045    if let Some(passed) = report.counts.passed {
18046        println!("  passed:          {}", passed);
18047    }
18048    if let Some(failed) = report.counts.failed {
18049        println!("  failed:          {}", failed);
18050    }
18051    if let Some(skipped) = report.counts.skipped {
18052        println!("  skipped:         {}", skipped);
18053    }
18054
18055    for failure in &report.failure_groups {
18056        println!();
18057        match (&failure.path, failure.line, failure.column) {
18058            (Some(path), Some(line), Some(column)) => println!("{path}:{line}:{column}"),
18059            (Some(path), Some(line), None) => println!("{path}:{line}"),
18060            (Some(path), None, _) => println!("{path}"),
18061            (None, _, _) => println!("(no file anchor)"),
18062        }
18063        println!("  tests: {}", failure.tests.join(", "));
18064        println!("  occurrences: {}", failure.occurrences);
18065        println!("  message: {}", failure.message);
18066        println!(
18067            "  cached summaries: {}",
18068            test_digest_summary_label(failure.summary_state)
18069        );
18070        for summary in &failure.current_summaries {
18071            println!(
18072                "    - {}: {}",
18073                summary.symbol,
18074                truncate_for_compact(&summary.summary, 160)
18075            );
18076        }
18077    }
18078    for warning in &report.warnings {
18079        println!("warning: {warning}");
18080    }
18081    Ok(())
18082}
18083
18084#[derive(Clone, Serialize, Deserialize)]
18085struct DispatchTraceSummary {
18086    backlog: usize,
18087    job_packet: usize,
18088    worker_result: usize,
18089    worker_context: usize,
18090    source_handle: usize,
18091    semantic_rows: usize,
18092}
18093
18094#[derive(Clone, Serialize, Deserialize)]
18095struct DispatchTraceReport {
18096    contract_version: String,
18097    root: String,
18098    #[serde(skip_serializing_if = "Option::is_none")]
18099    scope: Option<String>,
18100    targets: Vec<String>,
18101    projection_freshness: GraphDbFreshnessReport,
18102    projection_hashes: Vec<String>,
18103    evidence_packet_ids: Vec<String>,
18104    shared_preparation: ConflictMatrixSharedPreparationSummary,
18105    worker_prompt_packets: Vec<ConflictMatrixWorkerPromptPacket>,
18106    worker_feedback: Vec<ConflictMatrixWorkerFeedback>,
18107    summary: DispatchTraceSummary,
18108    nodes: Vec<SubstrateTerseGraphNode>,
18109    edges: Vec<SubstrateTerseGraphEdge>,
18110    conflict_matrix_decisions: Vec<String>,
18111    replay_commands: Vec<String>,
18112    repair_commands: Vec<String>,
18113    truncated: bool,
18114    #[serde(skip_serializing_if = "Vec::is_empty", default)]
18115    warnings: Vec<String>,
18116}
18117
18118fn dispatch_trace_allowed_node_kind(kind: &str) -> bool {
18119    matches!(
18120        kind,
18121        "session"
18122            | "backlog"
18123            | "job_packet"
18124            | "worker_result"
18125            | "worker_context"
18126            | "source_handle"
18127            | "semantic_concept"
18128            | "semantic_entity"
18129            | "file"
18130            | "symbol"
18131            | "route"
18132    )
18133}
18134
18135fn dispatch_trace_kind_rank(kind: &str) -> usize {
18136    match kind {
18137        "backlog" => 0,
18138        "job_packet" => 1,
18139        "worker_result" => 2,
18140        "worker_context" => 3,
18141        "source_handle" => 4,
18142        "file" => 5,
18143        "symbol" => 6,
18144        "route" => 7,
18145        "semantic_concept" => 8,
18146        "semantic_entity" => 9,
18147        "session" => 10,
18148        _ => 99,
18149    }
18150}
18151
18152fn dispatch_trace_summary(nodes: &[SubstrateGraphNode]) -> DispatchTraceSummary {
18153    DispatchTraceSummary {
18154        backlog: nodes.iter().filter(|node| node.kind == "backlog").count(),
18155        job_packet: nodes
18156            .iter()
18157            .filter(|node| node.kind == "job_packet")
18158            .count(),
18159        worker_result: nodes
18160            .iter()
18161            .filter(|node| node.kind == "worker_result")
18162            .count(),
18163        worker_context: nodes
18164            .iter()
18165            .filter(|node| node.kind == "worker_context")
18166            .count(),
18167        source_handle: nodes
18168            .iter()
18169            .filter(|node| node.kind == "source_handle")
18170            .count(),
18171        semantic_rows: nodes
18172            .iter()
18173            .filter(|node| matches!(node.kind.as_str(), "semantic_concept" | "semantic_entity"))
18174            .count(),
18175    }
18176}
18177
18178fn dispatch_trace_shared_preparation_summary(
18179    graph_nodes: &[SubstrateGraphNode],
18180    graph_edges: &[SubstrateGraphEdge],
18181    conflict: &ConflictMatrixReport,
18182) -> ConflictMatrixSharedPreparationSummary {
18183    ConflictMatrixSharedPreparationSummary {
18184        evidence_cache_status: conflict
18185            .inputs
18186            .shared_preparation
18187            .evidence_cache_status
18188            .clone(),
18189        graph_nodes: graph_nodes.len(),
18190        graph_edges: graph_edges.len(),
18191        evidence_packets: conflict.orchestration.evidence_packet_ids.len(),
18192        source_handles: conflict
18193            .candidates
18194            .iter()
18195            .map(|candidate| candidate.source_handles.len())
18196            .sum(),
18197        worker_context: conflict
18198            .candidates
18199            .iter()
18200            .map(|candidate| candidate.worker_context_handles.len())
18201            .sum(),
18202        worker_results: conflict
18203            .candidates
18204            .iter()
18205            .map(|candidate| candidate.worker_feedback.total)
18206            .sum(),
18207        semantic_rows: conflict
18208            .candidates
18209            .iter()
18210            .map(|candidate| candidate.semantic_related.len())
18211            .sum(),
18212        dispatch_trace_snapshot_nodes: graph_nodes.len(),
18213        dispatch_trace_snapshot_edges: graph_edges.len(),
18214    }
18215}
18216
18217fn dispatch_trace_collect_ids(
18218    targets: &[String],
18219    candidates: &[ConflictMatrixCandidate],
18220    graph_nodes: &[SubstrateGraphNode],
18221    graph_edges: &[SubstrateGraphEdge],
18222    depth: usize,
18223    limit: usize,
18224) -> (BTreeSet<String>, bool) {
18225    let target_refs = targets
18226        .iter()
18227        .map(|target| target.trim_start_matches('#').to_string())
18228        .collect::<BTreeSet<_>>();
18229    let mut ids = BTreeSet::new();
18230    for candidate in candidates {
18231        ids.insert(candidate.target_node_id.clone());
18232        for source in &candidate.source_handles {
18233            ids.insert(source.handle.clone());
18234        }
18235        for handle in &candidate.worker_context_handles {
18236            ids.insert(handle.clone());
18237        }
18238        for semantic in &candidate.semantic_related {
18239            ids.insert(semantic.handle.clone());
18240        }
18241    }
18242    for node in graph_nodes {
18243        if !dispatch_trace_allowed_node_kind(&node.kind) {
18244            continue;
18245        }
18246        if node
18247            .properties
18248            .get("ref_id")
18249            .is_some_and(|ref_id| target_refs.contains(ref_id))
18250        {
18251            ids.insert(node.id.clone());
18252        }
18253    }
18254
18255    let node_by_id = graph_nodes
18256        .iter()
18257        .map(|node| (node.id.as_str(), node))
18258        .collect::<BTreeMap<_, _>>();
18259    let max_nodes = if limit == 0 {
18260        usize::MAX
18261    } else {
18262        limit
18263            .saturating_mul(targets.len().max(1))
18264            .saturating_mul(12)
18265            .max(64)
18266    };
18267    let mut truncated = false;
18268    for _ in 0..depth.max(1) {
18269        let before = ids.len();
18270        let current_ids = ids.clone();
18271        for edge in graph_edges {
18272            if ids.len() >= max_nodes {
18273                truncated = true;
18274                break;
18275            }
18276            let touches = current_ids.contains(&edge.from_id) || current_ids.contains(&edge.to_id);
18277            if !touches {
18278                continue;
18279            }
18280            for endpoint in [&edge.from_id, &edge.to_id] {
18281                let Some(node) = node_by_id.get(endpoint.as_str()) else {
18282                    continue;
18283                };
18284                if dispatch_trace_allowed_node_kind(&node.kind) {
18285                    ids.insert(endpoint.clone());
18286                }
18287            }
18288        }
18289        if ids.len() == before || truncated {
18290            break;
18291        }
18292    }
18293    (ids, truncated)
18294}
18295
18296#[allow(clippy::too_many_arguments)]
18297fn build_dispatch_trace_report_from_conflict_snapshot(
18298    root: &Path,
18299    scope: Option<&str>,
18300    conflict: ConflictMatrixReport,
18301    graph_nodes: Vec<SubstrateGraphNode>,
18302    graph_edges: Vec<SubstrateGraphEdge>,
18303    depth: usize,
18304    limit: usize,
18305    extra_warnings: Vec<String>,
18306) -> Result<DispatchTraceReport> {
18307    let shared_preparation =
18308        dispatch_trace_shared_preparation_summary(&graph_nodes, &graph_edges, &conflict);
18309    let (ids, truncated) = dispatch_trace_collect_ids(
18310        &conflict.targets,
18311        &conflict.candidates,
18312        &graph_nodes,
18313        &graph_edges,
18314        depth,
18315        limit,
18316    );
18317    let mut nodes = graph_nodes
18318        .into_iter()
18319        .filter(|node| ids.contains(&node.id))
18320        .collect::<Vec<_>>();
18321    nodes.sort_by(|left, right| {
18322        dispatch_trace_kind_rank(&left.kind)
18323            .cmp(&dispatch_trace_kind_rank(&right.kind))
18324            .then(left.id.cmp(&right.id))
18325    });
18326    let node_ids = nodes
18327        .iter()
18328        .map(|node| node.id.as_str())
18329        .collect::<BTreeSet<_>>();
18330    let mut edges = graph_edges
18331        .into_iter()
18332        .filter(|edge| {
18333            node_ids.contains(edge.from_id.as_str()) && node_ids.contains(edge.to_id.as_str())
18334        })
18335        .collect::<Vec<_>>();
18336    edges.sort_by(|left, right| {
18337        left.from_id
18338            .cmp(&right.from_id)
18339            .then(left.kind.cmp(&right.kind))
18340            .then(left.to_id.cmp(&right.to_id))
18341    });
18342    let mut warnings = conflict.warnings;
18343    warnings.extend(extra_warnings);
18344
18345    Ok(DispatchTraceReport {
18346        contract_version: DISPATCH_TRACE_CONTRACT_VERSION.to_string(),
18347        root: conflict.root,
18348        scope: conflict.scope,
18349        targets: conflict.targets,
18350        projection_freshness: conflict.orchestration.projection_freshness,
18351        projection_hashes: conflict.orchestration.projection_hashes,
18352        evidence_packet_ids: conflict.orchestration.evidence_packet_ids,
18353        shared_preparation,
18354        worker_prompt_packets: conflict.worker_prompt_packets,
18355        worker_feedback: conflict
18356            .candidates
18357            .iter()
18358            .map(|candidate| candidate.worker_feedback.clone())
18359            .collect(),
18360        summary: dispatch_trace_summary(&nodes),
18361        nodes: nodes.into_iter().map(Into::into).collect(),
18362        edges: edges.into_iter().map(Into::into).collect(),
18363        conflict_matrix_decisions: conflict.orchestration.conflict_matrix_decisions,
18364        replay_commands: conflict.next_commands,
18365        repair_commands: graph_db_repair_commands(root, scope),
18366        truncated,
18367        warnings,
18368    })
18369}
18370
18371fn build_dispatch_trace_report(
18372    path: &Path,
18373    scope: Option<&str>,
18374    raw_targets: &[String],
18375    depth: usize,
18376    limit: usize,
18377    impact_limit: usize,
18378) -> Result<DispatchTraceReport> {
18379    let root = lint::resolve_project_root_or_canonical_path(path)?;
18380    let source_watermark = traversal_source_watermark(&root, path, scope, false)?;
18381    if graph_db_backend_eval_cached_refresh(&root, scope, source_watermark.as_deref())?.is_none() {
18382        write_traversal_graph_store(&root, path, scope)
18383            .with_context(|| format!("refreshing graph-db projection for {}", root.display()))?;
18384    }
18385    let graph_db = graph_substrate_db_path(&root, scope);
18386    let store = SqliteGraphStore::open_read_only_resilient(&graph_db)
18387        .with_context(|| format!("opening graph-db projection: {}", graph_db.display()))?;
18388    let freshness = sqlite_graph_freshness(&store, scope.unwrap_or("root"))?;
18389    let extra_warnings = store
18390        .read_only_recovery()
18391        .map(graph_db_read_recovery_diagnostic)
18392        .into_iter()
18393        .collect::<Vec<_>>();
18394    let prepared = prepare_conflict_matrix_inputs(&root, path, scope, impact_limit)?;
18395    let graph_prepared = prepare_conflict_matrix_graph_orchestration(
18396        &root,
18397        scope,
18398        "sqlite",
18399        raw_targets,
18400        &prepared,
18401        depth,
18402        limit,
18403        &store,
18404        freshness.clone(),
18405    )?;
18406    let dt_cache_key = cycle_packet_cache::cycle_packet_watermark_key(
18407        &prepared.preparation_cache.source_watermark,
18408        &prepared.preparation_cache.document_watermark,
18409        &prepared.preparation_cache.staged_diff_watermark,
18410        &[
18411            &format!("targets:{}", raw_targets.join(",")),
18412            &format!("depth:{depth}"),
18413            &format!("limit:{limit}"),
18414        ],
18415    );
18416    if let Some(cached_report) = cycle_packet_cache::cycle_packet_read_cache::<DispatchTraceReport>(
18417        &root,
18418        cycle_packet_cache::CyclePacketKind::ConflictMatrix,
18419        &dt_cache_key,
18420    ) {
18421        return Ok(cached_report);
18422    }
18423    let conflict = build_conflict_matrix_report_from_prepared_graph(
18424        &root,
18425        path,
18426        scope,
18427        depth,
18428        limit,
18429        impact_limit,
18430        freshness,
18431        extra_warnings.clone(),
18432        &prepared,
18433        &graph_prepared,
18434    )?;
18435    let report = build_dispatch_trace_report_from_conflict_snapshot(
18436        &root,
18437        scope,
18438        conflict,
18439        graph_prepared.graph.nodes,
18440        graph_prepared.graph.edges,
18441        depth,
18442        limit,
18443        extra_warnings,
18444    )?;
18445    cycle_packet_cache::cycle_packet_write_cache(
18446        &root,
18447        cycle_packet_cache::CyclePacketKind::ConflictMatrix,
18448        &dt_cache_key,
18449        &report,
18450    );
18451    Ok(report)
18452}
18453
18454fn dispatch_trace_html(report: &DispatchTraceReport) -> Result<String> {
18455    let json = serde_json::to_string(report)?.replace("</", "<\\/");
18456    let mut html = String::new();
18457    html.push_str(
18458        "<!doctype html><html><head><meta charset=\"utf-8\"><title>tsift dispatch trace</title>",
18459    );
18460    html.push_str(
18461        r#"<style>
18462:root{color-scheme:light dark;--bg:#f7f8fb;--panel:#fff;--text:#17202a;--muted:#5c6674;--line:#d7dce3;--edge:#8b98a8;--accent:#0f766e}
18463@media (prefers-color-scheme:dark){:root{--bg:#111318;--panel:#1b2028;--text:#ecf1f7;--muted:#a8b3c1;--line:#323946;--edge:#667386;--accent:#2dd4bf}}
18464*{box-sizing:border-box}body{margin:0;background:var(--bg);color:var(--text);font-family:Inter,ui-sans-serif,system-ui,sans-serif;line-height:1.4}.page{max-width:1280px;margin:0 auto;padding:20px}.top{display:flex;align-items:flex-end;justify-content:space-between;gap:16px;margin-bottom:14px}.top h1{font-size:22px;margin:0}.meta{color:var(--muted);font-size:13px}.layout{display:grid;grid-template-columns:minmax(0,1fr) 360px;gap:14px}.panel,.side{background:var(--panel);border:1px solid var(--line);border-radius:8px;overflow:hidden}.side{padding:14px;overflow:auto;max-height:720px}.side h2{font-size:15px;margin:12px 0 8px}.side h2:first-child{margin-top:0}.list{display:grid;gap:8px}.row{border:1px solid var(--line);border-radius:6px;padding:8px}.kind{font-size:11px;text-transform:uppercase;color:var(--muted);letter-spacing:.04em}.label{font-weight:650;overflow-wrap:anywhere}.handle,code{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;font-size:12px;color:var(--muted);overflow-wrap:anywhere}svg{width:100%;height:680px;display:block}.edge{stroke:var(--edge);stroke-width:1.4;opacity:.72}.node{stroke:var(--panel);stroke-width:2}.node-label{font-size:12px;paint-order:stroke;stroke:var(--panel);stroke-width:4px;stroke-linejoin:round;fill:var(--text)}@media(max-width:900px){.top{display:block}.layout{grid-template-columns:1fr}.side{max-height:none}svg{height:560px}}
18465</style>"#,
18466    );
18467    html.push_str("</head><body><div class=\"page\">");
18468    html.push_str(&format!(
18469        "<header class=\"top\"><div><h1>tsift dispatch trace</h1><div class=\"meta\">targets <code>{}</code> | evidence <code>{}</code> | nodes <code>{}</code> | worker_prompt_packets <code>{}</code></div></div><div class=\"meta\"><code>{}</code></div></header>",
18470        html_escape(&report.targets.join(", ")),
18471        report.evidence_packet_ids.len(),
18472        report.nodes.len(),
18473        report.worker_prompt_packets.len(),
18474        html_escape(&report.contract_version)
18475    ));
18476    html.push_str(
18477        r#"<main class="layout"><section class="panel"><svg id="graph-canvas" role="img" aria-label="Dispatch trace graph"></svg></section><aside class="side"><h2>Worker Prompt Packets</h2><div id="packets" class="list"></div><h2>Worker Feedback</h2><div id="feedback" class="list"></div><h2>Nodes</h2><div id="nodes" class="list"></div></aside></main>"#,
18478    );
18479    html.push_str("<script id=\"trace-data\" type=\"application/json\">");
18480    html.push_str(&json);
18481    html.push_str(
18482        r##"</script><script>
18483const report = JSON.parse(document.getElementById("trace-data").textContent);
18484const svg = document.getElementById("graph-canvas");
18485const nodeList = document.getElementById("nodes");
18486const packets = document.getElementById("packets");
18487const feedback = document.getElementById("feedback");
18488const nodes = report.nodes.map((node, index) => ({...node, index}));
18489const nodeById = new Map(nodes.map(node => [node.id, node]));
18490const edges = report.edges.filter(edge => nodeById.has(edge.from_id) && nodeById.has(edge.to_id));
18491const colorByKind = new Map([["backlog","#dc2626"],["job_packet","#ea580c"],["worker_result","#15803d"],["worker_context","#475569"],["source_handle","#64748b"],["semantic_concept","#9a3412"],["semantic_entity","#b45309"],["file","#2563eb"],["symbol","#16a34a"],["route","#7c3aed"],["session","#0891b2"]]);
18492function color(kind){return colorByKind.get(kind)||"#6b7280";}
18493function text(value){return value == null ? "" : String(value);}
18494function escapeHtml(value){return text(value).replace(/[&<>"']/g, ch => ({"&":"&amp;","<":"&lt;",">":"&gt;","\"":"&quot;","'":"&#39;"}[ch]));}
18495function layout(){
18496  const rect = svg.getBoundingClientRect();
18497  const width = rect.width || 900, height = rect.height || 680, cx = width / 2, cy = height / 2;
18498  const kinds = [...new Set(nodes.map(node => node.kind))].sort();
18499  const counts = new Map();
18500  for (const node of nodes) counts.set(node.kind, (counts.get(node.kind)||0)+1);
18501  const offsets = new Map();
18502  for (const node of nodes) {
18503    const group = kinds.indexOf(node.kind);
18504    const index = offsets.get(node.kind) || 0;
18505    offsets.set(node.kind, index + 1);
18506    const total = counts.get(node.kind) || 1;
18507    const ring = Math.min(width, height) * (0.18 + ((group % 4) * 0.09));
18508    const angle = Math.PI * 2 * index / Math.max(total, 1) + group * 0.53;
18509    node.x = cx + Math.cos(angle) * ring;
18510    node.y = cy + Math.sin(angle) * ring;
18511  }
18512}
18513function draw(){
18514  svg.innerHTML = "";
18515  for (const edge of edges) {
18516    const from = nodeById.get(edge.from_id), to = nodeById.get(edge.to_id);
18517    const line = document.createElementNS("http://www.w3.org/2000/svg", "line");
18518    line.setAttribute("x1", from.x); line.setAttribute("y1", from.y);
18519    line.setAttribute("x2", to.x); line.setAttribute("y2", to.y);
18520    line.setAttribute("class", "edge");
18521    line.appendChild(document.createElementNS("http://www.w3.org/2000/svg", "title")).textContent = edge.kind;
18522    svg.appendChild(line);
18523  }
18524  for (const node of nodes) {
18525    const circle = document.createElementNS("http://www.w3.org/2000/svg", "circle");
18526    circle.setAttribute("cx", node.x); circle.setAttribute("cy", node.y);
18527    circle.setAttribute("r", node.kind.startsWith("semantic_") ? 8 : 6);
18528    circle.setAttribute("fill", color(node.kind));
18529    circle.setAttribute("class", "node");
18530    circle.appendChild(document.createElementNS("http://www.w3.org/2000/svg", "title")).textContent = node.kind + ": " + node.label;
18531    svg.appendChild(circle);
18532    const label = document.createElementNS("http://www.w3.org/2000/svg", "text");
18533    label.setAttribute("x", node.x + 9); label.setAttribute("y", node.y + 4);
18534    label.setAttribute("class", "node-label");
18535    label.textContent = node.label.length > 34 ? node.label.slice(0,31) + "..." : node.label;
18536    svg.appendChild(label);
18537  }
18538}
18539packets.innerHTML = report.worker_prompt_packets.map(packet => `<div class="row"><div class="kind">${escapeHtml(packet.contract_version)} - ${escapeHtml(packet.risk)} - parallel_safe ${packet.parallel_safe ? "true" : "false"} - closure ${packet.worker_feedback ? packet.worker_feedback.closure_rank_score : 0}</div><div class="label">${escapeHtml(packet.title)}</div><div class="handle">${escapeHtml(packet.packet_id)}</div><div class="handle">blocks ${escapeHtml((packet.blocks||[]).join(", ") || "none")} | blocked_by ${escapeHtml((packet.blocked_by||[]).join(", ") || "none")}</div></div>`).join("") || "<div class=\"meta\">No packets.</div>";
18540feedback.innerHTML = report.worker_feedback.map(item => `<div class="row"><div class="kind">completed ${item.completed} - blocked ${item.blocked} - closure ${item.closure_rank_score}</div><div>files ${escapeHtml((item.touched_files||[]).join(", ") || "none")}</div><div>tests ${escapeHtml((item.expected_tests||[]).join(" && ") || "none")}</div>${item.repeated_blockage ? "<div class=\"label\">Repeated blockage</div>" : ""}${(item.stale_expected_tests||[]).length ? `<div class="label">Stale tests: ${escapeHtml(item.stale_expected_tests.join(", "))}</div>` : ""}${(item.follow_up_debt||[]).length ? `<div class="label">Follow-up debt: ${escapeHtml(item.follow_up_debt.join(", "))}</div>` : ""}</div>`).join("") || "<div class=\"meta\">No worker results.</div>";
18541nodeList.innerHTML = nodes.map(node => `<div class="row"><div class="kind">${escapeHtml(node.kind)}</div><div class="label">${escapeHtml(node.label)}</div><div class="handle">${escapeHtml(node.id)}</div></div>`).join("");
18542window.addEventListener("resize", () => { layout(); draw(); });
18543layout(); draw();
18544</script></div></body></html>"##,
18545    );
18546    Ok(html)
18547}
18548
18549struct DispatchTraceOptions<'a> {
18550    path: &'a Path,
18551    scope: Option<&'a str>,
18552    raw_targets: &'a [String],
18553    depth: usize,
18554    limit: usize,
18555    impact_limit: usize,
18556    trace_format: DispatchTraceFormat,
18557}
18558
18559fn cmd_dispatch_trace(
18560    options: DispatchTraceOptions<'_>,
18561    output_format: OutputFormat,
18562) -> Result<()> {
18563    let report = build_dispatch_trace_report(
18564        options.path,
18565        options.scope,
18566        options.raw_targets,
18567        options.depth,
18568        options.limit,
18569        options.impact_limit,
18570    )?;
18571    match options.trace_format {
18572        DispatchTraceFormat::Json => {
18573            if output_format.envelope {
18574                print_json_or_envelope(
18575                    &report,
18576                    &output_format,
18577                    "dispatch-trace",
18578                    "operator-review",
18579                    ToolEnvelopeSummary {
18580                        text: format!(
18581                            "Dispatch trace for {} target(s): {} graph node(s), {} worker prompt packet(s)",
18582                            report.targets.len(),
18583                            report.nodes.len(),
18584                            report.worker_prompt_packets.len()
18585                        ),
18586                        metrics: vec![
18587                            envelope_metric("targets", report.targets.len()),
18588                            envelope_metric("nodes", report.nodes.len()),
18589                            envelope_metric("edges", report.edges.len()),
18590                            envelope_metric(
18591                                "worker_prompt_packets",
18592                                report.worker_prompt_packets.len(),
18593                            ),
18594                        ],
18595                    },
18596                    report.truncated,
18597                    report.replay_commands.clone(),
18598                )
18599            } else {
18600                println!(
18601                    "{}",
18602                    to_json_schema(
18603                        &report,
18604                        output_format.pretty,
18605                        output_format.terse,
18606                        output_format.ultra_terse,
18607                        output_format.schema
18608                    )?
18609                );
18610                Ok(())
18611            }
18612        }
18613        DispatchTraceFormat::Html => {
18614            println!("{}", dispatch_trace_html(&report)?);
18615            Ok(())
18616        }
18617    }
18618}
18619
18620#[derive(Clone, Debug)]
18621struct DependencyDagProfile {
18622    id: String,
18623    graph_node_id: String,
18624    label: String,
18625    path: Option<String>,
18626    line: Option<i64>,
18627    detail: Option<String>,
18628    source_files: BTreeSet<String>,
18629    source_symbols: BTreeSet<String>,
18630    config_files: BTreeSet<String>,
18631    expected_tests: BTreeSet<String>,
18632    semantic_refs: BTreeMap<String, ConflictMatrixSemanticRef>,
18633    worker_feedback: ConflictMatrixWorkerFeedback,
18634}
18635
18636#[derive(Clone, Debug, Serialize)]
18637struct DependencyDagNode {
18638    id: String,
18639    graph_node_id: String,
18640    label: String,
18641    #[serde(skip_serializing_if = "Option::is_none")]
18642    path: Option<String>,
18643    #[serde(skip_serializing_if = "Option::is_none")]
18644    line: Option<i64>,
18645    #[serde(skip_serializing_if = "Option::is_none")]
18646    detail: Option<String>,
18647    source_files: Vec<String>,
18648    source_symbols: Vec<String>,
18649    config_files: Vec<String>,
18650    expected_tests: Vec<String>,
18651    semantic_refs: Vec<ConflictMatrixSemanticRef>,
18652    worker_feedback: ConflictMatrixWorkerFeedback,
18653}
18654
18655#[derive(Clone, Debug, Serialize)]
18656struct DependencyDagEdge {
18657    from: String,
18658    to: String,
18659    kind: String,
18660    weight: usize,
18661    reasons: Vec<String>,
18662    #[serde(skip_serializing_if = "Vec::is_empty", default)]
18663    shared_files: Vec<String>,
18664    #[serde(skip_serializing_if = "Vec::is_empty", default)]
18665    shared_symbols: Vec<String>,
18666    #[serde(skip_serializing_if = "Vec::is_empty", default)]
18667    shared_tests: Vec<String>,
18668    #[serde(skip_serializing_if = "Vec::is_empty", default)]
18669    shared_config_files: Vec<String>,
18670    #[serde(skip_serializing_if = "Vec::is_empty", default)]
18671    shared_semantic_refs: Vec<String>,
18672}
18673
18674#[derive(Clone, Debug, Serialize)]
18675struct DependencyDagTopoBatch {
18676    batch: usize,
18677    targets: Vec<String>,
18678}
18679
18680#[derive(Clone, Debug, Serialize)]
18681struct DependencyDagCycleDiagnostics {
18682    has_cycles: bool,
18683    blocked_nodes: Vec<String>,
18684    cycle_edges: Vec<DependencyDagEdge>,
18685}
18686
18687#[derive(Serialize)]
18688struct DependencyDagSummary {
18689    nodes: usize,
18690    edges: usize,
18691    topo_batches: usize,
18692    has_cycles: bool,
18693}
18694
18695#[derive(Serialize)]
18696struct DependencyDagReport {
18697    contract_version: &'static str,
18698    root: String,
18699    #[serde(skip_serializing_if = "Option::is_none")]
18700    scope: Option<String>,
18701    path: String,
18702    targets: Vec<String>,
18703    projection_freshness: GraphDbFreshnessReport,
18704    projection_hashes: Vec<String>,
18705    nodes: Vec<DependencyDagNode>,
18706    edges: Vec<DependencyDagEdge>,
18707    topo_batches: Vec<DependencyDagTopoBatch>,
18708    cycle_diagnostics: DependencyDagCycleDiagnostics,
18709    summary: DependencyDagSummary,
18710    replay_commands: Vec<String>,
18711    repair_commands: Vec<String>,
18712    #[serde(skip_serializing_if = "Vec::is_empty", default)]
18713    warnings: Vec<String>,
18714}
18715
18716fn dependency_dag_backlog_node_for_target(
18717    store: &impl GraphStore,
18718    target: &str,
18719) -> Result<SubstrateGraphNode> {
18720    let resolved = graph_db_resolve_evidence_target(store, target)?
18721        .with_context(|| format!("dependency-dag target not found: {target}"))?;
18722    if resolved.kind == "backlog" {
18723        return Ok(resolved);
18724    }
18725    let Some(ref_id) = resolved.properties.get("ref_id").cloned() else {
18726        bail!(
18727            "dependency-dag target {} resolved to {} without a backlog ref_id",
18728            target,
18729            resolved.kind
18730        );
18731    };
18732    store
18733        .nodes_by_kind("backlog")?
18734        .into_iter()
18735        .filter(|node| node.properties.get("ref_id") == Some(&ref_id))
18736        .min_by(|left, right| {
18737            left.properties
18738                .get("line")
18739                .and_then(|value| value.parse::<i64>().ok())
18740                .cmp(
18741                    &right
18742                        .properties
18743                        .get("line")
18744                        .and_then(|value| value.parse::<i64>().ok()),
18745                )
18746                .then(left.id.cmp(&right.id))
18747        })
18748        .with_context(|| format!("dependency-dag backlog node not found for #{ref_id}"))
18749}
18750
18751fn dependency_dag_resolve_backlog_nodes(
18752    root: &Path,
18753    path: &Path,
18754    store: &impl GraphStore,
18755    raw_targets: &[String],
18756) -> Result<Vec<SubstrateGraphNode>> {
18757    let mut nodes = Vec::new();
18758    let mut seen = BTreeSet::new();
18759    if raw_targets.is_empty() {
18760        let hinted_path = if path.is_absolute() {
18761            path.to_path_buf()
18762        } else {
18763            root.join(path)
18764        };
18765        let hinted_markdown = hinted_path
18766            .extension()
18767            .and_then(|ext| ext.to_str())
18768            .is_some_and(|ext| ext.eq_ignore_ascii_case("md"));
18769        let hinted_rel = hinted_markdown.then(|| {
18770            relativize_pathbuf(&hinted_path, root)
18771                .to_string_lossy()
18772                .replace('\\', "/")
18773        });
18774        for node in store.nodes_by_kind("backlog")? {
18775            if let Some(expected_path) = &hinted_rel
18776                && node.properties.get("path") != Some(expected_path)
18777            {
18778                continue;
18779            }
18780            if seen.insert(node.id.clone()) {
18781                nodes.push(node);
18782            }
18783        }
18784        if nodes.is_empty() && hinted_rel.is_some() {
18785            for node in store.nodes_by_kind("backlog")? {
18786                if seen.insert(node.id.clone()) {
18787                    nodes.push(node);
18788                }
18789            }
18790        }
18791    } else {
18792        for target in raw_targets {
18793            let normalized = normalize_conflict_target(target).unwrap_or_else(|| target.clone());
18794            let node = dependency_dag_backlog_node_for_target(store, &normalized)?;
18795            if seen.insert(node.id.clone()) {
18796                nodes.push(node);
18797            }
18798        }
18799    }
18800    if nodes.is_empty() {
18801        bail!("dependency-dag needs at least one resolvable backlog id");
18802    }
18803    nodes.sort_by(|left, right| {
18804        left.properties
18805            .get("line")
18806            .and_then(|value| value.parse::<i64>().ok())
18807            .cmp(
18808                &right
18809                    .properties
18810                    .get("line")
18811                    .and_then(|value| value.parse::<i64>().ok()),
18812            )
18813            .then(left.id.cmp(&right.id))
18814    });
18815    Ok(nodes)
18816}
18817
18818fn dependency_dag_node_id(node: &SubstrateGraphNode) -> String {
18819    node.properties
18820        .get("ref_id")
18821        .cloned()
18822        .unwrap_or_else(|| node.label.trim_start_matches('#').to_string())
18823}
18824
18825fn dependency_dag_node_profile(
18826    root: &Path,
18827    store: &impl GraphStore,
18828    node: &SubstrateGraphNode,
18829    graph_nodes_by_id: &BTreeMap<String, SubstrateGraphNode>,
18830    graph_edges: &[SubstrateGraphEdge],
18831    depth: usize,
18832    limit: usize,
18833) -> Result<DependencyDagProfile> {
18834    let id = dependency_dag_node_id(node);
18835    let mut source_files = BTreeSet::new();
18836    let mut source_symbols = BTreeSet::new();
18837    for edge in graph_edges
18838        .iter()
18839        .filter(|edge| edge.from_id == node.id && edge.kind == "mentions")
18840    {
18841        let Some(target) = graph_nodes_by_id.get(&edge.to_id) else {
18842            continue;
18843        };
18844        match target.kind.as_str() {
18845            "file" | "route" => {
18846                if let Some(path) = target.properties.get("path") {
18847                    source_files.insert(path.clone());
18848                }
18849            }
18850            "symbol" => {
18851                source_symbols.insert(target.label.clone());
18852                if let Some(path) = target.properties.get("path") {
18853                    source_files.insert(path.clone());
18854                }
18855            }
18856            _ => {}
18857        }
18858    }
18859
18860    let max_rows = if limit == 0 { usize::MAX } else { limit };
18861    for (source, _) in
18862        graph_db_reachable_nodes_by_kind(store, &node.id, "source_handle", depth, max_rows)?
18863    {
18864        let terse: SubstrateTerseGraphNode = (&source).into();
18865        if let Some(handle) = conflict_matrix_source_handle(&terse) {
18866            source_files.insert(handle.file);
18867        }
18868    }
18869
18870    let worker_results = graph_nodes_by_id
18871        .values()
18872        .filter(|candidate| {
18873            candidate.kind == "worker_result"
18874                && candidate.properties.get("ref_id").map(String::as_str) == Some(id.as_str())
18875        })
18876        .map(SubstrateTerseGraphNode::from)
18877        .collect::<Vec<_>>();
18878    let worker_feedback = conflict_matrix_worker_feedback(&worker_results);
18879    let expected_tests = worker_feedback.expected_tests.iter().cloned().collect();
18880    let config_files = source_files
18881        .iter()
18882        .filter(|file| is_planner_config_path(file))
18883        .cloned()
18884        .collect();
18885
18886    let mut semantic_refs = BTreeMap::new();
18887    for kind in ["semantic_concept", "semantic_entity"] {
18888        for (semantic, _) in
18889            graph_db_reachable_nodes_by_kind(store, &node.id, kind, depth, max_rows)?
18890        {
18891            let terse: SubstrateTerseGraphNode = (&semantic).into();
18892            let item = conflict_matrix_semantic_ref(root, &terse);
18893            semantic_refs
18894                .entry(format!("{}:{}", item.kind, item.label))
18895                .or_insert(item);
18896        }
18897    }
18898
18899    Ok(DependencyDagProfile {
18900        id,
18901        graph_node_id: node.id.clone(),
18902        label: node.label.clone(),
18903        path: node.properties.get("path").cloned(),
18904        line: node
18905            .properties
18906            .get("line")
18907            .and_then(|value| value.parse::<i64>().ok()),
18908        detail: node.properties.get("detail").cloned(),
18909        source_files,
18910        source_symbols,
18911        config_files,
18912        expected_tests,
18913        semantic_refs,
18914        worker_feedback,
18915    })
18916}
18917
18918fn dependency_dag_marker_refs(text: &str, markers: &[&str]) -> Vec<String> {
18919    let lower = text.to_ascii_lowercase();
18920    let mut refs = Vec::new();
18921    for marker in markers {
18922        let mut offset = 0usize;
18923        while let Some(pos) = lower[offset..].find(marker) {
18924            let start = offset + pos + marker.len();
18925            let segment = text[start..]
18926                .split(['\n', '.'])
18927                .next()
18928                .unwrap_or(&text[start..]);
18929            refs.extend(extract_conflict_target_refs(segment));
18930            offset = start;
18931        }
18932    }
18933    dedupe_preserve_order(refs)
18934}
18935
18936fn dependency_dag_push_edge(
18937    edges: &mut Vec<DependencyDagEdge>,
18938    seen: &mut BTreeSet<(String, String, String)>,
18939    edge: DependencyDagEdge,
18940) {
18941    if edge.from == edge.to {
18942        return;
18943    }
18944    if seen.insert((edge.from.clone(), edge.to.clone(), edge.kind.clone())) {
18945        edges.push(edge);
18946    }
18947}
18948
18949fn dependency_dag_explicit_edges(
18950    profiles: &[DependencyDagProfile],
18951    target_ids: &BTreeSet<String>,
18952    edges: &mut Vec<DependencyDagEdge>,
18953    seen: &mut BTreeSet<(String, String, String)>,
18954) {
18955    for profile in profiles {
18956        let detail = profile.detail.as_deref().unwrap_or_default();
18957        for dep in dependency_dag_marker_refs(
18958            detail,
18959            &[
18960                "depends on",
18961                "depends-on",
18962                "deps:",
18963                "after",
18964                "blocked by",
18965                "requires",
18966            ],
18967        ) {
18968            if target_ids.contains(&dep) {
18969                dependency_dag_push_edge(
18970                    edges,
18971                    seen,
18972                    DependencyDagEdge {
18973                        from: dep.clone(),
18974                        to: profile.id.clone(),
18975                        kind: "explicit_depends_on".to_string(),
18976                        weight: 1000,
18977                        reasons: vec![format!("{} declares dependency on #{dep}", profile.id)],
18978                        shared_files: Vec::new(),
18979                        shared_symbols: Vec::new(),
18980                        shared_tests: Vec::new(),
18981                        shared_config_files: Vec::new(),
18982                        shared_semantic_refs: Vec::new(),
18983                    },
18984                );
18985            }
18986        }
18987        for downstream in dependency_dag_marker_refs(detail, &["before", "unblocks"]) {
18988            if target_ids.contains(&downstream) {
18989                dependency_dag_push_edge(
18990                    edges,
18991                    seen,
18992                    DependencyDagEdge {
18993                        from: profile.id.clone(),
18994                        to: downstream.clone(),
18995                        kind: "explicit_before".to_string(),
18996                        weight: 900,
18997                        reasons: vec![format!(
18998                            "{} declares it should run before #{downstream}",
18999                            profile.id
19000                        )],
19001                        shared_files: Vec::new(),
19002                        shared_symbols: Vec::new(),
19003                        shared_tests: Vec::new(),
19004                        shared_config_files: Vec::new(),
19005                        shared_semantic_refs: Vec::new(),
19006                    },
19007                );
19008            }
19009        }
19010    }
19011}
19012
19013fn dependency_dag_worker_follow_up_edges(
19014    profiles: &[DependencyDagProfile],
19015    target_ids: &BTreeSet<String>,
19016    edges: &mut Vec<DependencyDagEdge>,
19017    seen: &mut BTreeSet<(String, String, String)>,
19018) {
19019    for profile in profiles {
19020        for follow_up in &profile.worker_feedback.follow_up_ids {
19021            if target_ids.contains(follow_up) {
19022                dependency_dag_push_edge(
19023                    edges,
19024                    seen,
19025                    DependencyDagEdge {
19026                        from: profile.id.clone(),
19027                        to: follow_up.clone(),
19028                        kind: "worker_result_follow_up".to_string(),
19029                        weight: 700,
19030                        reasons: vec![format!(
19031                            "worker_result for #{} references follow-up #{}",
19032                            profile.id, follow_up
19033                        )],
19034                        shared_files: Vec::new(),
19035                        shared_symbols: Vec::new(),
19036                        shared_tests: Vec::new(),
19037                        shared_config_files: Vec::new(),
19038                        shared_semantic_refs: Vec::new(),
19039                    },
19040                );
19041            }
19042        }
19043    }
19044}
19045
19046fn dependency_dag_overlap_edges(
19047    profiles: &[DependencyDagProfile],
19048    edges: &mut Vec<DependencyDagEdge>,
19049    seen: &mut BTreeSet<(String, String, String)>,
19050) {
19051    for left_idx in 0..profiles.len() {
19052        for right_idx in (left_idx + 1)..profiles.len() {
19053            let left = &profiles[left_idx];
19054            let right = &profiles[right_idx];
19055            let shared_files = sorted_intersection(&left.source_files, &right.source_files);
19056            let shared_symbols = sorted_intersection(&left.source_symbols, &right.source_symbols);
19057            let shared_tests = sorted_intersection(&left.expected_tests, &right.expected_tests);
19058            let shared_config_files = sorted_intersection(&left.config_files, &right.config_files);
19059            let left_semantic = left.semantic_refs.keys().cloned().collect::<BTreeSet<_>>();
19060            let right_semantic = right.semantic_refs.keys().cloned().collect::<BTreeSet<_>>();
19061            let shared_semantic_refs = sorted_intersection(&left_semantic, &right_semantic);
19062            if shared_files.is_empty()
19063                && shared_symbols.is_empty()
19064                && shared_tests.is_empty()
19065                && shared_config_files.is_empty()
19066                && shared_semantic_refs.is_empty()
19067            {
19068                continue;
19069            }
19070            let kind = if shared_files.is_empty()
19071                && shared_symbols.is_empty()
19072                && shared_tests.is_empty()
19073                && shared_config_files.is_empty()
19074            {
19075                "semantic_relation"
19076            } else {
19077                "shared_resource"
19078            };
19079            let mut reasons = Vec::new();
19080            if !shared_files.is_empty() {
19081                reasons.push(format!("shared files: {}", shared_files.join(", ")));
19082            }
19083            if !shared_symbols.is_empty() {
19084                reasons.push(format!("shared symbols: {}", shared_symbols.join(", ")));
19085            }
19086            if !shared_tests.is_empty() {
19087                reasons.push(format!("shared tests: {}", shared_tests.join(" && ")));
19088            }
19089            if !shared_config_files.is_empty() {
19090                reasons.push(format!(
19091                    "shared config files: {}",
19092                    shared_config_files.join(", ")
19093                ));
19094            }
19095            if !shared_semantic_refs.is_empty() {
19096                reasons.push(format!(
19097                    "shared semantic refs: {}",
19098                    shared_semantic_refs.join(", ")
19099                ));
19100            }
19101            let weight = shared_files.len() * 100
19102                + shared_config_files.len() * 100
19103                + shared_symbols.len() * 40
19104                + shared_tests.len() * 10
19105                + shared_semantic_refs.len() * 5;
19106            dependency_dag_push_edge(
19107                edges,
19108                seen,
19109                DependencyDagEdge {
19110                    from: left.id.clone(),
19111                    to: right.id.clone(),
19112                    kind: kind.to_string(),
19113                    weight,
19114                    reasons,
19115                    shared_files,
19116                    shared_symbols,
19117                    shared_tests,
19118                    shared_config_files,
19119                    shared_semantic_refs,
19120                },
19121            );
19122        }
19123    }
19124}
19125
19126fn dependency_dag_topo_batches(
19127    targets: &[String],
19128    edges: &[DependencyDagEdge],
19129) -> (Vec<DependencyDagTopoBatch>, DependencyDagCycleDiagnostics) {
19130    let target_set = targets.iter().cloned().collect::<BTreeSet<_>>();
19131    let order = targets
19132        .iter()
19133        .enumerate()
19134        .map(|(idx, id)| (id.clone(), idx))
19135        .collect::<BTreeMap<_, _>>();
19136    let mut indegree = targets
19137        .iter()
19138        .map(|id| (id.clone(), 0usize))
19139        .collect::<BTreeMap<_, _>>();
19140    let mut outgoing = BTreeMap::<String, Vec<String>>::new();
19141    let mut seen_pairs = BTreeSet::<(String, String)>::new();
19142    for edge in edges {
19143        if !target_set.contains(&edge.from) || !target_set.contains(&edge.to) {
19144            continue;
19145        }
19146        if !seen_pairs.insert((edge.from.clone(), edge.to.clone())) {
19147            continue;
19148        }
19149        *indegree.entry(edge.to.clone()).or_default() += 1;
19150        outgoing
19151            .entry(edge.from.clone())
19152            .or_default()
19153            .push(edge.to.clone());
19154    }
19155    for values in outgoing.values_mut() {
19156        values.sort_by_key(|id| order.get(id).copied().unwrap_or(usize::MAX));
19157        values.dedup();
19158    }
19159
19160    let mut processed = BTreeSet::new();
19161    let mut batches = Vec::new();
19162    loop {
19163        let mut ready = targets
19164            .iter()
19165            .filter(|id| !processed.contains(*id))
19166            .filter(|id| indegree.get(*id).copied().unwrap_or(0) == 0)
19167            .cloned()
19168            .collect::<Vec<_>>();
19169        ready.sort_by_key(|id| order.get(id).copied().unwrap_or(usize::MAX));
19170        if ready.is_empty() {
19171            break;
19172        }
19173        for id in &ready {
19174            processed.insert(id.clone());
19175            for next in outgoing.get(id).into_iter().flatten() {
19176                if let Some(value) = indegree.get_mut(next) {
19177                    *value = value.saturating_sub(1);
19178                }
19179            }
19180        }
19181        batches.push(DependencyDagTopoBatch {
19182            batch: batches.len() + 1,
19183            targets: ready,
19184        });
19185    }
19186
19187    let blocked_nodes = targets
19188        .iter()
19189        .filter(|id| !processed.contains(*id))
19190        .cloned()
19191        .collect::<Vec<_>>();
19192    let blocked_set = blocked_nodes.iter().cloned().collect::<BTreeSet<_>>();
19193    let cycle_edges = edges
19194        .iter()
19195        .filter(|edge| blocked_set.contains(&edge.from) && blocked_set.contains(&edge.to))
19196        .cloned()
19197        .collect::<Vec<_>>();
19198    (
19199        batches,
19200        DependencyDagCycleDiagnostics {
19201            has_cycles: !blocked_nodes.is_empty(),
19202            blocked_nodes,
19203            cycle_edges,
19204        },
19205    )
19206}
19207
19208fn dependency_dag_replay_commands(
19209    path: &Path,
19210    scope: Option<&str>,
19211    targets: &[String],
19212    depth: usize,
19213    limit: usize,
19214) -> Vec<String> {
19215    let target_args = targets
19216        .iter()
19217        .map(|target| shell_quote(target))
19218        .collect::<Vec<_>>()
19219        .join(" ");
19220    let mut command = format!(
19221        "tsift dependency-dag --path {}{} --depth {} --limit {} --json",
19222        shell_quote(path.to_string_lossy().as_ref()),
19223        scope
19224            .map(|scope| format!(" --scope {}", shell_quote(scope)))
19225            .unwrap_or_default(),
19226        depth,
19227        limit
19228    );
19229    if !target_args.is_empty() {
19230        command.push(' ');
19231        command.push_str(&target_args);
19232    }
19233    vec![command]
19234}
19235
19236fn build_dependency_dag_report(
19237    path: &Path,
19238    scope: Option<&str>,
19239    raw_targets: &[String],
19240    depth: usize,
19241    limit: usize,
19242) -> Result<DependencyDagReport> {
19243    let root = lint::resolve_project_root_or_canonical_path(path)?;
19244    write_traversal_graph_store(&root, path, scope)
19245        .with_context(|| format!("refreshing graph-db projection for {}", root.display()))?;
19246    let graph_db = graph_substrate_db_path(&root, scope);
19247    let store = SqliteGraphStore::open_read_only_resilient(&graph_db)
19248        .with_context(|| format!("opening graph-db projection: {}", graph_db.display()))?;
19249    let mut warnings = Vec::new();
19250    if let Some(recovery) = store.read_only_recovery() {
19251        warnings.push(graph_db_read_recovery_diagnostic(recovery));
19252    }
19253    let freshness = sqlite_graph_freshness(&store, scope.unwrap_or("root"))?;
19254    if freshness.fail_closed {
19255        bail!(
19256            "dependency-dag graph projection failed closed: {}; repair: {}",
19257            freshness.diagnostics.join("; "),
19258            graph_db_repair_commands(&root, scope).join("; ")
19259        );
19260    }
19261
19262    let target_nodes = dependency_dag_resolve_backlog_nodes(&root, path, &store, raw_targets)?;
19263    let graph_nodes = store.all_nodes()?;
19264    let graph_edges = store.all_edges()?;
19265    let graph_nodes_by_id = graph_nodes
19266        .into_iter()
19267        .map(|node| (node.id.clone(), node))
19268        .collect::<BTreeMap<_, _>>();
19269    let profiles = target_nodes
19270        .iter()
19271        .map(|node| {
19272            dependency_dag_node_profile(
19273                &root,
19274                &store,
19275                node,
19276                &graph_nodes_by_id,
19277                &graph_edges,
19278                depth,
19279                limit,
19280            )
19281        })
19282        .collect::<Result<Vec<_>>>()?;
19283    let targets = profiles
19284        .iter()
19285        .map(|profile| profile.id.clone())
19286        .collect::<Vec<_>>();
19287    let target_ids = targets.iter().cloned().collect::<BTreeSet<_>>();
19288
19289    let mut edges = Vec::new();
19290    let mut seen_edges = BTreeSet::new();
19291    dependency_dag_explicit_edges(&profiles, &target_ids, &mut edges, &mut seen_edges);
19292    dependency_dag_worker_follow_up_edges(&profiles, &target_ids, &mut edges, &mut seen_edges);
19293    dependency_dag_overlap_edges(&profiles, &mut edges, &mut seen_edges);
19294    edges.sort_by(|left, right| {
19295        left.from
19296            .cmp(&right.from)
19297            .then(left.to.cmp(&right.to))
19298            .then(left.kind.cmp(&right.kind))
19299    });
19300    let (topo_batches, cycle_diagnostics) = dependency_dag_topo_batches(&targets, &edges);
19301
19302    let nodes = profiles
19303        .into_iter()
19304        .map(|profile| DependencyDagNode {
19305            id: profile.id,
19306            graph_node_id: profile.graph_node_id,
19307            label: profile.label,
19308            path: profile.path,
19309            line: profile.line,
19310            detail: profile.detail,
19311            source_files: sorted_set(&profile.source_files),
19312            source_symbols: sorted_set(&profile.source_symbols),
19313            config_files: sorted_set(&profile.config_files),
19314            expected_tests: sorted_set(&profile.expected_tests),
19315            semantic_refs: profile.semantic_refs.into_values().collect(),
19316            worker_feedback: profile.worker_feedback,
19317        })
19318        .collect::<Vec<_>>();
19319    let projection_hashes = freshness
19320        .content_hash
19321        .clone()
19322        .into_iter()
19323        .collect::<Vec<_>>();
19324    let replay_commands = dependency_dag_replay_commands(path, scope, &targets, depth, limit);
19325    let repair_commands = graph_db_repair_commands(&root, scope);
19326    let summary = DependencyDagSummary {
19327        nodes: nodes.len(),
19328        edges: edges.len(),
19329        topo_batches: topo_batches.len(),
19330        has_cycles: cycle_diagnostics.has_cycles,
19331    };
19332
19333    Ok(DependencyDagReport {
19334        contract_version: DEPENDENCY_DAG_CONTRACT_VERSION,
19335        root: root.to_string_lossy().to_string(),
19336        scope: scope.map(str::to_string),
19337        path: path.to_string_lossy().to_string(),
19338        targets,
19339        projection_freshness: freshness,
19340        projection_hashes,
19341        nodes,
19342        edges,
19343        topo_batches,
19344        cycle_diagnostics,
19345        summary,
19346        replay_commands,
19347        repair_commands,
19348        warnings,
19349    })
19350}
19351
19352fn print_dependency_dag_human(report: &DependencyDagReport, compact: bool) {
19353    if compact {
19354        println!(
19355            "dependency-dag targets:{} edges:{} batches:{} cycles:{}",
19356            report.targets.len(),
19357            report.edges.len(),
19358            report.topo_batches.len(),
19359            report.cycle_diagnostics.has_cycles
19360        );
19361    } else {
19362        println!("Dependency DAG");
19363        println!("  targets: {}", report.targets.join(", "));
19364        println!("  edges:   {}", report.edges.len());
19365        println!("  cycles:  {}", report.cycle_diagnostics.has_cycles);
19366    }
19367    for batch in &report.topo_batches {
19368        println!("batch #{}: {}", batch.batch, batch.targets.join(", "));
19369    }
19370    for edge in &report.edges {
19371        println!(
19372            "edge {} -> {} kind:{} weight:{}",
19373            edge.from, edge.to, edge.kind, edge.weight
19374        );
19375        for reason in &edge.reasons {
19376            println!("  reason: {reason}");
19377        }
19378    }
19379    if report.cycle_diagnostics.has_cycles {
19380        println!(
19381            "cycle blocked nodes: {}",
19382            report.cycle_diagnostics.blocked_nodes.join(", ")
19383        );
19384    }
19385    for command in &report.replay_commands {
19386        println!("replay: {command}");
19387    }
19388    for command in &report.repair_commands {
19389        println!("repair: {command}");
19390    }
19391    for warning in &report.warnings {
19392        println!("warning: {warning}");
19393    }
19394}
19395
19396fn cmd_dependency_dag(
19397    path: &Path,
19398    scope: Option<&str>,
19399    raw_targets: &[String],
19400    depth: usize,
19401    limit: usize,
19402    format: OutputFormat,
19403) -> Result<()> {
19404    let report = build_dependency_dag_report(path, scope, raw_targets, depth, limit)?;
19405    if format.json_output {
19406        print_json_or_envelope(
19407            &report,
19408            &format,
19409            "dependency-dag",
19410            "topological-planning",
19411            ToolEnvelopeSummary {
19412                text: format!(
19413                    "Dependency DAG for {} target(s): edges={} batches={} cycles={}",
19414                    report.targets.len(),
19415                    report.edges.len(),
19416                    report.topo_batches.len(),
19417                    report.cycle_diagnostics.has_cycles
19418                ),
19419                metrics: vec![
19420                    envelope_metric("targets", report.targets.len()),
19421                    envelope_metric("edges", report.edges.len()),
19422                    envelope_metric("topo_batches", report.topo_batches.len()),
19423                    envelope_metric("has_cycles", report.cycle_diagnostics.has_cycles),
19424                ],
19425            },
19426            report.cycle_diagnostics.has_cycles,
19427            report.replay_commands.clone(),
19428        )
19429    } else {
19430        print_dependency_dag_human(&report, format.compact);
19431        Ok(())
19432    }
19433}
19434
19435pub(crate) fn render_log_digest_from_input(
19436    path: &Path,
19437    input: &str,
19438    format: OutputFormat,
19439) -> Result<()> {
19440    let report = log_digest::compute(path, input)?;
19441    if format.json_output {
19442        println!(
19443            "{}",
19444            to_json_schema(
19445                &report,
19446                format.pretty,
19447                format.terse,
19448                format.ultra_terse,
19449                format.schema
19450            )?
19451        );
19452        return Ok(());
19453    }
19454
19455    if format.compact {
19456        println!(
19457            "log lines:{} signals:{} repeats:{} files:{} syms:{} stacks:{}",
19458            report.non_empty_lines,
19459            report.signal_groups,
19460            report.repeated_line_groups,
19461            report.file_ref_groups,
19462            report.symbol_ref_groups,
19463            report.stack_groups
19464        );
19465        for signal in &report.signals {
19466            let location = match (&signal.path, signal.line) {
19467                (Some(path), Some(line)) => format!("{path}:{line}"),
19468                (Some(path), None) => path.clone(),
19469                _ => "-".to_string(),
19470            };
19471            println!(
19472                "{} sev:{} count:{} sums:{} msg:{}",
19473                location,
19474                signal.severity,
19475                signal.occurrences,
19476                log_digest_summary_label(signal.summary_state),
19477                truncate_for_compact(&signal.message, 80)
19478            );
19479        }
19480        for repeated in &report.repeated_lines {
19481            println!(
19482                "repeat count:{} line:{}",
19483                repeated.occurrences,
19484                truncate_for_compact(&repeated.line, 80)
19485            );
19486        }
19487        for symbol in &report.symbol_refs {
19488            println!(
19489                "sym:{} count:{} sums:{}",
19490                symbol.symbol,
19491                symbol.occurrences,
19492                log_digest_summary_label(symbol.summary_state)
19493            );
19494        }
19495        for warning in &report.warnings {
19496            println!("warning: {warning}");
19497        }
19498        return Ok(());
19499    }
19500
19501    println!("Log digest");
19502    println!("  lines:                    {}", report.total_lines);
19503    println!("  non-empty lines:          {}", report.non_empty_lines);
19504    println!("  signal groups:            {}", report.signal_groups);
19505    println!(
19506        "  repeated lines:           {}",
19507        report.repeated_line_groups
19508    );
19509    println!(
19510        "  repeated line instances:  {}",
19511        report.repeated_line_occurrences
19512    );
19513    println!("  file refs:                {}", report.file_ref_groups);
19514    println!("  symbol refs:              {}", report.symbol_ref_groups);
19515    println!("  stack groups:             {}", report.stack_groups);
19516
19517    if !report.signals.is_empty() {
19518        println!();
19519        println!("Signals:");
19520        for signal in &report.signals {
19521            match (&signal.path, signal.line, signal.column) {
19522                (Some(path), Some(line), Some(column)) => println!("{path}:{line}:{column}"),
19523                (Some(path), Some(line), None) => println!("{path}:{line}"),
19524                (Some(path), None, _) => println!("{path}"),
19525                (None, _, _) => println!("(no file anchor)"),
19526            }
19527            println!("  severity: {}", signal.severity);
19528            println!("  occurrences: {}", signal.occurrences);
19529            println!("  message: {}", signal.message);
19530            println!(
19531                "  cached summaries: {}",
19532                log_digest_summary_label(signal.summary_state)
19533            );
19534            for summary in &signal.current_summaries {
19535                println!(
19536                    "    - {}: {}",
19537                    summary.symbol,
19538                    truncate_for_compact(&summary.summary, 160)
19539                );
19540            }
19541        }
19542    }
19543
19544    if !report.repeated_lines.is_empty() {
19545        println!();
19546        println!("Repeated lines:");
19547        for repeated in &report.repeated_lines {
19548            println!(
19549                "  {}x {}",
19550                repeated.occurrences,
19551                truncate_for_compact(&repeated.line, 180)
19552            );
19553        }
19554    }
19555
19556    if !report.file_refs.is_empty() {
19557        println!();
19558        println!("Anchored files:");
19559        for file_ref in &report.file_refs {
19560            match (file_ref.line, file_ref.column) {
19561                (Some(line), Some(column)) => println!("{}:{}:{}", file_ref.path, line, column),
19562                (Some(line), None) => println!("{}:{}", file_ref.path, line),
19563                (None, _) => println!("{}", file_ref.path),
19564            }
19565            println!("  occurrences: {}", file_ref.occurrences);
19566            println!(
19567                "  cached summaries: {}",
19568                log_digest_summary_label(file_ref.summary_state)
19569            );
19570            for summary in &file_ref.current_summaries {
19571                println!(
19572                    "    - {}: {}",
19573                    summary.symbol,
19574                    truncate_for_compact(&summary.summary, 160)
19575                );
19576            }
19577        }
19578    }
19579
19580    if !report.symbol_refs.is_empty() {
19581        println!();
19582        println!("Symbol candidates:");
19583        for symbol in &report.symbol_refs {
19584            println!("{}", symbol.symbol);
19585            println!("  occurrences: {}", symbol.occurrences);
19586            println!(
19587                "  cached summaries: {}",
19588                log_digest_summary_label(symbol.summary_state)
19589            );
19590            for summary in &symbol.current_summaries {
19591                println!(
19592                    "    - {}: {}",
19593                    summary.symbol,
19594                    truncate_for_compact(&summary.summary, 160)
19595                );
19596            }
19597        }
19598    }
19599
19600    if !report.stack_traces.is_empty() {
19601        println!();
19602        println!("Stack groups:");
19603        for stack in &report.stack_traces {
19604            println!("  occurrences: {}", stack.occurrences);
19605            for frame in &stack.frames {
19606                println!("    - {}", frame);
19607            }
19608        }
19609    }
19610
19611    for warning in &report.warnings {
19612        println!("warning: {warning}");
19613    }
19614    Ok(())
19615}
19616
19617pub(crate) fn metric_digest_trend_label(trend: metric_digest::MetricDigestTrend) -> &'static str {
19618    match trend {
19619        metric_digest::MetricDigestTrend::Improved => "improved",
19620        metric_digest::MetricDigestTrend::Regressed => "regressed",
19621        metric_digest::MetricDigestTrend::Flat => "flat",
19622        metric_digest::MetricDigestTrend::Unknown => "changed",
19623    }
19624}
19625
19626pub(crate) fn metric_digest_gate_label(
19627    decision: metric_digest::CommunitySearchGateDecision,
19628) -> &'static str {
19629    match decision {
19630        metric_digest::CommunitySearchGateDecision::Pass => "pass",
19631        metric_digest::CommunitySearchGateDecision::Block => "block",
19632    }
19633}
19634
19635fn cmd_dci_benchmark(fixture_path: &Path, format: OutputFormat) -> Result<()> {
19636    let input = fs::read_to_string(fixture_path)
19637        .with_context(|| format!("reading dci-benchmark fixture: {}", fixture_path.display()))?;
19638    let report = dci_benchmark::compute(&input)?;
19639
19640    if format.json_output {
19641        println!(
19642            "{}",
19643            to_json_schema(
19644                &report,
19645                format.pretty,
19646                format.terse,
19647                format.ultra_terse,
19648                format.schema
19649            )?
19650        );
19651        return Ok(());
19652    }
19653
19654    if format.compact {
19655        println!(
19656            "dci tasks:{} strategies:{} warnings:{}",
19657            report.tasks_loaded,
19658            report.strategies_compared,
19659            report.warnings.len()
19660        );
19661        for summary in &report.strategy_summaries {
19662            println!(
19663                "{} rank:{} loc:{}/{} rate:{} useful_hits:{} zero_output:{} calls:{} latency_ms:{} tokens:{} output_tokens:{}",
19664                summary.strategy,
19665                summary.rank,
19666                summary.localized,
19667                summary.task_runs,
19668                dci_benchmark::format_number(summary.localization_rate * 100.0),
19669                dci_benchmark::format_number(summary.avg_useful_hits),
19670                dci_benchmark::format_number(summary.zero_output_rate * 100.0),
19671                dci_benchmark::format_number(summary.avg_tool_calls),
19672                dci_benchmark::format_number(summary.avg_latency_ms),
19673                dci_benchmark::format_number(summary.avg_estimated_tokens),
19674                dci_benchmark::format_number(summary.avg_output_tokens)
19675            );
19676        }
19677        if let Some(gate) = &report.memory_retrieval_gate {
19678            println!(
19679                "memory_retrieval_gate decision:{} baseline:{} min_avg_useful_hits:{} max_zero_output_failures:{} diagnostics:{}",
19680                gate.decision,
19681                gate.baseline_strategy,
19682                dci_benchmark::format_number(gate.min_avg_useful_hits),
19683                gate.max_zero_output_failures,
19684                gate.diagnostics.len()
19685            );
19686        }
19687        for warning in &report.warnings {
19688            println!("warning: {warning}");
19689        }
19690        return Ok(());
19691    }
19692
19693    println!("DCI benchmark");
19694    if let Some(description) = &report.description {
19695        println!("  description: {}", description);
19696    }
19697    println!("  tasks loaded:        {}", report.tasks_loaded);
19698    println!("  strategies compared: {}", report.strategies_compared);
19699
19700    println!();
19701    println!("Strategy summary:");
19702    for summary in &report.strategy_summaries {
19703        println!(
19704            "  #{} {}: localization {}/{} ({:.1}%), avg useful hits {}, zero output {:.1}%, avg calls {}, avg latency {}ms, avg tokens {}, avg output tokens {}",
19705            summary.rank,
19706            summary.strategy,
19707            summary.localized,
19708            summary.task_runs,
19709            summary.localization_rate * 100.0,
19710            dci_benchmark::format_number(summary.avg_useful_hits),
19711            summary.zero_output_rate * 100.0,
19712            dci_benchmark::format_number(summary.avg_tool_calls),
19713            dci_benchmark::format_number(summary.avg_latency_ms),
19714            dci_benchmark::format_number(summary.avg_estimated_tokens),
19715            dci_benchmark::format_number(summary.avg_output_tokens)
19716        );
19717    }
19718
19719    if let Some(gate) = &report.memory_retrieval_gate {
19720        println!();
19721        println!("Memory retrieval gate:");
19722        println!("  decision: {}", gate.decision);
19723        println!(
19724            "  baseline: {}, min avg useful hits {}, max zero-output failures {}",
19725            gate.baseline_strategy,
19726            dci_benchmark::format_number(gate.min_avg_useful_hits),
19727            gate.max_zero_output_failures
19728        );
19729        for row in &gate.rows {
19730            println!(
19731                "  {}: status {}, avg useful hits {}, zero-output failures {}",
19732                row.strategy,
19733                row.status,
19734                dci_benchmark::format_number(row.avg_useful_hits),
19735                row.zero_output_failures
19736            );
19737        }
19738        for diagnostic in &gate.diagnostics {
19739            println!("  diagnostic: {diagnostic}");
19740        }
19741    }
19742
19743    println!();
19744    println!("Task winners:");
19745    for row in &report.task_rows {
19746        let label = row
19747            .label
19748            .as_ref()
19749            .map(|value| format!(" ({value})"))
19750            .unwrap_or_default();
19751        println!("  {}{}", row.task_id, label);
19752        println!("    localized: {}", row.best_localization.join(", "));
19753        println!("    most useful hits: {}", row.most_useful_hits.join(", "));
19754        println!(
19755            "    lowest calls: {}, lowest latency: {}, lowest tokens: {}, lowest output tokens: {}",
19756            row.lowest_tool_calls.as_deref().unwrap_or("-"),
19757            row.lowest_latency.as_deref().unwrap_or("-"),
19758            row.lowest_token_budget.as_deref().unwrap_or("-"),
19759            row.lowest_output_tokens.as_deref().unwrap_or("-")
19760        );
19761        if !row.zero_output_failures.is_empty() {
19762            println!("    zero output: {}", row.zero_output_failures.join(", "));
19763        }
19764    }
19765
19766    for warning in &report.warnings {
19767        println!("warning: {warning}");
19768    }
19769    Ok(())
19770}
19771
19772pub(crate) fn format_compact_count(value: u64) -> String {
19773    if value >= 1_000_000 {
19774        format!("{:.1}M", value as f64 / 1_000_000.0)
19775    } else if value >= 1_000 {
19776        format!("{:.1}K", value as f64 / 1_000.0)
19777    } else {
19778        value.to_string()
19779    }
19780}
19781
19782fn cmd_digest_runner(
19783    kind: &str,
19784    path: &Path,
19785    runner: Option<&str>,
19786    shell_command: &str,
19787    format: OutputFormat,
19788) -> Result<()> {
19789    let digest_kind = DigestRunnerKind::parse(kind)?;
19790    let root = transcript_artifact_root(path)?;
19791    let execution = run_digest_runner_command(shell_command)?;
19792    let output = &execution.output;
19793    let captured = String::from_utf8_lossy(&output.stdout).into_owned();
19794    let exit_code = output.status.code().unwrap_or(-1);
19795    if format.json_output && format.envelope {
19796        let artifact_key = format!(
19797            "{}:{}:{}:{}",
19798            digest_kind.as_str(),
19799            shell_command,
19800            execution.executed_command,
19801            captured
19802        );
19803        let artifact = if captured.trim().is_empty() {
19804            None
19805        } else {
19806            let (suffix, expand) = match digest_kind {
19807                DigestRunnerKind::Test => (
19808                    "test.log",
19809                    format!(
19810                        "tsift test-digest --path {} --input {}{} --json",
19811                        shell_quote(root.to_string_lossy().as_ref()),
19812                        shell_quote(
19813                            root.join(".tsift/artifacts")
19814                                .join(format!("{}.test.log", stable_handle("tart", &artifact_key)))
19815                                .to_string_lossy()
19816                                .as_ref()
19817                        ),
19818                        runner
19819                            .map(|value| format!(" --runner {}", shell_quote(value)))
19820                            .unwrap_or_default()
19821                    ),
19822                ),
19823                DigestRunnerKind::Log => (
19824                    "log",
19825                    format!(
19826                        "tsift log-digest --path {} --input {} --json",
19827                        shell_quote(root.to_string_lossy().as_ref()),
19828                        shell_quote(
19829                            root.join(".tsift/artifacts")
19830                                .join(format!("{}.log", stable_handle("tart", &artifact_key)))
19831                                .to_string_lossy()
19832                                .as_ref()
19833                        )
19834                    ),
19835                ),
19836            };
19837            Some(persist_transcript_artifact(
19838                &root,
19839                "tart",
19840                suffix,
19841                &artifact_key,
19842                &captured,
19843                expand,
19844            )?)
19845        };
19846        let filter_report = execution.filter.as_ref().map(DigestRunnerFilter::to_json);
19847
19848        match digest_kind {
19849            DigestRunnerKind::Test => {
19850                let digest_report = test_digest::compute(path, &captured, runner)?;
19851                let report = serde_json::json!({
19852                    "kind": digest_kind.as_str(),
19853                    "command": shell_command,
19854                    "executed_command": execution.executed_command,
19855                    "exit_code": exit_code,
19856                    "success": output.status.success(),
19857                    "filter": filter_report,
19858                    "artifact": artifact,
19859                    "digest": digest_report,
19860                });
19861                let mut follow_up = artifact
19862                    .as_ref()
19863                    .map(|entry| vec![entry.expand.clone()])
19864                    .unwrap_or_default();
19865                follow_up.push(format!(
19866                    "tsift rewrite --run {}",
19867                    shell_quote(shell_command)
19868                ));
19869                let summary_text = if output.status.success() && digest_report.failures == 0 {
19870                    format!("test run passed for {}", runner.unwrap_or("auto"))
19871                } else {
19872                    format!("test run captured {} failure(s)", digest_report.failures)
19873                };
19874                print_json_or_envelope(
19875                    &report,
19876                    &format,
19877                    "digest-runner",
19878                    "test-run",
19879                    ToolEnvelopeSummary {
19880                        text: summary_text,
19881                        metrics: vec![
19882                            envelope_metric("runner", &digest_report.runner),
19883                            envelope_metric("exit_code", exit_code),
19884                            envelope_metric("filter", execution.filter_label()),
19885                            envelope_metric("failures", digest_report.failures),
19886                            envelope_metric("groups", digest_report.grouped_failures),
19887                            envelope_metric(
19888                                "artifact",
19889                                artifact
19890                                    .as_ref()
19891                                    .map(|entry| entry.handle.as_str())
19892                                    .unwrap_or("-"),
19893                            ),
19894                        ],
19895                    },
19896                    false,
19897                    follow_up,
19898                )?;
19899            }
19900            DigestRunnerKind::Log => {
19901                let digest_report = log_digest::compute(path, &captured)?;
19902                let report = serde_json::json!({
19903                    "kind": digest_kind.as_str(),
19904                    "command": shell_command,
19905                    "executed_command": execution.executed_command,
19906                    "exit_code": exit_code,
19907                    "success": output.status.success(),
19908                    "filter": filter_report,
19909                    "artifact": artifact,
19910                    "digest": digest_report,
19911                });
19912                let mut follow_up = artifact
19913                    .as_ref()
19914                    .map(|entry| vec![entry.expand.clone()])
19915                    .unwrap_or_default();
19916                follow_up.push(format!(
19917                    "tsift rewrite --run {}",
19918                    shell_quote(shell_command)
19919                ));
19920                let summary_text = if output.status.success() && digest_report.signal_groups == 0 {
19921                    "command finished without log signals".to_string()
19922                } else {
19923                    format!(
19924                        "command emitted {} log signal group(s)",
19925                        digest_report.signal_groups
19926                    )
19927                };
19928                print_json_or_envelope(
19929                    &report,
19930                    &format,
19931                    "digest-runner",
19932                    "command-run",
19933                    ToolEnvelopeSummary {
19934                        text: summary_text,
19935                        metrics: vec![
19936                            envelope_metric("exit_code", exit_code),
19937                            envelope_metric("filter", execution.filter_label()),
19938                            envelope_metric("signals", digest_report.signal_groups),
19939                            envelope_metric("file_refs", digest_report.file_ref_groups),
19940                            envelope_metric(
19941                                "artifact",
19942                                artifact
19943                                    .as_ref()
19944                                    .map(|entry| entry.handle.as_str())
19945                                    .unwrap_or("-"),
19946                            ),
19947                        ],
19948                    },
19949                    false,
19950                    follow_up,
19951                )?;
19952            }
19953        }
19954
19955        if output.status.success() {
19956            return Ok(());
19957        }
19958        if let Some(code) = output.status.code() {
19959            std::process::exit(code);
19960        }
19961        bail!("digest-wrapped command terminated by signal: {shell_command}");
19962    }
19963
19964    if captured.trim().is_empty() {
19965        let label = match digest_kind {
19966            DigestRunnerKind::Test => "test",
19967            DigestRunnerKind::Log => "log",
19968        };
19969        println!("No {label} output captured.");
19970    } else {
19971        match digest_kind {
19972            DigestRunnerKind::Test => {
19973                render_test_digest_from_input(path, &captured, runner, format)?
19974            }
19975            DigestRunnerKind::Log => render_log_digest_from_input(path, &captured, format)?,
19976        }
19977    }
19978
19979    if output.status.success() {
19980        return Ok(());
19981    }
19982    if let Some(code) = output.status.code() {
19983        std::process::exit(code);
19984    }
19985    bail!("digest-wrapped command terminated by signal: {shell_command}");
19986}
19987
19988struct DigestRunnerExecution {
19989    output: std::process::Output,
19990    executed_command: String,
19991    filter: Option<DigestRunnerFilter>,
19992}
19993
19994impl DigestRunnerExecution {
19995    fn filter_label(&self) -> &'static str {
19996        self.filter
19997            .as_ref()
19998            .map(|filter| filter.tool)
19999            .unwrap_or("none")
20000    }
20001}
20002
20003struct DigestRunnerFilter {
20004    tool: &'static str,
20005    command: String,
20006}
20007
20008impl DigestRunnerFilter {
20009    fn to_json(&self) -> serde_json::Value {
20010        serde_json::json!({
20011            "tool": self.tool,
20012            "command": self.command,
20013        })
20014    }
20015}
20016
20017fn run_digest_runner_command(shell_command: &str) -> Result<DigestRunnerExecution> {
20018    let filter = rtk_rewrite_for_digest_runner(shell_command);
20019    let executed_command = filter
20020        .as_ref()
20021        .map(|filter| filter.command.as_str())
20022        .unwrap_or(shell_command);
20023    let output = Command::new("sh")
20024        .arg("-lc")
20025        .arg(format!("({executed_command}) 2>&1"))
20026        .stdout(Stdio::piped())
20027        .output()
20028        .with_context(|| format!("running digest-wrapped command: {executed_command}"))?;
20029
20030    Ok(DigestRunnerExecution {
20031        output,
20032        executed_command: executed_command.to_string(),
20033        filter,
20034    })
20035}
20036
20037fn rtk_rewrite_for_digest_runner(shell_command: &str) -> Option<DigestRunnerFilter> {
20038    if shell_command.trim_start().starts_with("rtk ") || find_command_on_path("rtk").is_none() {
20039        return None;
20040    }
20041    let output = Command::new("rtk")
20042        .arg("rewrite")
20043        .arg(shell_command)
20044        .output()
20045        .ok()?;
20046    if !output.status.success() {
20047        return None;
20048    }
20049    let rewritten = String::from_utf8_lossy(&output.stdout).trim().to_string();
20050    if rewritten.is_empty() || rewritten == shell_command {
20051        return None;
20052    }
20053    Some(DigestRunnerFilter {
20054        tool: "rtk",
20055        command: rewritten,
20056    })
20057}
20058
20059fn find_command_on_path(command: &str) -> Option<PathBuf> {
20060    let path_var = std::env::var_os("PATH")?;
20061    std::env::split_paths(&path_var)
20062        .map(|dir| dir.join(command))
20063        .find(|candidate| candidate.is_file())
20064}
20065
20066pub(crate) fn open_existing_summary_db_read_only(db_path: &Path) -> Result<summarize::SummaryDb> {
20067    if !db_path.exists() {
20068        bail!("no summaries.db found — run `tsift summarize --extract <path>` first");
20069    }
20070    summarize::SummaryDb::open_read_only_resilient(db_path)
20071}
20072
20073fn status_index_needs_fix(report: &status::StatusReport) -> bool {
20074    !matches!(report.index, status::IndexStatus::Fresh { .. })
20075}
20076
20077fn status_instructions_need_fix(report: &status::StatusReport) -> bool {
20078    !matches!(report.instructions, init::InstructionStatus::Current { .. })
20079}
20080
20081pub(crate) fn apply_status_fixes(root: &Path, report: &status::StatusReport) -> Result<()> {
20082    if status_instructions_need_fix(report) {
20083        eprintln!("status fix: refreshing tsift instructions");
20084        init::init(root, false, false)?;
20085    }
20086
20087    let eviction = cycle_packet_cache::cycle_packet_cache_evict(
20088        root,
20089        cycle_packet_cache::CYCLE_PACKET_CACHE_DEFAULT_TTL_SECS,
20090        cycle_packet_cache::CYCLE_PACKET_CACHE_DEFAULT_MAX_BYTES,
20091    );
20092    if eviction.evicted_entries > 0 {
20093        eprintln!(
20094            "status fix: evicted {} cycle packet cache entry/entries ({} bytes, {} remaining)",
20095            eviction.evicted_entries,
20096            eviction.evicted_bytes,
20097            eviction.remaining_entries
20098        );
20099    }
20100
20101    if !status_index_needs_fix(report) {
20102        return Ok(());
20103    }
20104
20105    let scopes = config::Config::submodule_dirs(root)?;
20106    if scopes.is_empty() {
20107        eprintln!("status fix: refreshing index");
20108        run_index_update(
20109            &root.join(".tsift/index.db"),
20110            root,
20111            "status --fix refreshing index".to_string(),
20112            root,
20113            None,
20114            false,
20115            false,
20116        )?;
20117        return Ok(());
20118    }
20119
20120    let cfg = config::Config::load(root)?;
20121    for scope in scopes {
20122        if !scope.source_root.exists() {
20123            eprintln!(
20124                "status fix: skipping missing submodule `{}` ({})",
20125                scope.id,
20126                scope.source_root.display()
20127            );
20128            continue;
20129        }
20130        eprintln!("status fix: refreshing submodule `{}` index", scope.id);
20131        run_index_update(
20132            &cfg.db_path_for(root, &scope.id),
20133            &scope.source_root,
20134            format!("status --fix refreshing submodule `{}` index", scope.id),
20135            root,
20136            Some(scope.id.as_str()),
20137            false,
20138            false,
20139        )?;
20140    }
20141
20142    Ok(())
20143}
20144
20145pub(crate) fn status_missing_workspace_scopes(report: &status::StatusReport) -> bool {
20146    match &report.index {
20147        status::IndexStatus::Fresh { missing_scopes, .. }
20148        | status::IndexStatus::Stale { missing_scopes, .. }
20149        | status::IndexStatus::Missing { missing_scopes } => !missing_scopes.is_empty(),
20150    }
20151}
20152
20153pub(crate) fn autoindex_missing_workspace_scopes(
20154    root: &Path,
20155    report: &status::StatusReport,
20156) -> Result<()> {
20157    let missing_scopes = match &report.index {
20158        status::IndexStatus::Fresh { missing_scopes, .. }
20159        | status::IndexStatus::Stale { missing_scopes, .. }
20160        | status::IndexStatus::Missing { missing_scopes } => missing_scopes,
20161    };
20162    if missing_scopes.is_empty() {
20163        return Ok(());
20164    }
20165
20166    let missing_scope_ids = missing_scopes
20167        .iter()
20168        .map(|scope| scope.scope.as_str())
20169        .collect::<std::collections::HashSet<_>>();
20170    let cfg = config::Config::load(root)?;
20171    for scope in config::Config::submodule_dirs(root)? {
20172        if !missing_scope_ids.contains(scope.id.as_str()) || !scope.source_root.exists() {
20173            continue;
20174        }
20175        let db_path = cfg.db_path_for(root, &scope.id);
20176        run_index_update(
20177            &db_path,
20178            &scope.source_root,
20179            format!(
20180                "autoindexing missing submodule `{}` during status",
20181                scope.id
20182            ),
20183            root,
20184            Some(scope.id.as_str()),
20185            false,
20186            false,
20187        )?;
20188    }
20189    Ok(())
20190}
20191
20192pub(crate) fn emit_summary_stats_warnings(stats: &summarize::SummaryStats, root: &Path) {
20193    for warning in &stats.warnings {
20194        let rel_path = relativize_pathbuf(&warning.path, root);
20195        eprintln!(
20196            "warning: summarize stats {}: {}",
20197            rel_path.display(),
20198            warning.message
20199        );
20200    }
20201}
20202
20203fn contextualize_error(err: anyhow::Error, context: String) -> anyhow::Error {
20204    Result::<(), anyhow::Error>::Err(err)
20205        .context(context)
20206        .unwrap_err()
20207}
20208
20209fn should_attach_lock_diagnostics(err: &anyhow::Error) -> bool {
20210    let message = err.to_string();
20211    message.contains("another tsift index writer is already active")
20212        || substrate::error_mentions_locked_db(err)
20213}
20214
20215fn add_write_lock_context(
20216    err: anyhow::Error,
20217    action: String,
20218    root: &std::path::Path,
20219    scope: Option<&str>,
20220) -> anyhow::Error {
20221    if !should_attach_lock_diagnostics(&err) {
20222        return contextualize_error(err, action);
20223    }
20224
20225    let Ok(report) = status::check_locks(root, None, scope) else {
20226        return contextualize_error(err, action);
20227    };
20228
20229    contextualize_error(
20230        err,
20231        format!(
20232            "{}\n\nlock diagnostics:\n{}",
20233            action,
20234            status::format_locks_human(&report, false).trim_end()
20235        ),
20236    )
20237}
20238
20239pub(crate) fn run_index_update(
20240    db_path: &std::path::Path,
20241    source_root: &std::path::Path,
20242    action: String,
20243    root: &std::path::Path,
20244    scope: Option<&str>,
20245    rebuild: bool,
20246    prune: bool,
20247) -> Result<index::IndexSummary> {
20248    let result = (|| {
20249        let db = index::IndexDb::open(db_path)?;
20250        if rebuild {
20251            db.rebuild(source_root)
20252        } else if prune {
20253            db.apply_changes_pruned(source_root)
20254        } else {
20255            db.apply_changes(source_root)
20256        }
20257    })();
20258
20259    let summary = result.map_err(|err| add_write_lock_context(err, action, root, scope))?;
20260    emit_index_warnings(&summary, source_root, scope);
20261    Ok(summary)
20262}
20263
20264pub(crate) fn relativize_index_summary(summary: &mut index::IndexSummary, root: &Path) {
20265    for change in &mut summary.changes {
20266        change.path = relativize_pathbuf(&change.path, root);
20267    }
20268    for warning in &mut summary.warnings {
20269        warning.path = relativize_pathbuf(&warning.path, root);
20270    }
20271}
20272
20273fn emit_index_warnings(summary: &index::IndexSummary, root: &Path, scope: Option<&str>) {
20274    for warning in &summary.warnings {
20275        let rel_path = relativize_pathbuf(&warning.path, root);
20276        let stage = match warning.stage {
20277            index::IndexWarningStage::ReadSource => "read failed",
20278            index::IndexWarningStage::ExtractSymbols => "symbol extraction failed",
20279            index::IndexWarningStage::ExtractCallSites => "call extraction failed",
20280            index::IndexWarningStage::ExtractRoutes => "route extraction failed",
20281        };
20282        let scope_prefix = scope.map(|name| format!("[{}] ", name)).unwrap_or_default();
20283        let lang_suffix = warning
20284            .language
20285            .as_deref()
20286            .map(|lang| format!(" [{}]", lang))
20287            .unwrap_or_default();
20288        eprintln!(
20289            "warning: {}{}{}: {}: {}",
20290            scope_prefix,
20291            rel_path.display(),
20292            lang_suffix,
20293            stage,
20294            warning.message
20295        );
20296    }
20297}
20298
20299pub(crate) fn load_summarize_config(root: &std::path::Path) -> summarize::SummarizeConfig {
20300    let config_path = root.join(".tsift/config.toml");
20301    if !config_path.exists() {
20302        return summarize::SummarizeConfig::default();
20303    }
20304    #[derive(serde::Deserialize, Default)]
20305    struct RawConfig {
20306        #[serde(default)]
20307        summarize: Option<RawSummarize>,
20308    }
20309    #[derive(serde::Deserialize)]
20310    struct RawSummarize {
20311        model: Option<String>,
20312        max_file_tokens: Option<usize>,
20313        api_key_env: Option<String>,
20314    }
20315    let content = std::fs::read_to_string(&config_path).unwrap_or_default();
20316    let raw: RawConfig = toml::from_str(&content).unwrap_or_default();
20317    let defaults = summarize::SummarizeConfig::default();
20318    match raw.summarize {
20319        Some(s) => summarize::SummarizeConfig {
20320            model: s.model.unwrap_or(defaults.model),
20321            max_file_tokens: s.max_file_tokens.unwrap_or(defaults.max_file_tokens),
20322            api_key_env: s.api_key_env.unwrap_or(defaults.api_key_env),
20323        },
20324        None => defaults,
20325    }
20326}
20327
20328#[derive(Debug, Clone, PartialEq, Eq)]
20329struct ExtractSymbolContext {
20330    db_path: PathBuf,
20331    source_root: PathBuf,
20332}
20333
20334pub(crate) fn find_symbols_db_for_file(
20335    root: &Path,
20336    file_path: &Path,
20337) -> Result<Option<ExtractSymbolContext>> {
20338    let cfg = config::Config::load(root)?;
20339    let mut submodules = config::Config::submodule_dirs(root)?;
20340    submodules.sort_by(|left, right| {
20341        right
20342            .source_root
20343            .components()
20344            .count()
20345            .cmp(&left.source_root.components().count())
20346    });
20347
20348    for scope in submodules {
20349        if !file_path.starts_with(&scope.source_root) {
20350            continue;
20351        }
20352        let db_path = cfg.db_path_for(root, &scope.id);
20353        if db_path.exists() {
20354            return Ok(Some(ExtractSymbolContext {
20355                db_path,
20356                source_root: scope.source_root,
20357            }));
20358        }
20359    }
20360
20361    let single = root.join(".tsift/index.db");
20362    if single.exists() && file_path.starts_with(root) {
20363        return Ok(Some(ExtractSymbolContext {
20364            db_path: single,
20365            source_root: root.to_path_buf(),
20366        }));
20367    }
20368
20369    Ok(None)
20370}
20371
20372pub(crate) fn resolve_extract_base(path: &Path) -> Result<PathBuf> {
20373    let canonical = path
20374        .canonicalize()
20375        .with_context(|| format!("canonicalizing {}", path.display()))?;
20376
20377    Ok(if canonical.is_dir() {
20378        canonical
20379    } else {
20380        canonical
20381            .parent()
20382            .map(Path::to_path_buf)
20383            .unwrap_or(canonical)
20384    })
20385}
20386
20387fn normalize_extract_scope_path(path: &Path) -> Result<PathBuf> {
20388    if path.exists() {
20389        return path
20390            .canonicalize()
20391            .with_context(|| format!("canonicalizing extract scope {}", path.display()));
20392    }
20393
20394    Ok(summarize::normalize_lexical_path(path))
20395}
20396
20397pub(crate) fn resolve_extract_scope(root: &Path, extract_path: &Path) -> Result<PathBuf> {
20398    let scope = if extract_path.is_absolute() {
20399        extract_path.to_path_buf()
20400    } else {
20401        root.join(extract_path)
20402    };
20403    normalize_extract_scope_path(&scope)
20404}
20405
20406pub(crate) fn summarize_diff_matches_scope(changed_path: &Path, extract_scope: &Path) -> bool {
20407    normalize_extract_scope_path(changed_path)
20408        .unwrap_or_else(|_| summarize::normalize_lexical_path(changed_path))
20409        .starts_with(extract_scope)
20410}
20411
20412pub(crate) fn summarize_relative_file_path(root: &Path, file_path: &Path) -> String {
20413    summarize::normalize_summary_file_key(file_path.strip_prefix(root).unwrap_or(file_path))
20414}
20415
20416pub(crate) fn summarize_full_extract_deleted_summary_paths(
20417    summary_db: &summarize::SummaryDb,
20418    root: &Path,
20419    extract_scope: &Path,
20420    files_to_extract: &[PathBuf],
20421) -> Result<BTreeSet<String>> {
20422    let live_paths = files_to_extract
20423        .iter()
20424        .map(|file_path| summarize_relative_file_path(root, file_path))
20425        .collect::<BTreeSet<_>>();
20426    let mut deleted = BTreeSet::new();
20427
20428    for cached_path in summary_db.cached_file_paths()? {
20429        if !summarize_diff_matches_scope(&root.join(&cached_path), extract_scope) {
20430            continue;
20431        }
20432        if !live_paths.contains(&cached_path) {
20433            deleted.insert(cached_path);
20434        }
20435    }
20436
20437    Ok(deleted)
20438}
20439
20440#[derive(Debug, Clone)]
20441struct SearchIndexTarget {
20442    label: String,
20443    db_path: PathBuf,
20444    source_root: PathBuf,
20445    scope_name: Option<String>,
20446    reindex_cmd: String,
20447}
20448
20449fn cargo_package_index_target(
20450    root: &Path,
20451    package: multiplicity::CargoPackageInfo,
20452) -> SearchIndexTarget {
20453    SearchIndexTarget {
20454        label: format!("cargo package `{}` index", package.scope_id),
20455        db_path: multiplicity::cargo_package_db_path(root, &package.scope_id),
20456        source_root: package.package_root.clone(),
20457        scope_name: Some(package.scope_id.clone()),
20458        reindex_cmd: format!(
20459            "tsift index --submodule {} {}",
20460            package.scope_id,
20461            root.display()
20462        ),
20463    }
20464}
20465
20466#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20467enum SearchIndexState {
20468    Missing,
20469    Fresh,
20470    Stale { stale_files: usize },
20471}
20472
20473fn resolve_search_index_targets(
20474    root: &Path,
20475    path_hint: &Path,
20476    scope: Option<&str>,
20477    federated: bool,
20478) -> Result<Vec<SearchIndexTarget>> {
20479    if let Some(scope_name) = scope {
20480        if let Some(scope) = config::Config::find_submodule(root, scope_name)? {
20481            let cfg = config::Config::load(root)?;
20482            return Ok(vec![SearchIndexTarget {
20483                label: format!("submodule `{}` index", scope.id),
20484                db_path: cfg.db_path_for(root, &scope.id),
20485                source_root: scope.source_root.clone(),
20486                scope_name: Some(scope.id.clone()),
20487                reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
20488            }]);
20489        }
20490        if let Some(package) = multiplicity::find_cargo_package(root, scope_name)? {
20491            return Ok(vec![cargo_package_index_target(root, package)]);
20492        }
20493        config::Config::resolve_submodule(root, scope_name)?;
20494    }
20495
20496    if federated {
20497        let cfg = config::Config::load(root)?;
20498        let mut targets = Vec::new();
20499        for scope in config::Config::submodule_dirs(root)? {
20500            if !cfg.federation_for_scope(&scope) {
20501                continue;
20502            }
20503            targets.push(SearchIndexTarget {
20504                label: format!("submodule `{}` index", scope.id),
20505                db_path: cfg.db_path_for(root, &scope.id),
20506                source_root: scope.source_root.clone(),
20507                scope_name: Some(scope.id.clone()),
20508                reindex_cmd: format!("tsift index --workspace {}", root.display()),
20509            });
20510        }
20511        return Ok(targets);
20512    }
20513
20514    if let Some(scope) = config::Config::infer_submodule_from_path(root, path_hint)? {
20515        let cfg = config::Config::load(root)?;
20516        return Ok(vec![SearchIndexTarget {
20517            label: format!("submodule `{}` index", scope.id),
20518            db_path: cfg.db_path_for(root, &scope.id),
20519            source_root: scope.source_root.clone(),
20520            scope_name: Some(scope.id.clone()),
20521            reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
20522        }]);
20523    }
20524
20525    if let Some(package) = multiplicity::infer_cargo_package_from_path(root, path_hint)? {
20526        return Ok(vec![cargo_package_index_target(root, package)]);
20527    }
20528
20529    if let Some(scope) = infer_agent_doc_task_submodule(root, path_hint)? {
20530        let cfg = config::Config::load(root)?;
20531        return Ok(vec![SearchIndexTarget {
20532            label: format!("submodule `{}` index", scope.id),
20533            db_path: cfg.db_path_for(root, &scope.id),
20534            source_root: scope.source_root.clone(),
20535            scope_name: Some(scope.id.clone()),
20536            reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
20537        }]);
20538    }
20539
20540    let scopes = config::Config::submodule_dirs(root)?;
20541    if !scopes.is_empty() {
20542        let root_db = root.join(".tsift/index.db");
20543        if !root_db.exists() {
20544            let available_scopes = scopes
20545                .iter()
20546                .map(|scope| scope.id.as_str())
20547                .collect::<Vec<_>>()
20548                .join(", ");
20549            let cfg = config::Config::load(root)?;
20550            let indexed_scopes = scopes
20551                .iter()
20552                .filter(|scope| cfg.db_path_for(root, &scope.id).exists())
20553                .map(|scope| scope.id.as_str())
20554                .collect::<Vec<_>>();
20555            let indexed_label = if indexed_scopes.is_empty() {
20556                "none".to_string()
20557            } else {
20558                indexed_scopes.join(", ")
20559            };
20560            bail!(
20561                "workspace root {} has no shared root index at {}. Default search requires `--scope <scope>` or `--federated` when the workspace uses scoped `.tsift/indexes/*/index.db` files. Available scopes: {}. Indexed scopes: {}.",
20562                root.display(),
20563                root_db.display(),
20564                available_scopes,
20565                indexed_label,
20566            );
20567        }
20568    }
20569
20570    Ok(vec![SearchIndexTarget {
20571        label: "index".to_string(),
20572        db_path: root.join(".tsift/index.db"),
20573        source_root: root.to_path_buf(),
20574        scope_name: None,
20575        reindex_cmd: format!("tsift index {}", root.display()),
20576    }])
20577}
20578
20579fn inspect_search_index(target: &SearchIndexTarget) -> Result<SearchIndexState> {
20580    if !target.source_root.exists() || !target.db_path.exists() {
20581        return Ok(SearchIndexState::Missing);
20582    }
20583
20584    let inspection =
20585        index::IndexDb::inspect_read_only(&target.db_path, &target.source_root, false)?;
20586    let stale_files =
20587        inspection.summary.new + inspection.summary.modified + inspection.summary.deleted;
20588    if stale_files == 0 {
20589        Ok(SearchIndexState::Fresh)
20590    } else {
20591        Ok(SearchIndexState::Stale { stale_files })
20592    }
20593}
20594
20595#[derive(Debug, Clone, PartialEq, Eq)]
20596struct RebuildSearchTarget {
20597    label: String,
20598    reason: RebuildSearchReason,
20599    reindex_cmd: String,
20600}
20601
20602#[derive(Debug, Clone, PartialEq, Eq)]
20603enum RebuildSearchReason {
20604    Missing,
20605    Stale { stale_files: usize },
20606}
20607
20608#[derive(Debug, Clone, PartialEq, Eq)]
20609struct DegradedSearchTarget {
20610    label: String,
20611    reason: RebuildSearchReason,
20612    reindex_cmd: String,
20613}
20614
20615#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20616pub(crate) enum DegradedSearchMode {
20617    ReadOnly,
20618    Exact,
20619}
20620
20621#[derive(Debug)]
20622struct SearchPrecheck {
20623    targets: Vec<SearchIndexTarget>,
20624    degraded_targets: Vec<DegradedSearchTarget>,
20625}
20626
20627fn is_active_writer_lock_error(err: &anyhow::Error) -> bool {
20628    err.chain().any(|cause| {
20629        cause
20630            .to_string()
20631            .contains("another tsift index writer is already active")
20632    })
20633}
20634
20635fn infer_agent_doc_task_submodule(
20636    root: &Path,
20637    path_hint: &Path,
20638) -> Result<Option<config::WorkspaceScope>> {
20639    let hinted_path = if path_hint.is_absolute() {
20640        path_hint.to_path_buf()
20641    } else {
20642        root.join(path_hint)
20643    };
20644    let Ok(relative) = hinted_path.strip_prefix(root) else {
20645        return Ok(None);
20646    };
20647    let mut components = relative.components();
20648    let Some(std::path::Component::Normal(first)) = components.next() else {
20649        return Ok(None);
20650    };
20651    if first != "tasks" {
20652        return Ok(None);
20653    }
20654    let Some(file_stem) = relative.file_stem().and_then(|stem| stem.to_str()) else {
20655        return Ok(None);
20656    };
20657    config::Config::find_submodule(root, file_stem)
20658}
20659
20660fn degraded_search_target(
20661    target: &SearchIndexTarget,
20662    reason: RebuildSearchReason,
20663) -> DegradedSearchTarget {
20664    DegradedSearchTarget {
20665        label: target.label.clone(),
20666        reason,
20667        reindex_cmd: target.reindex_cmd.clone(),
20668    }
20669}
20670
20671fn apply_search_index_update(
20672    root: &Path,
20673    target: &SearchIndexTarget,
20674) -> Result<index::IndexSummary> {
20675    run_index_update(
20676        &target.db_path,
20677        &target.source_root,
20678        format!("autoindexing {}", target.label),
20679        root,
20680        target.scope_name.as_deref(),
20681        false,
20682        false,
20683    )
20684}
20685
20686fn collect_rebuild_search_targets(
20687    targets: &[SearchIndexTarget],
20688) -> Result<Vec<RebuildSearchTarget>> {
20689    let mut rebuild_targets = Vec::new();
20690    for target in targets {
20691        let reason = match inspect_search_index(target)? {
20692            SearchIndexState::Missing => RebuildSearchReason::Missing,
20693            SearchIndexState::Fresh => continue,
20694            SearchIndexState::Stale { stale_files } => RebuildSearchReason::Stale { stale_files },
20695        };
20696        rebuild_targets.push(RebuildSearchTarget {
20697            label: target.label.clone(),
20698            reason,
20699            reindex_cmd: target.reindex_cmd.clone(),
20700        });
20701    }
20702    Ok(rebuild_targets)
20703}
20704
20705fn rebuild_search_target_detail(target: &RebuildSearchTarget) -> String {
20706    match target.reason {
20707        RebuildSearchReason::Missing => format!("{} is missing", target.label),
20708        RebuildSearchReason::Stale { stale_files } => {
20709            let file_suffix = if stale_files == 1 { "" } else { "s" };
20710            format!(
20711                "{} is stale ({} file{})",
20712                target.label, stale_files, file_suffix
20713            )
20714        }
20715    }
20716}
20717
20718fn rebuild_search_targets_message(rebuild_targets: &[RebuildSearchTarget]) -> String {
20719    if rebuild_targets.len() == 1 {
20720        let target = &rebuild_targets[0];
20721        return format!(
20722            "{}. Run `{}` to rebuild before retrying.",
20723            rebuild_search_target_detail(target),
20724            target.reindex_cmd
20725        );
20726    }
20727
20728    let summary: Vec<String> = rebuild_targets
20729        .iter()
20730        .take(3)
20731        .map(rebuild_search_target_detail)
20732        .collect();
20733    let overflow = rebuild_targets.len().saturating_sub(summary.len());
20734    let mut details = summary.join(", ");
20735    if overflow > 0 {
20736        details.push_str(&format!(", +{} more", overflow));
20737    }
20738    let reindex_cmd = rebuild_targets[0].reindex_cmd.clone();
20739    format!(
20740        "{} indexes need rebuild: {}. Run `{}` to rebuild before retrying.",
20741        rebuild_targets.len(),
20742        details,
20743        reindex_cmd
20744    )
20745}
20746
20747pub(crate) fn precheck_search_indexes(
20748    root: &Path,
20749    path_hint: &Path,
20750    scope: Option<&str>,
20751    federated: bool,
20752    autoindex: bool,
20753) -> Result<SearchPrecheck> {
20754    let targets = resolve_search_index_targets(root, path_hint, scope, federated)?;
20755    let mut stale_targets = Vec::new();
20756    let mut degraded_targets = Vec::new();
20757
20758    for target in &targets {
20759        match inspect_search_index(target)? {
20760            SearchIndexState::Missing => {
20761                if autoindex && let Err(err) = apply_search_index_update(root, target) {
20762                    if is_active_writer_lock_error(&err) {
20763                        degraded_targets
20764                            .push(degraded_search_target(target, RebuildSearchReason::Missing));
20765                    } else {
20766                        return Err(err);
20767                    }
20768                }
20769            }
20770            SearchIndexState::Fresh => {}
20771            SearchIndexState::Stale { stale_files } => {
20772                if autoindex {
20773                    if let Err(err) = apply_search_index_update(root, target) {
20774                        if is_active_writer_lock_error(&err) {
20775                            degraded_targets.push(degraded_search_target(
20776                                target,
20777                                RebuildSearchReason::Stale { stale_files },
20778                            ));
20779                        } else {
20780                            return Err(err);
20781                        }
20782                    }
20783                } else {
20784                    stale_targets.push(RebuildSearchTarget {
20785                        label: target.label.clone(),
20786                        reason: RebuildSearchReason::Stale { stale_files },
20787                        reindex_cmd: target.reindex_cmd.clone(),
20788                    });
20789                }
20790            }
20791        }
20792    }
20793
20794    if stale_targets.is_empty() {
20795        return Ok(SearchPrecheck {
20796            targets,
20797            degraded_targets,
20798        });
20799    }
20800
20801    bail!(
20802        "tsift search aborted: {} \
20803         or re-run without `--no-autoindex`.",
20804        rebuild_search_targets_message(&stale_targets),
20805    );
20806}
20807
20808pub(crate) fn degraded_search_mode(targets: &[DegradedSearchTarget]) -> Option<DegradedSearchMode> {
20809    if targets.is_empty() {
20810        return None;
20811    }
20812
20813    if targets
20814        .iter()
20815        .all(|target| matches!(target.reason, RebuildSearchReason::Missing))
20816    {
20817        Some(DegradedSearchMode::Exact)
20818    } else {
20819        Some(DegradedSearchMode::ReadOnly)
20820    }
20821}
20822
20823fn degraded_search_targets_summary(targets: &[DegradedSearchTarget]) -> String {
20824    if targets.len() == 1 {
20825        let target = &targets[0];
20826        return match target.reason {
20827            RebuildSearchReason::Missing => format!("{} is missing", target.label),
20828            RebuildSearchReason::Stale { stale_files } => {
20829                let file_suffix = if stale_files == 1 { "" } else { "s" };
20830                format!(
20831                    "{} is stale ({} file{})",
20832                    target.label, stale_files, file_suffix
20833                )
20834            }
20835        };
20836    }
20837
20838    let missing = targets
20839        .iter()
20840        .filter(|target| matches!(target.reason, RebuildSearchReason::Missing))
20841        .count();
20842    let stale = targets.len().saturating_sub(missing);
20843    let mut parts = Vec::new();
20844    if stale > 0 {
20845        let suffix = if stale == 1 { "" } else { "es" };
20846        parts.push(format!("{stale} stale index{suffix}"));
20847    }
20848    if missing > 0 {
20849        let suffix = if missing == 1 { "" } else { "es" };
20850        parts.push(format!("{missing} missing index{suffix}"));
20851    }
20852    parts.join(", ")
20853}
20854
20855pub(crate) fn emit_degraded_search_note(
20856    targets: &[DegradedSearchTarget],
20857    mode: DegradedSearchMode,
20858) {
20859    let summary = degraded_search_targets_summary(targets);
20860    let reindex_cmd = &targets[0].reindex_cmd;
20861    match mode {
20862        DegradedSearchMode::ReadOnly => eprintln!(
20863            "note: active tsift writer detected; skipping autoindex because {}. \
20864             Continuing with read-only search and the current index snapshot; symbol hits may lag. \
20865             Retry `{}` after the active writer finishes for fresh index results.",
20866            summary, reindex_cmd
20867        ),
20868        DegradedSearchMode::Exact => eprintln!(
20869            "note: active tsift writer detected; skipping autoindex because {}. \
20870             Continuing with exact live-file search. Retry `{}` after the active writer finishes \
20871             for indexed symbol hits.",
20872            summary, reindex_cmd
20873        ),
20874    }
20875}
20876
20877fn search_timeout_message(
20878    timeout_secs: u64,
20879    strategy: &str,
20880    targets: &[SearchIndexTarget],
20881) -> Result<String> {
20882    let rebuild_targets = collect_rebuild_search_targets(targets)?;
20883    if rebuild_targets.is_empty() {
20884        return Ok(format!(
20885            "tsift search timed out after {}s (strategy: {}). \
20886             The search root looks fresh, so reindexing is unlikely to help. \
20887             Re-run with `--timeout 0` to disable the timeout, narrow `--path` / `--scope`, \
20888             or try a different strategy.",
20889            timeout_secs, strategy,
20890        ));
20891    }
20892
20893    Ok(format!(
20894        "tsift search timed out after {}s (strategy: {}). {}",
20895        timeout_secs,
20896        strategy,
20897        rebuild_search_targets_message(&rebuild_targets),
20898    ))
20899}
20900
20901fn is_exact_preferring_query_char(ch: char) -> bool {
20902    matches!(ch, '-' | '_' | '/' | '\\' | '.' | ':' | '#' | '@')
20903}
20904
20905fn query_prefers_exact_search(query: &str) -> bool {
20906    let trimmed = query.trim();
20907    !trimmed.is_empty()
20908        && !trimmed.chars().any(char::is_whitespace)
20909        && trimmed.chars().any(|ch| ch.is_alphanumeric())
20910        && trimmed.chars().any(is_exact_preferring_query_char)
20911        && trimmed
20912            .chars()
20913            .all(|ch| ch.is_alphanumeric() || is_exact_preferring_query_char(ch))
20914}
20915
20916pub(crate) fn resolve_search_strategy(query: &str, strategy: Option<String>) -> String {
20917    strategy.unwrap_or_else(|| {
20918        if query_prefers_exact_search(query) {
20919            "exact".to_string()
20920        } else {
20921            "lexical".to_string()
20922        }
20923    })
20924}
20925
20926
20927pub(crate) fn collect_source_files(path: &std::path::Path) -> Result<Vec<PathBuf>> {
20928    let mut files = Vec::new();
20929    if path.is_file() {
20930        files.push(path.to_path_buf());
20931        return Ok(files);
20932    }
20933    let walker = ignore::WalkBuilder::new(path)
20934        .hidden(true)
20935        .git_ignore(true)
20936        .build();
20937    for entry in walker {
20938        let entry = entry?;
20939        if entry.file_type().is_some_and(|ft| ft.is_file()) {
20940            let p = entry.path();
20941            if let Some(ext) = p.extension() {
20942                let ext = ext.to_string_lossy();
20943                if matches!(
20944                    ext.as_ref(),
20945                    "rs" | "py"
20946                        | "ts"
20947                        | "tsx"
20948                        | "js"
20949                        | "jsx"
20950                        | "kt"
20951                        | "kts"
20952                        | "zig"
20953                        | "sh"
20954                        | "bash"
20955                        | "zsh"
20956                ) {
20957                    files.push(p.to_path_buf());
20958                }
20959            }
20960        }
20961    }
20962    Ok(files)
20963}
20964
20965#[cfg(test)]
20966 mod tests {
20967     use super::*;
20968     use super::semantic_edit::{
20969         EditOp,
20970         apply_edit_op, apply_edit_plan_atomically_inner, markdown_block_spans,
20971         markdown_section_spans,
20972     };
20973     use tsift_memory::{MemoryEventKind, MemoryStore};
20974
20975    use std::cell::RefCell;
20976    use substrate::{ConvexEdgeRow, ConvexGraphClient, ConvexGraphStore, ConvexNodeRow};
20977    fn parse_cli<I, T>(itr: I) -> Cli
20978    where
20979        I: IntoIterator<Item = T> + Send + 'static,
20980        T: Into<std::ffi::OsString> + Clone + Send + 'static,
20981    {
20982        std::thread::Builder::new()
20983            .name("cli-parse".to_string())
20984            .stack_size(16 * 1024 * 1024)
20985            .spawn(move || Cli::parse_from(itr))
20986            .unwrap()
20987            .join()
20988            .unwrap()
20989    }
20990
20991    fn try_parse_cli<I, T>(itr: I) -> std::result::Result<Cli, clap::Error>
20992    where
20993        I: IntoIterator<Item = T> + Send + 'static,
20994        T: Into<std::ffi::OsString> + Clone + Send + 'static,
20995    {
20996        std::thread::Builder::new()
20997            .name("cli-try-parse".to_string())
20998            .stack_size(16 * 1024 * 1024)
20999            .spawn(move || Cli::try_parse_from(itr))
21000            .unwrap()
21001            .join()
21002            .unwrap()
21003    }
21004
21005    fn build_relative_search_budget_report(
21006        query: &str,
21007        strategy: &str,
21008        root: &Path,
21009        response: &sift::SearchResponse,
21010        symbol_hits: &[index::SymbolHit],
21011        budget: ResponseBudget,
21012        filters: &SearchFacetFilters,
21013    ) -> SearchBudgetReport {
21014        build_search_budget_report(SearchBudgetReportInput {
21015            query,
21016            strategy,
21017            root,
21018            response,
21019            symbol_hits,
21020            absolute: false,
21021            budget,
21022            filters,
21023        })
21024    }
21025
21026    #[derive(Default)]
21027    struct MemoryConvexGraphClient {
21028        nodes: RefCell<BTreeMap<String, ConvexNodeRow>>,
21029        edges: RefCell<BTreeMap<String, ConvexEdgeRow>>,
21030    }
21031
21032    impl ConvexGraphClient for MemoryConvexGraphClient {
21033        fn upsert_node_row(&self, row: &ConvexNodeRow) -> Result<()> {
21034            self.nodes
21035                .borrow_mut()
21036                .insert(row.external_id.clone(), row.clone());
21037            Ok(())
21038        }
21039
21040        fn upsert_edge_row(&self, row: &ConvexEdgeRow) -> Result<()> {
21041            self.edges
21042                .borrow_mut()
21043                .insert(row.edge_key.clone(), row.clone());
21044            Ok(())
21045        }
21046
21047        fn delete_node_row(&self, external_id: &str) -> Result<usize> {
21048            Ok(usize::from(
21049                self.nodes.borrow_mut().remove(external_id).is_some(),
21050            ))
21051        }
21052
21053        fn delete_edge_row(&self, edge_key: &str) -> Result<usize> {
21054            Ok(usize::from(
21055                self.edges.borrow_mut().remove(edge_key).is_some(),
21056            ))
21057        }
21058
21059        fn node_row(&self, external_id: &str) -> Result<Option<ConvexNodeRow>> {
21060            Ok(self.nodes.borrow().get(external_id).cloned())
21061        }
21062
21063        fn node_rows(&self) -> Result<Vec<ConvexNodeRow>> {
21064            Ok(self.nodes.borrow().values().cloned().collect())
21065        }
21066
21067        fn edge_rows(&self) -> Result<Vec<ConvexEdgeRow>> {
21068            Ok(self.edges.borrow().values().cloned().collect())
21069        }
21070
21071        fn node_rows_by_kind(&self, kind: &str) -> Result<Vec<ConvexNodeRow>> {
21072            Ok(self
21073                .nodes
21074                .borrow()
21075                .values()
21076                .filter(|row| row.kind == kind)
21077                .cloned()
21078                .collect())
21079        }
21080
21081        fn outgoing_edge_rows(
21082            &self,
21083            from_external_id: &str,
21084            kind: Option<&str>,
21085        ) -> Result<Vec<ConvexEdgeRow>> {
21086            Ok(self
21087                .edges
21088                .borrow()
21089                .values()
21090                .filter(|row| row.from_external_id == from_external_id)
21091                .filter(|row| kind.is_none_or(|kind| row.kind == kind))
21092                .cloned()
21093                .collect())
21094        }
21095    }
21096
21097    fn init_git_repo(path: &Path) {
21098        let status = std::process::Command::new("git")
21099            .args(["init"])
21100            .current_dir(path)
21101            .status()
21102            .unwrap();
21103        assert!(status.success(), "git init failed");
21104
21105        let status = std::process::Command::new("git")
21106            .args(["add", "."])
21107            .current_dir(path)
21108            .status()
21109            .unwrap();
21110        assert!(status.success(), "git add failed");
21111
21112        let status = std::process::Command::new("git")
21113            .args([
21114                "-c",
21115                "user.name=tsift-tests",
21116                "-c",
21117                "user.email=tsift-tests@example.com",
21118                "commit",
21119                "--quiet",
21120                "-m",
21121                "init",
21122            ])
21123            .current_dir(path)
21124            .status()
21125            .unwrap();
21126        assert!(status.success(), "git commit failed");
21127    }
21128
21129    fn write_empty_root_index(root: &Path) {
21130        let index_dir = root.join(".tsift");
21131        fs::create_dir_all(&index_dir).unwrap();
21132        fs::write(index_dir.join("index.db"), "").unwrap();
21133    }
21134
21135    fn write_repeated_lines(path: &Path, line: &str, lines: usize) -> PathBuf {
21136        if let Some(parent) = path.parent() {
21137            fs::create_dir_all(parent).unwrap();
21138        }
21139        let body = std::iter::repeat_n(line, lines)
21140            .collect::<Vec<_>>()
21141            .join("\n");
21142        fs::write(path, format!("{body}\n")).unwrap();
21143        path.to_path_buf()
21144    }
21145
21146    // --- build_token_capped_preview ---
21147
21148    #[test]
21149    fn token_capped_preview_returns_all_lines_when_under_cap() {
21150        let lines: Vec<&str> = vec!["fn foo() {", "    1 + 1", "}"];
21151        let result = build_token_capped_preview(&lines, 1, 3, 160, 1000);
21152        assert!(!result.was_capped);
21153        assert_eq!(result.preview.len(), 3);
21154        assert_eq!(result.capped_end, 3);
21155    }
21156
21157    #[test]
21158    fn token_capped_preview_truncates_when_over_cap() {
21159        let lines: Vec<&str> = (0..200).map(|_| "    let x = some_very_long_expression_here();").collect();
21160        let result = build_token_capped_preview(&lines, 1, 200, 160, 100);
21161        assert!(result.was_capped);
21162        assert!(result.preview.len() < 200);
21163        assert!(result.capped_end < 200);
21164    }
21165
21166    #[test]
21167    fn token_capped_preview_keeps_at_least_one_line() {
21168        let long_line: String = "x".repeat(8000);
21169        let lines: Vec<&str> = vec![&long_line];
21170        let result = build_token_capped_preview(&lines, 1, 1, 160, 10);
21171        assert!(!result.was_capped);
21172        assert_eq!(result.preview.len(), 1);
21173    }
21174
21175    #[test]
21176    fn token_capped_preview_cap_at_boundary() {
21177        let lines: Vec<&str> = vec!["aaaa", "bbbb", "cccc", "dddd"];
21178        let result = build_token_capped_preview(&lines, 1, 4, 160, 4);
21179        assert!(!result.was_capped);
21180        assert_eq!(result.preview.len(), 4);
21181    }
21182
21183    #[test]
21184    fn token_capped_preview_cap_just_over_boundary() {
21185        let lines: Vec<&str> = vec!["aaaa", "bbbb", "cccc", "dddd"];
21186        let result = build_token_capped_preview(&lines, 1, 4, 160, 3);
21187        assert!(result.was_capped);
21188        assert_eq!(result.preview.len(), 3);
21189        assert_eq!(result.capped_end, 3);
21190    }
21191
21192    #[test]
21193    fn token_capped_preview_empty_lines() {
21194        let lines: Vec<&str> = vec![];
21195        let result = build_token_capped_preview(&lines, 1, 0, 160, 100);
21196        assert!(!result.was_capped);
21197        assert!(result.preview.is_empty());
21198    }
21199
21200    #[test]
21201    fn token_capped_preview_per_line_truncation_applied() {
21202        let long_line = "x".repeat(500);
21203        let lines: Vec<&str> = vec![&long_line, "short"];
21204        let result = build_token_capped_preview(&lines, 1, 2, 20, 10000);
21205        assert!(!result.was_capped);
21206        assert_eq!(result.preview.len(), 2);
21207        assert!(result.preview[0].text.len() <= 23);
21208        assert!(result.preview[0].text.ends_with("..."));
21209    }
21210
21211    // --- classify_task ---
21212
21213    #[test]
21214    fn route_search_defaults_to_haiku() {
21215        let (tier, model) = classify_task("find all uses of authenticate");
21216        assert_eq!(tier, "haiku");
21217        assert!(
21218            model.contains("haiku"),
21219            "expected haiku model, got {}",
21220            model
21221        );
21222    }
21223
21224    #[test]
21225    fn route_edit_keywords_to_sonnet() {
21226        for kw in &[
21227            "edit the file",
21228            "fix the bug",
21229            "update the config",
21230            "remove dead code",
21231            "create a new module",
21232        ] {
21233            let (tier, _) = classify_task(kw);
21234            assert_eq!(tier, "sonnet", "expected sonnet for {:?}", kw);
21235        }
21236    }
21237
21238    #[test]
21239    fn route_architecture_keywords_to_opus() {
21240        for kw in &[
21241            "design the API",
21242            "architecture review",
21243            "plan the migration",
21244            "analyze the system",
21245            "evaluate trade-offs",
21246        ] {
21247            let (tier, _) = classify_task(kw);
21248            assert_eq!(tier, "opus", "expected opus for {:?}", kw);
21249        }
21250    }
21251
21252    #[test]
21253    fn route_architecture_beats_edit() {
21254        // "design and implement" — architecture signal wins (checked first)
21255        let (tier, _) = classify_task("design and implement the new auth service");
21256        assert_eq!(tier, "opus");
21257    }
21258
21259    #[test]
21260    fn cli_accepts_global_compact_flag() {
21261        let cli = parse_cli(["tsift", "--compact", "status"]);
21262        assert!(cli.compact);
21263        assert!(matches!(cli.command, Some(Commands::Status { .. })));
21264    }
21265
21266    #[test]
21267    fn summarize_diff_scope_matches_relative_directory() {
21268        let root = Path::new("/repo");
21269        let extract_scope = resolve_extract_scope(root, Path::new("src/feature")).unwrap();
21270
21271        assert!(summarize_diff_matches_scope(
21272            Path::new("/repo/src/feature/main.rs"),
21273            &extract_scope
21274        ));
21275        assert!(!summarize_diff_matches_scope(
21276            Path::new("/repo/src/other/main.rs"),
21277            &extract_scope
21278        ));
21279    }
21280
21281    #[test]
21282    fn summarize_diff_scope_matches_relative_file() {
21283        let root = Path::new("/repo");
21284        let extract_scope = resolve_extract_scope(root, Path::new("src/feature/main.rs")).unwrap();
21285
21286        assert!(summarize_diff_matches_scope(
21287            Path::new("/repo/src/feature/main.rs"),
21288            &extract_scope
21289        ));
21290        assert!(!summarize_diff_matches_scope(
21291            Path::new("/repo/src/feature/lib.rs"),
21292            &extract_scope
21293        ));
21294    }
21295
21296    #[test]
21297    fn summarize_extract_scope_walks_relative_paths_from_root() {
21298        let dir = tempfile::tempdir().unwrap();
21299        let source_dir = dir.path().join("src");
21300        std::fs::create_dir_all(&source_dir).unwrap();
21301        let main_rs = source_dir.join("main.rs");
21302        std::fs::write(&main_rs, "fn alpha() {}\n").unwrap();
21303
21304        let extract_scope = resolve_extract_scope(dir.path(), Path::new("src")).unwrap();
21305        let files = collect_source_files(&extract_scope).unwrap();
21306
21307        assert_eq!(files, vec![main_rs]);
21308    }
21309
21310    #[test]
21311    fn summarize_extract_base_uses_nested_path_instead_of_project_root() {
21312        let dir = tempfile::tempdir().unwrap();
21313        let nested = dir.path().join("src/nested");
21314        std::fs::create_dir_all(&nested).unwrap();
21315        std::fs::write(dir.path().join("root.rs"), "fn root_level() {}\n").unwrap();
21316        let nested_file = nested.join("main.rs");
21317        std::fs::write(&nested_file, "fn nested_only() {}\n").unwrap();
21318
21319        let extract_base = resolve_extract_base(&nested).unwrap();
21320        let extract_scope = resolve_extract_scope(&extract_base, Path::new(".")).unwrap();
21321        let files = collect_source_files(&extract_scope).unwrap();
21322
21323        assert_eq!(extract_scope, nested);
21324        assert_eq!(files, vec![nested_file]);
21325    }
21326
21327    #[test]
21328    fn summarize_extract_base_uses_parent_of_file_path() {
21329        let dir = tempfile::tempdir().unwrap();
21330        let nested = dir.path().join("src/nested");
21331        std::fs::create_dir_all(&nested).unwrap();
21332        let file_path = nested.join("main.rs");
21333        std::fs::write(&file_path, "fn nested_only() {}\n").unwrap();
21334
21335        let extract_base = resolve_extract_base(&file_path).unwrap();
21336
21337        assert_eq!(extract_base, nested);
21338    }
21339
21340    #[test]
21341    fn summarize_extract_scope_normalizes_dotdot_segments() {
21342        let dir = tempfile::tempdir().unwrap();
21343        let source_dir = dir.path().join("src");
21344        std::fs::create_dir_all(&source_dir).unwrap();
21345
21346        let extract_scope = resolve_extract_scope(dir.path(), Path::new("src/../src")).unwrap();
21347
21348        assert_eq!(extract_scope, source_dir.canonicalize().unwrap());
21349        assert!(summarize_diff_matches_scope(
21350            &source_dir.join("main.rs"),
21351            &extract_scope
21352        ));
21353    }
21354
21355    #[cfg(unix)]
21356    #[test]
21357    fn summarize_extract_scope_canonicalizes_absolute_symlink_paths() {
21358        use std::os::unix::fs::symlink;
21359
21360        let dir = tempfile::tempdir().unwrap();
21361        let real_root = dir.path().join("real");
21362        let source_dir = real_root.join("src");
21363        std::fs::create_dir_all(&source_dir).unwrap();
21364        let symlink_scope = dir.path().join("scope-link");
21365        symlink(&source_dir, &symlink_scope).unwrap();
21366
21367        let extract_scope = resolve_extract_scope(&real_root, &symlink_scope).unwrap();
21368
21369        assert_eq!(extract_scope, source_dir.canonicalize().unwrap());
21370        assert!(summarize_diff_matches_scope(
21371            &source_dir.join("lib.rs"),
21372            &extract_scope
21373        ));
21374    }
21375
21376    #[test]
21377    fn summarize_diff_extract_includes_untracked_files() {
21378        let dir = tempfile::tempdir().unwrap();
21379        std::fs::write(dir.path().join("README.md"), "# repo\n").unwrap();
21380        init_git_repo(dir.path());
21381
21382        let source_dir = dir.path().join("src");
21383        std::fs::create_dir_all(&source_dir).unwrap();
21384        let new_file = source_dir.join("new.rs");
21385        std::fs::write(&new_file, "fn alpha_helper() {}\n").unwrap();
21386
21387        let files = summarize::git_changed_files(dir.path()).unwrap();
21388
21389        assert_eq!(files.existing, vec![new_file]);
21390        assert!(files.deleted.is_empty());
21391    }
21392
21393    #[test]
21394    fn summarize_diff_extract_treats_unborn_head_as_untracked_only() {
21395        let dir = tempfile::tempdir().unwrap();
21396        let status = std::process::Command::new("git")
21397            .args(["init"])
21398            .current_dir(dir.path())
21399            .status()
21400            .unwrap();
21401        assert!(status.success(), "git init failed");
21402
21403        let source_dir = dir.path().join("src");
21404        std::fs::create_dir_all(&source_dir).unwrap();
21405        let new_file = source_dir.join("new.rs");
21406        std::fs::write(&new_file, "fn alpha_helper() {}\n").unwrap();
21407
21408        let files = summarize::git_changed_files(dir.path()).unwrap();
21409
21410        assert_eq!(files.existing, vec![new_file]);
21411        assert!(files.deleted.is_empty());
21412    }
21413
21414    #[test]
21415    fn summarize_diff_extract_tracks_deleted_files() {
21416        let dir = tempfile::tempdir().unwrap();
21417        let source_dir = dir.path().join("src");
21418        std::fs::create_dir_all(&source_dir).unwrap();
21419        let deleted_file = source_dir.join("gone.rs");
21420        std::fs::write(&deleted_file, "fn stale() {}\n").unwrap();
21421        init_git_repo(dir.path());
21422
21423        std::fs::remove_file(&deleted_file).unwrap();
21424
21425        let files = summarize::git_changed_files(dir.path()).unwrap();
21426
21427        assert!(files.existing.is_empty());
21428        assert_eq!(files.deleted, vec![deleted_file]);
21429    }
21430
21431    #[test]
21432    fn summarize_diff_extract_tracks_git_renames() {
21433        let dir = tempfile::tempdir().unwrap();
21434        let source_dir = dir.path().join("src");
21435        std::fs::create_dir_all(&source_dir).unwrap();
21436        let old_file = source_dir.join("old.rs");
21437        let new_file = source_dir.join("new.rs");
21438        std::fs::write(&old_file, "fn stale() {}\n").unwrap();
21439        init_git_repo(dir.path());
21440
21441        let status = std::process::Command::new("git")
21442            .args(["mv", "src/old.rs", "src/new.rs"])
21443            .current_dir(dir.path())
21444            .status()
21445            .unwrap();
21446        assert!(status.success(), "git mv failed");
21447
21448        let files = summarize::git_changed_files(dir.path()).unwrap();
21449
21450        assert_eq!(files.existing, vec![new_file]);
21451        assert_eq!(files.deleted, vec![old_file]);
21452    }
21453
21454    #[test]
21455    fn summarize_diff_extract_deletes_removed_summary_rows() {
21456        let dir = tempfile::tempdir().unwrap();
21457        let source_dir = dir.path().join("src");
21458        std::fs::create_dir_all(&source_dir).unwrap();
21459        let deleted_file = source_dir.join("gone.rs");
21460        std::fs::write(&deleted_file, "fn stale() {}\n").unwrap();
21461        std::fs::write(dir.path().join("README.md"), "# repo\n").unwrap();
21462        init_git_repo(dir.path());
21463
21464        let summary_db =
21465            summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
21466        summary_db
21467            .insert(&summarize::Summary {
21468                id: 0,
21469                symbol_name: "stale".to_string(),
21470                file_path: "src/gone.rs".to_string(),
21471                content_hash: "hash1".to_string(),
21472                summary: "stale summary".to_string(),
21473                entities: None,
21474                relationships: None,
21475                concept_labels: None,
21476                extracted_at: "1700000000".to_string(),
21477                model: "test".to_string(),
21478                tokens_input: Some(100),
21479                tokens_output: Some(50),
21480            })
21481            .unwrap();
21482
21483        std::fs::remove_file(&deleted_file).unwrap();
21484
21485        cmd_summarize(
21486            None,
21487            None,
21488            Some(PathBuf::from("src")),
21489            true,
21490            false,
21491            dir.path(),
21492            false,
21493            true,
21494            false,
21495            false,
21496            false,
21497        )
21498        .unwrap();
21499
21500        assert!(summary_db.get_by_file("src/gone.rs").unwrap().is_empty());
21501    }
21502
21503    #[test]
21504    fn summarize_diff_extract_deletes_renamed_summary_rows() {
21505        let dir = tempfile::tempdir().unwrap();
21506        let source_dir = dir.path().join("src");
21507        std::fs::create_dir_all(&source_dir).unwrap();
21508        let old_file = source_dir.join("old.rs");
21509        std::fs::write(&old_file, "fn stale() {}\n").unwrap();
21510        std::fs::write(dir.path().join("README.md"), "# repo\n").unwrap();
21511        init_git_repo(dir.path());
21512
21513        let summary_db =
21514            summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
21515        summary_db
21516            .insert(&summarize::Summary {
21517                id: 0,
21518                symbol_name: "stale".to_string(),
21519                file_path: "src/old.rs".to_string(),
21520                content_hash: "hash1".to_string(),
21521                summary: "stale summary".to_string(),
21522                entities: None,
21523                relationships: None,
21524                concept_labels: None,
21525                extracted_at: "1700000000".to_string(),
21526                model: "test".to_string(),
21527                tokens_input: Some(100),
21528                tokens_output: Some(50),
21529            })
21530            .unwrap();
21531
21532        let status = std::process::Command::new("git")
21533            .args(["mv", "src/old.rs", "src/new.rs"])
21534            .current_dir(dir.path())
21535            .status()
21536            .unwrap();
21537        assert!(status.success(), "git mv failed");
21538
21539        cmd_summarize(
21540            None,
21541            None,
21542            Some(PathBuf::from("src")),
21543            true,
21544            false,
21545            dir.path(),
21546            false,
21547            true,
21548            false,
21549            false,
21550            false,
21551        )
21552        .unwrap();
21553
21554        assert!(summary_db.get_by_file("src/old.rs").unwrap().is_empty());
21555    }
21556
21557    #[test]
21558    fn summarize_full_extract_deletes_removed_summary_rows_when_scope_is_empty() {
21559        let dir = tempfile::tempdir().unwrap();
21560        let source_dir = dir.path().join("src");
21561        std::fs::create_dir_all(&source_dir).unwrap();
21562        let deleted_file = source_dir.join("gone.rs");
21563        std::fs::write(&deleted_file, "fn stale() {}\n").unwrap();
21564
21565        let summary_db =
21566            summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
21567        summary_db
21568            .insert(&summarize::Summary {
21569                id: 0,
21570                symbol_name: "stale".to_string(),
21571                file_path: "src/gone.rs".to_string(),
21572                content_hash: "hash1".to_string(),
21573                summary: "stale summary".to_string(),
21574                entities: None,
21575                relationships: None,
21576                concept_labels: None,
21577                extracted_at: "1700000000".to_string(),
21578                model: "test".to_string(),
21579                tokens_input: Some(100),
21580                tokens_output: Some(50),
21581            })
21582            .unwrap();
21583
21584        std::fs::remove_file(&deleted_file).unwrap();
21585
21586        cmd_summarize(
21587            None,
21588            None,
21589            Some(PathBuf::from("src")),
21590            false,
21591            false,
21592            dir.path(),
21593            false,
21594            true,
21595            false,
21596            false,
21597            false,
21598        )
21599        .unwrap();
21600
21601        assert!(summary_db.get_by_file("src/gone.rs").unwrap().is_empty());
21602    }
21603
21604    #[test]
21605    fn summarize_extract_fails_fast_when_summary_writer_lock_is_live() {
21606        let dir = tempfile::tempdir().unwrap();
21607        let source_dir = dir.path().join("src");
21608        std::fs::create_dir_all(&source_dir).unwrap();
21609        let file = source_dir.join("lib.rs");
21610        std::fs::write(&file, "fn helper() {}\n").unwrap();
21611
21612        let content = std::fs::read(&file).unwrap();
21613        let summary_db =
21614            summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
21615        summary_db
21616            .insert(&summarize::Summary {
21617                id: 0,
21618                symbol_name: "lib.rs".to_string(),
21619                file_path: "src/lib.rs".to_string(),
21620                content_hash: summarize::content_hash(&content),
21621                summary: "cached summary".to_string(),
21622                entities: None,
21623                relationships: None,
21624                concept_labels: None,
21625                extracted_at: "1700000000".to_string(),
21626                model: "test".to_string(),
21627                tokens_input: Some(100),
21628                tokens_output: Some(50),
21629            })
21630            .unwrap();
21631        drop(summary_db);
21632
21633        let lock_path = summarize::writer_lock_path(&dir.path().join(".tsift/summaries.db"));
21634        let _lock = hold_writer_lock(&lock_path);
21635
21636        let err = cmd_summarize(
21637            None,
21638            None,
21639            Some(PathBuf::from("src")),
21640            false,
21641            false,
21642            dir.path(),
21643            false,
21644            true,
21645            false,
21646            false,
21647            false,
21648        )
21649        .unwrap_err();
21650        let message = err.to_string();
21651
21652        assert!(message.contains("another tsift summarize extractor is already active"));
21653        assert!(message.contains("tsift summarize --extract"));
21654    }
21655
21656    #[test]
21657    fn summarize_stats_fails_closed_when_cache_missing() {
21658        let dir = tempfile::tempdir().unwrap();
21659        let err = cmd_summarize(
21660            None,
21661            None,
21662            None,
21663            false,
21664            true,
21665            dir.path(),
21666            false,
21667            false,
21668            false,
21669            false,
21670            false,
21671        )
21672        .unwrap_err();
21673
21674        assert!(
21675            err.to_string().contains("no summaries.db found"),
21676            "got: {err}"
21677        );
21678        assert!(!dir.path().join(".tsift/summaries.db").exists());
21679    }
21680
21681    #[test]
21682    fn summarize_stats_uses_snapshot_fallback_when_rollback_journal_is_locked() {
21683        let dir = tempfile::tempdir().unwrap();
21684        let summary_db =
21685            summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
21686        summary_db
21687            .insert(&summarize::Summary {
21688                id: 0,
21689                symbol_name: "alpha_helper".to_string(),
21690                file_path: "src/lib.rs".to_string(),
21691                content_hash: "hash1".to_string(),
21692                summary: "cached summary".to_string(),
21693                entities: None,
21694                relationships: None,
21695                concept_labels: None,
21696                extracted_at: "1700000000".to_string(),
21697                model: "claude-haiku-4-5-20251001".to_string(),
21698                tokens_input: Some(100),
21699                tokens_output: Some(40),
21700            })
21701            .unwrap();
21702        drop(summary_db);
21703        let _lock = hold_rollback_journal_lock(&dir.path().join(".tsift/summaries.db"));
21704
21705        let result = cmd_summarize(
21706            None,
21707            None,
21708            None,
21709            false,
21710            true,
21711            dir.path(),
21712            false,
21713            false,
21714            false,
21715            false,
21716            false,
21717        );
21718
21719        assert!(result.is_ok());
21720    }
21721
21722    #[test]
21723    fn summarize_symbol_query_uses_snapshot_fallback_when_rollback_journal_is_locked() {
21724        let dir = tempfile::tempdir().unwrap();
21725        let summary_db =
21726            summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
21727        summary_db
21728            .insert(&summarize::Summary {
21729                id: 0,
21730                symbol_name: "alpha_helper".to_string(),
21731                file_path: "src/lib.rs".to_string(),
21732                content_hash: "hash1".to_string(),
21733                summary: "cached summary".to_string(),
21734                entities: None,
21735                relationships: None,
21736                concept_labels: None,
21737                extracted_at: "1700000000".to_string(),
21738                model: "claude-haiku-4-5-20251001".to_string(),
21739                tokens_input: Some(100),
21740                tokens_output: Some(40),
21741            })
21742            .unwrap();
21743        drop(summary_db);
21744        let _lock = hold_rollback_journal_lock(&dir.path().join(".tsift/summaries.db"));
21745
21746        let result = cmd_summarize(
21747            Some("alpha_helper".to_string()),
21748            None,
21749            None,
21750            false,
21751            false,
21752            dir.path(),
21753            false,
21754            true,
21755            false,
21756            false,
21757            false,
21758        );
21759
21760        assert!(result.is_ok());
21761    }
21762
21763    #[test]
21764    fn summarize_cmd_uses_ancestor_project_root_for_nested_paths() {
21765        let dir = tempfile::tempdir().unwrap();
21766        let nested = dir.path().join("src/nested");
21767        std::fs::create_dir_all(&nested).unwrap();
21768
21769        let summary_db =
21770            summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
21771        summary_db
21772            .insert(&summarize::Summary {
21773                id: 0,
21774                symbol_name: "alpha_helper".to_string(),
21775                file_path: "src/lib.rs".to_string(),
21776                content_hash: "hash1".to_string(),
21777                summary: "cached summary".to_string(),
21778                entities: None,
21779                relationships: None,
21780                concept_labels: None,
21781                extracted_at: "1700000000".to_string(),
21782                model: "claude-haiku-4-5-20251001".to_string(),
21783                tokens_input: Some(100),
21784                tokens_output: Some(40),
21785            })
21786            .unwrap();
21787
21788        let result = cmd_summarize(
21789            Some("alpha_helper".to_string()),
21790            None,
21791            None,
21792            false,
21793            false,
21794            &nested,
21795            false,
21796            true,
21797            false,
21798            false,
21799            false,
21800        );
21801
21802        assert!(result.is_ok());
21803        assert!(!nested.join(".tsift/summaries.db").exists());
21804    }
21805
21806    #[test]
21807    fn summarize_extract_uses_matching_scoped_index_for_workspace_file() {
21808        let dir = tempfile::tempdir().unwrap();
21809        std::fs::write(
21810            dir.path().join(".gitmodules"),
21811            r#"[submodule "src/alpha"]
21812	path = src/alpha
21813	url = https://example.com/alpha
21814[submodule "src/beta"]
21815	path = src/beta
21816	url = https://example.com/beta
21817"#,
21818        )
21819        .unwrap();
21820
21821        let alpha_root = dir.path().join("src/alpha");
21822        let beta_root = dir.path().join("src/beta");
21823        std::fs::create_dir_all(alpha_root.join("src")).unwrap();
21824        std::fs::create_dir_all(beta_root.join("src")).unwrap();
21825        std::fs::create_dir_all(dir.path().join(".tsift/indexes/alpha")).unwrap();
21826        std::fs::create_dir_all(dir.path().join(".tsift/indexes/beta")).unwrap();
21827        std::fs::write(alpha_root.join("src/lib.rs"), "fn alpha_helper() {}\n").unwrap();
21828        let beta_file = beta_root.join("src/lib.rs");
21829        std::fs::write(&beta_file, "fn beta_helper() {}\n").unwrap();
21830        std::fs::write(dir.path().join(".tsift/indexes/alpha/index.db"), "").unwrap();
21831        std::fs::write(dir.path().join(".tsift/indexes/beta/index.db"), "").unwrap();
21832
21833        let context = find_symbols_db_for_file(dir.path(), &beta_file)
21834            .unwrap()
21835            .expect("expected matching scoped index");
21836
21837        assert_eq!(
21838            context.db_path,
21839            dir.path().join(".tsift/indexes/beta/index.db")
21840        );
21841        assert_eq!(context.source_root, beta_root);
21842    }
21843
21844    // --- apply_edit_op ---
21845
21846    fn make_op(old: &str, new: &str, replace_all: bool) -> EditOp {
21847        EditOp {
21848            file: PathBuf::from("dummy.txt"),
21849            old: old.to_string(),
21850            new: new.to_string(),
21851            replace_all,
21852        }
21853    }
21854
21855    #[test]
21856    fn edit_replaces_single_occurrence() {
21857        let content = "hello world";
21858        let op = make_op("world", "rust", false);
21859        let (result, count) = apply_edit_op(content, &op).unwrap();
21860        assert_eq!(result, "hello rust");
21861        assert_eq!(count, 1);
21862    }
21863
21864    #[test]
21865    fn edit_replace_all_replaces_every_occurrence() {
21866        let content = "foo foo foo";
21867        let op = make_op("foo", "bar", true);
21868        let (result, count) = apply_edit_op(content, &op).unwrap();
21869        assert_eq!(result, "bar bar bar");
21870        assert_eq!(count, 3);
21871    }
21872
21873    #[test]
21874    fn edit_fails_when_old_not_found() {
21875        let content = "hello world";
21876        let op = make_op("missing", "x", false);
21877        assert!(apply_edit_op(content, &op).is_err());
21878    }
21879
21880    #[test]
21881    fn edit_fails_when_ambiguous_without_replace_all() {
21882        let content = "foo foo";
21883        let op = make_op("foo", "bar", false);
21884        let err = apply_edit_op(content, &op).unwrap_err();
21885        assert!(err.to_string().contains("2 times"), "got: {}", err);
21886    }
21887
21888    #[test]
21889    fn edit_fails_when_old_equals_new() {
21890        let content = "hello";
21891        let op = make_op("hello", "hello", false);
21892        assert!(apply_edit_op(content, &op).is_err());
21893    }
21894
21895    #[test]
21896    fn edit_batch_rolls_back_when_later_swap_fails() {
21897        let dir = tempfile::tempdir().unwrap();
21898        let alpha = dir.path().join("alpha.txt");
21899        let beta = dir.path().join("beta.txt");
21900        fs::write(&alpha, "alpha old\n").unwrap();
21901        fs::write(&beta, "beta old\n").unwrap();
21902
21903        let batch = EditBatch {
21904            edits: vec![
21905                EditOp {
21906                    file: alpha.clone(),
21907                    old: "old".to_string(),
21908                    new: "new".to_string(),
21909                    replace_all: false,
21910                },
21911                EditOp {
21912                    file: beta.clone(),
21913                    old: "old".to_string(),
21914                    new: "new".to_string(),
21915                    replace_all: false,
21916                },
21917            ],
21918        };
21919
21920        let plan = build_edit_plan(&batch).unwrap();
21921        let err = match apply_edit_plan_atomically_inner(plan, |commit_index, _| {
21922            if commit_index == 1 {
21923                bail!("simulated swap failure");
21924            }
21925            Ok(())
21926        }) {
21927            Ok(_) => panic!("expected simulated swap failure"),
21928            Err(err) => err,
21929        };
21930
21931        assert!(err.to_string().contains("simulated swap failure"));
21932        assert_eq!(fs::read_to_string(&alpha).unwrap(), "alpha old\n");
21933        assert_eq!(fs::read_to_string(&beta).unwrap(), "beta old\n");
21934    }
21935
21936    // --- SQL introspection ---
21937
21938    fn setup_test_db() -> (tempfile::NamedTempFile, Connection) {
21939        let tmp = tempfile::NamedTempFile::new().unwrap();
21940        let conn = Connection::open(tmp.path()).unwrap();
21941        conn.execute_batch(
21942            "CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT NOT NULL, email TEXT);
21943             INSERT INTO users VALUES (1, 'Alice', 'alice@example.com');
21944             INSERT INTO users VALUES (2, 'Bob', NULL);
21945             CREATE TABLE posts (id INTEGER PRIMARY KEY, user_id INTEGER NOT NULL, title TEXT NOT NULL, body TEXT,
21946                 FOREIGN KEY(user_id) REFERENCES users(id));
21947             INSERT INTO posts VALUES (1, 1, 'Hello World', 'First post');
21948             INSERT INTO posts VALUES (2, 1, 'Second', NULL);
21949             INSERT INTO posts VALUES (3, 2, 'Bob post', 'Content here');"
21950        ).unwrap();
21951        (tmp, conn)
21952    }
21953
21954    // --- rewrite_command ---
21955
21956    #[test]
21957    fn rewrite_rg_simple_pattern() {
21958        let result = rewrite_command("rg authenticate");
21959        assert_eq!(
21960            result,
21961            Some("tsift --envelope search \"authenticate\" --exact --budget normal".to_string(),)
21962        );
21963    }
21964
21965    #[test]
21966    fn rewrite_rg_with_path() {
21967        let result = rewrite_command("rg authenticate src/");
21968        assert_eq!(
21969            result,
21970            Some(
21971                "tsift --envelope search \"authenticate\" --exact --budget normal --path \"src/\""
21972                    .to_string()
21973            )
21974        );
21975    }
21976
21977    #[test]
21978    fn rewrite_rg_with_flags_ignored() {
21979        let result = rewrite_command("rg -i authenticate src/");
21980        assert_eq!(
21981            result,
21982            Some(
21983                "tsift --envelope search \"authenticate\" --exact --budget normal --path \"src/\""
21984                    .to_string()
21985            )
21986        );
21987    }
21988
21989    #[test]
21990    fn rewrite_rg_with_type_flag() {
21991        // -t rs takes a value, should be skipped; pattern is next positional
21992        let result = rewrite_command("rg -t rs authenticate");
21993        assert_eq!(
21994            result,
21995            Some("tsift --envelope search \"authenticate\" --exact --budget normal".to_string())
21996        );
21997    }
21998
21999    #[test]
22000    fn rewrite_rg_pipe_passthrough() {
22001        // Pipe chains can't be translated — pass through
22002        let result = rewrite_command("rg authenticate | head -5");
22003        assert_eq!(result, None);
22004    }
22005
22006    #[test]
22007    fn rewrite_rg_files_passthrough() {
22008        let result = rewrite_command("rg --files src/tsift .agent-doc logs");
22009        assert_eq!(result, None);
22010    }
22011
22012    #[test]
22013    fn rewrite_find_passthrough() {
22014        let result = rewrite_command("find src/tsift .agent-doc -type f -name '*.rs'");
22015        assert_eq!(result, None);
22016    }
22017
22018    #[test]
22019    fn rewrite_grep_recursive() {
22020        let result = rewrite_command("grep -r authenticate src/");
22021        assert_eq!(
22022            result,
22023            Some(
22024                "tsift --envelope search \"authenticate\" --exact --budget normal --path \"src/\""
22025                    .to_string()
22026            )
22027        );
22028    }
22029
22030    #[test]
22031    fn rewrite_grep_non_recursive_passthrough() {
22032        let result = rewrite_command("grep authenticate file.txt");
22033        assert_eq!(result, None);
22034    }
22035
22036    #[test]
22037    fn rewrite_tsift_passthrough() {
22038        let result = rewrite_command("tsift search \"foo\"");
22039        assert_eq!(result, Some("tsift search \"foo\"".to_string()));
22040    }
22041
22042    #[test]
22043    fn rewrite_run_tsift_search_disables_timeout_by_default() {
22044        let result = effective_rewrite_run_command("tsift search hookcaps --exact --path /tmp/x");
22045        assert_eq!(
22046            result,
22047            "tsift search hookcaps --exact --path /tmp/x --timeout 0"
22048        );
22049    }
22050
22051    #[test]
22052    fn rewrite_run_preserves_explicit_search_timeout() {
22053        let result = effective_rewrite_run_command(
22054            "tsift search hookcaps --exact --path /tmp/x --timeout 5",
22055        );
22056        assert_eq!(
22057            result,
22058            "tsift search hookcaps --exact --path /tmp/x --timeout 5"
22059        );
22060    }
22061
22062    #[test]
22063    fn rewrite_unrelated_passthrough() {
22064        let result = rewrite_command("echo cargo build");
22065        assert_eq!(result, None);
22066    }
22067
22068    #[test]
22069    fn rewrite_rg_quoted_pattern() {
22070        let result = rewrite_command("rg \"fn main\"");
22071        assert_eq!(
22072            result,
22073            Some("tsift --envelope search \"fn main\" --exact --budget normal".to_string())
22074        );
22075    }
22076
22077    #[test]
22078    fn rewrite_git_diff_to_diff_digest() {
22079        let result = rewrite_command("git diff");
22080        assert_eq!(result, Some("tsift diff-digest .".to_string()));
22081    }
22082
22083    #[test]
22084    fn rewrite_git_diff_cached_to_diff_digest() {
22085        let result = rewrite_command("git diff --cached");
22086        assert_eq!(result, Some("tsift diff-digest --cached .".to_string()));
22087    }
22088
22089    #[test]
22090    fn rewrite_git_diff_with_path_to_diff_digest() {
22091        let result = rewrite_command("git diff -- src/");
22092        assert_eq!(result, Some("tsift diff-digest \"src/\"".to_string()));
22093    }
22094
22095    #[test]
22096    fn rewrite_git_diff_with_revision_passthrough() {
22097        let result = rewrite_command("git diff HEAD~1");
22098        assert_eq!(result, None);
22099    }
22100
22101    #[test]
22102    fn rewrite_git_show_to_revision_diff_digest() {
22103        let result = rewrite_command("git show HEAD~1");
22104        assert_eq!(
22105            result,
22106            Some("tsift diff-digest --revision \"HEAD~1\" .".to_string())
22107        );
22108    }
22109
22110    #[test]
22111    fn rewrite_git_log_patch_history_to_revision_diff_digest() {
22112        let result = rewrite_command("git log -p -1 HEAD~2");
22113        assert_eq!(
22114            result,
22115            Some("tsift diff-digest --revision \"HEAD~2\" .".to_string())
22116        );
22117    }
22118
22119    #[test]
22120    fn rewrite_cat_long_agent_doc_session_to_session_digest() {
22121        let dir = tempfile::tempdir().unwrap();
22122        let session = dir.path().join("tsift.md");
22123        let mut body = String::from("---\nagent_doc_session: tsift-v0.1\n---\n\n## Exchange\n");
22124        for index in 0..90 {
22125            body.push_str(&format!("❯ prompt {index}?\n"));
22126        }
22127        fs::write(&session, body).unwrap();
22128
22129        let result = rewrite_command(&format!("cat {}", shell_quote(session.to_str().unwrap())));
22130        assert_eq!(
22131            result,
22132            Some(format!(
22133                "tsift session-digest --path {} --input {} --source markdown",
22134                shell_quote(&resolve_digest_context_path(&session)),
22135                shell_quote(session.to_str().unwrap())
22136            ))
22137        );
22138    }
22139
22140    #[test]
22141    fn rewrite_head_long_claude_jsonl_to_session_digest() {
22142        let dir = tempfile::tempdir().unwrap();
22143        let session = dir.path().join("session.jsonl");
22144        let line =
22145            r#"{"message":{"role":"assistant","content":[{"type":"text","text":"❯ do [#yyhd]"}]}}"#;
22146        let body = std::iter::repeat_n(line, 120)
22147            .collect::<Vec<_>>()
22148            .join("\n");
22149        fs::write(&session, format!("{body}\n")).unwrap();
22150
22151        let result = rewrite_command(&format!(
22152            "head -n 120 {}",
22153            shell_quote(session.to_str().unwrap())
22154        ));
22155        assert_eq!(
22156            result,
22157            Some(format!(
22158                "tsift session-digest --path {} --input {} --source claude-jsonl",
22159                shell_quote(&resolve_digest_context_path(&session)),
22160                shell_quote(session.to_str().unwrap())
22161            ))
22162        );
22163    }
22164
22165    #[test]
22166    fn rewrite_head_long_codex_jsonl_to_session_digest() {
22167        let dir = tempfile::tempdir().unwrap();
22168        let session = dir.path().join("codex.jsonl");
22169        let line = r#"{"type":"event_msg","payload":{"type":"user_message","message":"do [#cdxlog]. spec-test-build-install-commit-push"}}"#;
22170        let body = std::iter::repeat_n(line, 120)
22171            .collect::<Vec<_>>()
22172            .join("\n");
22173        fs::write(&session, format!("{body}\n")).unwrap();
22174
22175        let result = rewrite_command(&format!(
22176            "head -n 120 {}",
22177            shell_quote(session.to_str().unwrap())
22178        ));
22179        assert_eq!(
22180            result,
22181            Some(format!(
22182                "tsift session-digest --path {} --input {} --source codex-jsonl",
22183                shell_quote(&resolve_digest_context_path(&session)),
22184                shell_quote(session.to_str().unwrap())
22185            ))
22186        );
22187    }
22188
22189    #[test]
22190    fn rewrite_small_transcript_window_passthrough() {
22191        let dir = tempfile::tempdir().unwrap();
22192        let session = dir.path().join("session.jsonl");
22193        let line = r#"{"message":{"role":"assistant","content":[{"type":"text","text":"hello"}]}}"#;
22194        let body = std::iter::repeat_n(line, 120)
22195            .collect::<Vec<_>>()
22196            .join("\n");
22197        fs::write(&session, format!("{body}\n")).unwrap();
22198
22199        let result = rewrite_command(&format!(
22200            "tail -n 20 {}",
22201            shell_quote(session.to_str().unwrap())
22202        ));
22203        assert_eq!(result, None);
22204    }
22205
22206    #[test]
22207    fn rewrite_sed_large_agent_doc_range_to_session_digest() {
22208        let dir = tempfile::tempdir().unwrap();
22209        let session = dir.path().join("tsift.md");
22210        let mut body = String::from("---\nagent_doc_session: tsift-v0.1\n---\n\n## Exchange\n");
22211        for index in 0..120 {
22212            body.push_str(&format!("### Re: topic {index}\n"));
22213        }
22214        fs::write(&session, body).unwrap();
22215
22216        let result = rewrite_command(&format!(
22217            "sed -n '1,120p' {}",
22218            shell_quote(session.to_str().unwrap())
22219        ));
22220        assert_eq!(
22221            result,
22222            Some(format!(
22223                "tsift session-digest --path {} --input {} --source markdown",
22224                shell_quote(&resolve_digest_context_path(&session)),
22225                shell_quote(session.to_str().unwrap())
22226            ))
22227        );
22228    }
22229
22230    #[test]
22231    fn rewrite_cat_large_agent_doc_log_to_session_digest() {
22232        let dir = tempfile::tempdir().unwrap();
22233        let session = dir.path().join("tsift.log");
22234        let line = "[1776528398] claude_start mode=fresh_restart restart_count=1";
22235        let body = std::iter::repeat_n(line, 120)
22236            .collect::<Vec<_>>()
22237            .join("\n");
22238        fs::write(&session, format!("{body}\n")).unwrap();
22239
22240        let result = rewrite_command(&format!("cat {}", shell_quote(session.to_str().unwrap())));
22241        assert_eq!(
22242            result,
22243            Some(format!(
22244                "tsift session-digest --path {} --input {} --source agent-doc-log",
22245                shell_quote(&resolve_digest_context_path(&session)),
22246                shell_quote(session.to_str().unwrap())
22247            ))
22248        );
22249    }
22250
22251    #[test]
22252    fn rewrite_session_reads_prefer_submodule_root_for_digest_path() {
22253        let dir = tempfile::tempdir().unwrap();
22254        fs::write(
22255            dir.path().join(".gitmodules"),
22256            r#"[submodule "src/tsift"]
22257	path = src/tsift
22258	url = https://example.com/tsift
22259"#,
22260        )
22261        .unwrap();
22262        let submodule = dir.path().join("src/tsift");
22263        fs::create_dir_all(submodule.join("tasks")).unwrap();
22264        fs::write(
22265            submodule.join(".git"),
22266            "gitdir: ../../.git/modules/src/tsift\n",
22267        )
22268        .unwrap();
22269        let session = submodule.join("tasks/plan.md");
22270        let mut body = String::from("---\nagent_doc_session: tsift-v0.1\n---\n\n## Exchange\n");
22271        for index in 0..90 {
22272            body.push_str(&format!("❯ prompt {index}?\n"));
22273        }
22274        fs::write(&session, body).unwrap();
22275
22276        let result = rewrite_command(&format!("cat {}", shell_quote(session.to_str().unwrap())));
22277
22278        assert_eq!(
22279            result,
22280            Some(format!(
22281                "tsift session-digest --path {} --input {} --source markdown",
22282                shell_quote(submodule.to_str().unwrap()),
22283                shell_quote(session.to_str().unwrap())
22284            ))
22285        );
22286    }
22287
22288    #[test]
22289    fn rewrite_regular_markdown_read_passthrough() {
22290        let dir = tempfile::tempdir().unwrap();
22291        let readme = dir.path().join("README.md");
22292        let body = std::iter::repeat_n("plain markdown", 120)
22293            .collect::<Vec<_>>()
22294            .join("\n");
22295        fs::write(&readme, format!("{body}\n")).unwrap();
22296
22297        let result = rewrite_command(&format!("cat {}", shell_quote(readme.to_str().unwrap())));
22298        assert_eq!(result, None);
22299    }
22300
22301    #[test]
22302    fn rewrite_cat_large_source_to_source_read_in_indexed_repo() {
22303        let dir = tempfile::tempdir().unwrap();
22304        write_empty_root_index(dir.path());
22305        let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 120);
22306
22307        let result = rewrite_command(&format!("cat {}", shell_quote(source.to_str().unwrap())));
22308
22309        assert_eq!(
22310            result,
22311            Some(format!(
22312                "tsift --envelope source-read \"src/lib.rs\" --path {} --start 1 --lines 80 --budget normal",
22313                shell_quote(&dir.path().to_string_lossy())
22314            ))
22315        );
22316    }
22317
22318    #[test]
22319    fn rewrite_head_small_source_window_passthrough() {
22320        let dir = tempfile::tempdir().unwrap();
22321        write_empty_root_index(dir.path());
22322        let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 120);
22323
22324        let result = rewrite_command(&format!(
22325            "head -n 20 {}",
22326            shell_quote(source.to_str().unwrap())
22327        ));
22328
22329        assert_eq!(result, None);
22330    }
22331
22332    #[test]
22333    fn rewrite_sed_large_source_range_to_source_read() {
22334        let dir = tempfile::tempdir().unwrap();
22335        write_empty_root_index(dir.path());
22336        let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 200);
22337
22338        let result = rewrite_command(&format!(
22339            "sed -n '40,160p' {}",
22340            shell_quote(source.to_str().unwrap())
22341        ));
22342
22343        assert_eq!(
22344            result,
22345            Some(format!(
22346                "tsift --envelope source-read \"src/lib.rs\" --path {} --start 40 --lines 121 --budget normal",
22347                shell_quote(&dir.path().to_string_lossy())
22348            ))
22349        );
22350    }
22351
22352    #[test]
22353    fn rewrite_tail_large_source_window_preserves_tail_anchor() {
22354        let dir = tempfile::tempdir().unwrap();
22355        write_empty_root_index(dir.path());
22356        let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 200);
22357
22358        let result = rewrite_command(&format!(
22359            "tail -n 120 {}",
22360            shell_quote(source.to_str().unwrap())
22361        ));
22362
22363        assert_eq!(
22364            result,
22365            Some(format!(
22366                "tsift --envelope source-read \"src/lib.rs\" --path {} --start 81 --lines 120 --budget normal",
22367                shell_quote(&dir.path().to_string_lossy())
22368            ))
22369        );
22370    }
22371
22372    #[test]
22373    fn rewrite_large_non_source_read_passthrough_even_when_indexed() {
22374        let dir = tempfile::tempdir().unwrap();
22375        write_empty_root_index(dir.path());
22376        let text = write_repeated_lines(&dir.path().join("notes.txt"), "plain text", 120);
22377
22378        let result = rewrite_command(&format!("cat {}", shell_quote(text.to_str().unwrap())));
22379
22380        assert_eq!(result, None);
22381    }
22382
22383    #[test]
22384    fn rewrite_large_source_read_passthrough_without_index() {
22385        let dir = tempfile::tempdir().unwrap();
22386        let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 120);
22387
22388        let result = rewrite_command(&format!("cat {}", shell_quote(source.to_str().unwrap())));
22389
22390        assert_eq!(result, None);
22391    }
22392
22393    #[test]
22394    fn rewrite_cargo_test_to_digest_runner() {
22395        let result = rewrite_command("cargo test --lib");
22396        assert_eq!(
22397            result,
22398            Some(
22399                "tsift --envelope digest-runner --kind \"test\" --path \".\" --shell-command \"cargo test --lib\" --runner \"cargo\"".to_string()
22400            )
22401        );
22402    }
22403
22404    #[test]
22405    fn rewrite_pytest_to_digest_runner() {
22406        let result = rewrite_command("pytest -q tests/test_cli.py");
22407        assert_eq!(
22408            result,
22409            Some(
22410                "tsift --envelope digest-runner --kind \"test\" --path \".\" --shell-command \"pytest -q tests/test_cli.py\" --runner \"pytest\"".to_string()
22411            )
22412        );
22413    }
22414
22415    #[test]
22416    fn rewrite_python_m_pytest_to_digest_runner() {
22417        let result = rewrite_command("python -m pytest tests/test_cli.py");
22418        assert_eq!(
22419            result,
22420            Some(
22421                "tsift --envelope digest-runner --kind \"test\" --path \".\" --shell-command \"python -m pytest tests/test_cli.py\" --runner \"pytest\"".to_string()
22422            )
22423        );
22424    }
22425
22426    #[test]
22427    fn rewrite_cargo_build_to_log_digest_runner() {
22428        let result = rewrite_command("cargo build --release");
22429        assert_eq!(
22430            result,
22431            Some(
22432                "tsift --envelope digest-runner --kind \"log\" --path \".\" --shell-command \"cargo build --release\"".to_string()
22433            )
22434        );
22435    }
22436
22437    #[test]
22438    fn rewrite_cargo_install_to_log_digest_runner() {
22439        let result = rewrite_command("cargo install --path . --force");
22440        assert_eq!(
22441            result,
22442            Some(
22443                "tsift --envelope digest-runner --kind \"log\" --path \".\" --shell-command \"cargo install --path . --force\"".to_string()
22444            )
22445        );
22446    }
22447
22448    #[test]
22449    fn rewrite_metacharacter_command_passthrough() {
22450        let result = rewrite_command("cargo test | head");
22451        assert_eq!(result, None);
22452    }
22453
22454    #[test]
22455    fn rewrite_output_cap_detects_search_even_with_global_flag() {
22456        let cap = rewrite_output_cap("tsift --compact search foo").expect("cap");
22457        assert_eq!(cap.max_lines, 50);
22458        assert_eq!(cap.strip_prefix, Some("Strategy:"));
22459    }
22460
22461    #[test]
22462    fn rewrite_output_cap_skips_structured_output() {
22463        assert!(rewrite_output_cap("tsift search foo --json").is_none());
22464        assert!(rewrite_output_cap("tsift --schema graph foo").is_none());
22465        assert!(rewrite_output_cap("tsift --envelope search foo").is_none());
22466    }
22467
22468    #[test]
22469    fn rewrite_output_format_forwards_envelope_to_digest_runner() {
22470        let command = rewrite_command("cargo test --lib").expect("rewrite");
22471        let forwarded = apply_rewrite_output_format(
22472            &command,
22473            OutputFormat {
22474                json_output: true,
22475                compact: false,
22476                pretty: false,
22477                terse: false,
22478                ultra_terse: false,
22479                schema: false,
22480                envelope: true,
22481            },
22482        );
22483        assert_eq!(
22484            forwarded,
22485            "tsift --envelope digest-runner --kind \"test\" --path \".\" --shell-command \"cargo test --lib\" --runner \"cargo\""
22486        );
22487    }
22488
22489    #[test]
22490    fn rewrite_output_format_forwards_json_when_requested() {
22491        let command = rewrite_command("cargo build --release").expect("rewrite");
22492        let forwarded = apply_rewrite_output_format(
22493            &command,
22494            OutputFormat {
22495                json_output: true,
22496                compact: false,
22497                pretty: true,
22498                terse: false,
22499                ultra_terse: false,
22500                schema: false,
22501                envelope: false,
22502            },
22503        );
22504        assert_eq!(
22505            forwarded,
22506            "tsift --pretty --envelope digest-runner --kind \"log\" --path \".\" --shell-command \"cargo build --release\""
22507        );
22508    }
22509
22510    #[test]
22511    fn output_cap_strips_search_header_and_truncates() {
22512        let capped = apply_output_cap(
22513            b"Strategy: exact | Indexed: 0 | Skipped: 0\n\nline1\nline2\nline3\n",
22514            OutputCap {
22515                max_lines: 2,
22516                strip_prefix: Some("Strategy:"),
22517            },
22518        );
22519        assert_eq!(
22520            capped,
22521            "line1\nline2\n... (+1 more lines; rerun the underlying tsift command directly for the full output)\n"
22522        );
22523    }
22524
22525    #[test]
22526    fn sql_schema_overview_lists_tables() {
22527        let (_tmp, conn) = setup_test_db();
22528        let tables = schema_overview(&conn).unwrap();
22529        let names: Vec<&str> = tables.iter().map(|t| t.name.as_str()).collect();
22530        assert_eq!(names, &["posts", "users"]);
22531    }
22532
22533    #[test]
22534    fn sql_schema_overview_row_counts() {
22535        let (_tmp, conn) = setup_test_db();
22536        let tables = schema_overview(&conn).unwrap();
22537        let users = tables.iter().find(|t| t.name == "users").unwrap();
22538        let posts = tables.iter().find(|t| t.name == "posts").unwrap();
22539        assert_eq!(users.row_count, 2);
22540        assert_eq!(posts.row_count, 3);
22541    }
22542
22543    #[test]
22544    fn sql_table_columns_metadata() {
22545        let (_tmp, conn) = setup_test_db();
22546        let cols = table_columns(&conn, "users").unwrap();
22547        assert_eq!(cols.len(), 3);
22548        assert_eq!(cols[0].name, "id");
22549        assert!(cols[0].pk);
22550        assert_eq!(cols[1].name, "name");
22551        assert!(cols[1].notnull);
22552        assert_eq!(cols[2].name, "email");
22553        assert!(!cols[2].notnull);
22554    }
22555
22556    #[test]
22557    fn sql_execute_query_returns_rows() {
22558        let (_tmp, conn) = setup_test_db();
22559        let (columns, rows) =
22560            execute_query(&conn, "SELECT name, email FROM users ORDER BY id").unwrap();
22561        assert_eq!(columns, &["name", "email"]);
22562        assert_eq!(rows.len(), 2);
22563        assert_eq!(rows[0][0], serde_json::json!("Alice"));
22564        assert_eq!(rows[0][1], serde_json::json!("alice@example.com"));
22565        assert_eq!(rows[1][1], serde_json::Value::Null);
22566    }
22567
22568    #[test]
22569    fn sql_execute_query_aggregate() {
22570        let (_tmp, conn) = setup_test_db();
22571        let (columns, rows) = execute_query(&conn, "SELECT COUNT(*) as cnt FROM posts").unwrap();
22572        assert_eq!(columns, &["cnt"]);
22573        assert_eq!(rows[0][0], serde_json::json!(3));
22574    }
22575
22576    #[test]
22577    fn sql_execute_query_join() {
22578        let (_tmp, conn) = setup_test_db();
22579        let (_cols, rows) = execute_query(
22580            &conn,
22581            "SELECT u.name, p.title FROM users u JOIN posts p ON u.id = p.user_id ORDER BY p.id",
22582        )
22583        .unwrap();
22584        assert_eq!(rows.len(), 3);
22585        assert_eq!(rows[0][0], serde_json::json!("Alice"));
22586        assert_eq!(rows[2][0], serde_json::json!("Bob"));
22587    }
22588
22589    #[test]
22590    fn sql_open_db_read_only() {
22591        let (tmp, _conn) = setup_test_db();
22592        drop(_conn);
22593        let ro_conn = open_db(tmp.path()).unwrap();
22594        let result = ro_conn.execute("INSERT INTO users VALUES (99, 'Fail', NULL)", []);
22595        assert!(result.is_err(), "read-only connection should reject writes");
22596    }
22597
22598    #[test]
22599    fn sql_empty_table_schema() {
22600        let tmp = tempfile::NamedTempFile::new().unwrap();
22601        let conn = Connection::open(tmp.path()).unwrap();
22602        conn.execute_batch("CREATE TABLE empty_tbl (id INTEGER PRIMARY KEY, data BLOB)")
22603            .unwrap();
22604        let tables = schema_overview(&conn).unwrap();
22605        assert_eq!(tables[0].row_count, 0);
22606        assert_eq!(tables[0].columns.len(), 2);
22607    }
22608
22609    // --- graph command ---
22610
22611    fn setup_graph_index() -> tempfile::TempDir {
22612        let dir = tempfile::tempdir().unwrap();
22613        std::fs::write(
22614            dir.path().join("main.rs"),
22615            "fn helper() { println!(\"hi\"); }\nfn main() { helper(); Vec::new(); }",
22616        )
22617        .unwrap();
22618        let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
22619        db.apply_changes(dir.path()).unwrap();
22620        dir
22621    }
22622
22623    fn setup_traversal_project() -> tempfile::TempDir {
22624        let dir = setup_graph_index();
22625        let task_dir = dir.path().join("tasks/software");
22626        std::fs::create_dir_all(&task_dir).unwrap();
22627        std::fs::write(
22628            task_dir.join("tsift.md"),
22629            r#"---
22630agent_doc_session: tsift-v0.1
22631agent_doc_format: template
22632---
22633
22634## Exchange
22635
22636<!-- agent:exchange patch=append -->
22637❯ do [#kgnv]
22638Completed `#kgnv`; touched files `main.rs`; tests `cargo test traversal_graph`; follow-up `#gfix`.
22639<!-- /agent:exchange -->
22640
22641<!-- agent:queue -->
22642dispatch #spec-test-build-install-commit-push
22643- do [#kgnv]
22644<!-- /agent:queue -->
22645
22646## Backlog
22647
22648<!-- agent:backlog -->
22649- [ ] [#kgnv] Fix helper traversal handles while preserving graph navigation.
22650<!-- /agent:backlog -->
22651"#,
22652        )
22653        .unwrap();
22654        dir
22655    }
22656
22657    fn resolve_ast_span_node<'a>(
22658        graph: &'a TraversalGraphBuild,
22659        label: &str,
22660        symbol_kind: &str,
22661    ) -> &'a TraversalNode {
22662        graph
22663            .nodes
22664            .values()
22665            .find(|node| {
22666                node.kind == "ast_span"
22667                    && node.label == label
22668                    && node.properties.get("symbol_kind") == Some(&symbol_kind.to_string())
22669            })
22670            .unwrap_or_else(|| panic!("missing ast_span {symbol_kind} {label}"))
22671    }
22672
22673    fn setup_multilingual_ast_navigation_project() -> tempfile::TempDir {
22674        let dir = tempfile::tempdir().unwrap();
22675        std::fs::write(
22676            dir.path().join("rust.rs"),
22677            r#"mod fixture_nav_rust_mod {
22678    pub fn fixture_nav_rust_helper() {}
22679    pub fn fixture_nav_rust_entry() {
22680        fixture_nav_rust_helper();
22681    }
22682}
22683"#,
22684        )
22685        .unwrap();
22686        std::fs::write(
22687            dir.path().join("python.py"),
22688            r#"def fixture_nav_python_helper():
22689    return 1
22690
22691def fixture_nav_python_entry():
22692    return fixture_nav_python_helper()
22693"#,
22694        )
22695        .unwrap();
22696        std::fs::write(
22697            dir.path().join("typescript.ts"),
22698            r#"export function fixture_nav_typescript_entry(): number {
22699    return fixtureNavTsHelper();
22700}
22701
22702function fixtureNavTsHelper(): number {
22703    return 1;
22704}
22705"#,
22706        )
22707        .unwrap();
22708        std::fs::write(
22709            dir.path().join("javascript.js"),
22710            r#"function fixture_nav_javascript_entry() {
22711    return fixtureNavJsHelper();
22712}
22713
22714function fixtureNavJsHelper() {
22715    return 1;
22716}
22717"#,
22718        )
22719        .unwrap();
22720        std::fs::write(
22721            dir.path().join("kotlin.kt"),
22722            r#"fun fixture_nav_kotlin_entry(): Int {
22723    return fixtureNavKotlinHelper()
22724}
22725
22726fun fixtureNavKotlinHelper(): Int = 1
22727"#,
22728        )
22729        .unwrap();
22730        std::fs::write(
22731            dir.path().join("zig.zig"),
22732            r#"pub fn fixture_nav_zig_entry() i32 {
22733    return fixtureNavZigHelper();
22734}
22735
22736fn fixtureNavZigHelper() i32 {
22737    return 1;
22738}
22739"#,
22740        )
22741        .unwrap();
22742        std::fs::write(
22743            dir.path().join("bash.sh"),
22744            r#"#!/usr/bin/env bash
22745fixture_nav_bash_entry() {
22746    fixture_nav_bash_helper
22747}
22748
22749fixture_nav_bash_helper() {
22750    echo ok
22751}
22752
22753alias fixture_nav_bash_alias='echo alias'
22754"#,
22755        )
22756        .unwrap();
22757        std::fs::write(
22758            dir.path().join("README.md"),
22759            r#"# Fixture Guide
22760
22761## Fixture Section
22762
22763- Fixture step
22764  - Nested fixture step
22765
22766```python
22767def fixture_nav_markdown_embedded():
22768    return 1
22769```
22770"#,
22771        )
22772        .unwrap();
22773
22774        let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
22775        db.apply_changes(dir.path()).unwrap();
22776        dir
22777    }
22778
22779    fn assert_cli_expand_command_parses(command: &str) {
22780        let args = shell_split(command)
22781            .into_iter()
22782            .map(str::to_string)
22783            .collect::<Vec<_>>();
22784        assert!(
22785            try_parse_cli(args).is_ok(),
22786            "expand command should parse as a tsift CLI command: {command}"
22787        );
22788    }
22789
22790    fn setup_multiplicity_project() -> tempfile::TempDir {
22791        let dir = tempfile::tempdir().unwrap();
22792        std::fs::write(
22793            dir.path().join("Cargo.toml"),
22794            r#"[workspace]
22795members = ["crates/core-lib", "crates/cli-app"]
22796"#,
22797        )
22798        .unwrap();
22799        std::fs::create_dir_all(dir.path().join("crates/core-lib/src")).unwrap();
22800        std::fs::write(
22801            dir.path().join("crates/core-lib/Cargo.toml"),
22802            r#"[package]
22803name = "core-lib"
22804
22805[lib]
22806name = "core_lib"
22807
22808[features]
22809default = []
22810"#,
22811        )
22812        .unwrap();
22813        std::fs::write(
22814            dir.path().join("crates/core-lib/src/lib.rs"),
22815            "pub fn run() {}\n",
22816        )
22817        .unwrap();
22818        std::fs::create_dir_all(dir.path().join("crates/cli-app/src")).unwrap();
22819        std::fs::write(
22820            dir.path().join("crates/cli-app/Cargo.toml"),
22821            r#"[package]
22822name = "cli-app"
22823
22824[[bin]]
22825name = "cli-app"
22826
22827[dependencies]
22828core-lib = { path = "../core-lib" }
22829"#,
22830        )
22831        .unwrap();
22832        std::fs::write(
22833            dir.path().join("crates/cli-app/src/main.rs"),
22834            "use core_lib::run;\nfn main() { run(); }\n",
22835        )
22836        .unwrap();
22837        let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
22838        db.apply_changes(dir.path()).unwrap();
22839
22840        let task_dir = dir.path().join("tasks/software");
22841        std::fs::create_dir_all(&task_dir).unwrap();
22842        std::fs::write(
22843            task_dir.join("tsift.md"),
22844            r#"---
22845agent_doc_session: tsift-multiplicity
22846agent_doc_format: template
22847---
22848
22849## Backlog
22850
22851<!-- agent:backlog -->
22852- [ ] [#corepkg] Update the core-lib Cargo package ownership model.
22853<!-- /agent:backlog -->
22854"#,
22855        )
22856        .unwrap();
22857        init_git_repo(dir.path());
22858        dir
22859    }
22860
22861    fn setup_dependency_dag_project() -> tempfile::TempDir {
22862        let dir = tempfile::tempdir().unwrap();
22863        std::fs::write(
22864            dir.path().join("main.rs"),
22865            "fn shared_helper() {}\nfn main() { shared_helper(); }\n",
22866        )
22867        .unwrap();
22868        std::fs::write(
22869            dir.path().join("Cargo.toml"),
22870            "[package]\nname = \"dag-fixture\"\n",
22871        )
22872        .unwrap();
22873        let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
22874        db.apply_changes(dir.path()).unwrap();
22875
22876        let task_dir = dir.path().join("tasks/software");
22877        std::fs::create_dir_all(&task_dir).unwrap();
22878        std::fs::write(
22879            task_dir.join("tsift.md"),
22880            r#"---
22881agent_doc_session: tsift-dag
22882agent_doc_format: template
22883---
22884
22885## Exchange
22886
22887<!-- agent:exchange patch=append -->
22888Completed `#alpha`; touched files `main.rs`; tests `cargo test dependency_dag`; follow-up `#gamma`.
22889<!-- /agent:exchange -->
22890
22891## Backlog
22892
22893<!-- agent:backlog -->
22894- [ ] [#prep] Prepare Cargo.toml configuration before shared helper work.
22895- [ ] [#alpha] Update shared_helper in main.rs after #prep.
22896- [ ] [#beta] Refactor shared_helper tests in main.rs.
22897- [ ] [#gamma] Follow-up review for graph navigation.
22898<!-- /agent:backlog -->
22899"#,
22900        )
22901        .unwrap();
22902        dir
22903    }
22904
22905    fn setup_dependency_dag_cycle_project() -> tempfile::TempDir {
22906        let dir = setup_graph_index();
22907        let task_dir = dir.path().join("tasks/software");
22908        std::fs::create_dir_all(&task_dir).unwrap();
22909        std::fs::write(
22910            task_dir.join("tsift.md"),
22911            r#"---
22912agent_doc_session: tsift-dag-cycle
22913agent_doc_format: template
22914---
22915
22916## Backlog
22917
22918<!-- agent:backlog -->
22919- [ ] [#left] Left side depends on #right.
22920- [ ] [#right] Right side depends on #left.
22921<!-- /agent:backlog -->
22922"#,
22923        )
22924        .unwrap();
22925        dir
22926    }
22927
22928    fn seed_traversal_semantic_summaries(dir: &Path) {
22929        let summary_db = summarize::SummaryDb::open(&dir.join(".tsift/summaries.db")).unwrap();
22930        summary_db
22931            .insert(&summarize::Summary {
22932                id: 0,
22933                symbol_name: "helper".to_string(),
22934                file_path: "main.rs".to_string(),
22935                content_hash: "hash-main".to_string(),
22936                summary: "helper builds graph navigation handles for traversal.".to_string(),
22937                entities: Some(vec![
22938                    summarize::Entity {
22939                        name: "helper".to_string(),
22940                        kind: "function".to_string(),
22941                        description: "Builds graph navigation handles.".to_string(),
22942                    },
22943                    summarize::Entity {
22944                        name: "TraversalGraph".to_string(),
22945                        kind: "type".to_string(),
22946                        description: "Carries GraphStore-backed traversal rows.".to_string(),
22947                    },
22948                ]),
22949                relationships: Some(vec![summarize::Relationship {
22950                    from: "helper".to_string(),
22951                    to: "TraversalGraph".to_string(),
22952                    kind: "uses".to_string(),
22953                }]),
22954                concept_labels: Some(vec![
22955                    "graph navigation".to_string(),
22956                    "semantic extraction".to_string(),
22957                ]),
22958                extracted_at: "1700000000".to_string(),
22959                model: "test-model".to_string(),
22960                tokens_input: Some(10),
22961                tokens_output: Some(5),
22962            })
22963            .unwrap();
22964    }
22965
22966    fn seed_tsift_memory_graph_db(dir: &Path) {
22967        let db = dir.join(".tsift").join("memory.db");
22968        let store = MemoryStore::open_or_create(&db).unwrap();
22969        let project = dir.to_string_lossy().to_string();
22970        let observation = MemoryEvent::new(
22971            MemoryEventKind::ImportedObservation,
22972            "claude-mem:observations:1",
22973            [
22974                "Graph memory adapter",
22975                "read-only projection",
22976                "graph-db should retrieve tsift memory observations",
22977                "Project memory is queried from .tsift/memory.db",
22978                "graph memory, tsift memory, semantic query",
22979            ]
22980            .join("\n\n"),
22981        )
22982        .with_session_id("claude-session-a")
22983        .with_observed_at_unix(1_700_000_000)
22984        .with_import("claude-mem", "observations:1")
22985        .with_metadata("project", project.clone())
22986        .with_metadata("observation_type", "fact")
22987        .with_metadata("prompt_number", "7")
22988        .with_metadata("discovery_tokens", "42")
22989        .with_metadata("content_hash", "hash-observation-1");
22990        store.insert_event(&observation).unwrap();
22991
22992        let summary = MemoryEvent::new(
22993            MemoryEventKind::ImportedSessionSummary,
22994            "claude-mem:session_summaries:2",
22995            [
22996                "Query old memory from graph-db",
22997                "Read-only tsift memory SQLite projection",
22998                "Semantic graph rows can point at existing memory",
22999                "Projected source and session nodes",
23000                "Keep capture ownership inside tsift-memory",
23001                "summary note",
23002            ]
23003            .join("\n\n"),
23004        )
23005        .with_session_id("claude-session-a")
23006        .with_observed_at_unix(1_700_000_010)
23007        .with_import("claude-mem", "session_summaries:2")
23008        .with_metadata("project", project)
23009        .with_metadata("prompt_number", "8")
23010        .with_metadata("discovery_tokens", "36");
23011        store.insert_event(&summary).unwrap();
23012
23013        let prompt = MemoryEvent::new(
23014            MemoryEventKind::ImportedUserPrompt,
23015            "claude-mem:user_prompts:3",
23016            "How can graph-db query tsift memory semantic history?",
23017        )
23018        .with_session_id("claude-session-a")
23019        .with_observed_at_unix(1_700_000_020)
23020        .with_import("claude-mem", "user_prompts:3")
23021        .with_metadata("prompt_number", "9");
23022        store.insert_event(&prompt).unwrap();
23023    }
23024
23025    #[test]
23026    fn graph_callers_query() {
23027        let dir = setup_graph_index();
23028        let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
23029        let callers = db.callers_of("helper").unwrap();
23030        assert_eq!(callers.len(), 1);
23031        assert_eq!(callers[0].caller_name, "main");
23032    }
23033
23034    #[test]
23035    fn graph_callees_query() {
23036        let dir = setup_graph_index();
23037        let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
23038        let callees = db.callees_of("main").unwrap();
23039        let names: Vec<&str> = callees.iter().map(|e| e.callee_name.as_str()).collect();
23040        assert!(names.contains(&"helper"));
23041        assert!(names.contains(&"new"));
23042    }
23043
23044    #[test]
23045    fn graph_no_callers_returns_empty() {
23046        let dir = setup_graph_index();
23047        let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
23048        let callers = db.callers_of("nonexistent").unwrap();
23049        assert!(callers.is_empty());
23050    }
23051
23052    #[test]
23053    fn graph_cmd_autoindexes_missing_index_by_default() {
23054        let dir = tempfile::tempdir().unwrap();
23055        std::fs::write(
23056            dir.path().join("main.rs"),
23057            "fn helper() {}\nfn main() { helper(); }\n",
23058        )
23059        .unwrap();
23060        let result = cmd_graph(
23061            "helper",
23062            dir.path(),
23063            true,
23064            false,
23065            None,
23066            20,
23067            false,
23068            true,
23069            false,
23070            false,
23071            false,
23072            false,
23073            false,
23074            TagpathSearchOpts::default(),
23075        );
23076
23077        assert!(result.is_ok());
23078        let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
23079        let summary = db.compute_changes(dir.path()).unwrap();
23080        assert_eq!(summary.new + summary.modified + summary.deleted, 0);
23081    }
23082
23083    #[test]
23084    fn traversal_graph_has_stable_typed_handles() {
23085        let dir = setup_traversal_project();
23086        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
23087        let graph_again = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
23088
23089        let file = resolve_traversal_node(&graph, "main.rs").unwrap();
23090        let symbol = resolve_traversal_node(&graph, "helper").unwrap();
23091        let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
23092        let session = resolve_traversal_node(&graph, "tsift-v0.1").unwrap();
23093
23094        assert!(file.handle.starts_with("gfil-"));
23095        assert!(symbol.handle.starts_with("gsym-"));
23096        assert!(backlog.handle.starts_with("gbak-"));
23097        assert!(session.handle.starts_with("gses-"));
23098
23099        assert_eq!(
23100            symbol.handle,
23101            resolve_traversal_node(&graph_again, "helper")
23102                .unwrap()
23103                .handle
23104        );
23105        assert_eq!(
23106            backlog.handle,
23107            resolve_traversal_node(&graph_again, "#kgnv")
23108                .unwrap()
23109                .handle
23110        );
23111    }
23112
23113    #[test]
23114    fn traversal_graph_links_backlog_items_to_code_tokens() {
23115        let dir = setup_traversal_project();
23116        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
23117        let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
23118        let helper = resolve_traversal_node(&graph, "helper").unwrap();
23119
23120        assert!(graph.edges.iter().any(|edge| {
23121            edge.from == backlog.handle && edge.to == helper.handle && edge.relation == "mentions"
23122        }));
23123    }
23124
23125    #[test]
23126    fn session_hinted_traversal_skips_global_call_edges() {
23127        let dir = setup_traversal_project();
23128        let session = dir.path().join("tasks/software/tsift.md");
23129        let bounded = build_traversal_graph_source(dir.path(), &session, None).unwrap();
23130        let backlog = resolve_traversal_node(&bounded, "#kgnv").unwrap();
23131        let helper = resolve_traversal_node(&bounded, "helper").unwrap();
23132
23133        assert!(bounded.edges.iter().any(|edge| {
23134            edge.from == backlog.handle && edge.to == helper.handle && edge.relation == "mentions"
23135        }));
23136        assert!(
23137            !bounded.edges.iter().any(|edge| edge.relation == "calls"),
23138            "session-hinted graph-db projections should not materialize unrelated global call edges"
23139        );
23140
23141        let full = build_traversal_graph_source(dir.path(), dir.path(), None).unwrap();
23142        assert!(
23143            full.edges.iter().any(|edge| edge.relation == "calls"),
23144            "root/full projections still carry the complete indexed call graph"
23145        );
23146    }
23147
23148    #[test]
23149    fn agent_doc_task_path_infers_matching_workspace_scope() {
23150        let dir = tempfile::tempdir().unwrap();
23151        std::fs::create_dir_all(dir.path().join("src/tsift")).unwrap();
23152        std::fs::create_dir_all(dir.path().join("tasks/software")).unwrap();
23153        std::fs::write(
23154            dir.path().join(".gitmodules"),
23155            "[submodule \"src/tsift\"]\n\tpath = src/tsift\n\turl = https://example.invalid/tsift.git\n",
23156        )
23157        .unwrap();
23158        let task = dir.path().join("tasks/software/tsift.md");
23159        std::fs::write(&task, "# tsift\n").unwrap();
23160
23161        let targets = resolve_search_index_targets(dir.path(), &task, None, false).unwrap();
23162        let query_db_path = resolve_query_db_path(dir.path(), &task, None).unwrap();
23163        let cfg = config::Config::load(dir.path()).unwrap();
23164
23165        assert_eq!(targets.len(), 1);
23166        assert_eq!(targets[0].scope_name.as_deref(), Some("tsift"));
23167        assert_eq!(targets[0].source_root, dir.path().join("src/tsift"));
23168        assert!(
23169            targets[0]
23170                .db_path
23171                .ends_with(".tsift/indexes/tsift/index.db")
23172        );
23173        assert_eq!(query_db_path, cfg.db_path_for(dir.path(), "tsift"));
23174    }
23175
23176    #[test]
23177    fn cargo_package_scope_selector_indexes_package_db() {
23178        let dir = setup_multiplicity_project();
23179        let targets =
23180            resolve_search_index_targets(dir.path(), dir.path(), Some("core_lib"), false).unwrap();
23181
23182        assert_eq!(targets.len(), 1);
23183        assert_eq!(targets[0].scope_name.as_deref(), Some("core-lib"));
23184        assert_eq!(targets[0].source_root, dir.path().join("crates/core-lib"));
23185        assert!(
23186            targets[0]
23187                .db_path
23188                .ends_with(".tsift/indexes/cargo/core-lib/index.db")
23189        );
23190
23191        cmd_index(
23192            dir.path(),
23193            false,
23194            false,
23195            false,
23196            false,
23197            true,
23198            false,
23199            Some("core_lib"),
23200            false,
23201            true,
23202            false,
23203            false,
23204            false,
23205            false,
23206        )
23207        .unwrap();
23208        assert!(targets[0].db_path.exists());
23209    }
23210
23211    #[test]
23212    fn path_inference_prefers_nested_cargo_package_without_submodule() {
23213        let dir = setup_multiplicity_project();
23214        let source = dir.path().join("crates/cli-app/src/main.rs");
23215        let targets = resolve_search_index_targets(dir.path(), &source, None, false).unwrap();
23216
23217        assert_eq!(targets.len(), 1);
23218        assert_eq!(targets[0].scope_name.as_deref(), Some("cli-app"));
23219        assert_eq!(targets[0].source_root, dir.path().join("crates/cli-app"));
23220    }
23221
23222    #[test]
23223    fn traversal_graph_projects_cargo_multiplicity_nodes_and_edges() {
23224        let dir = setup_multiplicity_project();
23225        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
23226        let workspace = resolve_traversal_node(&graph, "root cargo workspace").unwrap();
23227        let core = resolve_traversal_node(&graph, "core-lib").unwrap();
23228        let cli = resolve_traversal_node(&graph, "cli-app").unwrap();
23229        let core_file = resolve_traversal_node(&graph, "crates/core-lib/src/lib.rs").unwrap();
23230
23231        assert_eq!(workspace.kind, "cargo_workspace");
23232        assert_eq!(core.kind, "cargo_package");
23233        assert_eq!(
23234            core.properties.get("features"),
23235            Some(&"default".to_string())
23236        );
23237        assert!(graph.edges.iter().any(|edge| {
23238            edge.from == workspace.handle
23239                && edge.to == core.handle
23240                && edge.relation == "contains_package"
23241        }));
23242        assert!(graph.edges.iter().any(|edge| {
23243            edge.from == core.handle && edge.to == core_file.handle && edge.relation == "owns_file"
23244        }));
23245        assert!(graph.edges.iter().any(|edge| {
23246            edge.from == cli.handle
23247                && edge.to == core.handle
23248                && (edge.relation == "declares_dependency" || edge.relation == "uses_crate")
23249        }));
23250    }
23251
23252    #[test]
23253    fn conflict_matrix_uses_cargo_package_mentions_as_ownership_evidence() {
23254        let dir = setup_multiplicity_project();
23255        let session = dir.path().join("tasks/software/tsift.md");
23256        let report =
23257            build_conflict_matrix_report(&session, None, &["corepkg".to_string()], 3, 8, 20)
23258                .unwrap();
23259
23260        assert!(report.per_target_fail_closed.is_empty());
23261        let candidate = report
23262            .candidates
23263            .iter()
23264            .find(|candidate| candidate.target == "corepkg")
23265            .unwrap();
23266        assert!(
23267            candidate
23268                .owned_files
23269                .iter()
23270                .any(|file| file == "crates/core-lib/Cargo.toml"),
23271            "{:?}",
23272            candidate.owned_files
23273        );
23274    }
23275
23276    #[test]
23277    fn traversal_graph_links_agent_doc_queue_job_packets_to_backlog() {
23278        let dir = setup_traversal_project();
23279        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
23280        let job = resolve_traversal_node(&graph, "do #kgnv").unwrap();
23281        let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
23282
23283        assert_eq!(job.kind, "job_packet");
23284        assert!(job.handle.starts_with("gjob-"));
23285        assert!(graph.edges.iter().any(|edge| {
23286            edge.from == job.handle && edge.to == backlog.handle && edge.relation == "targets"
23287        }));
23288
23289        let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
23290        let jobs = store.nodes_by_kind("job_packet").unwrap();
23291        assert!(
23292            jobs.iter()
23293                .any(|node| node.properties.get("ref_id") == Some(&"kgnv".to_string())),
23294            "expected queued job packet in graph store, got {jobs:?}"
23295        );
23296    }
23297
23298    #[test]
23299    fn traversal_graph_includes_routes_and_handler_edges() {
23300        let dir = tempfile::tempdir().unwrap();
23301        std::fs::write(
23302            dir.path().join("api.py"),
23303            r#"@router.get("/items")
23304def list_items():
23305    return []
23306"#,
23307        )
23308        .unwrap();
23309        let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
23310        db.apply_changes(dir.path()).unwrap();
23311
23312        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
23313        let route = resolve_traversal_node(&graph, "/items").unwrap();
23314        let handler = resolve_traversal_node(&graph, "list_items").unwrap();
23315
23316        assert_eq!(route.kind, "route");
23317        assert!(graph.edges.iter().any(|edge| {
23318            edge.from == route.handle && edge.to == handler.handle && edge.relation == "handled_by"
23319        }));
23320    }
23321
23322    #[test]
23323    fn traversal_graph_projects_rust_ast_navigation_edges() {
23324        let dir = tempfile::tempdir().unwrap();
23325        std::fs::write(
23326            dir.path().join("main.rs"),
23327            r#"mod api {
23328    pub fn helper() {}
23329    pub fn handler() { helper(); }
23330}
23331
23332fn main() { api::handler(); }
23333"#,
23334        )
23335        .unwrap();
23336        let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
23337        db.apply_changes(dir.path()).unwrap();
23338
23339        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
23340        let api = resolve_ast_span_node(&graph, "api", "mod");
23341        let helper = resolve_ast_span_node(&graph, "helper", "function");
23342        let handler = resolve_ast_span_node(&graph, "handler", "function");
23343
23344        assert_eq!(helper.kind, "ast_span");
23345        assert!(helper.handle.starts_with("span-"));
23346        assert_eq!(helper.properties.get("language"), Some(&"rust".to_string()));
23347        assert!(graph.edges.iter().any(|edge| {
23348            edge.from == api.handle && edge.to == helper.handle && edge.relation == "contains"
23349        }));
23350        assert!(graph.edges.iter().any(|edge| {
23351            edge.from == api.handle && edge.to == helper.handle && edge.relation == "child"
23352        }));
23353        assert!(graph.edges.iter().any(|edge| {
23354            edge.from == helper.handle && edge.to == api.handle && edge.relation == "parent"
23355        }));
23356        assert!(graph.edges.iter().any(|edge| {
23357            edge.from == helper.handle
23358                && edge.to == handler.handle
23359                && edge.relation == "next_sibling"
23360        }));
23361        assert!(graph.edges.iter().any(|edge| {
23362            edge.from == handler.handle
23363                && edge.to == helper.handle
23364                && edge.relation == "previous_sibling"
23365        }));
23366        assert!(graph.edges.iter().any(|edge| {
23367            edge.from == helper.handle
23368                && edge.to == api.handle
23369                && edge.relation == "enclosing_module"
23370        }));
23371        assert!(graph.edges.iter().any(|edge| {
23372            edge.from == handler.handle && edge.to == helper.handle && edge.relation == "calls"
23373        }));
23374
23375        let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
23376        let ast_nodes = store.nodes_by_kind("ast_span").unwrap();
23377        assert!(
23378            ast_nodes.iter().any(|node| node.id == helper.handle
23379                && node.properties.get("symbol_kind") == Some(&"function".to_string())),
23380            "expected helper AST span in graph store, got {ast_nodes:?}"
23381        );
23382        assert!(
23383            store
23384                .outgoing_edges(&helper.handle, Some("parent"))
23385                .unwrap()
23386                .iter()
23387                .any(|edge| edge.to_id == api.handle),
23388            "expected persisted AST parent edge"
23389        );
23390    }
23391
23392    #[test]
23393    fn traversal_graph_projects_markdown_section_block_edges() {
23394        let dir = tempfile::tempdir().unwrap();
23395        std::fs::write(
23396            dir.path().join("README.md"),
23397            "# Guide\n\n- Setup\n- Verify\n\n```rust\nfn demo() {}\n```\n",
23398        )
23399        .unwrap();
23400        let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
23401        db.apply_changes(dir.path()).unwrap();
23402
23403        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
23404        let guide = resolve_ast_span_node(&graph, "Guide", "heading");
23405        let code = resolve_ast_span_node(&graph, "rust", "code_block");
23406        let embedded = resolve_ast_span_node(&graph, "demo", "function");
23407        let list_item = graph
23408            .nodes
23409            .values()
23410            .find(|node| {
23411                node.kind == "ast_span"
23412                    && node.properties.get("symbol_kind") == Some(&"list_item".to_string())
23413                    && node.properties.get("section_handle") == Some(&guide.handle)
23414            })
23415            .expect("missing Markdown list item AST span");
23416
23417        assert_eq!(
23418            code.properties.get("markdown_block_kind"),
23419            Some(&"fenced_code_block".to_string())
23420        );
23421        assert_eq!(
23422            guide.properties.get("heading_level"),
23423            Some(&"1".to_string())
23424        );
23425        assert_eq!(
23426            embedded.properties.get("embedded"),
23427            Some(&"true".to_string())
23428        );
23429        assert_eq!(
23430            embedded.properties.get("language"),
23431            Some(&"rust".to_string())
23432        );
23433        assert_eq!(
23434            embedded.properties.get("markdown_block_handle"),
23435            Some(&code.handle)
23436        );
23437        assert!(graph.edges.iter().any(|edge| {
23438            edge.from == guide.handle
23439                && edge.to == code.handle
23440                && edge.relation == "contains_markdown_block"
23441        }));
23442        assert!(graph.edges.iter().any(|edge| {
23443            edge.from == code.handle
23444                && edge.to == guide.handle
23445                && edge.relation == "enclosing_section"
23446        }));
23447        assert!(graph.edges.iter().any(|edge| {
23448            edge.from == guide.handle
23449                && edge.to == list_item.handle
23450                && edge.relation == "contains_markdown_block"
23451        }));
23452        assert!(graph.edges.iter().any(|edge| {
23453            edge.from == code.handle
23454                && edge.to == embedded.handle
23455                && edge.relation == "contains_embedded_symbol"
23456        }));
23457        assert!(graph.edges.iter().any(|edge| {
23458            edge.from == embedded.handle
23459                && edge.to == code.handle
23460                && edge.relation == "embedded_in_fence"
23461        }));
23462        assert!(graph.edges.iter().any(|edge| {
23463            edge.from == guide.handle
23464                && edge.to == embedded.handle
23465                && edge.relation == "contains_embedded_code"
23466        }));
23467
23468        let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
23469        assert!(
23470            store
23471                .outgoing_edges(&guide.handle, Some("contains_markdown_block"))
23472                .unwrap()
23473                .iter()
23474                .any(|edge| edge.to_id == code.handle),
23475            "expected persisted Markdown section/block edge"
23476        );
23477        assert!(
23478            store
23479                .outgoing_edges(&code.handle, Some("contains_embedded_symbol"))
23480                .unwrap()
23481                .iter()
23482                .any(|edge| edge.to_id == embedded.handle),
23483            "expected persisted Markdown fence/embedded symbol edge"
23484        );
23485    }
23486
23487    #[test]
23488    fn multilingual_ast_navigation_fixture_locks_recall_handles_expands_and_budget() {
23489        let dir = setup_multilingual_ast_navigation_project();
23490        let db =
23491            index::IndexDb::open_read_only_resilient(&dir.path().join(".tsift/index.db")).unwrap();
23492        let symbols = db.all_symbols().unwrap();
23493        let expected_symbols = [
23494            ("rust", "fixture_nav_rust_entry", "function", "rust.rs"),
23495            (
23496                "python",
23497                "fixture_nav_python_entry",
23498                "function",
23499                "python.py",
23500            ),
23501            (
23502                "typescript",
23503                "fixture_nav_typescript_entry",
23504                "function",
23505                "typescript.ts",
23506            ),
23507            (
23508                "javascript",
23509                "fixture_nav_javascript_entry",
23510                "function",
23511                "javascript.js",
23512            ),
23513            (
23514                "kotlin",
23515                "fixture_nav_kotlin_entry",
23516                "function",
23517                "kotlin.kt",
23518            ),
23519            ("zig", "fixture_nav_zig_entry", "function", "zig.zig"),
23520            ("bash", "fixture_nav_bash_entry", "function", "bash.sh"),
23521            ("markdown", "Fixture Section", "heading", "README.md"),
23522            ("markdown", "Fixture step", "list_item", "README.md"),
23523            ("markdown", "python", "code_block", "README.md"),
23524        ];
23525
23526        for (language, name, kind, file) in expected_symbols {
23527            let symbol = symbols
23528                .iter()
23529                .find(|symbol| {
23530                    symbol.language == language
23531                        && symbol.name == name
23532                        && symbol.kind == kind
23533                        && symbol.file.ends_with(file)
23534                })
23535                .unwrap_or_else(|| panic!("missing indexed {language} {kind} {name}"));
23536            assert!(
23537                symbol.start_byte.is_some() && symbol.end_byte.is_some(),
23538                "{language} {name} should carry AST byte spans"
23539            );
23540        }
23541
23542        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
23543        let graph_again = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
23544        let expected_ast_nodes = [
23545            ("fixture_nav_rust_entry", "function", "rust"),
23546            ("fixture_nav_python_entry", "function", "python"),
23547            ("fixture_nav_typescript_entry", "function", "typescript"),
23548            ("fixture_nav_javascript_entry", "function", "javascript"),
23549            ("fixture_nav_kotlin_entry", "function", "kotlin"),
23550            ("fixture_nav_zig_entry", "function", "zig"),
23551            ("fixture_nav_bash_entry", "function", "bash"),
23552            ("Fixture Section", "heading", "markdown"),
23553            ("Fixture step", "list_item", "markdown"),
23554            ("python", "code_block", "markdown"),
23555            ("fixture_nav_markdown_embedded", "function", "python"),
23556        ];
23557
23558        for (name, kind, language) in expected_ast_nodes {
23559            let node = resolve_ast_span_node(&graph, name, kind);
23560            let repeated = resolve_ast_span_node(&graph_again, name, kind);
23561            assert!(
23562                node.handle.starts_with("span-"),
23563                "{name} handle: {}",
23564                node.handle
23565            );
23566            assert_eq!(
23567                node.handle, repeated.handle,
23568                "{language} {name} handle drifted"
23569            );
23570            assert_eq!(
23571                node.properties.get("language"),
23572                Some(&language.to_string()),
23573                "{name} should keep its language label"
23574            );
23575        }
23576
23577        let markdown_section = resolve_ast_span_node(&graph, "Fixture Section", "heading");
23578        let markdown_code = resolve_ast_span_node(&graph, "python", "code_block");
23579        let embedded = resolve_ast_span_node(&graph, "fixture_nav_markdown_embedded", "function");
23580        assert!(graph.edges.iter().any(|edge| {
23581            edge.from == markdown_section.handle
23582                && edge.to == markdown_code.handle
23583                && edge.relation == "contains_markdown_block"
23584        }));
23585        assert!(graph.edges.iter().any(|edge| {
23586            edge.from == markdown_code.handle
23587                && edge.to == embedded.handle
23588                && edge.relation == "contains_embedded_symbol"
23589        }));
23590        assert!(
23591            graph.nodes.len() <= 80,
23592            "multilingual AST fixture should stay bounded, got {} nodes",
23593            graph.nodes.len()
23594        );
23595        assert!(
23596            graph.edges.len() <= 180,
23597            "multilingual AST fixture should stay bounded, got {} edges",
23598            graph.edges.len()
23599        );
23600
23601        let response = empty_search_response(dir.path(), "lexical");
23602        let symbol_hits = db.symbol_search("fixture_nav_python_entry", 20).unwrap();
23603        let report = build_relative_search_budget_report(
23604            "fixture_nav_python_entry",
23605            "lexical",
23606            dir.path(),
23607            &response,
23608            &symbol_hits,
23609            ResponseBudget::new(Some(8), Some(120)),
23610            &SearchFacetFilters::default(),
23611        );
23612        let report_again = build_relative_search_budget_report(
23613            "fixture_nav_python_entry",
23614            "lexical",
23615            dir.path(),
23616            &response,
23617            &symbol_hits,
23618            ResponseBudget::new(Some(8), Some(120)),
23619            &SearchFacetFilters::default(),
23620        );
23621
23622        let top = report
23623            .ranked
23624            .first()
23625            .expect("ranked preview should not be empty");
23626        assert_eq!(top.source, "symbol_span");
23627        assert_eq!(top.name.as_deref(), Some("fixture_nav_python_entry"));
23628        assert!(top.handle.starts_with("srnk-"));
23629        assert_eq!(top.handle, report_again.ranked[0].handle);
23630        assert!(
23631            top.reasons.iter().any(|reason| reason == "ast_span"),
23632            "expected AST span ranking reason, got {:?}",
23633            top.reasons
23634        );
23635        assert!(report.ranked.len() <= 8);
23636        assert!(report.symbols.len() <= 8);
23637
23638        let symbol = report
23639            .symbols
23640            .iter()
23641            .find(|symbol| symbol.name == "fixture_nav_python_entry")
23642            .expect("missing search preview symbol");
23643        assert_cli_expand_command_parses(&symbol.expand);
23644        let ast = symbol
23645            .ast
23646            .as_ref()
23647            .expect("search symbol should expose AST");
23648        assert_cli_expand_command_parses(&ast.expand.source_window);
23649        assert_cli_expand_command_parses(ast.expand.source_body.as_ref().unwrap());
23650        assert_cli_expand_command_parses(&ast.expand.symbol_read);
23651
23652        let markdown_hits = db.symbol_search("python", 20).unwrap();
23653        let markdown_report = build_relative_search_budget_report(
23654            "python",
23655            "lexical",
23656            dir.path(),
23657            &response,
23658            &markdown_hits,
23659            ResponseBudget::new(Some(8), Some(120)),
23660            &SearchFacetFilters::default(),
23661        );
23662        let markdown_symbol = markdown_report
23663            .symbols
23664            .iter()
23665            .find(|symbol| symbol.kind == "code_block" && symbol.language == "markdown")
23666            .expect("missing Markdown code-block symbol");
23667        let markdown_ast = markdown_symbol
23668            .ast
23669            .as_ref()
23670            .expect("Markdown code block should expose AST");
23671        assert_cli_expand_command_parses(markdown_ast.expand.markdown_ast.as_ref().unwrap());
23672        assert_eq!(
23673            markdown_ast
23674                .span
23675                .markdown
23676                .as_ref()
23677                .unwrap()
23678                .embedded_symbols[0]
23679                .name,
23680            "fixture_nav_markdown_embedded"
23681        );
23682    }
23683
23684    #[test]
23685    fn traversal_neighborhood_handles_prioritizes_high_signal_edges_when_limited() {
23686        let edges = vec![
23687            TraversalEdge {
23688                from: "origin".to_string(),
23689                to: "aaa_low".to_string(),
23690                relation: "unknown".to_string(),
23691                label: None,
23692                weight: 1,
23693            },
23694            TraversalEdge {
23695                from: "origin".to_string(),
23696                to: "zzz_high".to_string(),
23697                relation: "mentions".to_string(),
23698                label: None,
23699                weight: 1,
23700            },
23701        ];
23702
23703        let handles = traversal_neighborhood_handles(&edges, "origin", 1, 2);
23704
23705        assert!(handles.contains("origin"));
23706        assert!(handles.contains("zzz_high"), "{handles:?}");
23707        assert!(!handles.contains("aaa_low"), "{handles:?}");
23708    }
23709
23710    #[test]
23711    fn traversal_materializes_provider_neutral_sqlite_graph() {
23712        let dir = setup_traversal_project();
23713        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
23714        let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
23715
23716        let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
23717        let backlog_nodes = store.nodes_by_kind("backlog").unwrap();
23718        assert!(
23719            backlog_nodes.iter().any(|node| node.id == backlog.handle
23720                && node.properties.get("ref_id") == Some(&"kgnv".to_string())),
23721            "expected materialized backlog node, got {backlog_nodes:?}"
23722        );
23723        assert!(
23724            store
23725                .all_nodes()
23726                .unwrap()
23727                .iter()
23728                .any(|node| node.kind == GRAPH_PROJECTION_META_KIND
23729                    && node.properties.get("projection_version")
23730                        == Some(&GRAPH_PROJECTION_VERSION.to_string())),
23731            "expected projection metadata node"
23732        );
23733        let source_handles = store.nodes_by_kind("source_handle").unwrap();
23734        assert!(
23735            source_handles
23736                .iter()
23737                .any(|node| node.properties.get("file") == Some(&"main.rs".to_string())),
23738            "expected bounded source_handle rows, got {source_handles:?}"
23739        );
23740        let worker_context = store.nodes_by_kind("worker_context").unwrap();
23741        assert!(
23742            worker_context
23743                .iter()
23744                .any(|node| node.properties.get("target")
23745                    == Some(&"tasks/software/tsift.md".to_string())),
23746            "expected bounded worker_context rows, got {worker_context:?}"
23747        );
23748        let worker_results = store.nodes_by_kind("worker_result").unwrap();
23749        assert!(
23750            worker_results.iter().any(|node| {
23751                node.properties.get("ref_id") == Some(&"kgnv".to_string())
23752                    && node.properties.get("status") == Some(&"completed".to_string())
23753                    && node.properties.get("touched_files") == Some(&"main.rs".to_string())
23754                    && node.properties.get("follow_up_ids") == Some(&"gfix".to_string())
23755            }),
23756            "expected worker_result rows, got {worker_results:?}"
23757        );
23758    }
23759
23760    #[test]
23761    fn traversal_projection_materializes_cached_semantic_rows() {
23762        let dir = setup_traversal_project();
23763        seed_traversal_semantic_summaries(dir.path());
23764        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
23765        let helper = resolve_traversal_node(&graph, "helper").unwrap();
23766        let concept = resolve_traversal_node(&graph, "graph navigation").unwrap();
23767        let entity = resolve_traversal_node(&graph, "TraversalGraph").unwrap();
23768
23769        assert_eq!(concept.kind, "semantic_concept");
23770        assert_eq!(entity.kind, "semantic_entity");
23771        assert!(concept.handle.starts_with("gcon-"));
23772        assert!(entity.handle.starts_with("gent-"));
23773
23774        let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
23775        assert!(
23776            store
23777                .nodes_by_kind("semantic_concept")
23778                .unwrap()
23779                .iter()
23780                .any(|node| node.label == "semantic extraction"
23781                    && node.properties.contains_key("embedding")),
23782            "expected persisted concept embeddings"
23783        );
23784        assert!(
23785            store
23786                .outgoing_edges(&helper.handle, Some("mentions_concept"))
23787                .unwrap()
23788                .iter()
23789                .any(|edge| edge.to_id == concept.handle),
23790            "expected helper symbol to link to cached summary concept"
23791        );
23792        assert!(
23793            store
23794                .outgoing_edges(
23795                    &semantic_entity_handle("helper", "function"),
23796                    Some("semantic_relation")
23797                )
23798                .unwrap()
23799                .iter()
23800                .any(|edge| edge.to_id == entity.handle
23801                    && edge.properties.get("relationship_kind") == Some(&"uses".to_string())),
23802            "expected LLM relationship rows projected into GraphStore"
23803        );
23804    }
23805
23806    #[test]
23807    fn traversal_projection_materializes_tsift_memory_rows() {
23808        let dir = setup_traversal_project();
23809        seed_tsift_memory_graph_db(dir.path());
23810        let memory_db = dir.path().join(".tsift").join("memory.db");
23811        let store = MemoryStore::open_or_create(&memory_db).unwrap();
23812        for summary in ["first closeout", "second closeout"] {
23813            let event = MemoryEvent::new(
23814                MemoryEventKind::ResponseSummary,
23815                "tasks/software/tsift.md",
23816                summary,
23817            )
23818            .with_session_id("tasks/software/tsift.md")
23819            .with_observed_at_unix(1_700_000_100);
23820            store.insert_event(&event).unwrap();
23821        }
23822        refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
23823        let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
23824
23825        let native_sources = store
23826            .nodes_by_kind("source_handle")
23827            .unwrap()
23828            .into_iter()
23829            .filter(|node| {
23830                node.properties.get("provider") == Some(&"tsift-memory".to_string())
23831                    && node.properties.get("source_ref")
23832                        == Some(&"tasks/software/tsift.md".to_string())
23833            })
23834            .collect::<Vec<_>>();
23835        assert_eq!(
23836            native_sources.len(),
23837            2,
23838            "same-source native memory events must get distinct source handles"
23839        );
23840
23841        let source = store
23842            .nodes_by_kind("source_handle")
23843            .unwrap()
23844            .into_iter()
23845            .find(|node| {
23846                node.properties.get("source_ref") == Some(&"claude-mem:observations:1".to_string())
23847            })
23848            .expect("expected tsift-memory source handle");
23849        let session = store
23850            .nodes_by_kind("memory_session")
23851            .unwrap()
23852            .into_iter()
23853            .find(|node| {
23854                node.properties.get("provider") == Some(&"tsift-memory".to_string())
23855                    && node.properties.get("session_id") == Some(&"claude-session-a".to_string())
23856            })
23857            .expect("expected tsift-memory session node");
23858        let event = store
23859            .nodes_by_kind("memory_event")
23860            .unwrap()
23861            .into_iter()
23862            .find(|node| {
23863                node.properties.get("source_ref") == Some(&"claude-mem:observations:1".to_string())
23864                    && node.properties.get("provider") == Some(&"tsift-memory".to_string())
23865                    && node.properties.get("imported_from") == Some(&"claude-mem".to_string())
23866            })
23867            .expect("expected tsift-memory event node");
23868        let concept = store
23869            .nodes_by_kind("semantic_concept")
23870            .unwrap()
23871            .into_iter()
23872            .find(|node| {
23873                node.properties.get("provider") == Some(&"tsift-memory".to_string())
23874                    && node.label.contains("Graph memory adapter")
23875                    && node.properties.contains_key("embedding")
23876            })
23877            .expect("expected tsift-memory semantic concept");
23878
23879        assert!(
23880            store
23881                .outgoing_edges(&session.id, Some("records_memory_source"))
23882                .unwrap()
23883                .iter()
23884                .any(|edge| edge.to_id == source.id),
23885            "expected session to link to source handle"
23886        );
23887        assert!(
23888            store
23889                .outgoing_edges(&session.id, Some("records_memory_event"))
23890                .unwrap()
23891                .iter()
23892                .any(|edge| edge.to_id == event.id),
23893            "expected session to link to memory event"
23894        );
23895        assert!(
23896            store
23897                .outgoing_edges(&event.id, Some("projects_source"))
23898                .unwrap()
23899                .iter()
23900                .any(|edge| edge.to_id == source.id),
23901            "expected memory event to project source handle"
23902        );
23903        assert!(
23904            store
23905                .outgoing_edges(&source.id, Some("mentions_concept"))
23906                .unwrap()
23907                .iter()
23908                .any(|edge| edge.to_id == concept.id),
23909            "expected source handle to seed semantic concept"
23910        );
23911
23912        let related = semantic_related_report_from_store(
23913            dir.path(),
23914            None,
23915            "tsift memory graph adapter",
23916            5,
23917            SemanticRelatedKind::Concept,
23918            &store,
23919        )
23920        .unwrap();
23921        assert!(
23922            related
23923                .items
23924                .iter()
23925                .any(|item| item.handle == concept.id && item.score > 0.0),
23926            "expected semantic query to retrieve tsift-memory concept, got {:?}",
23927            related.items
23928        );
23929
23930        let graph_related = graph_db_report_from_store(
23931            dir.path(),
23932            None,
23933            "sqlite",
23934            GraphDbQuery::Related {
23935                query: "tsift memory graph adapter".to_string(),
23936                kind: SemanticRelatedKind::Concept,
23937                depth: 1,
23938                seed_limit: 5,
23939                limit: 20,
23940            },
23941            &store,
23942            sqlite_graph_freshness(&store, "root").unwrap(),
23943            Vec::new(),
23944        )
23945        .unwrap();
23946        assert_eq!(
23947            graph_related
23948                .readiness
23949                .as_ref()
23950                .map(|readiness| readiness.status.as_str()),
23951            Some("ready"),
23952            "tsift-memory semantic rows should satisfy graph-db related readiness"
23953        );
23954        assert!(
23955            graph_related.nodes.iter().any(|node| {
23956                node.kind == "semantic_concept"
23957                    && node.properties.get("provider") == Some(&"tsift-memory".to_string())
23958            }),
23959            "expected related graph output to include tsift-memory semantic rows"
23960        );
23961    }
23962
23963    #[test]
23964    fn semantic_related_query_uses_persisted_graph_embeddings() {
23965        let dir = setup_traversal_project();
23966        seed_traversal_semantic_summaries(dir.path());
23967        refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
23968        let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
23969
23970        let report = semantic_related_report_from_store(
23971            dir.path(),
23972            None,
23973            "graph navigation",
23974            5,
23975            SemanticRelatedKind::Concept,
23976            &store,
23977        )
23978        .unwrap();
23979
23980        assert_eq!(report.embedding_model, SEMANTIC_EMBEDDING_MODEL);
23981        assert!(
23982            report
23983                .items
23984                .iter()
23985                .any(|item| item.label == "graph navigation"
23986                    && item.kind == "semantic_concept"
23987                    && item.score > 0.9),
23988            "expected nearest concept match from graph embeddings, got {:?}",
23989            report.items
23990        );
23991    }
23992
23993    #[test]
23994    fn graph_db_related_query_uses_semantic_seeds_and_incident_neighborhoods() {
23995        let dir = setup_traversal_project();
23996        seed_traversal_semantic_summaries(dir.path());
23997        refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
23998        let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
23999
24000        let report = graph_db_report_from_store(
24001            dir.path(),
24002            None,
24003            "sqlite",
24004            GraphDbQuery::Related {
24005                query: "graph navigation".to_string(),
24006                kind: SemanticRelatedKind::All,
24007                depth: 1,
24008                seed_limit: 2,
24009                limit: 20,
24010            },
24011            &store,
24012            sqlite_graph_freshness(&store, "root").unwrap(),
24013            Vec::new(),
24014        )
24015        .unwrap();
24016
24017        let knowledge = report.knowledge_retrieval.as_ref().unwrap();
24018        assert_eq!(knowledge.mode, "semantic_seeded_neighborhood");
24019        assert_eq!(knowledge.seed_kind, "all");
24020        assert_eq!(knowledge.depth, 1);
24021        assert_eq!(
24022            report
24023                .readiness
24024                .as_ref()
24025                .map(|readiness| readiness.status.as_str()),
24026            Some("ready")
24027        );
24028        assert!(
24029            knowledge
24030                .diagnostics
24031                .iter()
24032                .any(|diagnostic| diagnostic.contains("incident"))
24033        );
24034        assert!(
24035            report
24036                .semantic_related
24037                .iter()
24038                .any(|item| item.label == "graph navigation"
24039                    && item.kind == "semantic_concept"
24040                    && item.score > 0.9),
24041            "expected natural-language query to seed the graph navigation concept, got {:?}",
24042            report.semantic_related
24043        );
24044        assert!(
24045            report
24046                .nodes
24047                .iter()
24048                .any(|node| node.kind == "semantic_concept" && node.label == "graph navigation")
24049        );
24050        assert!(
24051            report
24052                .nodes
24053                .iter()
24054                .any(|node| node.kind == "symbol" && node.label == "helper"),
24055            "incident expansion from semantic seed should recover source symbols, got {:?}",
24056            report
24057                .nodes
24058                .iter()
24059                .map(|node| (&node.kind, &node.label))
24060                .collect::<Vec<_>>()
24061        );
24062        assert!(
24063            report
24064                .edges
24065                .iter()
24066                .any(|edge| edge.kind == "mentions_concept")
24067        );
24068        assert!(
24069            report.output_budget.as_ref().is_some_and(|budget| budget
24070                .diagnostics
24071                .iter()
24072                .any(|diagnostic| { diagnostic.contains("budget ranking signals") })),
24073            "expected related output budget diagnostics, got {:?}",
24074            report.output_budget
24075        );
24076    }
24077
24078    #[test]
24079    fn graph_db_related_reports_summary_extract_gate_when_summary_cache_empty() {
24080        let dir = setup_graph_index();
24081        refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
24082        let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
24083
24084        let report = graph_db_report_from_store(
24085            dir.path(),
24086            None,
24087            "sqlite",
24088            GraphDbQuery::Related {
24089                query: "graph navigation".to_string(),
24090                kind: SemanticRelatedKind::All,
24091                depth: 1,
24092                seed_limit: 2,
24093                limit: 20,
24094            },
24095            &store,
24096            sqlite_graph_freshness(&store, "root").unwrap(),
24097            Vec::new(),
24098        )
24099        .unwrap();
24100
24101        let readiness = report.readiness.as_ref().unwrap();
24102        assert_eq!(readiness.status, "blocked");
24103        assert_eq!(readiness.reason, "summary_cache_empty");
24104        assert!(readiness.fail_closed);
24105        assert_eq!(
24106            readiness.next_commands,
24107            vec![
24108                "tsift summarize --extract .".to_string(),
24109                graph_db_refresh_command(dir.path(), None)
24110            ]
24111        );
24112        assert!(
24113            report
24114                .knowledge_retrieval
24115                .as_ref()
24116                .unwrap()
24117                .diagnostics
24118                .iter()
24119                .any(|diagnostic| diagnostic.contains("summary cache empty")
24120                    && diagnostic.contains("graph-db materialized code/session rows")),
24121            "expected related diagnostics to carry readiness gate, got {:?}",
24122            report.knowledge_retrieval.as_ref().unwrap().diagnostics
24123        );
24124    }
24125
24126    #[test]
24127    fn graph_db_semantic_seeded_neighborhood_scores_before_caps() {
24128        let mut nodes = vec![
24129            SubstrateGraphNode::new("seed", "semantic_concept", "graph budget"),
24130            SubstrateGraphNode::new("zzz_high", "symbol", "high_signal"),
24131        ];
24132        let mut edges = vec![SubstrateGraphEdge::new(
24133            "zzz_high",
24134            "seed",
24135            "mentions_concept",
24136        )];
24137        for idx in 0..24 {
24138            let id = format!("aaa_low_{idx:02}");
24139            nodes.push(SubstrateGraphNode::new(
24140                id.clone(),
24141                "note",
24142                format!("low {idx}"),
24143            ));
24144            edges.push(SubstrateGraphEdge::new(id, "seed", "weak_link"));
24145        }
24146        let mut store = SqliteGraphStore::in_memory().unwrap();
24147        store
24148            .replace_projection(&GraphProjection { nodes, edges })
24149            .unwrap();
24150
24151        let subgraph =
24152            graph_db_semantic_seeded_neighborhood(&store, &["seed".to_string()], 1, 3).unwrap();
24153
24154        assert_eq!(subgraph.nodes.len(), 3);
24155        assert_eq!(subgraph.nodes[0].id, "seed");
24156        assert_eq!(
24157            subgraph.nodes[1].id, "zzz_high",
24158            "expected semantic mention edge to survive caps before lexicographic low-signal nodes: {:?}",
24159            subgraph.nodes
24160        );
24161        assert!(subgraph.truncated);
24162        assert!(
24163            subgraph
24164                .diagnostics
24165                .iter()
24166                .any(|diagnostic| diagnostic.contains("per-node edge scan cap")),
24167            "{:?}",
24168            subgraph.diagnostics
24169        );
24170        assert!(
24171            subgraph
24172                .diagnostics
24173                .iter()
24174                .any(|diagnostic| diagnostic.contains("skipped")),
24175            "{:?}",
24176            subgraph.diagnostics
24177        );
24178    }
24179
24180    #[test]
24181    fn conflict_matrix_uses_semantic_rows_as_dispatch_ranking_signal() {
24182        let dir = setup_traversal_project();
24183        seed_traversal_semantic_summaries(dir.path());
24184        init_git_repo(dir.path());
24185        let session = dir.path().join("tasks/software/tsift.md");
24186        refresh_traversal_graph_store(dir.path(), &session, None).unwrap();
24187        let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
24188        let freshness = sqlite_graph_freshness(&store, "root").unwrap();
24189        let evidence = graph_db_evidence_report_from_store(GraphDbEvidenceInput {
24190            root: dir.path(),
24191            scope: None,
24192            backend: "sqlite",
24193            target: "kgnv",
24194            depth: 4,
24195            limit: 8,
24196            cursor: None,
24197            store: &store,
24198            freshness,
24199            warnings: Vec::new(),
24200        })
24201        .unwrap();
24202        assert!(
24203            evidence
24204                .semantic_related
24205                .iter()
24206                .any(|node| node.kind == "semantic_concept" && node.label == "graph navigation"),
24207            "expected semantic evidence rows, got {:?}",
24208            evidence
24209                .semantic_related
24210                .iter()
24211                .map(|node| (&node.kind, &node.label))
24212                .collect::<Vec<_>>()
24213        );
24214        assert!(
24215            evidence
24216                .output_budget
24217                .as_ref()
24218                .is_some_and(|budget| budget.diagnostics.iter().any(|diagnostic| {
24219                    diagnostic.contains("semantic_match")
24220                        && diagnostic.contains("source_handle_coverage")
24221                })),
24222            "expected evidence output budget diagnostics, got {:?}",
24223            evidence.output_budget
24224        );
24225
24226        let cached_diff = diff_digest::compute(
24227            dir.path(),
24228            diff_digest::DiffDigestOptions {
24229                cached: true,
24230                revision: None,
24231                max_parsed_files: None,
24232            },
24233        )
24234        .unwrap();
24235        let impact_report = impact::compute(
24236            dir.path(),
24237            impact::ImpactOptions {
24238                cached: true,
24239                revision: None,
24240                scope: None,
24241                limit: 10,
24242            },
24243        )
24244        .unwrap();
24245        let graph_nodes = store.all_nodes().unwrap();
24246        let graph_index = conflict_matrix_graph_index(&graph_nodes);
24247        let semantic_candidate = conflict_matrix_candidate_from_evidence(
24248            dir.path(),
24249            &evidence,
24250            &graph_index,
24251            &cached_diff,
24252            &impact_report,
24253        );
24254        assert!(semantic_candidate.semantic_dispatch_score > 0);
24255        assert!(
24256            semantic_candidate
24257                .semantic_dispatch_reasons
24258                .iter()
24259                .any(|reason| reason.contains("semantic_concept") && reason.contains("owned file")),
24260            "expected semantic ranking explanations, got {:?}",
24261            semantic_candidate.semantic_dispatch_reasons
24262        );
24263        assert!(
24264            semantic_candidate
24265                .semantic_related
24266                .iter()
24267                .any(|item| item.label == "graph navigation")
24268        );
24269
24270        let mut plain_candidate = semantic_candidate.clone();
24271        plain_candidate.target = "plain".to_string();
24272        plain_candidate.semantic_related.clear();
24273        plain_candidate.semantic_dispatch_score = 0;
24274        plain_candidate.semantic_dispatch_reasons.clear();
24275        let mut ranked = [plain_candidate, semantic_candidate];
24276        ranked.sort_by(|left, right| {
24277            left.risk
24278                .cmp(&right.risk)
24279                .then_with(|| left.risk_score.cmp(&right.risk_score))
24280                .then_with(|| {
24281                    right
24282                        .semantic_dispatch_score
24283                        .cmp(&left.semantic_dispatch_score)
24284                })
24285                .then_with(|| left.target.cmp(&right.target))
24286        });
24287        assert_eq!(ranked[0].target, "kgnv");
24288    }
24289
24290    #[test]
24291    fn dependency_dag_extracts_explicit_overlap_and_follow_up_edges() {
24292        let dir = setup_dependency_dag_project();
24293        let session = dir.path().join("tasks/software/tsift.md");
24294        let report = build_dependency_dag_report(dir.path(), None, &[], 4, 12).unwrap();
24295
24296        assert_eq!(report.contract_version, "dependency-dag-v1");
24297        assert_eq!(
24298            report.targets,
24299            vec![
24300                "prep".to_string(),
24301                "alpha".to_string(),
24302                "beta".to_string(),
24303                "gamma".to_string()
24304            ]
24305        );
24306        assert!(report.edges.iter().any(|edge| {
24307            edge.from == "prep" && edge.to == "alpha" && edge.kind == "explicit_depends_on"
24308        }));
24309        assert!(report.edges.iter().any(|edge| {
24310            edge.from == "alpha" && edge.to == "gamma" && edge.kind == "worker_result_follow_up"
24311        }));
24312        assert!(report.edges.iter().any(|edge| {
24313            edge.from == "alpha"
24314                && edge.to == "beta"
24315                && edge.kind == "shared_resource"
24316                && edge.shared_files.contains(&"main.rs".to_string())
24317                && edge.shared_symbols.contains(&"shared_helper".to_string())
24318        }));
24319        assert!(
24320            !report.cycle_diagnostics.has_cycles,
24321            "{:?}",
24322            report.cycle_diagnostics
24323        );
24324        assert_eq!(report.topo_batches[0].targets, vec!["prep".to_string()]);
24325        assert_eq!(report.topo_batches[1].targets, vec!["alpha".to_string()]);
24326        assert!(
24327            report.replay_commands[0].contains("dependency-dag"),
24328            "{:?}",
24329            report.replay_commands
24330        );
24331
24332        cmd_dependency_dag(
24333            &session,
24334            None,
24335            &["alpha".to_string(), "beta".to_string()],
24336            4,
24337            12,
24338            OutputFormat {
24339                json_output: true,
24340                compact: false,
24341                pretty: false,
24342                terse: false,
24343                ultra_terse: false,
24344                schema: false,
24345                envelope: false,
24346            },
24347        )
24348        .unwrap();
24349    }
24350
24351    #[test]
24352    fn dependency_dag_reports_cycles_from_explicit_depends_on_text() {
24353        let dir = setup_dependency_dag_cycle_project();
24354        let report = build_dependency_dag_report(dir.path(), None, &[], 4, 12).unwrap();
24355
24356        assert!(report.cycle_diagnostics.has_cycles);
24357        assert_eq!(
24358            report.cycle_diagnostics.blocked_nodes,
24359            vec!["left".to_string(), "right".to_string()]
24360        );
24361        assert!(report.cycle_diagnostics.cycle_edges.iter().any(|edge| {
24362            edge.from == "left" && edge.to == "right" && edge.kind == "explicit_depends_on"
24363        }));
24364        assert!(report.cycle_diagnostics.cycle_edges.iter().any(|edge| {
24365            edge.from == "right" && edge.to == "left" && edge.kind == "explicit_depends_on"
24366        }));
24367    }
24368
24369    #[test]
24370    fn traversal_projection_queries_match_sqlite_and_convex_stores() {
24371        let dir = setup_traversal_project();
24372        let source_graph = build_traversal_graph_source(dir.path(), dir.path(), None).unwrap();
24373        let projection = traversal_projection_from_graph(dir.path(), None, &source_graph).unwrap();
24374
24375        let mut sqlite = SqliteGraphStore::in_memory().unwrap();
24376        sqlite.replace_projection(&projection).unwrap();
24377        let convex = ConvexGraphStore::new(MemoryConvexGraphClient::default());
24378        projection.upsert_into(&convex).unwrap();
24379
24380        let sqlite_graph = traversal_graph_from_store(dir.path(), &sqlite).unwrap();
24381        let convex_graph = traversal_graph_from_store(dir.path(), &convex).unwrap();
24382        assert_eq!(sqlite_graph.nodes.len(), convex_graph.nodes.len());
24383        assert_eq!(sqlite_graph.edges.len(), convex_graph.edges.len());
24384
24385        let sqlite_backlog = resolve_traversal_node(&sqlite_graph, "#kgnv").unwrap();
24386        let convex_helper = resolve_traversal_node(&convex_graph, "helper").unwrap();
24387        assert!(convex_graph.edges.iter().any(|edge| {
24388            edge.from == sqlite_backlog.handle
24389                && edge.to == convex_helper.handle
24390                && edge.relation == "mentions"
24391        }));
24392    }
24393
24394    #[test]
24395    fn graph_db_api_queries_sqlite_neighborhood_and_schema() {
24396        let dir = setup_traversal_project();
24397        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
24398        let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
24399        let freshness = sqlite_graph_freshness(&store, "root").unwrap();
24400        assert_eq!(freshness.status, "current");
24401
24402        let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
24403        let report = graph_db_report_from_store(
24404            dir.path(),
24405            None,
24406            "sqlite",
24407            GraphDbQuery::Neighborhood {
24408                id: backlog.handle.clone(),
24409                depth: 1,
24410                edge_kind: Some("mentions".to_string()),
24411                cursor: None,
24412                limit: None,
24413                property_filters: Vec::new(),
24414            },
24415            &store,
24416            freshness,
24417            Vec::new(),
24418        )
24419        .unwrap();
24420        assert!(
24421            report
24422                .edges
24423                .iter()
24424                .any(|edge| edge.from_id == backlog.handle && edge.kind == "mentions"),
24425            "expected backlog mention edge, got {:?}",
24426            report.edges
24427        );
24428        assert!(
24429            report.ranked_neighbors.iter().any(|neighbor| {
24430                neighbor.depth == Some(1)
24431                    && neighbor.edge_kinds.iter().any(|kind| kind == "mentions")
24432                    && neighbor.node_id != backlog.handle
24433                    && neighbor.handle_coverage_pct >= 95.0
24434                    && neighbor.duplicate_name_precision >= 0.99
24435            }),
24436            "expected ranked neighborhood neighbors with quality scores, got {:?}",
24437            report.ranked_neighbors
24438        );
24439        assert!(report.ranked_neighbors.len() <= GRAPH_DB_RANKED_NEIGHBOR_CAP);
24440        let ranking_gate = report.neighborhood_ranking_gate.as_ref().unwrap();
24441        assert!(!ranking_gate.ranked_output_default);
24442        assert_eq!(ranking_gate.default_order, "stable_node_id");
24443        assert!(
24444            ranking_gate
24445                .diagnostics
24446                .iter()
24447                .any(|diagnostic| diagnostic.contains("score-capped")),
24448            "{ranking_gate:?}"
24449        );
24450        assert!(
24451            ranking_gate
24452                .required_metrics
24453                .iter()
24454                .any(|metric| metric == "handle_coverage_pct")
24455        );
24456        assert!(
24457            ranking_gate
24458                .required_metrics
24459                .iter()
24460                .any(|metric| metric == "duplicate_name_precision")
24461        );
24462        assert!(
24463            report
24464                .page
24465                .as_ref()
24466                .unwrap()
24467                .diagnostics
24468                .iter()
24469                .any(|diagnostic| diagnostic.contains("idx_graph_edges_from_kind")),
24470            "expected SQLite neighborhood query plan diagnostics, got {:?}",
24471            report.page.as_ref().unwrap().diagnostics
24472        );
24473        let edges_report = graph_db_report_from_store(
24474            dir.path(),
24475            None,
24476            "sqlite",
24477            GraphDbQuery::Edges {
24478                edge_kind: Some("mentions".to_string()),
24479                cursor: None,
24480                limit: Some(2),
24481                property_filters: Vec::new(),
24482            },
24483            &store,
24484            sqlite_graph_freshness(&store, "root").unwrap(),
24485            Vec::new(),
24486        )
24487        .unwrap();
24488        let edge_id = edges_report
24489            .edges
24490            .first()
24491            .map(|edge| edge.id.clone())
24492            .expect("expected at least one paged mentions edge");
24493        assert!(edges_report.edges.iter().any(|edge| edge.id == edge_id));
24494        assert_eq!(
24495            edges_report.page.as_ref().unwrap().returned_edges,
24496            edges_report.edges.len()
24497        );
24498
24499        let edge_report = graph_db_report_from_store(
24500            dir.path(),
24501            None,
24502            "sqlite",
24503            GraphDbQuery::Edge {
24504                id: edge_id.clone(),
24505            },
24506            &store,
24507            sqlite_graph_freshness(&store, "root").unwrap(),
24508            Vec::new(),
24509        )
24510        .unwrap();
24511        assert_eq!(
24512            edge_report.edge.as_ref().map(|e| graph_db_edge_key(&SubstrateGraphEdge::from(e))),
24513            Some(edge_id.clone())
24514        );
24515
24516        let incident_report = graph_db_report_from_store(
24517            dir.path(),
24518            None,
24519            "sqlite",
24520            GraphDbQuery::Incident {
24521                id: backlog.handle.clone(),
24522                edge_kind: Some("mentions".to_string()),
24523                cursor: None,
24524                limit: Some(1),
24525                property_filters: Vec::new(),
24526            },
24527            &store,
24528            sqlite_graph_freshness(&store, "root").unwrap(),
24529            Vec::new(),
24530        )
24531        .unwrap();
24532        assert_eq!(incident_report.page.as_ref().unwrap().returned_edges, 1);
24533        assert!(
24534            incident_report
24535                .edges
24536                .iter()
24537                .all(|edge| edge.from_id == backlog.handle || edge.to_id == backlog.handle),
24538            "{:?}",
24539            incident_report.edges
24540        );
24541
24542        let schema_report = graph_db_report_from_store(
24543            dir.path(),
24544            None,
24545            "sqlite",
24546            GraphDbQuery::Schema,
24547            &store,
24548            sqlite_graph_freshness(&store, "root").unwrap(),
24549            Vec::new(),
24550        )
24551        .unwrap();
24552        assert!(
24553            schema_report
24554                .schema
24555                .unwrap()
24556                .operations
24557                .iter()
24558                .any(|operation| operation.command.starts_with("neighborhood"))
24559        );
24560    }
24561
24562    #[test]
24563    fn graph_db_neighborhood_reports_dropped_by_budget_diagnostics() {
24564        let mut nodes = vec![SubstrateGraphNode::new(
24565            "origin",
24566            "backlog",
24567            "#budgeted-neighborhood",
24568        )];
24569        let mut edges = Vec::new();
24570        for idx in 0..32 {
24571            let id = format!("src-{idx:02}");
24572            nodes.push(
24573                SubstrateGraphNode::new(id.clone(), "source_handle", format!("source {idx}"))
24574                    .with_property("source_ref", format!("fixture:{idx}"))
24575                    .with_property("detail", "x".repeat(600)),
24576            );
24577            edges.push(SubstrateGraphEdge::new("origin", id, "mentions"));
24578        }
24579        let store = SqliteGraphStore::in_memory().unwrap();
24580        GraphProjection { nodes, edges }
24581            .upsert_into(&store)
24582            .unwrap();
24583
24584        let report = graph_db_report_from_store(
24585            Path::new("."),
24586            None,
24587            "fixture",
24588            GraphDbQuery::Neighborhood {
24589                id: "origin".to_string(),
24590                depth: 1,
24591                edge_kind: None,
24592                cursor: None,
24593                limit: None,
24594                property_filters: Vec::new(),
24595            },
24596            &store,
24597            current_graph_db_freshness(),
24598            Vec::new(),
24599        )
24600        .unwrap();
24601        let budget = report.output_budget.as_ref().unwrap();
24602        assert!(budget.selected_nodes < budget.candidate_nodes);
24603        assert!(
24604            budget.dropped_by_budget.iter().any(|drop| {
24605                drop.item == "node"
24606                    && drop.kind == "source_handle"
24607                    && drop.reason == "per_kind_quota"
24608            }),
24609            "expected source_handle budget drops, got {:?}",
24610            budget.dropped_by_budget
24611        );
24612        assert!(report.page.as_ref().unwrap().truncated);
24613        assert!(
24614            report
24615                .page
24616                .as_ref()
24617                .unwrap()
24618                .diagnostics
24619                .iter()
24620                .any(|diagnostic| diagnostic.contains("budget ranking signals")),
24621            "{:?}",
24622            report.page
24623        );
24624    }
24625
24626    #[test]
24627    fn graph_db_output_budget_uses_depth_overrides_for_evidence_rows() {
24628        let mut nodes = vec![SubstrateGraphNode::new("near", "note", "zzz shallow row")];
24629        let mut depth_by_id = BTreeMap::from([("near".to_string(), 1usize)]);
24630        for idx in 0..8 {
24631            let id = format!("far-{idx:02}");
24632            nodes.push(SubstrateGraphNode::new(
24633                id.clone(),
24634                "note",
24635                format!("aaa deeper row {idx}"),
24636            ));
24637            depth_by_id.insert(id, 6);
24638        }
24639
24640        let origin_ids = vec!["target".to_string()];
24641        let budgeted = graph_db_apply_output_budget_with_depths_and_cursor(
24642            &origin_ids,
24643            &BTreeMap::new(),
24644            nodes,
24645            Vec::new(),
24646            Some(3),
24647            Some(&depth_by_id),
24648            None,
24649        );
24650
24651        assert!(
24652            budgeted.nodes.iter().any(|node| node.id == "near"),
24653            "expected the shallow evidence row to outrank deeper rows, got {:?}",
24654            budgeted
24655                .nodes
24656                .iter()
24657                .map(|node| (&node.id, &node.label))
24658                .collect::<Vec<_>>()
24659        );
24660        assert!(
24661            budgeted.report.dropped_by_budget.iter().any(|drop| {
24662                drop.item == "node" && drop.kind == "note" && drop.reason == "per_kind_quota"
24663            }),
24664            "expected node quota drops, got {:?}",
24665            budgeted.report.dropped_by_budget
24666        );
24667        assert!(
24668            budgeted
24669                .report
24670                .diagnostics
24671                .iter()
24672                .any(|diagnostic| diagnostic.contains("depth")),
24673            "{:?}",
24674            budgeted.report.diagnostics
24675        );
24676    }
24677
24678    #[test]
24679    fn evidence_pagination_returns_next_cursor_when_truncated() {
24680        let mut nodes = vec![SubstrateGraphNode::new(
24681            "target".to_string(),
24682            "backlog_item",
24683            "target item".to_string(),
24684        )];
24685        let mut depth_by_id = BTreeMap::new();
24686        depth_by_id.insert("target".to_string(), 0);
24687        for idx in 0..20 {
24688            let id = format!("ev-{idx}");
24689            nodes.push(SubstrateGraphNode::new(
24690                id.clone(),
24691                "source_handle",
24692                format!("evidence row {idx}"),
24693            ).with_property("detail", "x".repeat(400)));
24694            depth_by_id.insert(id, 1);
24695        }
24696        let origin_ids = vec!["target".to_string()];
24697        let first_page = graph_db_apply_output_budget_with_depths_and_cursor(
24698            &origin_ids,
24699            &BTreeMap::new(),
24700            nodes.clone(),
24701            Vec::new(),
24702            Some(3),
24703            Some(&depth_by_id),
24704            None,
24705        );
24706        assert!(
24707            first_page.truncated,
24708            "expected first page to be truncated with 20 candidates and low limit, got {} selected of {} candidates",
24709            first_page.nodes.len(),
24710            first_page.report.candidate_nodes
24711        );
24712        assert!(
24713            first_page.next_cursor.is_some(),
24714            "expected next_cursor when truncated"
24715        );
24716        let cursor = first_page.next_cursor.unwrap();
24717        assert!(
24718            !cursor.is_empty(),
24719            "cursor should be a non-empty node id"
24720        );
24721        let first_ids: BTreeSet<_> = first_page.nodes.iter().map(|n| n.id.clone()).collect();
24722        let second_page = graph_db_apply_output_budget_with_depths_and_cursor(
24723            &origin_ids,
24724            &BTreeMap::new(),
24725            nodes.clone(),
24726            Vec::new(),
24727            Some(3),
24728            Some(&depth_by_id),
24729            Some(&cursor),
24730        );
24731        let second_ids: BTreeSet<_> = second_page.nodes.iter().map(|n| n.id.clone()).collect();
24732        let overlap: BTreeSet<_> = first_ids.intersection(&second_ids).cloned().collect();
24733        assert!(
24734            overlap.is_empty(),
24735            "pages should not overlap, but found shared ids: {overlap:?}"
24736        );
24737        assert!(
24738            second_page.report.diagnostics.iter().any(|d| d.contains("cursor skipped")),
24739            "expected cursor skip diagnostic, got {:?}",
24740            second_page.report.diagnostics
24741        );
24742    }
24743
24744    #[test]
24745    fn evidence_pagination_no_cursor_returns_all_when_within_budget() {
24746        let mut nodes = vec![SubstrateGraphNode::new(
24747            "target".to_string(),
24748            "backlog_item",
24749            "target item".to_string(),
24750        )];
24751        let mut depth_by_id = BTreeMap::new();
24752        depth_by_id.insert("target".to_string(), 0);
24753        for idx in 0..3 {
24754            let id = format!("ev-{idx}");
24755            nodes.push(SubstrateGraphNode::new(
24756                id.clone(),
24757                "source_handle",
24758                format!("evidence row {idx}"),
24759            ));
24760            depth_by_id.insert(id, 1);
24761        }
24762        let origin_ids = vec!["target".to_string()];
24763        let result = graph_db_apply_output_budget_with_depths_and_cursor(
24764            &origin_ids,
24765            &BTreeMap::new(),
24766            nodes,
24767            Vec::new(),
24768            None,
24769            Some(&depth_by_id),
24770            None,
24771        );
24772        assert!(
24773            !result.truncated,
24774            "expected no truncation with small candidate set and default budget"
24775        );
24776        assert!(
24777            result.next_cursor.is_none(),
24778            "expected no next_cursor when not truncated"
24779        );
24780    }
24781
24782    #[test]
24783    fn evidence_pagination_invalid_cursor_returns_first_page() {
24784        let mut nodes = vec![SubstrateGraphNode::new(
24785            "target".to_string(),
24786            "backlog_item",
24787            "target item".to_string(),
24788        )];
24789        let mut depth_by_id = BTreeMap::new();
24790        depth_by_id.insert("target".to_string(), 0);
24791        for idx in 0..5 {
24792            let id = format!("ev-{idx}");
24793            nodes.push(SubstrateGraphNode::new(
24794                id.clone(),
24795                "source_handle",
24796                format!("evidence row {idx}"),
24797            ));
24798            depth_by_id.insert(id, 1);
24799        }
24800        let origin_ids = vec!["target".to_string()];
24801        let result = graph_db_apply_output_budget_with_depths_and_cursor(
24802            &origin_ids,
24803            &BTreeMap::new(),
24804            nodes.clone(),
24805            Vec::new(),
24806            None,
24807            Some(&depth_by_id),
24808            Some("nonexistent-id"),
24809        );
24810        assert!(
24811            result.report.diagnostics.iter().any(|d| d.contains("cursor skipped 0")),
24812            "invalid cursor should skip 0 candidates, got {:?}",
24813            result.report.diagnostics
24814        );
24815    }
24816
24817    #[test]
24818    fn graph_db_status_uses_snapshot_fallback_when_rollback_journal_is_locked() {
24819        let dir = setup_traversal_project();
24820        refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
24821        let graph_db = dir.path().join(".tsift/graph.db");
24822        let _lock = hold_rollback_journal_lock(&graph_db);
24823
24824        let report =
24825            graph_db_operator_report_from_disk(dir.path(), None, &graph_db, "status", None, vec![])
24826                .unwrap();
24827
24828        assert_eq!(report.status, "current");
24829        assert_eq!(
24830            report.recovery,
24831            Some(index::ReadOnlyRecovery::SnapshotFallback)
24832        );
24833        assert!(
24834            report
24835                .warnings
24836                .iter()
24837                .any(|warning| warning.contains("rollback-journal lock")),
24838            "expected rollback-journal recovery warning, got {:?}",
24839            report.warnings
24840        );
24841    }
24842
24843    #[test]
24844    fn graph_db_status_copies_wal_sidecars_when_locked() {
24845        let dir = setup_traversal_project();
24846        refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
24847        let graph_db = dir.path().join(".tsift/graph.db");
24848        let _lock = hold_wal_database_lock(&graph_db);
24849
24850        let report =
24851            graph_db_operator_report_from_disk(dir.path(), None, &graph_db, "status", None, vec![])
24852                .unwrap();
24853
24854        assert_eq!(report.status, "current");
24855        assert_eq!(
24856            report.recovery,
24857            Some(index::ReadOnlyRecovery::SnapshotFallbackWal)
24858        );
24859        assert!(
24860            report
24861                .warnings
24862                .iter()
24863                .any(|warning| warning.contains("WAL-aware snapshot fallback")),
24864            "expected WAL recovery warning, got {:?}",
24865            report.warnings
24866        );
24867    }
24868
24869    #[test]
24870    fn graph_db_evidence_uses_snapshot_fallback_when_graph_db_is_locked() {
24871        let dir = setup_traversal_project();
24872        let session = dir.path().join("tasks/software/tsift.md");
24873        refresh_traversal_graph_store(dir.path(), &session, None).unwrap();
24874        let graph_db = dir.path().join(".tsift/graph.db");
24875        let _lock = hold_rollback_journal_lock(&graph_db);
24876
24877        let result = cmd_graph_db(
24878            &session,
24879            None,
24880            GraphDbBackend::Sqlite,
24881            None,
24882            GraphDbQuery::Evidence {
24883                target: "kgnv".to_string(),
24884                depth: 3,
24885                limit: 8,
24886                cursor: None,
24887            },
24888            OutputFormat {
24889                json_output: false,
24890                compact: true,
24891                pretty: false,
24892                terse: false,
24893                ultra_terse: false,
24894                schema: false,
24895                envelope: false,
24896            },
24897        );
24898
24899        assert!(result.is_ok());
24900    }
24901
24902    fn current_graph_db_freshness() -> GraphDbFreshnessReport {
24903        GraphDbFreshnessReport {
24904            status: "current".to_string(),
24905            fail_closed: false,
24906            projection_version: Some(GRAPH_PROJECTION_VERSION.to_string()),
24907            content_hash: Some("fixture".to_string()),
24908            source_watermark: None,
24909            diagnostics: Vec::new(),
24910        }
24911    }
24912
24913    #[test]
24914    fn graph_db_evidence_fails_closed_with_repair_command_for_stale_freshness() {
24915        let dir = setup_traversal_project();
24916        refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
24917        let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
24918        let stale = GraphDbFreshnessReport {
24919            status: "stale".to_string(),
24920            fail_closed: true,
24921            projection_version: Some("old-v0".to_string()),
24922            content_hash: None,
24923            source_watermark: None,
24924            diagnostics: vec!["projection content hash is missing".to_string()],
24925        };
24926
24927        let err = match graph_db_evidence_report_from_store(GraphDbEvidenceInput {
24928            root: dir.path(),
24929            scope: None,
24930            backend: "sqlite",
24931            target: "kgnv",
24932            depth: 3,
24933            limit: 8,
24934            cursor: None,
24935            store: &store,
24936            freshness: stale,
24937            warnings: Vec::new(),
24938        }) {
24939            Ok(_) => panic!("stale graph freshness should fail closed"),
24940            Err(err) => err,
24941        };
24942        let message = err.to_string();
24943        assert!(message.contains("failed closed"), "{message}");
24944        assert!(message.contains("graph-db --path"), "{message}");
24945        assert!(message.contains("refresh --json"), "{message}");
24946    }
24947
24948    fn paged_graph_ids(
24949        store: &impl GraphStore,
24950        cursor: Option<&str>,
24951    ) -> (Vec<String>, GraphDbPageReport) {
24952        let report = graph_db_report_from_store(
24953            Path::new("."),
24954            None,
24955            "fixture",
24956            GraphDbQuery::Kind {
24957                kind: "backlog".to_string(),
24958                cursor: cursor.map(str::to_string),
24959                limit: Some(2),
24960                property_filters: vec!["phase=open".to_string()],
24961            },
24962            store,
24963            current_graph_db_freshness(),
24964            Vec::new(),
24965        )
24966        .unwrap();
24967        (
24968            report.nodes.iter().map(|node| node.id.clone()).collect(),
24969            report.page.unwrap(),
24970        )
24971    }
24972
24973    #[test]
24974    fn graph_db_query_pagination_and_filters_match_sqlite_and_convex() {
24975        let nodes = (0..5)
24976            .map(|idx| {
24977                let phase = if idx == 1 { "closed" } else { "open" };
24978                SubstrateGraphNode::new(format!("gbak-{idx:02}"), "backlog", format!("#{idx:02}"))
24979                    .with_property("phase", phase)
24980            })
24981            .collect::<Vec<_>>();
24982        let projection = GraphProjection {
24983            nodes,
24984            edges: Vec::new(),
24985        };
24986        let sqlite = SqliteGraphStore::in_memory().unwrap();
24987        projection.upsert_into(&sqlite).unwrap();
24988        let convex = ConvexGraphStore::new(MemoryConvexGraphClient::default());
24989        projection.upsert_into(&convex).unwrap();
24990
24991        let (sqlite_first_ids, sqlite_first_page) = paged_graph_ids(&sqlite, None);
24992        let (convex_first_ids, convex_first_page) = paged_graph_ids(&convex, None);
24993        assert_eq!(sqlite_first_ids, vec!["gbak-00", "gbak-02"]);
24994        assert_eq!(sqlite_first_ids, convex_first_ids);
24995        assert_eq!(sqlite_first_page.next_cursor.as_deref(), Some("gbak-02"));
24996        assert!(sqlite_first_page.truncated);
24997        assert_eq!(
24998            sqlite_first_page.returned_nodes,
24999            convex_first_page.returned_nodes
25000        );
25001        assert_eq!(
25002            sqlite_first_page.property_filters,
25003            convex_first_page.property_filters
25004        );
25005        assert!(
25006            sqlite_first_page
25007                .diagnostics
25008                .iter()
25009                .any(|diagnostic| diagnostic.contains("idx_graph_nodes_kind")),
25010            "expected SQLite kind query plan diagnostics, got {:?}",
25011            sqlite_first_page.diagnostics
25012        );
25013
25014        let cursor = sqlite_first_page.next_cursor.as_deref();
25015        let (sqlite_next_ids, sqlite_next_page) = paged_graph_ids(&sqlite, cursor);
25016        let (convex_next_ids, convex_next_page) = paged_graph_ids(&convex, cursor);
25017        assert_eq!(sqlite_next_ids, vec!["gbak-03", "gbak-04"]);
25018        assert_eq!(sqlite_next_ids, convex_next_ids);
25019        assert_eq!(sqlite_next_page.next_cursor, None);
25020        assert!(!sqlite_next_page.truncated);
25021        assert_eq!(
25022            sqlite_next_page.returned_nodes,
25023            convex_next_page.returned_nodes
25024        );
25025        assert_eq!(
25026            sqlite_next_page.property_filters,
25027            convex_next_page.property_filters
25028        );
25029    }
25030
25031    #[test]
25032    fn traversal_shortest_path_crosses_artifacts_and_symbols() {
25033        let dir = setup_traversal_project();
25034        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
25035        let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
25036        let main = resolve_traversal_node(&graph, "main").unwrap();
25037
25038        let path = traversal_shortest_handles(&graph.edges, &backlog.handle, &main.handle).unwrap();
25039        assert_eq!(path.first(), Some(&backlog.handle));
25040        assert_eq!(path.last(), Some(&main.handle));
25041        assert!(
25042            path.len() >= 3,
25043            "expected backlog -> symbol -> main, got {path:?}"
25044        );
25045    }
25046
25047    #[test]
25048    fn traversal_report_recommends_next_bugfix_nodes() {
25049        let dir = setup_traversal_project();
25050        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
25051        let report = traversal_report(dir.path(), None, graph, Some("#kgnv"), None, 1, 50).unwrap();
25052
25053        assert_eq!(report.mode, "neighborhood");
25054        assert!(
25055            report
25056                .recommendations
25057                .iter()
25058                .any(|rec| rec.label == "helper" && rec.reason.contains("matched")),
25059            "expected helper recommendation, got {:?}",
25060            report.recommendations
25061        );
25062        assert!(
25063            !report.exploration.source_windows.is_empty(),
25064            "expected exploration source windows"
25065        );
25066        assert!(
25067            report
25068                .exploration
25069                .no_reread_guidance
25070                .contains("avoid whole-file reads")
25071        );
25072    }
25073
25074    #[test]
25075    fn traversal_graph_refreshes_stale_index_before_loading_symbols() {
25076        let dir = setup_traversal_project();
25077        std::thread::sleep(std::time::Duration::from_millis(50));
25078        std::fs::write(
25079            dir.path().join("main.rs"),
25080            "fn fresh_helper() { println!(\"fresh\"); }\nfn main() { fresh_helper(); }\n",
25081        )
25082        .unwrap();
25083
25084        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
25085
25086        assert!(
25087            graph
25088                .warnings
25089                .iter()
25090                .any(|warning| warning.contains("index refreshed")
25091                    && warning.contains("graph traversal packet")),
25092            "expected refresh diagnostic, got {:?}",
25093            graph.warnings
25094        );
25095        assert!(resolve_traversal_node(&graph, "fresh_helper").is_some());
25096
25097        let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
25098        let summary = db.compute_changes(dir.path()).unwrap();
25099        assert_eq!(summary.new + summary.modified + summary.deleted, 0);
25100    }
25101
25102    #[test]
25103    fn traversal_graph_falls_back_to_raw_source_when_stale_refresh_is_blocked() {
25104        let dir = setup_traversal_project();
25105        let db_path = dir.path().join(".tsift/index.db");
25106        let _writer = hold_writer_lock(&index::writer_lock_path(&db_path));
25107        std::thread::sleep(std::time::Duration::from_millis(50));
25108        std::fs::write(
25109            dir.path().join("main.rs"),
25110            "fn fresh_helper() { println!(\"fresh\"); }\nfn main() { fresh_helper(); }\n",
25111        )
25112        .unwrap();
25113
25114        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
25115        let file = resolve_traversal_node(&graph, "main.rs").unwrap();
25116
25117        assert!(
25118            graph
25119                .warnings
25120                .iter()
25121                .any(|warning| warning.contains("falling back to raw source file nodes")),
25122            "expected raw-source fallback diagnostic, got {:?}",
25123            graph.warnings
25124        );
25125        assert!(
25126            file.detail
25127                .as_deref()
25128                .is_some_and(|detail| detail.contains("raw source fallback")),
25129            "expected raw-source detail, got {:?}",
25130            file.detail
25131        );
25132        assert!(
25133            file.expand.contains("source-read"),
25134            "expected source-read fallback command, got {}",
25135            file.expand
25136        );
25137        assert!(
25138            resolve_traversal_node(&graph, "helper").is_none(),
25139            "stale symbol evidence should be skipped when refresh is blocked"
25140        );
25141    }
25142
25143    #[test]
25144    fn traversal_cmd_supports_json_and_html_outputs() {
25145        let dir = setup_traversal_project();
25146        cmd_traverse(
25147            Some("#kgnv"),
25148            Some("main"),
25149            dir.path(),
25150            None,
25151            1,
25152            50,
25153            TraverseFormat::Json,
25154            false,
25155            false,
25156            false,
25157            None,
25158        )
25159        .unwrap();
25160        cmd_traverse(
25161            None,
25162            None,
25163            dir.path(),
25164            None,
25165            1,
25166            50,
25167            TraverseFormat::Html,
25168            false,
25169            false,
25170            false,
25171            None,
25172        )
25173        .unwrap();
25174    }
25175
25176    #[test]
25177    fn traversal_html_renders_inline_graph_visualization() {
25178        let dir = setup_traversal_project();
25179        seed_traversal_semantic_summaries(dir.path());
25180        let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
25181        let report = traversal_report(dir.path(), None, graph, None, None, 1, 50).unwrap();
25182        let html = traversal_report_html(&report).unwrap();
25183
25184        assert!(html.contains("id=\"graph-canvas\""));
25185        assert!(html.contains("semantic_concept"));
25186        assert!(html.contains("graph navigation"));
25187        assert!(html.contains("JSON.parse"));
25188    }
25189
25190    #[test]
25191    fn compact_helpers_trim_scores_and_snippets() {
25192        assert_eq!(format_score(0.12345, true), "0.12");
25193        assert_eq!(format_score(0.12345, false), "0.1235");
25194        let snippet = compact_snippet("    first line with useful context\nsecond");
25195        assert_eq!(snippet.as_deref(), Some("first line with useful context"));
25196    }
25197
25198    #[test]
25199    fn compact_members_caps_list() {
25200        let members: Vec<graph::CommunityMember> = ["a", "b", "c", "d", "e", "f"]
25201            .iter()
25202            .map(|n| graph::CommunityMember::new(*n))
25203            .collect();
25204        assert_eq!(compact_members(&members, 5), "a, b, c, d, e (+1 more)");
25205    }
25206
25207    #[test]
25208    fn abbreviate_kind_maps_common_kinds() {
25209        assert_eq!(abbreviate_kind("function"), "fn");
25210        assert_eq!(abbreviate_kind("method"), "meth");
25211        assert_eq!(abbreviate_kind("class"), "cls");
25212        assert_eq!(abbreviate_kind("interface"), "iface");
25213        assert_eq!(abbreviate_kind("type_alias"), "type");
25214        assert_eq!(abbreviate_kind("data_class"), "data_cls");
25215        assert_eq!(abbreviate_kind("sealed_class"), "sealed_cls");
25216        assert_eq!(abbreviate_kind("enum_class"), "enum_cls");
25217        assert_eq!(abbreviate_kind("companion_object"), "comp_obj");
25218        assert_eq!(abbreviate_kind("object"), "obj");
25219        assert_eq!(abbreviate_kind("heading"), "h");
25220        assert_eq!(abbreviate_kind("code_block"), "code");
25221        // short kinds pass through
25222        assert_eq!(abbreviate_kind("struct"), "struct");
25223        assert_eq!(abbreviate_kind("trait"), "trait");
25224        assert_eq!(abbreviate_kind("enum"), "enum");
25225        assert_eq!(abbreviate_kind("const"), "const");
25226        assert_eq!(abbreviate_kind("unknown_kind"), "unknown_kind");
25227    }
25228
25229    #[test]
25230    fn abbreviate_match_type_maps_search_types() {
25231        assert_eq!(abbreviate_match_type("exact_name"), "exact");
25232        assert_eq!(abbreviate_match_type("partial_tags"), "partial");
25233        assert_eq!(abbreviate_match_type("all_tags"), "all_tags");
25234        assert_eq!(abbreviate_match_type("other_type"), "other_type");
25235    }
25236
25237    #[test]
25238    fn explain_compact_groups_edges_by_file() {
25239        let edges = vec![
25240            index::StoredEdge {
25241                caller_file: "src/main.rs".to_string(),
25242                caller_name: "main".to_string(),
25243                caller_line: 1,
25244                callee_name: "helper".to_string(),
25245                call_site_line: 2,
25246                tagpath_handle: None,
25247            },
25248            index::StoredEdge {
25249                caller_file: "src/main.rs".to_string(),
25250                caller_name: "main".to_string(),
25251                caller_line: 1,
25252                callee_name: "render".to_string(),
25253                call_site_line: 3,
25254                tagpath_handle: None,
25255            },
25256        ];
25257        let lines = format_edge_groups(&edges, false);
25258        assert_eq!(lines, vec!["  src/main.rs (2): helper, render"]);
25259    }
25260
25261    #[test]
25262    fn search_hit_groups_preserve_file_counts_and_samples() {
25263        let dir = tempfile::tempdir().unwrap();
25264        let root = dir.path();
25265        let main_rs = root.join("src/main.rs");
25266        fs::create_dir_all(main_rs.parent().unwrap()).unwrap();
25267        fs::write(&main_rs, "claudescore-3 anchor\nclaudescore-3 follow-up\n").unwrap();
25268        let freshness = exact_search_file_timestamp(&main_rs);
25269        let hits = vec![
25270            sift::SearchHit {
25271                artifact_id: "a".to_string(),
25272                artifact_kind: sift::ContextArtifactKind::File,
25273                path: main_rs.display().to_string(),
25274                rank: 1,
25275                score: 10.0,
25276                confidence: sift::ScoreConfidence::High,
25277                location: Some("line 3".to_string()),
25278                snippet: "claudescore-3 anchor".to_string(),
25279                provenance: sift::ArtifactProvenance {
25280                    adapter: sift::AcquisitionAdapterKind::FileSystem,
25281                    source: "ripgrep -F".to_string(),
25282                    synthetic: false,
25283                },
25284                freshness: freshness.clone(),
25285                budget: sift::ArtifactBudget::from_text("claudescore-3 anchor", 1),
25286            },
25287            sift::SearchHit {
25288                artifact_id: "b".to_string(),
25289                artifact_kind: sift::ContextArtifactKind::File,
25290                path: main_rs.display().to_string(),
25291                rank: 2,
25292                score: 9.0,
25293                confidence: sift::ScoreConfidence::High,
25294                location: Some("line 7".to_string()),
25295                snippet: "claudescore-3 follow-up".to_string(),
25296                provenance: sift::ArtifactProvenance {
25297                    adapter: sift::AcquisitionAdapterKind::FileSystem,
25298                    source: "ripgrep -F".to_string(),
25299                    synthetic: false,
25300                },
25301                freshness: freshness.clone(),
25302                budget: sift::ArtifactBudget::from_text("claudescore-3 follow-up", 1),
25303            },
25304            sift::SearchHit {
25305                artifact_id: "c".to_string(),
25306                artifact_kind: sift::ContextArtifactKind::File,
25307                path: main_rs.display().to_string(),
25308                rank: 3,
25309                score: 8.0,
25310                confidence: sift::ScoreConfidence::High,
25311                location: Some("line 9".to_string()),
25312                snippet: "claudescore-3 tail".to_string(),
25313                provenance: sift::ArtifactProvenance {
25314                    adapter: sift::AcquisitionAdapterKind::FileSystem,
25315                    source: "ripgrep -F".to_string(),
25316                    synthetic: false,
25317                },
25318                freshness,
25319                budget: sift::ArtifactBudget::from_text("claudescore-3 tail", 1),
25320            },
25321        ];
25322
25323        let groups = group_search_hits(&hits, root, false);
25324        assert_eq!(groups.len(), 1);
25325        assert_eq!(groups[0].path, "src/main.rs");
25326        assert_eq!(groups[0].hits, 3);
25327        assert_eq!(
25328            groups[0].samples,
25329            vec![
25330                "line 3: claudescore-3 anchor".to_string(),
25331                "line 7: claudescore-3 follow-up".to_string()
25332            ]
25333        );
25334        assert!(should_collapse_search_hits(&hits, root, false));
25335    }
25336
25337    #[test]
25338    fn dense_edge_groups_trigger_collapse() {
25339        let edges = vec![
25340            index::StoredEdge {
25341                caller_file: "src/main.rs".to_string(),
25342                caller_name: "main".to_string(),
25343                caller_line: 1,
25344                callee_name: "helper".to_string(),
25345                call_site_line: 2,
25346                tagpath_handle: None,
25347            },
25348            index::StoredEdge {
25349                caller_file: "src/main.rs".to_string(),
25350                caller_name: "beta".to_string(),
25351                caller_line: 5,
25352                callee_name: "helper".to_string(),
25353                call_site_line: 6,
25354                tagpath_handle: None,
25355            },
25356            index::StoredEdge {
25357                caller_file: "src/main.rs".to_string(),
25358                caller_name: "gamma".to_string(),
25359                caller_line: 9,
25360                callee_name: "helper".to_string(),
25361                call_site_line: 10,
25362                tagpath_handle: None,
25363            },
25364        ];
25365        assert!(should_collapse_edge_groups(&edges));
25366    }
25367
25368    // --- workspace indexing ---
25369
25370    fn setup_workspace() -> tempfile::TempDir {
25371        let dir = tempfile::tempdir().unwrap();
25372        let root = dir.path();
25373        std::fs::write(
25374            root.join(".gitmodules"),
25375            r#"[submodule "src/alpha"]
25376	path = src/alpha
25377	url = https://example.com/alpha
25378[submodule "src/beta"]
25379	path = src/beta
25380	url = https://example.com/beta
25381"#,
25382        )
25383        .unwrap();
25384        let alpha = root.join("src/alpha");
25385        let beta = root.join("src/beta");
25386        std::fs::create_dir_all(&alpha).unwrap();
25387        std::fs::create_dir_all(&beta).unwrap();
25388        std::fs::write(
25389            alpha.join("lib.rs"),
25390            "fn alpha_helper() {}\nfn alpha_main() { alpha_helper(); }",
25391        )
25392        .unwrap();
25393        std::fs::write(beta.join("lib.rs"), "fn beta_func() {}").unwrap();
25394        dir
25395    }
25396
25397    fn setup_workspace_with_duplicate_leaf_names() -> tempfile::TempDir {
25398        let dir = tempfile::tempdir().unwrap();
25399        let root = dir.path();
25400        std::fs::write(
25401            root.join(".gitmodules"),
25402            r#"[submodule "pkg/app/foo"]
25403	path = pkg/app/foo
25404	url = https://example.com/pkg-app-foo
25405[submodule "vendor/foo"]
25406	path = vendor/foo
25407	url = https://example.com/vendor-foo
25408"#,
25409        )
25410        .unwrap();
25411        let pkg_foo = root.join("pkg/app/foo");
25412        let vendor_foo = root.join("vendor/foo");
25413        std::fs::create_dir_all(&pkg_foo).unwrap();
25414        std::fs::create_dir_all(&vendor_foo).unwrap();
25415        std::fs::write(
25416            pkg_foo.join("lib.rs"),
25417            "fn pkg_only() {}\nfn shared_name() { pkg_only(); }\n",
25418        )
25419        .unwrap();
25420        std::fs::write(
25421            vendor_foo.join("lib.rs"),
25422            "fn vendor_only() {}\nfn shared_name() { vendor_only(); }\n",
25423        )
25424        .unwrap();
25425        dir
25426    }
25427
25428    #[test]
25429    fn workspace_index_creates_per_submodule_dbs() {
25430        let dir = setup_workspace();
25431        cmd_index(
25432            dir.path(),
25433            false,
25434            false,
25435            false,
25436            false,
25437            false,
25438            true,
25439            None,
25440            false,
25441            false,
25442            false,
25443            false,
25444            false,
25445            false,
25446        )
25447        .unwrap();
25448        assert!(dir.path().join(".tsift/indexes/alpha/index.db").exists());
25449        assert!(dir.path().join(".tsift/indexes/beta/index.db").exists());
25450    }
25451
25452    #[test]
25453    fn workspace_index_single_submodule() {
25454        let dir = setup_workspace();
25455        cmd_index(
25456            dir.path(),
25457            false,
25458            false,
25459            false,
25460            false,
25461            false,
25462            false,
25463            Some("alpha"),
25464            false,
25465            false,
25466            false,
25467            false,
25468            false,
25469            false,
25470        )
25471        .unwrap();
25472        assert!(dir.path().join(".tsift/indexes/alpha/index.db").exists());
25473        assert!(!dir.path().join(".tsift/indexes/beta/index.db").exists());
25474    }
25475
25476    #[test]
25477    fn workspace_index_single_submodule_errors_on_unknown_scope() {
25478        let dir = setup_workspace();
25479
25480        let err = cmd_index(
25481            dir.path(),
25482            false,
25483            false,
25484            false,
25485            false,
25486            false,
25487            false,
25488            Some("missing"),
25489            false,
25490            false,
25491            false,
25492            false,
25493            false,
25494            false,
25495        )
25496        .unwrap_err();
25497
25498        let msg = err.to_string();
25499        assert!(msg.contains("unknown scope `missing`"));
25500        assert!(msg.contains("Available scopes: alpha, beta"));
25501        assert!(!dir.path().join(".tsift/indexes/missing/index.db").exists());
25502    }
25503
25504    #[test]
25505    fn workspace_index_uses_unique_scope_ids_when_leaf_names_collide() {
25506        let dir = setup_workspace_with_duplicate_leaf_names();
25507        cmd_index(
25508            dir.path(),
25509            false,
25510            false,
25511            false,
25512            false,
25513            false,
25514            true,
25515            None,
25516            false,
25517            false,
25518            false,
25519            false,
25520            false,
25521            false,
25522        )
25523        .unwrap();
25524
25525        assert!(
25526            dir.path()
25527                .join(".tsift/indexes/pkg/app/foo/index.db")
25528                .exists()
25529        );
25530        assert!(
25531            dir.path()
25532                .join(".tsift/indexes/vendor/foo/index.db")
25533                .exists()
25534        );
25535    }
25536
25537    #[test]
25538    fn federated_search_across_submodules() {
25539        let dir = setup_workspace();
25540        cmd_index(
25541            dir.path(),
25542            false,
25543            false,
25544            false,
25545            false,
25546            false,
25547            true,
25548            None,
25549            false,
25550            false,
25551            false,
25552            false,
25553            false,
25554            false,
25555        )
25556        .unwrap();
25557        let (hits, _diag) = federated_symbol_search(
25558            dir.path(),
25559            "alpha_helper",
25560            10,
25561            &TagpathSearchOpts {
25562                no_tagpath: true,
25563                strict: false,
25564            },
25565        )
25566        .unwrap();
25567        assert!(
25568            !hits.is_empty(),
25569            "should find alpha_helper via federated search"
25570        );
25571    }
25572
25573    #[test]
25574    fn federated_search_respects_isolation() {
25575        let dir = setup_workspace();
25576        let tsift_dir = dir.path().join(".tsift");
25577        std::fs::create_dir_all(&tsift_dir).unwrap();
25578        std::fs::write(
25579            tsift_dir.join("config.toml"),
25580            r#"
25581[overrides.alpha]
25582tier = "isolated"
25583"#,
25584        )
25585        .unwrap();
25586        cmd_index(
25587            dir.path(),
25588            false,
25589            false,
25590            false,
25591            false,
25592            false,
25593            true,
25594            None,
25595            false,
25596            false,
25597            false,
25598            false,
25599            false,
25600            false,
25601        )
25602        .unwrap();
25603        let (hits, _diag) = federated_symbol_search(
25604            dir.path(),
25605            "alpha_helper",
25606            10,
25607            &TagpathSearchOpts {
25608                no_tagpath: true,
25609                strict: false,
25610            },
25611        )
25612        .unwrap();
25613        assert!(
25614            hits.is_empty(),
25615            "isolated submodule should not appear in federated search"
25616        );
25617    }
25618
25619    #[test]
25620    fn federated_lexical_search_respects_isolation() {
25621        let dir = setup_workspace();
25622        let tsift_dir = dir.path().join(".tsift");
25623        std::fs::create_dir_all(&tsift_dir).unwrap();
25624        std::fs::write(
25625            tsift_dir.join("config.toml"),
25626            r#"
25627[overrides.alpha]
25628tier = "isolated"
25629"#,
25630        )
25631        .unwrap();
25632        cmd_index(
25633            dir.path(),
25634            false,
25635            false,
25636            false,
25637            false,
25638            false,
25639            true,
25640            None,
25641            false,
25642            false,
25643            false,
25644            false,
25645            false,
25646            false,
25647        )
25648        .unwrap();
25649
25650        let response = federated_sift_search(
25651            dir.path(),
25652            &dir.path().join(".tsift/search-cache"),
25653            "fn",
25654            10,
25655            0,
25656            "lexical",
25657        )
25658        .unwrap();
25659
25660        assert!(
25661            !response.hits.is_empty(),
25662            "shared scopes should still contribute lexical hits"
25663        );
25664        assert!(
25665            response
25666                .hits
25667                .iter()
25668                .all(|hit| hit.path.ends_with("src/beta/lib.rs")),
25669            "isolated scope should not leak lexical hits: {:?}",
25670            response.hits
25671        );
25672    }
25673
25674    #[test]
25675    fn federated_lexical_search_respects_private_tier() {
25676        let dir = setup_workspace();
25677        let tsift_dir = dir.path().join(".tsift");
25678        std::fs::create_dir_all(&tsift_dir).unwrap();
25679        std::fs::write(
25680            tsift_dir.join("config.toml"),
25681            r#"
25682[overrides.alpha]
25683tier = "private"
25684"#,
25685        )
25686        .unwrap();
25687        cmd_index(
25688            dir.path(),
25689            false,
25690            false,
25691            false,
25692            false,
25693            false,
25694            true,
25695            None,
25696            false,
25697            false,
25698            false,
25699            false,
25700            false,
25701            false,
25702        )
25703        .unwrap();
25704
25705        let response = federated_sift_search(
25706            dir.path(),
25707            &dir.path().join(".tsift/search-cache"),
25708            "fn",
25709            10,
25710            0,
25711            "lexical",
25712        )
25713        .unwrap();
25714
25715        assert!(
25716            !response.hits.is_empty(),
25717            "shared scopes should still contribute lexical hits"
25718        );
25719        assert!(
25720            response
25721                .hits
25722                .iter()
25723                .all(|hit| hit.path.ends_with("src/beta/lib.rs")),
25724            "private scope should not leak lexical hits: {:?}",
25725            response.hits
25726        );
25727    }
25728
25729    #[test]
25730    fn scoped_search_finds_submodule_symbols() {
25731        let dir = setup_workspace();
25732        cmd_index(
25733            dir.path(),
25734            false,
25735            false,
25736            false,
25737            false,
25738            false,
25739            true,
25740            None,
25741            false,
25742            false,
25743            false,
25744            false,
25745            false,
25746            false,
25747        )
25748        .unwrap();
25749        let cfg = config::Config::load(dir.path()).unwrap();
25750        let db_path = cfg.db_path_for(dir.path(), "alpha");
25751        let db = index::IndexDb::open(&db_path).unwrap();
25752        let hits = db.symbol_search("alpha_main", 10).unwrap();
25753        assert!(!hits.is_empty());
25754        assert_eq!(hits[0].name, "alpha_main");
25755    }
25756
25757    #[test]
25758    fn scoped_search_cmd_errors_on_unknown_scope() {
25759        let dir = setup_workspace();
25760
25761        let err = cmd_search(
25762            "alpha_main".to_string(),
25763            Some(dir.path().to_path_buf()),
25764            5,
25765            Some("lexical".to_string()),
25766            Some("missing".to_string()),
25767            false,
25768            false,
25769            false,
25770            0,
25771            false,
25772            false,
25773            false,
25774            false,
25775            false,
25776            false,
25777            false,
25778        )
25779        .unwrap_err();
25780
25781        let msg = err.to_string();
25782        assert!(msg.contains("unknown scope `missing`"));
25783        assert!(msg.contains("Available scopes: alpha, beta"));
25784    }
25785
25786    #[test]
25787    fn scoped_search_cmd_errors_on_ambiguous_legacy_scope_name() {
25788        let dir = setup_workspace_with_duplicate_leaf_names();
25789        cmd_index(
25790            dir.path(),
25791            false,
25792            false,
25793            false,
25794            false,
25795            false,
25796            true,
25797            None,
25798            false,
25799            false,
25800            false,
25801            false,
25802            false,
25803            false,
25804        )
25805        .unwrap();
25806
25807        let err = cmd_search(
25808            "vendor_only".to_string(),
25809            Some(dir.path().to_path_buf()),
25810            5,
25811            Some("lexical".to_string()),
25812            Some("foo".to_string()),
25813            false,
25814            false,
25815            false,
25816            0,
25817            false,
25818            false,
25819            false,
25820            false,
25821            false,
25822            false,
25823            false,
25824        )
25825        .unwrap_err();
25826
25827        let msg = err.to_string();
25828        assert!(msg.contains("ambiguous scope `foo`"));
25829        assert!(msg.contains("pkg/app/foo"));
25830        assert!(msg.contains("vendor/foo"));
25831    }
25832
25833    #[test]
25834    fn scoped_graph_query() {
25835        let dir = setup_workspace();
25836        cmd_index(
25837            dir.path(),
25838            false,
25839            false,
25840            false,
25841            false,
25842            false,
25843            true,
25844            None,
25845            false,
25846            false,
25847            false,
25848            false,
25849            false,
25850            false,
25851        )
25852        .unwrap();
25853        let cfg = config::Config::load(dir.path()).unwrap();
25854        let db_path = cfg.db_path_for(dir.path(), "alpha");
25855        let db = index::IndexDb::open(&db_path).unwrap();
25856        let callees = db.callees_of("alpha_main").unwrap();
25857        let names: Vec<&str> = callees.iter().map(|e| e.callee_name.as_str()).collect();
25858        assert!(names.contains(&"alpha_helper"));
25859    }
25860
25861    fn assert_workspace_query_requires_scope(err: anyhow::Error) {
25862        let msg = err.to_string();
25863        assert!(msg.contains("require `--scope <scope>`"), "{msg}");
25864        assert!(msg.contains("Available scopes: alpha, beta"), "{msg}");
25865        assert!(msg.contains("Indexed scopes: alpha, beta"), "{msg}");
25866        assert!(
25867            !msg.contains("no index found at"),
25868            "workspace query should fail with scope guidance, got: {msg}"
25869        );
25870    }
25871
25872    fn assert_workspace_search_requires_explicit_target(err: anyhow::Error) {
25873        let msg = err.to_string();
25874        assert!(
25875            msg.contains("requires `--scope <scope>` or `--federated`"),
25876            "{msg}"
25877        );
25878        assert!(msg.contains("Available scopes: alpha, beta"), "{msg}");
25879        assert!(msg.contains("Indexed scopes: alpha, beta"), "{msg}");
25880        assert!(
25881            !msg.contains("autoindexing index"),
25882            "workspace search should fail before creating a shared root index: {msg}"
25883        );
25884    }
25885
25886    #[test]
25887    fn graph_cmd_requires_scope_for_workspace_root_without_shared_index() {
25888        let dir = setup_workspace();
25889        cmd_index(
25890            dir.path(),
25891            false,
25892            false,
25893            false,
25894            false,
25895            false,
25896            true,
25897            None,
25898            false,
25899            false,
25900            false,
25901            false,
25902            false,
25903            false,
25904        )
25905        .unwrap();
25906
25907        let err = cmd_graph(
25908            "alpha_main",
25909            dir.path(),
25910            false,
25911            false,
25912            None,
25913            20,
25914            false,
25915            false,
25916            false,
25917            false,
25918            false,
25919            false,
25920            false,
25921            TagpathSearchOpts::default(),
25922        )
25923        .unwrap_err();
25924
25925        assert_workspace_query_requires_scope(err);
25926    }
25927
25928    #[test]
25929    fn graph_cmd_infers_scope_from_nested_workspace_path() {
25930        let dir = setup_workspace();
25931        cmd_index(
25932            dir.path(),
25933            false,
25934            false,
25935            false,
25936            false,
25937            false,
25938            true,
25939            None,
25940            false,
25941            false,
25942            false,
25943            false,
25944            false,
25945            false,
25946        )
25947        .unwrap();
25948        let nested = dir.path().join("src/alpha/nested");
25949        std::fs::create_dir_all(&nested).unwrap();
25950
25951        let result = cmd_graph(
25952            "alpha_main",
25953            &nested,
25954            false,
25955            false,
25956            None,
25957            20,
25958            false,
25959            false,
25960            false,
25961            false,
25962            false,
25963            false,
25964            false,
25965            TagpathSearchOpts::default(),
25966        );
25967
25968        assert!(result.is_ok());
25969    }
25970
25971    #[test]
25972    fn communities_cmd_requires_scope_for_workspace_root_without_shared_index() {
25973        let dir = setup_workspace();
25974        cmd_index(
25975            dir.path(),
25976            false,
25977            false,
25978            false,
25979            false,
25980            false,
25981            true,
25982            None,
25983            false,
25984            false,
25985            false,
25986            false,
25987            false,
25988            false,
25989        )
25990        .unwrap();
25991
25992        let err = cmd_communities(
25993            dir.path(),
25994            None,
25995            1,
25996            10,
25997            false,
25998            false,
25999            false,
26000            false,
26001            false,
26002            false,
26003            TagpathSearchOpts::default(),
26004        )
26005        .unwrap_err();
26006
26007        assert_workspace_query_requires_scope(err);
26008    }
26009
26010    #[test]
26011    fn communities_cmd_infers_scope_from_nested_workspace_path() {
26012        let dir = setup_workspace();
26013        cmd_index(
26014            dir.path(),
26015            false,
26016            false,
26017            false,
26018            false,
26019            false,
26020            true,
26021            None,
26022            false,
26023            false,
26024            false,
26025            false,
26026            false,
26027            false,
26028        )
26029        .unwrap();
26030        let nested = dir.path().join("src/alpha/nested");
26031        std::fs::create_dir_all(&nested).unwrap();
26032
26033        let result = cmd_communities(
26034            &nested,
26035            None,
26036            1,
26037            10,
26038            false,
26039            false,
26040            false,
26041            false,
26042            false,
26043            false,
26044            TagpathSearchOpts::default(),
26045        );
26046
26047        assert!(result.is_ok());
26048    }
26049
26050    #[test]
26051    fn path_cmd_requires_scope_for_workspace_root_without_shared_index() {
26052        let dir = setup_workspace();
26053        cmd_index(
26054            dir.path(),
26055            false,
26056            false,
26057            false,
26058            false,
26059            false,
26060            true,
26061            None,
26062            false,
26063            false,
26064            false,
26065            false,
26066            false,
26067            false,
26068        )
26069        .unwrap();
26070
26071        let err = cmd_path(
26072            "alpha_main",
26073            "alpha_helper",
26074            dir.path(),
26075            None,
26076            false,
26077            false,
26078            false,
26079            false,
26080            false,
26081            TagpathSearchOpts::default(),
26082        )
26083        .unwrap_err();
26084
26085        assert_workspace_query_requires_scope(err);
26086    }
26087
26088    #[test]
26089    fn path_cmd_infers_scope_from_nested_workspace_path() {
26090        let dir = setup_workspace();
26091        cmd_index(
26092            dir.path(),
26093            false,
26094            false,
26095            false,
26096            false,
26097            false,
26098            true,
26099            None,
26100            false,
26101            false,
26102            false,
26103            false,
26104            false,
26105            false,
26106        )
26107        .unwrap();
26108        let nested = dir.path().join("src/alpha/nested");
26109        std::fs::create_dir_all(&nested).unwrap();
26110
26111        let result = cmd_path(
26112            "alpha_main",
26113            "alpha_helper",
26114            &nested,
26115            None,
26116            false,
26117            false,
26118            false,
26119            false,
26120            false,
26121            TagpathSearchOpts::default(),
26122        );
26123
26124        assert!(result.is_ok());
26125    }
26126
26127    #[test]
26128    fn path_cmd_uses_snapshot_fallback_when_rollback_journal_is_locked() {
26129        let dir = setup_graph_index();
26130        let db_path = dir.path().join(".tsift/index.db");
26131        let _lock = hold_rollback_journal_lock(&db_path);
26132
26133        let result = cmd_path(
26134            "main",
26135            "helper",
26136            dir.path(),
26137            None,
26138            false,
26139            false,
26140            false,
26141            false,
26142            false,
26143            TagpathSearchOpts::default(),
26144        );
26145
26146        assert!(result.is_ok());
26147    }
26148
26149    #[test]
26150    fn explain_cmd_requires_scope_for_workspace_root_without_shared_index() {
26151        let dir = setup_workspace();
26152        cmd_index(
26153            dir.path(),
26154            false,
26155            false,
26156            false,
26157            false,
26158            false,
26159            true,
26160            None,
26161            false,
26162            false,
26163            false,
26164            false,
26165            false,
26166            false,
26167        )
26168        .unwrap();
26169
26170        let err = cmd_explain(
26171            "alpha_main",
26172            dir.path(),
26173            None,
26174            15,
26175            false,
26176            false,
26177            false,
26178            false,
26179            false,
26180            false,
26181            false,
26182            false,
26183        )
26184        .unwrap_err();
26185
26186        assert_workspace_query_requires_scope(err);
26187    }
26188
26189    #[test]
26190    fn explain_cmd_infers_scope_from_nested_workspace_path() {
26191        let dir = setup_workspace();
26192        cmd_index(
26193            dir.path(),
26194            false,
26195            false,
26196            false,
26197            false,
26198            false,
26199            true,
26200            None,
26201            false,
26202            false,
26203            false,
26204            false,
26205            false,
26206            false,
26207        )
26208        .unwrap();
26209        let nested = dir.path().join("src/alpha/nested");
26210        std::fs::create_dir_all(&nested).unwrap();
26211
26212        let result = cmd_explain(
26213            "alpha_main",
26214            &nested,
26215            None,
26216            15,
26217            false,
26218            false,
26219            false,
26220            false,
26221            false,
26222            false,
26223            false,
26224            false,
26225        );
26226
26227        assert!(result.is_ok());
26228    }
26229
26230    #[test]
26231    fn explain_cmd_uses_snapshot_fallback_when_rollback_journal_is_locked() {
26232        let dir = setup_graph_index();
26233        let db_path = dir.path().join(".tsift/index.db");
26234        let _lock = hold_rollback_journal_lock(&db_path);
26235
26236        let result = cmd_explain(
26237            "main",
26238            dir.path(),
26239            None,
26240            15,
26241            false,
26242            false,
26243            false,
26244            false,
26245            false,
26246            false,
26247            false,
26248            false,
26249        );
26250
26251        assert!(result.is_ok());
26252    }
26253
26254    // --- community detection ---
26255
26256    #[test]
26257    fn community_detection_groups_related() {
26258        let dir = setup_graph_index();
26259        let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
26260        let edges = db.all_edges().unwrap();
26261        let result = graph::detect_communities(&edges);
26262        assert!(result.node_count > 0);
26263        assert!(!result.communities.is_empty());
26264    }
26265
26266    #[test]
26267    fn community_cmd_autoindexes_missing_index_by_default() {
26268        let dir = tempfile::tempdir().unwrap();
26269        let result = cmd_communities(
26270            dir.path(),
26271            None,
26272            2,
26273            10,
26274            false,
26275            false,
26276            false,
26277            false,
26278            false,
26279            false,
26280            TagpathSearchOpts::default(),
26281        );
26282
26283        assert!(result.is_ok());
26284        assert!(dir.path().join(".tsift/index.db").exists());
26285    }
26286
26287    // --- path ---
26288
26289    #[test]
26290    fn path_finds_connected_symbols() {
26291        let dir = setup_graph_index();
26292        let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
26293        let edges = db.all_edges().unwrap();
26294        let result = graph::shortest_path(&edges, "main", "helper");
26295        assert!(result.is_some());
26296        let path = result.unwrap();
26297        assert_eq!(path.hops, 1);
26298    }
26299
26300    #[test]
26301    fn path_returns_none_for_unknown() {
26302        let dir = setup_graph_index();
26303        let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
26304        let edges = db.all_edges().unwrap();
26305        assert!(graph::shortest_path(&edges, "main", "nonexistent").is_none());
26306    }
26307
26308    #[test]
26309    fn path_cmd_autoindexes_missing_index_by_default() {
26310        let dir = tempfile::tempdir().unwrap();
26311        let result = cmd_path(
26312            "a",
26313            "b",
26314            dir.path(),
26315            None,
26316            false,
26317            false,
26318            false,
26319            false,
26320            false,
26321            TagpathSearchOpts::default(),
26322        );
26323
26324        assert!(result.is_ok());
26325        assert!(dir.path().join(".tsift/index.db").exists());
26326    }
26327
26328    // --- explain ---
26329
26330    #[test]
26331    fn explain_shows_symbol_info() {
26332        let dir = setup_graph_index();
26333        let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
26334        let symbols = db.symbol_info("main").unwrap();
26335        assert!(!symbols.is_empty());
26336        assert_eq!(symbols[0].name, "main");
26337        assert_eq!(symbols[0].kind, "function");
26338    }
26339
26340    #[test]
26341    fn explain_cmd_autoindexes_missing_index_by_default() {
26342        let dir = tempfile::tempdir().unwrap();
26343        let result = cmd_explain(
26344            "main",
26345            dir.path(),
26346            None,
26347            15,
26348            false,
26349            false,
26350            false,
26351            false,
26352            false,
26353            false,
26354            false,
26355            false,
26356        );
26357
26358        assert!(result.is_ok());
26359        assert!(dir.path().join(".tsift/index.db").exists());
26360    }
26361
26362    fn hold_write_lock(db_path: &std::path::Path) -> Connection {
26363        let conn = Connection::open(db_path).unwrap();
26364        conn.execute_batch("BEGIN IMMEDIATE").unwrap();
26365        conn
26366    }
26367
26368    fn hold_writer_lock(lock_path: &std::path::Path) -> std::fs::File {
26369        use fs4::fs_std::FileExt;
26370        use std::io::Write;
26371
26372        let mut file = std::fs::OpenOptions::new()
26373            .read(true)
26374            .write(true)
26375            .create(true)
26376            .truncate(false)
26377            .open(lock_path)
26378            .unwrap();
26379        assert!(file.try_lock_exclusive().unwrap());
26380        writeln!(file, "{}", std::process::id()).unwrap();
26381        file
26382    }
26383
26384    fn hold_rollback_journal_lock(db_path: &std::path::Path) -> Connection {
26385        let conn = Connection::open(db_path).unwrap();
26386        conn.execute_batch("PRAGMA journal_mode=DELETE; BEGIN EXCLUSIVE;")
26387            .unwrap();
26388        std::fs::write(substrate::rollback_journal_path(db_path), "locked").unwrap();
26389        conn
26390    }
26391
26392    fn hold_wal_database_lock(db_path: &std::path::Path) -> Connection {
26393        let conn = Connection::open(db_path).unwrap();
26394        conn.execute_batch(
26395            "PRAGMA journal_mode=WAL;
26396             PRAGMA wal_autocheckpoint=0;
26397             CREATE TABLE IF NOT EXISTS wal_lock_probe (id INTEGER PRIMARY KEY);
26398             INSERT INTO wal_lock_probe DEFAULT VALUES;
26399             PRAGMA locking_mode=EXCLUSIVE;
26400             BEGIN EXCLUSIVE;",
26401        )
26402        .unwrap();
26403        assert!(substrate::wal_sidecar_path(db_path).exists());
26404        conn
26405    }
26406
26407    #[test]
26408    fn index_cmd_reports_wal_sidecar_diagnostics_without_tsift_writer_lock() {
26409        let dir = setup_graph_index();
26410        let db_path = dir.path().join(".tsift/index.db");
26411        let _lock = hold_wal_database_lock(&db_path);
26412
26413        let err = cmd_index(
26414            dir.path(),
26415            false,
26416            false,
26417            false,
26418            false,
26419            false,
26420            false,
26421            None,
26422            false,
26423            false,
26424            false,
26425            false,
26426            false,
26427            false,
26428        )
26429        .unwrap_err();
26430
26431        let msg = err.to_string();
26432        assert!(msg.contains("indexing"));
26433        assert!(msg.contains("lock diagnostics:"));
26434        assert!(msg.contains("lock: absent"));
26435        assert!(msg.contains("wal: present") || msg.contains("shm: present"));
26436        assert!(msg.contains("wedged writer holding live WAL sidecars"));
26437        assert!(msg.contains("snapshot fallback"));
26438    }
26439
26440    #[test]
26441    fn search_cmd_succeeds_while_writer_lock_is_held() {
26442        let dir = setup_graph_index();
26443        let db_path = dir.path().join(".tsift/index.db");
26444        let _lock = hold_write_lock(&db_path);
26445
26446        let result = cmd_search(
26447            "main".to_string(),
26448            Some(dir.path().to_path_buf()),
26449            5,
26450            Some("lexical".to_string()),
26451            None,
26452            false,
26453            false,
26454            false,
26455            0,
26456            true,
26457            false,
26458            false,
26459            false,
26460            false,
26461            false,
26462            false,
26463        );
26464
26465        assert!(result.is_ok());
26466    }
26467
26468    #[test]
26469    fn search_cmd_uses_snapshot_fallback_when_rollback_journal_lock_appears_after_precheck() {
26470        let dir = setup_graph_index();
26471        let _hook = install_search_post_precheck_lock(dir.path().join(".tsift/index.db"));
26472
26473        let result = cmd_search(
26474            "main".to_string(),
26475            Some(dir.path().to_path_buf()),
26476            5,
26477            Some("lexical".to_string()),
26478            None,
26479            false,
26480            false,
26481            false,
26482            0,
26483            true,
26484            false,
26485            false,
26486            false,
26487            false,
26488            false,
26489            false,
26490        );
26491
26492        assert!(result.is_ok());
26493    }
26494
26495    #[test]
26496    fn search_cmd_uses_wal_snapshot_fallback_when_lock_appears_after_precheck() {
26497        let dir = setup_graph_index();
26498        let _hook = install_search_post_precheck_wal_lock(dir.path().join(".tsift/index.db"));
26499
26500        let result = cmd_search(
26501            "main".to_string(),
26502            Some(dir.path().to_path_buf()),
26503            5,
26504            Some("lexical".to_string()),
26505            None,
26506            false,
26507            false,
26508            false,
26509            0,
26510            true,
26511            false,
26512            false,
26513            false,
26514            false,
26515            false,
26516            false,
26517        );
26518
26519        assert!(result.is_ok());
26520    }
26521
26522    #[test]
26523    fn search_cmd_fails_fast_when_autoindex_disabled_and_index_is_stale() {
26524        let dir = setup_graph_index();
26525        std::thread::sleep(std::time::Duration::from_millis(50));
26526        std::fs::write(
26527            dir.path().join("main.rs"),
26528            "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
26529        )
26530        .unwrap();
26531
26532        let err = cmd_search(
26533            "helper".to_string(),
26534            Some(dir.path().to_path_buf()),
26535            5,
26536            Some("lexical".to_string()),
26537            None,
26538            false,
26539            false,
26540            false,
26541            0,
26542            false,
26543            false,
26544            false,
26545            false,
26546            false,
26547            false,
26548            false,
26549        )
26550        .unwrap_err();
26551
26552        assert!(err.to_string().contains("search aborted"));
26553        assert!(err.to_string().contains("index is stale"));
26554        assert!(err.to_string().contains("--no-autoindex"));
26555    }
26556
26557    #[test]
26558    fn search_cmd_reports_stale_when_root_index_is_locked_by_rollback_journal() {
26559        let dir = setup_graph_index();
26560        std::thread::sleep(std::time::Duration::from_millis(50));
26561        std::fs::write(
26562            dir.path().join("main.rs"),
26563            "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
26564        )
26565        .unwrap();
26566        let _lock = hold_rollback_journal_lock(&dir.path().join(".tsift/index.db"));
26567
26568        let err = cmd_search(
26569            "helper".to_string(),
26570            Some(dir.path().to_path_buf()),
26571            5,
26572            Some("lexical".to_string()),
26573            None,
26574            false,
26575            false,
26576            false,
26577            0,
26578            false,
26579            false,
26580            false,
26581            false,
26582            false,
26583            false,
26584            false,
26585        )
26586        .unwrap_err();
26587
26588        assert!(err.to_string().contains("search aborted"));
26589        assert!(err.to_string().contains("index is stale"));
26590        assert!(!err.to_string().contains("database is locked"));
26591    }
26592
26593    #[test]
26594    fn search_cmd_autoindexes_stale_index_by_default() {
26595        let dir = setup_graph_index();
26596        std::thread::sleep(std::time::Duration::from_millis(50));
26597        std::fs::write(
26598            dir.path().join("main.rs"),
26599            "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
26600        )
26601        .unwrap();
26602
26603        let result = cmd_search(
26604            "helper".to_string(),
26605            Some(dir.path().to_path_buf()),
26606            5,
26607            Some("lexical".to_string()),
26608            None,
26609            false,
26610            false,
26611            true,
26612            0,
26613            false,
26614            false,
26615            false,
26616            false,
26617            false,
26618            false,
26619            false,
26620        );
26621
26622        assert!(result.is_ok());
26623
26624        let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
26625        let summary = db.compute_changes(dir.path()).unwrap();
26626        assert_eq!(summary.new + summary.modified + summary.deleted, 0);
26627    }
26628
26629    #[test]
26630    fn search_cmd_keeps_read_only_results_when_active_writer_blocks_autoindex() {
26631        let dir = setup_graph_index();
26632        std::thread::sleep(std::time::Duration::from_millis(50));
26633        std::fs::write(
26634            dir.path().join("main.rs"),
26635            "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
26636        )
26637        .unwrap();
26638        let _lock = hold_writer_lock(&dir.path().join(".tsift/index.lock"));
26639
26640        let result = cmd_search(
26641            "helper".to_string(),
26642            Some(dir.path().to_path_buf()),
26643            5,
26644            Some("lexical".to_string()),
26645            None,
26646            false,
26647            false,
26648            true,
26649            0,
26650            false,
26651            false,
26652            false,
26653            false,
26654            false,
26655            false,
26656            false,
26657        );
26658
26659        assert!(result.is_ok());
26660
26661        let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
26662        let summary = db.compute_changes(dir.path()).unwrap();
26663        assert_eq!(summary.modified, 1);
26664    }
26665
26666    #[test]
26667    fn search_cmd_autoindex_reports_lock_diagnostics_when_rollback_journal_blocks_writer() {
26668        let dir = setup_graph_index();
26669        std::thread::sleep(std::time::Duration::from_millis(50));
26670        std::fs::write(
26671            dir.path().join("main.rs"),
26672            "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
26673        )
26674        .unwrap();
26675        let _lock = hold_rollback_journal_lock(&dir.path().join(".tsift/index.db"));
26676
26677        let err = cmd_search(
26678            "helper".to_string(),
26679            Some(dir.path().to_path_buf()),
26680            5,
26681            Some("lexical".to_string()),
26682            None,
26683            false,
26684            false,
26685            true,
26686            0,
26687            false,
26688            false,
26689            false,
26690            false,
26691            false,
26692            false,
26693            false,
26694        )
26695        .unwrap_err();
26696
26697        let msg = err.to_string();
26698        assert!(msg.contains("autoindexing index"));
26699        assert!(msg.contains("lock diagnostics:"));
26700        assert!(msg.contains("journal: present"));
26701        assert!(msg.contains("next: inspect the host for a wedged rollback-journal writer"));
26702    }
26703
26704    #[test]
26705    fn search_cmd_uses_ancestor_project_root_for_nested_paths() {
26706        let dir = setup_graph_index();
26707        let nested = dir.path().join("src/nested");
26708        std::fs::create_dir_all(&nested).unwrap();
26709
26710        let result = cmd_search(
26711            "helper".to_string(),
26712            Some(nested.clone()),
26713            5,
26714            Some("lexical".to_string()),
26715            None,
26716            false,
26717            false,
26718            true,
26719            0,
26720            false,
26721            false,
26722            false,
26723            false,
26724            false,
26725            false,
26726            false,
26727        );
26728
26729        assert!(result.is_ok());
26730        assert!(!nested.join(".tsift/index.db").exists());
26731    }
26732
26733    #[test]
26734    fn exact_search_returns_literal_matches() {
26735        let dir = tempfile::tempdir().unwrap();
26736        std::fs::write(dir.path().join("notes.txt"), "alpha\nclaudescore-3\nbeta\n").unwrap();
26737
26738        let response = run_exact_search_with_timeout(dir.path(), "claudescore-3", 5, 0).unwrap();
26739
26740        assert_eq!(response.strategy, "exact");
26741        assert_eq!(response.hits.len(), 1);
26742        assert!(response.hits[0].path.ends_with("notes.txt"));
26743        assert_eq!(response.hits[0].location.as_deref(), Some("line 2"));
26744        assert!(response.hits[0].snippet.contains("claudescore-3"));
26745    }
26746
26747    #[test]
26748    fn exact_search_skips_stale_index_precheck() {
26749        let dir = setup_graph_index();
26750        std::thread::sleep(std::time::Duration::from_millis(50));
26751        std::fs::write(
26752            dir.path().join("main.rs"),
26753            "fn helper() { println!(\"updated\"); }\nfn main() { helper(); }\n",
26754        )
26755        .unwrap();
26756
26757        let result = cmd_search(
26758            "println!(\"updated\")".to_string(),
26759            Some(dir.path().to_path_buf()),
26760            5,
26761            Some("exact".to_string()),
26762            None,
26763            false,
26764            false,
26765            false,
26766            0,
26767            false,
26768            false,
26769            false,
26770            false,
26771            false,
26772            false,
26773            false,
26774        );
26775
26776        assert!(result.is_ok());
26777    }
26778
26779    #[test]
26780    fn workspace_exact_search_does_not_require_shared_root_index() {
26781        let dir = setup_workspace();
26782        cmd_index(
26783            dir.path(),
26784            false,
26785            false,
26786            false,
26787            false,
26788            false,
26789            true,
26790            None,
26791            false,
26792            false,
26793            false,
26794            false,
26795            false,
26796            false,
26797        )
26798        .unwrap();
26799
26800        let result = cmd_search(
26801            "alpha_helper".to_string(),
26802            Some(dir.path().to_path_buf()),
26803            5,
26804            Some("exact".to_string()),
26805            None,
26806            false,
26807            false,
26808            false,
26809            0,
26810            false,
26811            false,
26812            false,
26813            false,
26814            false,
26815            false,
26816            false,
26817        );
26818
26819        assert!(result.is_ok());
26820        assert!(!dir.path().join(".tsift/index.db").exists());
26821    }
26822
26823    #[test]
26824    fn identifier_like_query_prefers_exact_search() {
26825        assert!(query_prefers_exact_search("claudescore-3"));
26826        assert!(query_prefers_exact_search("alpha_helper"));
26827        assert!(query_prefers_exact_search("src/main.rs"));
26828        assert!(query_prefers_exact_search("crate::module"));
26829        assert!(!query_prefers_exact_search("authenticate"));
26830        assert!(!query_prefers_exact_search("fn main"));
26831        assert!(!query_prefers_exact_search("."));
26832    }
26833
26834    #[test]
26835    fn resolve_search_strategy_auto_promotes_identifier_like_queries() {
26836        assert_eq!(resolve_search_strategy("claudescore-3", None), "exact");
26837        assert_eq!(resolve_search_strategy("authenticate", None), "lexical");
26838        assert_eq!(
26839            resolve_search_strategy("claudescore-3", Some("hybrid".to_string())),
26840            "hybrid"
26841        );
26842    }
26843
26844    #[test]
26845    fn workspace_identifier_like_search_auto_uses_exact_backend() {
26846        let dir = setup_workspace();
26847        cmd_index(
26848            dir.path(),
26849            false,
26850            false,
26851            false,
26852            false,
26853            false,
26854            true,
26855            None,
26856            false,
26857            false,
26858            false,
26859            false,
26860            false,
26861            false,
26862        )
26863        .unwrap();
26864
26865        let result = cmd_search(
26866            "alpha_helper".to_string(),
26867            Some(dir.path().to_path_buf()),
26868            5,
26869            None,
26870            None,
26871            false,
26872            false,
26873            false,
26874            0,
26875            false,
26876            false,
26877            false,
26878            false,
26879            false,
26880            false,
26881            false,
26882        );
26883
26884        assert!(result.is_ok());
26885        assert!(!dir.path().join(".tsift/index.db").exists());
26886    }
26887
26888    #[test]
26889    fn index_cmd_uses_ancestor_project_root_for_nested_paths() {
26890        let dir = setup_graph_index();
26891        let nested = dir.path().join("src/nested");
26892        std::fs::create_dir_all(&nested).unwrap();
26893        std::fs::write(nested.join("extra.rs"), "fn nested_helper() {}\n").unwrap();
26894
26895        let result = cmd_index(
26896            &nested, false, false, false, false, false, false, None, false, false, false, false,
26897            false, false,
26898        );
26899
26900        assert!(result.is_ok());
26901        assert!(dir.path().join(".tsift/index.db").exists());
26902        assert!(!nested.join(".tsift/index.db").exists());
26903    }
26904
26905    #[test]
26906    fn workspace_index_cmd_uses_ancestor_project_root_for_nested_paths() {
26907        let dir = setup_workspace();
26908        let nested = dir.path().join("docs/nested");
26909        std::fs::create_dir_all(&nested).unwrap();
26910
26911        let result = cmd_index(
26912            &nested, false, false, false, false, false, true, None, false, false, false, false,
26913            false, false,
26914        );
26915
26916        let cfg = config::Config::load(dir.path()).unwrap();
26917
26918        assert!(result.is_ok());
26919        assert!(cfg.db_path_for(dir.path(), "alpha").exists());
26920        assert!(cfg.db_path_for(dir.path(), "beta").exists());
26921    }
26922
26923    #[test]
26924    fn status_cmd_autoindexes_missing_workspace_scopes() {
26925        let dir = setup_workspace();
26926        let cfg = config::Config::load(dir.path()).unwrap();
26927        let alpha = config::Config::resolve_submodule(dir.path(), "alpha").unwrap();
26928        let alpha_db_path = cfg.db_path_for(dir.path(), &alpha.id);
26929        let alpha_db = index::IndexDb::open(&alpha_db_path).unwrap();
26930        alpha_db.apply_changes(&alpha.source_root).unwrap();
26931
26932        let beta_db_path = cfg.db_path_for(dir.path(), "beta");
26933        assert!(!beta_db_path.exists());
26934
26935        cmd_status(
26936            dir.path(),
26937            StatusCommandOptions {
26938                fix: false,
26939                no_fix: false,
26940                json_output: true,
26941                compact: false,
26942                pretty: false,
26943                terse: false,
26944                schema: false,
26945            },
26946        )
26947        .unwrap();
26948
26949        assert!(beta_db_path.exists());
26950        let report = status::check_status(dir.path()).unwrap();
26951        assert!(matches!(report.index, status::IndexStatus::Fresh { .. }));
26952    }
26953
26954    #[test]
26955    fn status_cmd_autoindexes_workspace_when_all_scopes_are_missing() {
26956        let dir = setup_workspace();
26957        let cfg = config::Config::load(dir.path()).unwrap();
26958
26959        cmd_status(
26960            dir.path(),
26961            StatusCommandOptions {
26962                fix: false,
26963                no_fix: false,
26964                json_output: true,
26965                compact: false,
26966                pretty: false,
26967                terse: false,
26968                schema: false,
26969            },
26970        )
26971        .unwrap();
26972
26973        assert!(cfg.db_path_for(dir.path(), "alpha").exists());
26974        assert!(cfg.db_path_for(dir.path(), "beta").exists());
26975        let report = status::check_status(dir.path()).unwrap();
26976        assert!(matches!(report.index, status::IndexStatus::Fresh { .. }));
26977    }
26978
26979    #[test]
26980    fn status_cmd_fix_refreshes_stale_index() {
26981        let dir = setup_graph_index();
26982        std::thread::sleep(std::time::Duration::from_millis(50));
26983        std::fs::write(
26984            dir.path().join("main.rs"),
26985            "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }\n",
26986        )
26987        .unwrap();
26988
26989        let report = status::check_status(dir.path()).unwrap();
26990        assert!(matches!(report.index, status::IndexStatus::Stale { .. }));
26991
26992        cmd_status(
26993            dir.path(),
26994            StatusCommandOptions {
26995                fix: false,
26996                no_fix: false,
26997                json_output: true,
26998                compact: false,
26999                pretty: false,
27000                terse: false,
27001                schema: false,
27002            },
27003        )
27004        .unwrap();
27005
27006        let report = status::check_status(dir.path()).unwrap();
27007        assert!(matches!(report.index, status::IndexStatus::Fresh { .. }));
27008    }
27009
27010    #[test]
27011    fn status_cmd_reports_wal_snapshot_recovery_without_tsift_writer_lock() {
27012        let dir = setup_graph_index();
27013        let db_path = dir.path().join(".tsift/index.db");
27014        let _lock = hold_wal_database_lock(&db_path);
27015
27016        cmd_status(
27017            dir.path(),
27018            StatusCommandOptions {
27019                fix: false,
27020                no_fix: false,
27021                json_output: true,
27022                compact: false,
27023                pretty: false,
27024                terse: false,
27025                schema: false,
27026            },
27027        )
27028        .unwrap();
27029
27030        let report = status::check_status(dir.path()).unwrap();
27031        assert!(matches!(
27032            report.index,
27033            status::IndexStatus::Fresh {
27034                recovery: Some(index::ReadOnlyRecovery::SnapshotFallbackWal),
27035                ..
27036            }
27037        ));
27038        let locks = status::check_locks(dir.path(), None, None).unwrap();
27039        assert!(matches!(
27040            locks.writer_lock,
27041            status::WriterLockStatus::Absent { .. }
27042        ));
27043        assert!(locks.wal_sidecar.present || locks.shared_memory_sidecar.present);
27044        assert!(
27045            locks
27046                .recommended_action
27047                .contains("wedged writer holding live WAL sidecars")
27048        );
27049    }
27050
27051    #[test]
27052    fn locks_report_uses_ancestor_project_root_for_nested_paths() {
27053        let dir = setup_graph_index();
27054        let nested = dir.path().join("src/nested");
27055        std::fs::create_dir_all(&nested).unwrap();
27056
27057        let root = lint::resolve_project_root_or_canonical_path(&nested).unwrap();
27058        let report = status::check_locks(&root, Some(&nested), None).unwrap();
27059
27060        assert_eq!(report.source_root, dir.path());
27061        assert_eq!(report.db_path, dir.path().join(".tsift/index.db"));
27062    }
27063
27064    #[test]
27065    fn workspace_locks_report_infers_scope_from_nested_path() {
27066        let dir = setup_workspace();
27067        cmd_index(
27068            dir.path(),
27069            false,
27070            false,
27071            false,
27072            false,
27073            false,
27074            true,
27075            None,
27076            false,
27077            false,
27078            false,
27079            false,
27080            false,
27081            false,
27082        )
27083        .unwrap();
27084        let nested = dir.path().join("src/alpha/nested");
27085        std::fs::create_dir_all(&nested).unwrap();
27086
27087        let root = lint::resolve_project_root_or_canonical_path(&nested).unwrap();
27088        let report = status::check_locks(&root, Some(&nested), None).unwrap();
27089        let cfg = config::Config::load(dir.path()).unwrap();
27090
27091        assert_eq!(report.label, "submodule `alpha` index");
27092        assert_eq!(report.source_root, dir.path().join("src/alpha"));
27093        assert_eq!(report.db_path, cfg.db_path_for(dir.path(), "alpha"));
27094        assert_eq!(
27095            report.reindex_command,
27096            format!("tsift index --submodule alpha {}", dir.path().display())
27097        );
27098    }
27099
27100    #[test]
27101    fn scoped_search_cmd_autoindexes_stale_submodule_index_by_default() {
27102        let dir = setup_workspace();
27103        cmd_index(
27104            dir.path(),
27105            false,
27106            false,
27107            false,
27108            false,
27109            false,
27110            true,
27111            None,
27112            false,
27113            false,
27114            false,
27115            false,
27116            false,
27117            false,
27118        )
27119        .unwrap();
27120
27121        let alpha = dir.path().join("src/alpha/lib.rs");
27122        std::thread::sleep(std::time::Duration::from_millis(50));
27123        std::fs::write(
27124            &alpha,
27125            "fn alpha_helper() { println!(\"updated\"); }\nfn alpha_main() { alpha_helper(); }",
27126        )
27127        .unwrap();
27128
27129        let result = cmd_search(
27130            "alpha_helper".to_string(),
27131            Some(dir.path().to_path_buf()),
27132            5,
27133            Some("lexical".to_string()),
27134            Some("alpha".to_string()),
27135            false,
27136            false,
27137            true,
27138            0,
27139            false,
27140            false,
27141            false,
27142            false,
27143            false,
27144            false,
27145            false,
27146        );
27147
27148        assert!(result.is_ok());
27149
27150        let cfg = config::Config::load(dir.path()).unwrap();
27151        let db = index::IndexDb::open_read_only(&cfg.db_path_for(dir.path(), "alpha")).unwrap();
27152        let summary = db.compute_changes(&dir.path().join("src/alpha")).unwrap();
27153        assert_eq!(summary.new + summary.modified + summary.deleted, 0);
27154    }
27155
27156    #[test]
27157    fn scoped_search_cmd_reports_stale_when_submodule_index_is_locked_by_rollback_journal() {
27158        let dir = setup_workspace();
27159        cmd_index(
27160            dir.path(),
27161            false,
27162            false,
27163            false,
27164            false,
27165            false,
27166            true,
27167            None,
27168            false,
27169            false,
27170            false,
27171            false,
27172            false,
27173            false,
27174        )
27175        .unwrap();
27176
27177        let alpha = dir.path().join("src/alpha/lib.rs");
27178        std::thread::sleep(std::time::Duration::from_millis(50));
27179        std::fs::write(
27180            &alpha,
27181            "fn alpha_helper() { println!(\"updated\"); }\nfn alpha_main() { alpha_helper(); }",
27182        )
27183        .unwrap();
27184
27185        let cfg = config::Config::load(dir.path()).unwrap();
27186        let _lock = hold_rollback_journal_lock(&cfg.db_path_for(dir.path(), "alpha"));
27187
27188        let err = cmd_search(
27189            "alpha_helper".to_string(),
27190            Some(dir.path().to_path_buf()),
27191            5,
27192            Some("lexical".to_string()),
27193            Some("alpha".to_string()),
27194            false,
27195            false,
27196            false,
27197            0,
27198            false,
27199            false,
27200            false,
27201            false,
27202            false,
27203            false,
27204            false,
27205        )
27206        .unwrap_err();
27207
27208        assert!(err.to_string().contains("search aborted"));
27209        assert!(err.to_string().contains("submodule `alpha` index"));
27210        assert!(!err.to_string().contains("database is locked"));
27211    }
27212
27213    #[test]
27214    fn federated_search_cmd_autoindexes_stale_indexes_by_default() {
27215        let dir = setup_workspace();
27216        cmd_index(
27217            dir.path(),
27218            false,
27219            false,
27220            false,
27221            false,
27222            false,
27223            true,
27224            None,
27225            false,
27226            false,
27227            false,
27228            false,
27229            false,
27230            false,
27231        )
27232        .unwrap();
27233
27234        let alpha = dir.path().join("src/alpha/lib.rs");
27235        std::thread::sleep(std::time::Duration::from_millis(50));
27236        std::fs::write(
27237            &alpha,
27238            "fn alpha_helper() { println!(\"updated\"); }\nfn alpha_main() { alpha_helper(); }",
27239        )
27240        .unwrap();
27241
27242        let result = cmd_search(
27243            "alpha_helper".to_string(),
27244            Some(dir.path().to_path_buf()),
27245            5,
27246            Some("lexical".to_string()),
27247            None,
27248            true,
27249            false,
27250            true,
27251            0,
27252            false,
27253            false,
27254            false,
27255            false,
27256            false,
27257            false,
27258            false,
27259        );
27260
27261        assert!(result.is_ok());
27262
27263        let cfg = config::Config::load(dir.path()).unwrap();
27264        let db = index::IndexDb::open_read_only(&cfg.db_path_for(dir.path(), "alpha")).unwrap();
27265        let summary = db.compute_changes(&dir.path().join("src/alpha")).unwrap();
27266        assert_eq!(summary.new + summary.modified + summary.deleted, 0);
27267    }
27268
27269    #[test]
27270    fn federated_search_cmd_reports_stale_when_submodule_index_is_locked_by_rollback_journal() {
27271        let dir = setup_workspace();
27272        cmd_index(
27273            dir.path(),
27274            false,
27275            false,
27276            false,
27277            false,
27278            false,
27279            true,
27280            None,
27281            false,
27282            false,
27283            false,
27284            false,
27285            false,
27286            false,
27287        )
27288        .unwrap();
27289
27290        let alpha = dir.path().join("src/alpha/lib.rs");
27291        std::thread::sleep(std::time::Duration::from_millis(50));
27292        std::fs::write(
27293            &alpha,
27294            "fn alpha_helper() { println!(\"updated\"); }\nfn alpha_main() { alpha_helper(); }",
27295        )
27296        .unwrap();
27297
27298        let cfg = config::Config::load(dir.path()).unwrap();
27299        let _lock = hold_rollback_journal_lock(&cfg.db_path_for(dir.path(), "alpha"));
27300
27301        let err = cmd_search(
27302            "alpha_helper".to_string(),
27303            Some(dir.path().to_path_buf()),
27304            5,
27305            Some("lexical".to_string()),
27306            None,
27307            true,
27308            false,
27309            false,
27310            30,
27311            false,
27312            false,
27313            false,
27314            false,
27315            false,
27316            false,
27317            false,
27318        )
27319        .unwrap_err();
27320
27321        assert!(err.to_string().contains("stale"));
27322        assert!(err.to_string().contains("submodule `alpha` index"));
27323        assert!(!err.to_string().contains("database is locked"));
27324    }
27325
27326    #[test]
27327    fn workspace_search_cmd_requires_explicit_target_without_shared_root_index() {
27328        let dir = setup_workspace();
27329        cmd_index(
27330            dir.path(),
27331            false,
27332            false,
27333            false,
27334            false,
27335            false,
27336            true,
27337            None,
27338            false,
27339            false,
27340            false,
27341            false,
27342            false,
27343            false,
27344        )
27345        .unwrap();
27346
27347        let err = cmd_search(
27348            "alpha_helper".to_string(),
27349            Some(dir.path().to_path_buf()),
27350            5,
27351            Some("lexical".to_string()),
27352            None,
27353            false,
27354            false,
27355            true,
27356            0,
27357            false,
27358            false,
27359            false,
27360            false,
27361            false,
27362            false,
27363            false,
27364        )
27365        .unwrap_err();
27366
27367        assert_workspace_search_requires_explicit_target(err);
27368        assert!(!dir.path().join(".tsift/index.db").exists());
27369    }
27370
27371    #[test]
27372    fn workspace_search_cmd_infers_scope_from_nested_path() {
27373        let dir = setup_workspace();
27374        cmd_index(
27375            dir.path(),
27376            false,
27377            false,
27378            false,
27379            false,
27380            false,
27381            true,
27382            None,
27383            false,
27384            false,
27385            false,
27386            false,
27387            false,
27388            false,
27389        )
27390        .unwrap();
27391        let nested = dir.path().join("src/alpha/nested");
27392        std::fs::create_dir_all(&nested).unwrap();
27393
27394        let result = cmd_search(
27395            "alpha_helper".to_string(),
27396            Some(nested),
27397            5,
27398            Some("lexical".to_string()),
27399            None,
27400            false,
27401            false,
27402            false,
27403            0,
27404            false,
27405            false,
27406            false,
27407            false,
27408            false,
27409            false,
27410            false,
27411        );
27412
27413        assert!(result.is_ok());
27414    }
27415
27416    #[test]
27417    fn resolve_query_db_path_infers_matching_duplicate_leaf_scope_from_nested_path() {
27418        let dir = setup_workspace_with_duplicate_leaf_names();
27419        cmd_index(
27420            dir.path(),
27421            false,
27422            false,
27423            false,
27424            false,
27425            false,
27426            true,
27427            None,
27428            false,
27429            false,
27430            false,
27431            false,
27432            false,
27433            false,
27434        )
27435        .unwrap();
27436        let nested = dir.path().join("vendor/foo/nested");
27437        std::fs::create_dir_all(&nested).unwrap();
27438
27439        let root = lint::resolve_project_root_or_canonical_path(&nested).unwrap();
27440        let db_path = resolve_query_db_path(&root, &nested, None).unwrap();
27441        let cfg = config::Config::load(dir.path()).unwrap();
27442
27443        assert_eq!(db_path, cfg.db_path_for(dir.path(), "vendor/foo"));
27444    }
27445
27446    #[test]
27447    fn graph_cmd_succeeds_while_writer_lock_is_held() {
27448        let dir = setup_graph_index();
27449        let db_path = dir.path().join(".tsift/index.db");
27450        let _lock = hold_write_lock(&db_path);
27451
27452        let result = cmd_graph(
27453            "main",
27454            dir.path(),
27455            false,
27456            false,
27457            None,
27458            20,
27459            false,
27460            true,
27461            false,
27462            false,
27463            false,
27464            false,
27465            false,
27466            TagpathSearchOpts::default(),
27467        );
27468
27469        assert!(result.is_ok());
27470    }
27471
27472    #[test]
27473    fn graph_cmd_autoindexes_stale_index_by_default() {
27474        let dir = setup_graph_index();
27475        std::thread::sleep(std::time::Duration::from_millis(50));
27476        std::fs::write(
27477            dir.path().join("main.rs"),
27478            "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }\n",
27479        )
27480        .unwrap();
27481
27482        let result = cmd_graph(
27483            "helper",
27484            dir.path(),
27485            true,
27486            false,
27487            None,
27488            20,
27489            false,
27490            true,
27491            false,
27492            false,
27493            false,
27494            false,
27495            false,
27496            TagpathSearchOpts::default(),
27497        );
27498
27499        assert!(result.is_ok());
27500        let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
27501        let summary = db.compute_changes(dir.path()).unwrap();
27502        assert_eq!(summary.new + summary.modified + summary.deleted, 0);
27503    }
27504
27505    #[test]
27506    fn graph_cmd_uses_snapshot_fallback_when_rollback_journal_is_locked() {
27507        let dir = setup_graph_index();
27508        let db_path = dir.path().join(".tsift/index.db");
27509        let _lock = hold_rollback_journal_lock(&db_path);
27510
27511        let result = cmd_graph(
27512            "main",
27513            dir.path(),
27514            false,
27515            false,
27516            None,
27517            20,
27518            false,
27519            true,
27520            false,
27521            false,
27522            false,
27523            false,
27524            false,
27525            TagpathSearchOpts::default(),
27526        );
27527
27528        assert!(result.is_ok());
27529    }
27530
27531    #[test]
27532    fn graph_cmd_uses_ancestor_project_root_for_nested_paths() {
27533        let dir = setup_graph_index();
27534        let nested = dir.path().join("src/nested");
27535        std::fs::create_dir_all(&nested).unwrap();
27536
27537        let result = cmd_graph(
27538            "helper",
27539            &nested,
27540            true,
27541            false,
27542            None,
27543            20,
27544            false,
27545            false,
27546            false,
27547            false,
27548            false,
27549            false,
27550            false,
27551            TagpathSearchOpts::default(),
27552        );
27553
27554        assert!(result.is_ok());
27555    }
27556
27557    #[test]
27558    fn communities_cmd_succeeds_while_writer_lock_is_held() {
27559        let dir = setup_graph_index();
27560        let _lock = hold_writer_lock(&dir.path().join(".tsift/index.lock"));
27561
27562        let result = cmd_communities(
27563            dir.path(),
27564            None,
27565            1,
27566            10,
27567            false,
27568            false,
27569            false,
27570            false,
27571            false,
27572            false,
27573            TagpathSearchOpts::default(),
27574        );
27575
27576        assert!(result.is_ok());
27577    }
27578
27579    #[test]
27580    fn communities_cmd_uses_snapshot_fallback_when_rollback_journal_is_locked() {
27581        let dir = setup_graph_index();
27582        let db_path = dir.path().join(".tsift/index.db");
27583        let _lock = hold_rollback_journal_lock(&db_path);
27584
27585        let result = cmd_communities(
27586            dir.path(),
27587            None,
27588            1,
27589            10,
27590            false,
27591            false,
27592            false,
27593            false,
27594            false,
27595            false,
27596            TagpathSearchOpts::default(),
27597        );
27598
27599        assert!(result.is_ok());
27600    }
27601
27602    #[test]
27603    fn lint_finds_entities_from_project_root_index_db() {
27604        let dir = tempfile::tempdir().unwrap();
27605        std::fs::write(dir.path().join("main.rs"), "fn alpha_helper() {}\n").unwrap();
27606        std::fs::write(
27607            dir.path().join("README.md"),
27608            "alpha_helper should be backticked.\n",
27609        )
27610        .unwrap();
27611        cmd_index(
27612            dir.path(),
27613            false,
27614            false,
27615            false,
27616            false,
27617            false,
27618            false,
27619            None,
27620            false,
27621            false,
27622            false,
27623            false,
27624            false,
27625            false,
27626        )
27627        .unwrap();
27628
27629        let root = lint::find_project_root_for_path(&dir.path().join("README.md"))
27630            .unwrap()
27631            .unwrap();
27632        let entities = lint::collect_entities_from_index_path(&root).unwrap();
27633        let result = lint::lint_markdown(&dir.path().join("README.md"), &entities).unwrap();
27634
27635        assert!(
27636            result
27637                .annotations
27638                .iter()
27639                .any(|ann| ann.text == "alpha_helper")
27640        );
27641    }
27642
27643    // --- search timeout ---
27644
27645    #[test]
27646    fn search_direct_runs_ok() {
27647        let dir = tempfile::tempdir().unwrap();
27648        let search_dir = dir.path().to_path_buf();
27649        let cache_dir = search_dir.join(".tsift/search-cache");
27650        std::fs::write(search_dir.join("test.rs"), "fn main() {}").unwrap();
27651        let result = run_sift_search(&search_dir, &cache_dir, "main", 1, "lexical");
27652        assert!(result.is_ok(), "direct search should succeed");
27653        assert!(
27654            cache_dir.exists(),
27655            "search should create the configured cache dir"
27656        );
27657    }
27658
27659    #[test]
27660    fn search_timeout_zero_disables_timeout() {
27661        let dir = tempfile::tempdir().unwrap();
27662        let search_dir = dir.path().to_path_buf();
27663        let cache_dir = search_dir.join(".tsift/search-cache");
27664        std::fs::write(search_dir.join("test.rs"), "fn main() {}").unwrap();
27665        let result = run_search_with_timeout(&search_dir, &cache_dir, "main", 1, 0, "lexical", &[]);
27666        assert!(result.is_ok(), "timeout=0 should still work (no timeout)");
27667        assert!(
27668            cache_dir.exists(),
27669            "timeout=0 should keep using the stable search cache dir"
27670        );
27671    }
27672
27673    #[test]
27674    fn search_timeout_message_reports_missing_index_as_rebuild_needed() {
27675        let dir = tempfile::tempdir().unwrap();
27676        std::fs::write(dir.path().join("main.rs"), "fn main() {}\n").unwrap();
27677        cmd_index(
27678            dir.path(),
27679            false,
27680            false,
27681            false,
27682            false,
27683            false,
27684            false,
27685            None,
27686            false,
27687            false,
27688            false,
27689            false,
27690            false,
27691            false,
27692        )
27693        .unwrap();
27694        let db_path = dir.path().join(".tsift/index.db");
27695        std::fs::remove_file(&db_path).unwrap();
27696        let search_target = SearchIndexTarget {
27697            label: "index".to_string(),
27698            db_path,
27699            source_root: dir.path().to_path_buf(),
27700            scope_name: None,
27701            reindex_cmd: format!("tsift index {}", dir.path().display()),
27702        };
27703
27704        let message = search_timeout_message(1, "lexical", &[search_target]).unwrap();
27705
27706        assert!(message.contains("timed out after 1s"));
27707        assert!(message.contains("index is missing"));
27708        assert!(message.contains("Run `tsift index"));
27709        assert!(!message.contains("search root looks fresh"));
27710    }
27711
27712    #[test]
27713    fn search_worker_output_path_uses_json_suffix() {
27714        let path = next_search_worker_output_path();
27715        assert!(path.extension().is_some_and(|ext| ext == "json"));
27716    }
27717
27718    // --- index quiet mode ---
27719
27720    #[test]
27721    fn index_quiet_suppresses_file_list() {
27722        let dir = setup_graph_index();
27723        let result = cmd_index(
27724            dir.path(),
27725            false,
27726            true,
27727            false,
27728            false,
27729            true,
27730            false,
27731            None,
27732            false,
27733            false,
27734            false,
27735            false,
27736            false,
27737            false,
27738        );
27739        assert!(result.is_ok());
27740    }
27741
27742    #[test]
27743    fn index_exit_code_implies_quiet() {
27744        let dir = setup_graph_index();
27745        let result = cmd_index(
27746            dir.path(),
27747            false,
27748            true,
27749            false,
27750            false,
27751            false,
27752            false,
27753            None,
27754            false,
27755            false,
27756            false,
27757            false,
27758            false,
27759            false,
27760        );
27761        assert!(result.is_ok());
27762    }
27763
27764    #[test]
27765    fn index_quiet_json_omits_changes() {
27766        let dir = setup_graph_index();
27767        let result = cmd_index(
27768            dir.path(),
27769            false,
27770            true,
27771            false,
27772            false,
27773            true,
27774            false,
27775            None,
27776            true,
27777            false,
27778            false,
27779            false,
27780            false,
27781            false,
27782        );
27783        assert!(result.is_ok());
27784    }
27785
27786    #[test]
27787    fn cli_workflow_defaults_to_search_topic() {
27788        let cli = parse_cli(["tsift", "workflow"]);
27789        match cli.command {
27790            Some(Commands::Workflow { topic, json }) => {
27791                assert_eq!(topic, "search");
27792                assert!(!json);
27793            }
27794            _ => panic!("expected Workflow command"),
27795        }
27796    }
27797
27798    #[test]
27799    fn search_workflow_recipe_preserves_handles_across_expansions() {
27800        let recipe = workflow::search_workflow_recipe();
27801        let step_names: Vec<&str> = recipe.steps.iter().map(|step| step.name).collect();
27802        assert_eq!(
27803            step_names,
27804            vec![
27805                "exact-anchor",
27806                "semantic-search",
27807                "explain-symbol",
27808                "summarize-selection",
27809                "digest-expansion"
27810            ]
27811        );
27812        assert!(
27813            recipe
27814                .handle_contract
27815                .iter()
27816                .any(|item| item.contains("originating command"))
27817        );
27818        assert!(
27819            recipe.steps[1]
27820                .preserves
27821                .iter()
27822                .any(|item| item.contains("sfam-*"))
27823        );
27824        assert!(
27825            recipe.steps[2]
27826                .preserves
27827                .iter()
27828                .any(|item| item.contains("ecall-*"))
27829        );
27830        assert!(
27831            recipe.steps[4]
27832                .preserves
27833                .iter()
27834                .any(|item| item.contains("artifact handles"))
27835        );
27836    }
27837
27838    // --- JSON compact vs pretty ---
27839
27840    #[test]
27841    fn to_json_compact_default() {
27842        let val = serde_json::json!({"a": 1, "b": [2, 3]});
27843        let compact = to_json(&val, false, false).unwrap();
27844        assert!(!compact.contains('\n'));
27845        assert!(
27846            compact.contains("\"a\":1")
27847                || compact.contains("\"a\": 1")
27848                || compact.contains("\"a\":")
27849        );
27850    }
27851
27852    #[test]
27853    fn to_json_pretty_indents() {
27854        let val = serde_json::json!({"a": 1, "b": [2, 3]});
27855        let pretty = to_json(&val, true, false).unwrap();
27856        assert!(pretty.contains('\n'));
27857        assert!(pretty.contains("  "));
27858    }
27859
27860    #[test]
27861    fn to_json_compact_is_shorter() {
27862        let val =
27863            serde_json::json!({"name": "test", "items": [1, 2, 3], "nested": {"key": "value"}});
27864        let compact = to_json(&val, false, false).unwrap();
27865        let pretty = to_json(&val, true, false).unwrap();
27866        assert!(compact.len() < pretty.len());
27867    }
27868
27869    #[test]
27870    fn terse_renames_keys() {
27871        let val =
27872            serde_json::json!({"caller_file": "a.rs", "caller_name": "main", "call_site_line": 10});
27873        let result = to_json(&val, false, true).unwrap();
27874        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
27875        assert!(parsed["_s"].is_object());
27876        let d = &parsed["d"];
27877        assert_eq!(d["cf"], "a.rs");
27878        assert_eq!(d["cn"], "main");
27879        assert_eq!(d["csl"], 10);
27880    }
27881
27882    #[test]
27883    fn terse_schema_only_includes_used_keys() {
27884        let val = serde_json::json!({"name": "test", "score": 0.5});
27885        let result = to_json(&val, false, true).unwrap();
27886        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
27887        let schema = parsed["_s"].as_object().unwrap();
27888        assert_eq!(schema["n"], "name");
27889        assert_eq!(schema["sc"], "score");
27890        assert!(!schema.contains_key("cf"));
27891    }
27892
27893    #[test]
27894    fn terse_nested_arrays() {
27895        let val = serde_json::json!({"callers": [{"caller_name": "a", "caller_file": "b.rs", "caller_line": 1, "callee_name": "c", "call_site_line": 2}]});
27896        let result = to_json(&val, false, true).unwrap();
27897        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
27898        let d = &parsed["d"];
27899        assert_eq!(d["crs"][0]["cn"], "a");
27900        assert_eq!(d["crs"][0]["cf"], "b.rs");
27901    }
27902
27903    #[test]
27904    fn terse_preserves_unknown_keys() {
27905        let val = serde_json::json!({"custom_field": "value", "name": "test"});
27906        let result = to_json(&val, false, true).unwrap();
27907        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
27908        let d = &parsed["d"];
27909        assert_eq!(d["custom_field"], "value");
27910        assert_eq!(d["n"], "test");
27911    }
27912
27913    // --- ultra-terse ---
27914
27915    #[test]
27916    fn ultra_terse_strips_properties_from_graph_nodes() {
27917        let val = serde_json::json!({
27918            "nodes": [{"id": "fn:main", "kind": "fn", "name": "main", "properties": {"line": "10"}}]
27919        });
27920        let result = to_json_schema(&val, false, true, true, false).unwrap();
27921        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
27922        let node = &parsed["d"]["nodes"][0];
27923        assert_eq!(node["id"], "fn:main");
27924        assert_eq!(node["k"], "fn");
27925        assert_eq!(node["n"], "main");
27926        assert!(node.get("properties").is_none());
27927    }
27928
27929    #[test]
27930    fn ultra_terse_strips_properties_from_graph_edges() {
27931        let val = serde_json::json!({
27932            "edges": [{"from_id": "a", "to_id": "b", "kind": "calls", "properties": {"weight": "2"}}]
27933        });
27934        let result = to_json_schema(&val, false, true, true, false).unwrap();
27935        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
27936        let edge = &parsed["d"]["edges"][0];
27937        assert_eq!(edge["from_id"], "a");
27938        assert_eq!(edge["to_id"], "b");
27939        assert_eq!(edge["k"], "c");
27940        assert!(edge.get("properties").is_none());
27941    }
27942
27943    #[test]
27944    fn ultra_terse_abbreviates_edge_kinds() {
27945        let val = serde_json::json!({
27946            "edges": [
27947                {"from_id": "a", "to_id": "b", "kind": "defines"},
27948                {"from_id": "a", "to_id": "c", "kind": "contains"},
27949                {"from_id": "a", "to_id": "d", "kind": "imports"},
27950                {"from_id": "a", "to_id": "e", "kind": "mentions"},
27951                {"from_id": "a", "to_id": "f", "kind": "semantic_relation"},
27952                {"from_id": "a", "to_id": "g", "kind": "belongs_to"},
27953                {"from_id": "a", "to_id": "h", "kind": "scopes_context"},
27954                {"from_id": "a", "to_id": "i", "kind": "uses"},
27955                {"from_id": "a", "to_id": "j", "kind": "parent"},
27956                {"from_id": "a", "to_id": "k", "kind": "unknown_edge"},
27957            ]
27958        });
27959        let result = to_json_schema(&val, false, true, true, false).unwrap();
27960        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
27961        let edges = &parsed["d"]["edges"].as_array().unwrap();
27962        assert_eq!(edges[0]["k"], "d");
27963        assert_eq!(edges[1]["k"], "ct");
27964        assert_eq!(edges[2]["k"], "i");
27965        assert_eq!(edges[3]["k"], "m");
27966        assert_eq!(edges[4]["k"], "sr");
27967        assert_eq!(edges[5]["k"], "bt");
27968        assert_eq!(edges[6]["k"], "sctx");
27969        assert_eq!(edges[7]["k"], "u");
27970        assert_eq!(edges[8]["k"], "p");
27971        assert_eq!(edges[9]["k"], "unknown_edge");
27972    }
27973
27974    #[test]
27975    fn ultra_terse_strips_provenance_freshness_from_edges() {
27976        let val = serde_json::json!({
27977            "edges": [{"from_id": "a", "to_id": "b", "kind": "calls", "provenance": [{"source": "tsift"}], "freshness": {"observed_at_unix": 1234567890}}]
27978        });
27979        let result = to_json_schema(&val, false, true, true, false).unwrap();
27980        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
27981        let edge = &parsed["d"]["edges"][0];
27982        assert!(edge.get("provenance").is_none());
27983        assert!(edge.get("freshness").is_none());
27984        assert_eq!(edge["k"], "c");
27985    }
27986
27987    #[test]
27988    fn ultra_terse_truncates_snippets() {
27989        let long_snippet = "x".repeat(120);
27990        let val = serde_json::json!({"snippet": long_snippet});
27991        let result = to_json_schema(&val, false, true, true, false).unwrap();
27992        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
27993        let snipped = parsed["d"]["sn"].as_str().unwrap();
27994        assert_eq!(snipped.len(), 80);
27995        assert!(snipped.ends_with("..."));
27996    }
27997
27998    #[test]
27999    fn ultra_terse_truncates_abbreviated_snippet_key() {
28000        let long_snippet = "y".repeat(100);
28001        let val = serde_json::json!({"snippet": long_snippet});
28002        let result = to_json_schema(&val, false, true, true, false).unwrap();
28003        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28004        let snipped = parsed["d"]["sn"].as_str().unwrap();
28005        assert_eq!(snipped.len(), 80);
28006        assert!(snipped.ends_with("..."));
28007    }
28008
28009    #[test]
28010    fn ultra_terse_compacts_coverage_snapshot() {
28011        let val = serde_json::json!({
28012            "mode": "incremental",
28013            "total_sector_count": 10,
28014            "dirty_sector_count": 2,
28015            "active_rebuild": Some("rebuild-1"),
28016            "completed_dirty_sector_count": 1,
28017            "mounted_sector_count": 8,
28018            "rebuilding_sector_count": 1,
28019            "resumed_sector_count": 3,
28020            "reused_sector_count": 5
28021        });
28022        let result = to_json_schema(&val, false, true, true, false).unwrap();
28023        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28024        let d = &parsed["d"];
28025        assert_eq!(d["mode"], "incremental");
28026        assert_eq!(d["total_sector_count"], 10);
28027        assert_eq!(d["dirty_sector_count"], 2);
28028        assert!(d.get("active_rebuild").is_none());
28029        assert!(d.get("completed_dirty_sector_count").is_none());
28030        assert!(d.get("mounted_sector_count").is_none());
28031        assert!(d.get("rebuilding_sector_count").is_none());
28032        assert!(d.get("resumed_sector_count").is_none());
28033        assert!(d.get("reused_sector_count").is_none());
28034    }
28035
28036    #[test]
28037    fn ultra_terse_short_snippet_unchanged() {
28038        let val = serde_json::json!({"snippet": "short text"});
28039        let result = to_json_schema(&val, false, true, true, false).unwrap();
28040        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28041        assert_eq!(parsed["d"]["sn"], "short text");
28042    }
28043
28044    #[test]
28045    fn ultra_terse_non_graph_object_properties_preserved() {
28046        let val = serde_json::json!({"config": {"properties": {"a": "1"}}});
28047        let result = to_json_schema(&val, false, true, true, false).unwrap();
28048        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28049        assert!(parsed["d"]["config"]["properties"].is_object());
28050    }
28051
28052    // --- schema-then-values ---
28053
28054    #[test]
28055    fn schema_converts_homogeneous_arrays() {
28056        let val = serde_json::json!({"symbols": [
28057            {"name": "foo", "kind": "fn", "line": 10},
28058            {"name": "bar", "kind": "fn", "line": 20}
28059        ]});
28060        let result = to_json_schema(&val, false, false, false, true).unwrap();
28061        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28062        let syms = &parsed["symbols"];
28063        let columns = syms["_c"]
28064            .as_array()
28065            .unwrap()
28066            .iter()
28067            .map(|value| value.as_str().unwrap())
28068            .collect::<Vec<_>>();
28069        let row0 = syms["_r"][0].as_array().unwrap();
28070        let row1 = syms["_r"][1].as_array().unwrap();
28071        let name_index = columns.iter().position(|column| *column == "name").unwrap();
28072        let kind_index = columns.iter().position(|column| *column == "kind").unwrap();
28073        let line_index = columns.iter().position(|column| *column == "line").unwrap();
28074        assert_eq!(row0[name_index], "foo");
28075        assert_eq!(row0[kind_index], "fn");
28076        assert_eq!(row0[line_index], 10);
28077        assert_eq!(row1[name_index], "bar");
28078        assert_eq!(row1[kind_index], "fn");
28079        assert_eq!(row1[line_index], 20);
28080    }
28081
28082    #[test]
28083    fn schema_skips_short_arrays() {
28084        let val = serde_json::json!({"items": [{"name": "only"}]});
28085        let result = to_json_schema(&val, false, false, false, true).unwrap();
28086        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28087        assert!(parsed["items"].is_array());
28088        assert_eq!(parsed["items"][0]["name"], "only");
28089    }
28090
28091    #[test]
28092    fn schema_skips_heterogeneous_arrays() {
28093        let val = serde_json::json!({"items": [{"a": 1}, {"b": 2}]});
28094        let result = to_json_schema(&val, false, false, false, true).unwrap();
28095        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28096        assert!(parsed["items"].is_array());
28097        assert_eq!(parsed["items"][0]["a"], 1);
28098    }
28099
28100    #[test]
28101    fn schema_with_terse_combines() {
28102        let val = serde_json::json!({"callers": [
28103            {"caller_name": "a", "caller_file": "x.rs"},
28104            {"caller_name": "b", "caller_file": "y.rs"}
28105        ]});
28106        let result = to_json_schema(&val, false, true, false, true).unwrap();
28107        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28108        assert!(parsed["_s"].is_object());
28109        let d = &parsed["d"];
28110        let crs = &d["crs"];
28111        assert!(crs["_c"].is_array());
28112        assert!(crs["_r"].is_array());
28113        let columns = crs["_c"]
28114            .as_array()
28115            .unwrap()
28116            .iter()
28117            .map(|value| value.as_str().unwrap())
28118            .collect::<Vec<_>>();
28119        let row = crs["_r"][0].as_array().unwrap();
28120        let name_index = columns.iter().position(|column| *column == "cn").unwrap();
28121        let file_index = columns.iter().position(|column| *column == "cf").unwrap();
28122        assert_eq!(row[name_index], "a");
28123        assert_eq!(row[file_index], "x.rs");
28124    }
28125
28126    #[test]
28127    fn schema_preserves_non_object_arrays() {
28128        let val = serde_json::json!({"tags": ["a", "b", "c"]});
28129        let result = to_json_schema(&val, false, false, false, true).unwrap();
28130        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
28131        assert_eq!(parsed["tags"], serde_json::json!(["a", "b", "c"]));
28132    }
28133
28134    #[test]
28135    fn cli_accepts_global_schema_flag() {
28136        let cli = parse_cli(["tsift", "--schema", "search", "test"]);
28137        assert!(cli.schema);
28138        assert!(matches!(cli.command, Some(Commands::Search { .. })));
28139    }
28140
28141    #[test]
28142    fn cli_accepts_global_envelope_flag() {
28143        let cli = parse_cli([
28144            "tsift",
28145            "--envelope",
28146            "context-pack",
28147            "tasks/software/tsift.md",
28148        ]);
28149        assert!(cli.envelope);
28150        assert!(matches!(cli.command, Some(Commands::ContextPack { .. })));
28151    }
28152
28153    #[test]
28154    fn cli_accepts_locks_command() {
28155        let cli = parse_cli(["tsift", "locks"]);
28156        assert!(matches!(cli.command, Some(Commands::Locks { .. })));
28157    }
28158
28159    #[test]
28160    fn cli_parses_memory_budget_guard_command() {
28161        let cli = parse_cli([
28162            "tsift",
28163            "memory",
28164            "budget-guard",
28165            "--file",
28166            "tool.log",
28167            "--budget-tokens",
28168            "1000",
28169            "--json",
28170        ]);
28171        match cli.command {
28172            Some(Commands::Memory {
28173                command:
28174                    crate::cli::MemoryCommand::BudgetGuard {
28175                        file,
28176                        budget_tokens,
28177                        json,
28178                        ..
28179                    },
28180            }) => {
28181                assert_eq!(file.as_deref(), Some(std::path::Path::new("tool.log")));
28182                assert_eq!(budget_tokens, 1000);
28183                assert!(json);
28184            }
28185            _ => panic!("expected memory budget-guard command"),
28186        }
28187    }
28188
28189    #[test]
28190    fn cli_parses_memory_capture_agent_doc_closeout_command() {
28191        let cli = parse_cli([
28192            "tsift",
28193            "memory",
28194            "capture-agent-doc-closeout",
28195            ".",
28196            "--session-path",
28197            "tasks/software/tsift.md",
28198            "--prompt-target",
28199            "do [#tsiftmemhooks]",
28200            "--response-summary",
28201            "wired closeout capture",
28202            "--commit-hash",
28203            "abc123",
28204            "--session-check-status",
28205            "clean",
28206            "--json",
28207        ]);
28208        match cli.command {
28209            Some(Commands::Memory {
28210                command:
28211                    crate::cli::MemoryCommand::CaptureAgentDocCloseout {
28212                        path,
28213                        session_path,
28214                        prompt_target,
28215                        response_summary,
28216                        commit_hash,
28217                        session_check_status,
28218                        json,
28219                    },
28220            }) => {
28221                assert_eq!(path, std::path::PathBuf::from("."));
28222                assert_eq!(
28223                    session_path,
28224                    std::path::PathBuf::from("tasks/software/tsift.md")
28225                );
28226                assert_eq!(prompt_target, "do [#tsiftmemhooks]");
28227                assert_eq!(response_summary, "wired closeout capture");
28228                assert_eq!(commit_hash.as_deref(), Some("abc123"));
28229                assert_eq!(session_check_status, "clean");
28230                assert!(json);
28231            }
28232            _ => panic!("expected memory capture-agent-doc-closeout command"),
28233        }
28234    }
28235
28236    #[test]
28237    fn cli_locks_accepts_scope_flag() {
28238        let cli = parse_cli(["tsift", "locks", "--scope", "alpha"]);
28239        match cli.command {
28240            Some(Commands::Locks { scope, .. }) => {
28241                assert_eq!(scope.as_deref(), Some("alpha"));
28242            }
28243            _ => panic!("expected Locks command"),
28244        }
28245    }
28246
28247    #[test]
28248    fn cli_search_accepts_autoindex_flag() {
28249        let cli = parse_cli(["tsift", "search", "test", "--autoindex"]);
28250        match cli.command {
28251            Some(Commands::Search {
28252                autoindex,
28253                no_autoindex,
28254                ..
28255            }) => {
28256                assert!(autoindex);
28257                assert!(!no_autoindex);
28258            }
28259            _ => panic!("expected Search command"),
28260        }
28261    }
28262
28263    #[test]
28264    fn cli_search_accepts_exact_flag() {
28265        let cli = parse_cli(["tsift", "search", "test", "--exact"]);
28266        match cli.command {
28267            Some(Commands::Search {
28268                exact, strategy, ..
28269            }) => {
28270                assert!(exact);
28271                assert!(strategy.is_none());
28272            }
28273            _ => panic!("expected Search command"),
28274        }
28275    }
28276
28277    #[test]
28278    fn cli_parses_diff_digest_command() {
28279        let cli = parse_cli(["tsift", "diff-digest", "--json", "."]);
28280        match cli.command {
28281            Some(Commands::DiffDigest {
28282                json,
28283                path,
28284                cached,
28285                revision,
28286                max_parsed_files,
28287            }) => {
28288                assert!(json);
28289                assert_eq!(path, PathBuf::from("."));
28290                assert!(!cached);
28291                assert!(revision.is_none());
28292                assert_eq!(max_parsed_files, 25);
28293            }
28294            _ => panic!("expected DiffDigest command"),
28295        }
28296    }
28297
28298    #[test]
28299    fn cli_rejects_conflicting_diff_digest_modes() {
28300        match try_parse_cli([
28301            "tsift",
28302            "diff-digest",
28303            "--cached",
28304            "--revision",
28305            "HEAD",
28306            ".",
28307        ]) {
28308            Ok(_) => panic!("expected conflicting diff-digest modes to fail"),
28309            Err(err) => {
28310                assert!(err.to_string().contains("--cached"));
28311                assert!(err.to_string().contains("--revision"));
28312            }
28313        }
28314    }
28315
28316    #[test]
28317    fn cli_parses_test_digest_command() {
28318        let cli = parse_cli([
28319            "tsift",
28320            "test-digest",
28321            "--path",
28322            ".",
28323            "--input",
28324            "target/test.log",
28325            "--runner",
28326            "cargo",
28327            "--json",
28328        ]);
28329        match cli.command {
28330            Some(Commands::TestDigest {
28331                json,
28332                path,
28333                input,
28334                runner,
28335            }) => {
28336                assert!(json);
28337                assert_eq!(path, PathBuf::from("."));
28338                assert_eq!(input, Some(PathBuf::from("target/test.log")));
28339                assert_eq!(runner.as_deref(), Some("cargo"));
28340            }
28341            _ => panic!("expected TestDigest command"),
28342        }
28343    }
28344
28345    #[test]
28346    fn cli_parses_log_digest_command() {
28347        let cli = parse_cli([
28348            "tsift",
28349            "log-digest",
28350            "--path",
28351            ".",
28352            "--input",
28353            "target/build.log",
28354            "--json",
28355        ]);
28356        match cli.command {
28357            Some(Commands::LogDigest { json, path, input }) => {
28358                assert!(json);
28359                assert_eq!(path, PathBuf::from("."));
28360                assert_eq!(input, Some(PathBuf::from("target/build.log")));
28361            }
28362            _ => panic!("expected LogDigest command"),
28363        }
28364    }
28365
28366    #[test]
28367    fn cli_parses_metric_digest_command() {
28368        let cli = parse_cli([
28369            "tsift",
28370            "metric-digest",
28371            "--input",
28372            "target/runs.json",
28373            "--baseline",
28374            "target/prior.json",
28375            "--metric",
28376            "session_mae",
28377            "--lower-is-better",
28378            "session_mae",
28379            "--history",
28380            "4",
28381            "--top",
28382            "2",
28383            "--json",
28384        ]);
28385        match cli.command {
28386            Some(Commands::MetricDigest {
28387                input,
28388                baseline,
28389                metrics,
28390                lower_is_better,
28391                history,
28392                top,
28393                json,
28394                ..
28395            }) => {
28396                assert!(json);
28397                assert_eq!(input, Some(PathBuf::from("target/runs.json")));
28398                assert_eq!(baseline, Some(PathBuf::from("target/prior.json")));
28399                assert_eq!(metrics, vec!["session_mae"]);
28400                assert_eq!(lower_is_better, vec!["session_mae"]);
28401                assert_eq!(history, 4);
28402                assert_eq!(top, 2);
28403            }
28404            _ => panic!("expected MetricDigest command"),
28405        }
28406    }
28407
28408    #[test]
28409    fn cli_parses_dci_benchmark_command() {
28410        let cli = parse_cli([
28411            "tsift",
28412            "dci-benchmark",
28413            "--fixture",
28414            "fixtures/dci-search-benchmark.json",
28415            "--json",
28416        ]);
28417        match cli.command {
28418            Some(Commands::DciBenchmark { fixture, json }) => {
28419                assert!(json);
28420                assert_eq!(fixture, PathBuf::from("fixtures/dci-search-benchmark.json"));
28421            }
28422            _ => panic!("expected DciBenchmark command"),
28423        }
28424    }
28425
28426    #[test]
28427    fn cli_parses_session_digest_command() {
28428        let cli = parse_cli([
28429            "tsift",
28430            "session-digest",
28431            "--path",
28432            ".",
28433            "--input",
28434            "target/session.md",
28435            "--source",
28436            "markdown",
28437            "--json",
28438        ]);
28439        match cli.command {
28440            Some(Commands::SessionDigest {
28441                json,
28442                path,
28443                input,
28444                source,
28445            }) => {
28446                assert!(json);
28447                assert_eq!(path, PathBuf::from("."));
28448                assert_eq!(input, Some(PathBuf::from("target/session.md")));
28449                assert_eq!(source.as_deref(), Some("markdown"));
28450            }
28451            _ => panic!("expected SessionDigest command"),
28452        }
28453    }
28454
28455    #[test]
28456    fn cli_parses_session_cost_command() {
28457        let cli = parse_cli([
28458            "tsift",
28459            "session-cost",
28460            "--input",
28461            "target/session.jsonl",
28462            "--source",
28463            "codex-jsonl",
28464            "--json",
28465        ]);
28466        match cli.command {
28467            Some(Commands::SessionCost {
28468                json,
28469                input,
28470                source,
28471            }) => {
28472                assert!(json);
28473                assert_eq!(input, Some(PathBuf::from("target/session.jsonl")));
28474                assert_eq!(source.as_deref(), Some("codex-jsonl"));
28475            }
28476            _ => panic!("expected SessionCost command"),
28477        }
28478    }
28479
28480    #[test]
28481    fn cli_parses_session_review_command() {
28482        let cli = parse_cli([
28483            "tsift",
28484            "session-review",
28485            "tasks/software/tsift.md",
28486            "--next-context",
28487            "--json",
28488        ]);
28489        match cli.command {
28490            Some(Commands::SessionReview {
28491                json,
28492                next_context,
28493                path,
28494                ..
28495            }) => {
28496                assert!(json);
28497                assert!(next_context);
28498                assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
28499            }
28500            _ => panic!("expected SessionReview command"),
28501        }
28502    }
28503
28504    #[test]
28505    fn cli_search_accepts_budget_flags() {
28506        let cli = parse_cli([
28507            "tsift",
28508            "search",
28509            "alpha_helper",
28510            "--max-items",
28511            "3",
28512            "--max-bytes",
28513            "96",
28514        ]);
28515        match cli.command {
28516            Some(Commands::Search {
28517                max_items,
28518                max_bytes,
28519                ..
28520            }) => {
28521                assert_eq!(max_items, Some(3));
28522                assert_eq!(max_bytes, Some(96));
28523            }
28524            _ => panic!("expected Search command"),
28525        }
28526    }
28527
28528    #[test]
28529    fn cli_search_accepts_budget_preset() {
28530        let cli = parse_cli(["tsift", "search", "alpha_helper", "--budget", "small"]);
28531        match cli.command {
28532            Some(Commands::Search { budget, .. }) => {
28533                assert_eq!(budget, Some(ResponseBudgetPreset::Small));
28534            }
28535            _ => panic!("expected Search command"),
28536        }
28537    }
28538
28539    #[test]
28540    fn cli_search_accepts_ast_facet_filters() {
28541        let cli = parse_cli([
28542            "tsift",
28543            "search",
28544            "setup",
28545            "--lang",
28546            "markdown",
28547            "--kind",
28548            "list_item",
28549            "--node-kind",
28550            "list_item",
28551            "--section",
28552            "Install",
28553            "--parent",
28554            "Run setup.",
28555            "--child",
28556            "Confirm setup.",
28557            "--fence-language",
28558            "rust",
28559            "--list-depth",
28560            "1",
28561            "--heading-level",
28562            "2",
28563        ]);
28564        match cli.command {
28565            Some(Commands::Search {
28566                lang,
28567                kind,
28568                node_kind,
28569                section,
28570                parent,
28571                child,
28572                fence_language,
28573                list_depth,
28574                heading_level,
28575                ..
28576            }) => {
28577                assert_eq!(lang, vec!["markdown"]);
28578                assert_eq!(kind, vec!["list_item"]);
28579                assert_eq!(node_kind, vec!["list_item"]);
28580                assert_eq!(section, vec!["Install"]);
28581                assert_eq!(parent, vec!["Run setup."]);
28582                assert_eq!(child, vec!["Confirm setup."]);
28583                assert_eq!(fence_language, vec!["rust"]);
28584                assert_eq!(list_depth, vec![1]);
28585                assert_eq!(heading_level, vec![2]);
28586            }
28587            _ => panic!("expected Search command"),
28588        }
28589    }
28590
28591    #[test]
28592    fn response_budget_presets_fill_defaults_and_preserve_explicit_caps() {
28593        let small = ResponseBudget::from_cli(None, None, Some(ResponseBudgetPreset::Small), false);
28594        assert_eq!(small.preview_items(), 3);
28595        assert_eq!(small.preview_bytes(), 120);
28596        assert_eq!(small.follow_up_items(), 4);
28597
28598        let overridden =
28599            ResponseBudget::from_cli(Some(7), None, Some(ResponseBudgetPreset::Small), false);
28600        assert_eq!(overridden.preview_items(), 7);
28601        assert_eq!(overridden.preview_bytes(), 120);
28602        assert_eq!(overridden.follow_up_items(), 7);
28603
28604        let envelope_default = ResponseBudget::from_cli(None, None, None, true);
28605        assert!(envelope_default.is_active());
28606    }
28607
28608    #[test]
28609    fn cli_explain_accepts_budget_flags() {
28610        let cli = parse_cli([
28611            "tsift",
28612            "explain",
28613            "alpha_helper",
28614            "--max-items",
28615            "2",
28616            "--max-bytes",
28617            "80",
28618        ]);
28619        match cli.command {
28620            Some(Commands::Explain {
28621                max_items,
28622                max_bytes,
28623                ..
28624            }) => {
28625                assert_eq!(max_items, Some(2));
28626                assert_eq!(max_bytes, Some(80));
28627            }
28628            _ => panic!("expected Explain command"),
28629        }
28630    }
28631
28632    #[test]
28633    fn cli_session_review_accepts_budget_flags() {
28634        let cli = parse_cli([
28635            "tsift",
28636            "session-review",
28637            "tasks/software/tsift.md",
28638            "--max-items",
28639            "4",
28640            "--max-bytes",
28641            "120",
28642        ]);
28643        match cli.command {
28644            Some(Commands::SessionReview {
28645                max_items,
28646                max_bytes,
28647                ..
28648            }) => {
28649                assert_eq!(max_items, Some(4));
28650                assert_eq!(max_bytes, Some(120));
28651            }
28652            _ => panic!("expected SessionReview command"),
28653        }
28654    }
28655
28656    #[test]
28657    fn cli_parses_context_pack_command() {
28658        let cli = parse_cli([
28659            "tsift",
28660            "context-pack",
28661            "tasks/software/tsift.md",
28662            "--test-input",
28663            "target/test.log",
28664            "--runner",
28665            "cargo",
28666            "--log-input",
28667            "target/build.log",
28668            "--max-items",
28669            "3",
28670            "--max-bytes",
28671            "96",
28672            "--json",
28673        ]);
28674        match cli.command {
28675            Some(Commands::ContextPack {
28676                path,
28677                test_input,
28678                runner,
28679                log_input,
28680                json,
28681                max_items,
28682                max_bytes,
28683                budget,
28684                convex_snapshot,
28685            }) => {
28686                assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
28687                assert_eq!(test_input, Some(PathBuf::from("target/test.log")));
28688                assert_eq!(runner.as_deref(), Some("cargo"));
28689                assert_eq!(log_input, Some(PathBuf::from("target/build.log")));
28690                assert!(json);
28691                assert_eq!(max_items, Some(3));
28692                assert_eq!(max_bytes, Some(96));
28693                assert!(budget.is_none());
28694                assert!(convex_snapshot.is_none());
28695            }
28696            _ => panic!("expected ContextPack command"),
28697        }
28698    }
28699
28700    #[test]
28701    fn cli_parses_token_savings_command() {
28702        let cli = parse_cli([
28703            "tsift",
28704            "token-savings",
28705            "--fixture",
28706            "fixtures/tsift-token-savings.json",
28707            "--fail-under",
28708            "--json",
28709        ]);
28710        match cli.command {
28711            Some(Commands::TokenSavings {
28712                fixture,
28713                fail_under,
28714                json,
28715            }) => {
28716                assert_eq!(fixture, PathBuf::from("fixtures/tsift-token-savings.json"));
28717                assert!(fail_under);
28718                assert!(json);
28719            }
28720            _ => panic!("expected TokenSavings command"),
28721        }
28722    }
28723
28724    #[test]
28725    fn token_savings_report_records_fixture_thresholds() {
28726        let raw_symbols = [
28727            "validate_user",
28728            "validateUser",
28729            "ValidateUser",
28730            "validate-user",
28731            "VALIDATE_USER",
28732            "Validate_User",
28733            "raw_symbol",
28734            "rawSymbol",
28735            "RawSymbol",
28736            "raw-symbol",
28737            "RAW_SYMBOL",
28738            "Raw_Symbol",
28739        ]
28740        .iter()
28741        .enumerate()
28742        .map(|(idx, identifier)| TokenSavingsRawSymbol {
28743            identifier: (*identifier).to_string(),
28744            file: format!("src/example_{idx}.rs"),
28745            line: (idx + 1) as u64,
28746            context: "function".to_string(),
28747        })
28748        .collect();
28749        let fixture = TokenSavingsFixture {
28750            schema_version: 1,
28751            description: "fixture".to_string(),
28752            token_estimate: "ceil(utf8_bytes / 4)".to_string(),
28753            cases: vec![TokenSavingsFixtureCase {
28754                name: "search-preview".to_string(),
28755                surface: "search".to_string(),
28756                minimum_savings_percent: 40.0,
28757                raw_symbols,
28758                tagpath_families: vec![
28759                    TokenSavingsFamily {
28760                        canonical: "validate_user".to_string(),
28761                        count: 6,
28762                        aliases: BTreeMap::new(),
28763                    },
28764                    TokenSavingsFamily {
28765                        canonical: "raw_symbol".to_string(),
28766                        count: 6,
28767                        aliases: BTreeMap::new(),
28768                    },
28769                ],
28770                context_pack_inputs: None,
28771                session_review_inputs: None,
28772                source_read_inputs: None,
28773                markdown_projection_inputs: None,
28774            }],
28775        };
28776
28777        let report = build_token_savings_report(&fixture).unwrap();
28778
28779        assert!(report.pass);
28780        assert_eq!(report.cases[0].raw_symbol_count, 12);
28781        assert_eq!(report.cases[0].family_count, 2);
28782        assert_eq!(report.cases[0].status, "pass");
28783        assert!(report.cases[0].byte_delta > 0);
28784        assert!(report.cases[0].raw_estimated_tokens > report.cases[0].envelope_estimated_tokens);
28785        assert!(report.cases[0].savings_percent >= 40.0);
28786    }
28787
28788    #[test]
28789    fn token_savings_source_read_inputs_preserve_required_anchors() {
28790        let fixture = TokenSavingsFixture {
28791            schema_version: 1,
28792            description: "fixture".to_string(),
28793            token_estimate: "ceil(utf8_bytes / 4)".to_string(),
28794            cases: vec![TokenSavingsFixtureCase {
28795                name: "source-read".to_string(),
28796                surface: "source-read".to_string(),
28797                minimum_savings_percent: 40.0,
28798                raw_symbols: Vec::new(),
28799                tagpath_families: Vec::new(),
28800                context_pack_inputs: None,
28801                session_review_inputs: None,
28802                source_read_inputs: Some(TokenSavingsSourceReadInputs {
28803                    reads: vec![TokenSavingsSourceReadInput {
28804                        command: "sed -n '40,160p' src/main.rs".to_string(),
28805                        file: "src/main.rs".to_string(),
28806                        raw_start: 40,
28807                        raw_lines: 121,
28808                        raw_excerpt: "line 40\n".repeat(121),
28809                        envelope_start: 40,
28810                        envelope_lines: 121,
28811                        required_line_anchors: vec![40, 120, 160],
28812                    }],
28813                }),
28814                markdown_projection_inputs: None,
28815            }],
28816        };
28817
28818        let report = build_token_savings_report(&fixture).unwrap();
28819
28820        assert!(report.pass);
28821        assert_eq!(report.cases[0].surface, "source-read");
28822        assert!(report.cases[0].savings_percent >= 40.0);
28823    }
28824
28825    #[test]
28826    fn token_savings_source_read_inputs_fail_when_anchor_is_hidden() {
28827        let fixture = TokenSavingsFixture {
28828            schema_version: 1,
28829            description: "fixture".to_string(),
28830            token_estimate: "ceil(utf8_bytes / 4)".to_string(),
28831            cases: vec![TokenSavingsFixtureCase {
28832                name: "source-read".to_string(),
28833                surface: "source-read".to_string(),
28834                minimum_savings_percent: 40.0,
28835                raw_symbols: Vec::new(),
28836                tagpath_families: Vec::new(),
28837                context_pack_inputs: None,
28838                session_review_inputs: None,
28839                source_read_inputs: Some(TokenSavingsSourceReadInputs {
28840                    reads: vec![TokenSavingsSourceReadInput {
28841                        command: "cat src/main.rs".to_string(),
28842                        file: "src/main.rs".to_string(),
28843                        raw_start: 1,
28844                        raw_lines: 200,
28845                        raw_excerpt: "line\n".repeat(200),
28846                        envelope_start: 1,
28847                        envelope_lines: 80,
28848                        required_line_anchors: vec![120],
28849                    }],
28850                }),
28851                markdown_projection_inputs: None,
28852            }],
28853        };
28854
28855        let err = match build_token_savings_report(&fixture) {
28856            Ok(_) => panic!("hidden anchor should fail the source-read fixture"),
28857            Err(err) => err,
28858        };
28859
28860        assert!(err.to_string().contains("hides required line anchor 120"));
28861    }
28862
28863    #[test]
28864    fn token_savings_markdown_projection_inputs_require_outline_and_selected_nodes() {
28865        let fixture = TokenSavingsFixture {
28866            schema_version: 1,
28867            description: "fixture".to_string(),
28868            token_estimate: "ceil(utf8_bytes / 4)".to_string(),
28869            cases: vec![TokenSavingsFixtureCase {
28870                name: "markdown-projection".to_string(),
28871                surface: "context-pack".to_string(),
28872                minimum_savings_percent: 40.0,
28873                raw_symbols: Vec::new(),
28874                tagpath_families: Vec::new(),
28875                context_pack_inputs: None,
28876                session_review_inputs: None,
28877                source_read_inputs: None,
28878                markdown_projection_inputs: Some(TokenSavingsMarkdownProjectionInputs {
28879                    documents: vec![TokenSavingsMarkdownProjectionInput {
28880                        command: "context-pack markdown body".to_string(),
28881                        file: "tasks/software/tsift.md".to_string(),
28882                        raw_markdown: "# Heading\n\n".repeat(120),
28883                        outline_nodes: vec!["Heading".to_string(), "Details".to_string()],
28884                        selected_nodes: vec!["mdast-selected".to_string()],
28885                        expand:
28886                            "tsift --envelope markdown-ast tasks/software/tsift.md --node mdast-selected --budget normal"
28887                                .to_string(),
28888                    }],
28889                }),
28890            }],
28891        };
28892
28893        let report = build_token_savings_report(&fixture).unwrap();
28894
28895        assert!(report.pass);
28896        assert_eq!(report.cases[0].surface, "context-pack");
28897        assert!(report.cases[0].savings_percent >= 40.0);
28898    }
28899
28900    #[test]
28901    fn markdown_ast_projection_cache_reuses_large_document_section_and_block_lookups() {
28902        let mut content = String::from("# Cache Root\n\n");
28903        for idx in 0..96 {
28904            content.push_str(&format!(
28905                "## Section {idx}\n\n- Item {idx}\n\n```rust\nfn sample_{idx}() {{}}\n```\n\n"
28906            ));
28907        }
28908
28909        let first = markdown_ast_projection("semantic-edit", content.as_bytes()).unwrap();
28910        assert!(!first.cache_hit);
28911        assert!(first.nodes.len() > 200);
28912
28913        let sections = markdown_section_spans(&content).unwrap();
28914        let list_items = markdown_block_spans(&content, "list_item").unwrap();
28915        let code_blocks = markdown_block_spans(&content, "code_block").unwrap();
28916        let second = markdown_ast_projection("semantic-edit", content.as_bytes()).unwrap();
28917
28918        assert!(second.cache_hit);
28919        assert_eq!(second.nodes.len(), first.nodes.len());
28920        assert_eq!(sections.len(), 97);
28921        assert_eq!(list_items.len(), 96);
28922        assert_eq!(code_blocks.len(), 96);
28923        let first_code = first
28924            .nodes
28925            .iter()
28926            .find(|node| node.kind == "code_block")
28927            .expect("expected a Markdown code block");
28928        let first_code_node = markdown_ast_node(
28929            Path::new("/repo"),
28930            "semantic-edit",
28931            first_code,
28932            content.as_bytes(),
28933            &first.nodes,
28934            8,
28935        );
28936        assert_eq!(first_code_node.metadata.embedded_symbols.len(), 1);
28937        assert_eq!(
28938            first_code_node.metadata.embedded_symbols[0].name,
28939            "sample_0"
28940        );
28941        assert_eq!(
28942            first_code_node.metadata.embedded_symbols[0].language,
28943            "rust"
28944        );
28945    }
28946
28947    #[test]
28948    fn search_budget_report_truncates_symbol_preview_and_emits_stable_handle() {
28949        let response = empty_search_response(Path::new("/repo"), "lexical");
28950        let symbol_hits = vec![index::SymbolHit {
28951            name: "alpha_helper_with_a_long_name".to_string(),
28952            kind: "function".to_string(),
28953            language: "rust".to_string(),
28954            file: "/repo/src/lib.rs".to_string(),
28955            line: 12,
28956            end_line: None,
28957            node_kind: None,
28958            start_byte: None,
28959            end_byte: None,
28960            body_start_byte: None,
28961            body_end_byte: None,
28962            tags: None,
28963            score: 0.98,
28964            match_type: "exact_name".to_string(),
28965            tagpath_handle: None,
28966        }];
28967
28968        let report = build_relative_search_budget_report(
28969            "alpha_helper_with_a_long_name",
28970            "lexical",
28971            Path::new("/repo"),
28972            &response,
28973            &symbol_hits,
28974            ResponseBudget::new(Some(1), Some(12)),
28975            &SearchFacetFilters::default(),
28976        );
28977
28978        assert_eq!(report.symbols.len(), 1);
28979        assert!(report.symbols[0].handle.starts_with("sfam-"));
28980        assert_eq!(report.symbols[0].tag_alias.as_deref(), Some("alpha/hel..."));
28981        assert_eq!(report.symbols[0].name, "alpha_hel...");
28982        assert_eq!(report.symbols[0].file, "src/lib.rs");
28983        assert!(report.symbols[0].expand.contains("tsift search"));
28984    }
28985
28986    #[test]
28987    fn search_budget_report_promotes_ast_span_artifacts_for_symbols() {
28988        let dir = tempfile::tempdir().unwrap();
28989        let src_dir = dir.path().join("src");
28990        fs::create_dir_all(&src_dir).unwrap();
28991        let source = "fn alpha_helper() {\n    beta();\n}\n";
28992        let file = src_dir.join("lib.rs");
28993        fs::write(&file, source).unwrap();
28994        let body_start = source.find("{\n").unwrap() + 1;
28995        let body_end = source.rfind("\n}").unwrap() + 1;
28996
28997        let response = empty_search_response(dir.path(), "lexical");
28998        let symbol_hits = vec![index::SymbolHit {
28999            name: "alpha_helper".to_string(),
29000            kind: "function".to_string(),
29001            language: "rust".to_string(),
29002            file: file.to_string_lossy().to_string(),
29003            line: 0,
29004            end_line: Some(2),
29005            node_kind: Some("function_item".to_string()),
29006            start_byte: Some(0),
29007            end_byte: Some(i64::try_from(source.len()).unwrap()),
29008            body_start_byte: Some(i64::try_from(body_start).unwrap()),
29009            body_end_byte: Some(i64::try_from(body_end).unwrap()),
29010            tags: Some("alpha,helper".to_string()),
29011            score: 0.98,
29012            match_type: "exact_name".to_string(),
29013            tagpath_handle: None,
29014        }];
29015
29016        let report = build_relative_search_budget_report(
29017            "alpha helper",
29018            "lexical",
29019            dir.path(),
29020            &response,
29021            &symbol_hits,
29022            ResponseBudget::new(Some(5), Some(96)),
29023            &SearchFacetFilters::default(),
29024        );
29025
29026        let symbol = &report.symbols[0];
29027        assert_eq!(symbol.language, "rust");
29028        assert_eq!(symbol.end_line, Some(2));
29029        let ast = symbol
29030            .ast
29031            .as_ref()
29032            .expect("search symbol preview should expose an AST span artifact");
29033        assert_eq!(ast.artifact_kind, "ast_span");
29034        assert!(ast.span.handle.starts_with("span-"));
29035        assert_eq!(ast.span.node_kind, "function_item");
29036        assert_eq!(ast.span.start_byte, 0);
29037        assert_eq!(ast.span.end_byte, source.len());
29038        assert_eq!(ast.span.body_start_byte, Some(body_start));
29039        assert_eq!(ast.span.body_end_byte, Some(body_end));
29040        assert!(ast.expand.source_window.contains("source-read"));
29041        assert!(
29042            ast.expand
29043                .source_body
29044                .as_ref()
29045                .unwrap()
29046                .contains("source-read")
29047        );
29048        assert!(ast.expand.symbol_read.contains("symbol-read"));
29049        assert!(ast.expand.markdown_ast.is_none());
29050    }
29051
29052    #[test]
29053    fn search_budget_report_links_markdown_spans_to_markdown_ast_expansion() {
29054        let dir = tempfile::tempdir().unwrap();
29055        let source = "# Guide\n\n## Install\n\n- Run setup.\n";
29056        let file = dir.path().join("README.md");
29057        fs::write(&file, source).unwrap();
29058        let heading_start = source.find("## Install").unwrap();
29059        let heading_end = source.len();
29060
29061        let response = empty_search_response(dir.path(), "lexical");
29062        let symbol_hits = vec![index::SymbolHit {
29063            name: "Install".to_string(),
29064            kind: "heading".to_string(),
29065            language: "markdown".to_string(),
29066            file: file.to_string_lossy().to_string(),
29067            line: 2,
29068            end_line: Some(4),
29069            node_kind: Some("atx_heading".to_string()),
29070            start_byte: Some(i64::try_from(heading_start).unwrap()),
29071            end_byte: Some(i64::try_from(heading_end).unwrap()),
29072            body_start_byte: Some(i64::try_from(source.find("- Run setup.").unwrap()).unwrap()),
29073            body_end_byte: Some(i64::try_from(heading_end).unwrap()),
29074            tags: Some("install".to_string()),
29075            score: 1.0,
29076            match_type: "exact_name".to_string(),
29077            tagpath_handle: None,
29078        }];
29079
29080        let report = build_relative_search_budget_report(
29081            "Install",
29082            "lexical",
29083            dir.path(),
29084            &response,
29085            &symbol_hits,
29086            ResponseBudget::new(Some(5), Some(96)),
29087            &SearchFacetFilters::default(),
29088        );
29089
29090        let ast = report.symbols[0]
29091            .ast
29092            .as_ref()
29093            .expect("Markdown search symbol should expose an AST span artifact");
29094        assert_eq!(ast.span.node_kind, "atx_heading");
29095        assert_eq!(ast.span.markdown.as_ref().unwrap().heading_level, Some(2));
29096        let markdown_ast = ast
29097            .expand
29098            .markdown_ast
29099            .as_ref()
29100            .expect("Markdown symbols should include markdown-ast expansion");
29101        assert!(markdown_ast.contains("markdown-ast"), "{markdown_ast}");
29102        assert!(markdown_ast.contains("--node"), "{markdown_ast}");
29103        assert!(markdown_ast.contains(&ast.span.handle), "{markdown_ast}");
29104        assert!(ast.expand.source_window.contains("source-read"));
29105        assert!(ast.expand.symbol_read.contains("symbol-read"));
29106    }
29107
29108    #[test]
29109    fn search_budget_report_exposes_markdown_embedded_code_symbols() {
29110        let dir = tempfile::tempdir().unwrap();
29111        let source = "# Guide\n\n```rust\nfn sample() {}\n```\n";
29112        let file = dir.path().join("README.md");
29113        fs::write(&file, source).unwrap();
29114        let fence_start = source.find("```rust").unwrap();
29115        let body_start = source.find("fn sample").unwrap();
29116        let body_end = body_start + "fn sample() {}\n".len();
29117
29118        let response = empty_search_response(dir.path(), "lexical");
29119        let symbol_hits = vec![index::SymbolHit {
29120            name: "rust".to_string(),
29121            kind: "code_block".to_string(),
29122            language: "markdown".to_string(),
29123            file: file.to_string_lossy().to_string(),
29124            line: 2,
29125            end_line: Some(4),
29126            node_kind: Some("fenced_code_block".to_string()),
29127            start_byte: Some(i64::try_from(fence_start).unwrap()),
29128            end_byte: Some(i64::try_from(source.len()).unwrap()),
29129            body_start_byte: Some(i64::try_from(body_start).unwrap()),
29130            body_end_byte: Some(i64::try_from(body_end).unwrap()),
29131            tags: Some("rust".to_string()),
29132            score: 1.0,
29133            match_type: "exact_name".to_string(),
29134            tagpath_handle: None,
29135        }];
29136
29137        let report = build_relative_search_budget_report(
29138            "rust",
29139            "lexical",
29140            dir.path(),
29141            &response,
29142            &symbol_hits,
29143            ResponseBudget::new(Some(5), Some(96)),
29144            &SearchFacetFilters::default(),
29145        );
29146
29147        let embedded = &report.symbols[0]
29148            .ast
29149            .as_ref()
29150            .unwrap()
29151            .span
29152            .markdown
29153            .as_ref()
29154            .unwrap()
29155            .embedded_symbols;
29156        assert_eq!(embedded.len(), 1);
29157        assert_eq!(embedded[0].name, "sample");
29158        assert_eq!(embedded[0].kind, "function");
29159        assert_eq!(embedded[0].language, "rust");
29160        assert_eq!(embedded[0].node_kind, "function_item");
29161        assert!(embedded[0].handle.starts_with("span-"));
29162        assert_eq!(embedded[0].start_byte, body_start);
29163        assert_eq!(embedded[0].start_line, 4);
29164    }
29165
29166    fn test_lexical_search_hit(
29167        path: &Path,
29168        rank: usize,
29169        score: f64,
29170        snippet: &str,
29171    ) -> sift::SearchHit {
29172        sift::SearchHit {
29173            artifact_id: format!("hit-{rank}"),
29174            artifact_kind: sift::ContextArtifactKind::File,
29175            budget: sift::ArtifactBudget::from_text(snippet, 1),
29176            confidence: sift::ScoreConfidence::High,
29177            freshness: sift::ArtifactFreshness {
29178                modified_unix_secs: None,
29179                observed_unix_secs: 0,
29180            },
29181            location: Some("line 1".to_string()),
29182            path: path.to_string_lossy().to_string(),
29183            provenance: sift::ArtifactProvenance {
29184                adapter: sift::AcquisitionAdapterKind::FileSystem,
29185                source: "test lexical hit".to_string(),
29186                synthetic: false,
29187            },
29188            rank,
29189            score,
29190            snippet: snippet.to_string(),
29191        }
29192    }
29193
29194    fn test_summary(symbol_name: &str, file_path: &str, summary: &str) -> summarize::Summary {
29195        summarize::Summary {
29196            id: 0,
29197            symbol_name: symbol_name.to_string(),
29198            file_path: file_path.to_string(),
29199            content_hash: "hash".to_string(),
29200            summary: summary.to_string(),
29201            entities: None,
29202            relationships: None,
29203            concept_labels: None,
29204            extracted_at: "2026-06-02T00:00:00Z".to_string(),
29205            model: "test".to_string(),
29206            tokens_input: None,
29207            tokens_output: None,
29208        }
29209    }
29210
29211    #[test]
29212    fn search_budget_ranked_preview_prioritizes_precise_ast_span_over_broad_file_hit() {
29213        let dir = tempfile::tempdir().unwrap();
29214        let src_dir = dir.path().join("src");
29215        fs::create_dir_all(&src_dir).unwrap();
29216        let source = "fn alpha_helper() {}\n";
29217        let file = src_dir.join("lib.rs");
29218        let broad_file = dir.path().join("README.md");
29219        fs::write(&file, source).unwrap();
29220        fs::write(
29221            &broad_file,
29222            "alpha helper alpha helper alpha helper in prose\n",
29223        )
29224        .unwrap();
29225
29226        let mut response = empty_search_response(dir.path(), "lexical");
29227        response.hits.push(test_lexical_search_hit(
29228            &broad_file,
29229            1,
29230            240.0,
29231            "alpha helper alpha helper alpha helper in prose",
29232        ));
29233        let symbol_hits = vec![index::SymbolHit {
29234            name: "alpha_helper".to_string(),
29235            kind: "function".to_string(),
29236            language: "rust".to_string(),
29237            file: file.to_string_lossy().to_string(),
29238            line: 0,
29239            end_line: Some(0),
29240            node_kind: Some("function_item".to_string()),
29241            start_byte: Some(0),
29242            end_byte: Some(i64::try_from(source.len()).unwrap()),
29243            body_start_byte: Some(i64::try_from(source.find("{}").unwrap() + 1).unwrap()),
29244            body_end_byte: Some(i64::try_from(source.find("{}").unwrap() + 1).unwrap()),
29245            tags: Some("alpha,helper".to_string()),
29246            score: 0.8,
29247            match_type: "all_tags".to_string(),
29248            tagpath_handle: None,
29249        }];
29250
29251        let report = build_relative_search_budget_report(
29252            "alpha helper",
29253            "lexical",
29254            dir.path(),
29255            &response,
29256            &symbol_hits,
29257            ResponseBudget::new(Some(5), Some(128)),
29258            &SearchFacetFilters::default(),
29259        );
29260
29261        assert_eq!(report.ranked[0].source, "symbol_span");
29262        assert_eq!(report.ranked[0].name.as_deref(), Some("alpha_helper"));
29263        assert!(report.ranked[0].score > report.ranked[1].score);
29264        assert_eq!(report.ranked[1].source, "lexical_file");
29265    }
29266
29267    #[test]
29268    fn search_budget_ranked_preview_includes_summary_and_graph_evidence() {
29269        let dir = tempfile::tempdir().unwrap();
29270        let source = "# Guide\n\n```rust\nfn sample() {}\n```\n";
29271        let file = dir.path().join("README.md");
29272        fs::write(&file, source).unwrap();
29273        let summary_db =
29274            summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
29275        summary_db
29276            .insert(&test_summary(
29277                "rust",
29278                "README.md",
29279                "Rust fence contains a sample function.",
29280            ))
29281            .unwrap();
29282
29283        let fence_start = source.find("```rust").unwrap();
29284        let body_start = source.find("fn sample").unwrap();
29285        let body_end = body_start + "fn sample() {}\n".len();
29286        let response = empty_search_response(dir.path(), "lexical");
29287        let symbol_hits = vec![index::SymbolHit {
29288            name: "rust".to_string(),
29289            kind: "code_block".to_string(),
29290            language: "markdown".to_string(),
29291            file: file.to_string_lossy().to_string(),
29292            line: 2,
29293            end_line: Some(4),
29294            node_kind: Some("fenced_code_block".to_string()),
29295            start_byte: Some(i64::try_from(fence_start).unwrap()),
29296            end_byte: Some(i64::try_from(source.len()).unwrap()),
29297            body_start_byte: Some(i64::try_from(body_start).unwrap()),
29298            body_end_byte: Some(i64::try_from(body_end).unwrap()),
29299            tags: Some("rust".to_string()),
29300            score: 1.0,
29301            match_type: "exact_name".to_string(),
29302            tagpath_handle: None,
29303        }];
29304
29305        let report = build_relative_search_budget_report(
29306            "rust",
29307            "lexical",
29308            dir.path(),
29309            &response,
29310            &symbol_hits,
29311            ResponseBudget::new(Some(5), Some(128)),
29312            &SearchFacetFilters::default(),
29313        );
29314
29315        let symbol = &report.symbols[0];
29316        assert_eq!(symbol.summary_refs, 1);
29317        assert_eq!(symbol.graph_neighbors, 1);
29318        assert!(
29319            report.ranked[0]
29320                .reasons
29321                .iter()
29322                .any(|reason| reason == "summary_refs:1")
29323        );
29324        assert!(
29325            report.ranked[0]
29326                .reasons
29327                .iter()
29328                .any(|reason| reason == "graph_neighbors:1")
29329        );
29330    }
29331
29332    fn markdown_search_facet_fixture() -> tempfile::TempDir {
29333        let dir = tempfile::tempdir().unwrap();
29334        let source = r#"# Guide
29335
29336## Install
29337
29338- Run setup.
29339  - Confirm setup.
29340
29341```rust
29342fn sample() {}
29343```
29344"#;
29345        fs::write(dir.path().join("README.md"), source).unwrap();
29346        let index_dir = dir.path().join(".tsift");
29347        fs::create_dir_all(&index_dir).unwrap();
29348        run_index_update(
29349            &index_dir.join("index.db"),
29350            dir.path(),
29351            "indexing markdown search facet fixture".to_string(),
29352            dir.path(),
29353            None,
29354            false,
29355            false,
29356        )
29357        .unwrap();
29358        dir
29359    }
29360
29361    fn markdown_search_facet_hits(root: &Path, query: &str) -> Vec<index::SymbolHit> {
29362        let db = index::IndexDb::open_read_only_resilient(&root.join(".tsift/index.db")).unwrap();
29363        db.symbol_search(query, 20).unwrap()
29364    }
29365
29366    #[test]
29367    fn search_facet_filters_match_scalar_symbol_fields() {
29368        let dir = tempfile::tempdir().unwrap();
29369        let hits = vec![
29370            index::SymbolHit {
29371                name: "alpha_helper".to_string(),
29372                kind: "function".to_string(),
29373                language: "rust".to_string(),
29374                file: dir.path().join("src/lib.rs").to_string_lossy().to_string(),
29375                line: 0,
29376                end_line: None,
29377                node_kind: Some("function_item".to_string()),
29378                start_byte: None,
29379                end_byte: None,
29380                body_start_byte: None,
29381                body_end_byte: None,
29382                tags: None,
29383                score: 1.0,
29384                match_type: "exact_name".to_string(),
29385                tagpath_handle: None,
29386            },
29387            index::SymbolHit {
29388                name: "Install".to_string(),
29389                kind: "heading".to_string(),
29390                language: "markdown".to_string(),
29391                file: dir.path().join("README.md").to_string_lossy().to_string(),
29392                line: 0,
29393                end_line: None,
29394                node_kind: Some("atx_heading".to_string()),
29395                start_byte: None,
29396                end_byte: None,
29397                body_start_byte: None,
29398                body_end_byte: None,
29399                tags: None,
29400                score: 0.9,
29401                match_type: "exact_name".to_string(),
29402                tagpath_handle: None,
29403            },
29404        ];
29405
29406        let filtered = apply_search_facet_filters(
29407            dir.path(),
29408            hits,
29409            &SearchFacetFilters {
29410                languages: vec!["rust".to_string()],
29411                kinds: vec!["function".to_string()],
29412                node_kinds: vec!["function_item".to_string()],
29413                ..SearchFacetFilters::default()
29414            },
29415        );
29416
29417        assert_eq!(filtered.len(), 1);
29418        assert_eq!(filtered[0].name, "alpha_helper");
29419    }
29420
29421    #[test]
29422    fn search_facet_filters_match_markdown_sections_and_block_metadata() {
29423        let dir = markdown_search_facet_fixture();
29424
29425        let nested_list = apply_search_facet_filters(
29426            dir.path(),
29427            markdown_search_facet_hits(dir.path(), "setup"),
29428            &SearchFacetFilters {
29429                sections: vec!["Install".to_string()],
29430                parents: vec!["Run setup.".to_string()],
29431                list_depths: vec![1],
29432                ..SearchFacetFilters::default()
29433            },
29434        );
29435        assert_eq!(nested_list.len(), 1);
29436        assert_eq!(nested_list[0].name, "Confirm setup.");
29437
29438        let parent_list = apply_search_facet_filters(
29439            dir.path(),
29440            markdown_search_facet_hits(dir.path(), "setup"),
29441            &SearchFacetFilters {
29442                children: vec!["Confirm setup.".to_string()],
29443                ..SearchFacetFilters::default()
29444            },
29445        );
29446        assert_eq!(parent_list.len(), 1);
29447        assert_eq!(parent_list[0].name, "Run setup.");
29448
29449        let heading = apply_search_facet_filters(
29450            dir.path(),
29451            markdown_search_facet_hits(dir.path(), "Install"),
29452            &SearchFacetFilters {
29453                heading_levels: vec![2],
29454                node_kinds: vec!["atx_heading".to_string()],
29455                ..SearchFacetFilters::default()
29456            },
29457        );
29458        assert_eq!(heading.len(), 1);
29459        assert_eq!(heading[0].name, "Install");
29460
29461        let fence = apply_search_facet_filters(
29462            dir.path(),
29463            markdown_search_facet_hits(dir.path(), "rust"),
29464            &SearchFacetFilters {
29465                fence_languages: vec!["rust".to_string()],
29466                kinds: vec!["code_block".to_string()],
29467                ..SearchFacetFilters::default()
29468            },
29469        );
29470        assert_eq!(fence.len(), 1);
29471        assert_eq!(fence[0].kind, "code_block");
29472
29473        let embedded_child = apply_search_facet_filters(
29474            dir.path(),
29475            markdown_search_facet_hits(dir.path(), "rust"),
29476            &SearchFacetFilters {
29477                children: vec!["sample".to_string()],
29478                kinds: vec!["code_block".to_string()],
29479                ..SearchFacetFilters::default()
29480            },
29481        );
29482        assert_eq!(embedded_child.len(), 1);
29483        assert_eq!(embedded_child[0].name, "rust");
29484    }
29485
29486    #[test]
29487    fn search_budget_report_groups_repeated_symbols_by_canonical_tag_family() {
29488        let response = empty_search_response(Path::new("/repo"), "lexical");
29489        let symbol_hits = vec![
29490            index::SymbolHit {
29491                name: "alpha_helper".to_string(),
29492                kind: "function".to_string(),
29493                language: "rust".to_string(),
29494                file: "/repo/src/lib.rs".to_string(),
29495                line: 12,
29496                end_line: None,
29497                node_kind: None,
29498                start_byte: None,
29499                end_byte: None,
29500                body_start_byte: None,
29501                body_end_byte: None,
29502                tags: Some("alpha,helper".to_string()),
29503                score: 0.98,
29504                match_type: "exact_name".to_string(),
29505                tagpath_handle: None,
29506            },
29507            index::SymbolHit {
29508                name: "alphaHelper".to_string(),
29509                kind: "method".to_string(),
29510                language: "rust".to_string(),
29511                file: "/repo/src/main.rs".to_string(),
29512                line: 34,
29513                end_line: None,
29514                node_kind: None,
29515                start_byte: None,
29516                end_byte: None,
29517                body_start_byte: None,
29518                body_end_byte: None,
29519                tags: Some("alpha,helper".to_string()),
29520                score: 0.93,
29521                match_type: "tag_overlap".to_string(),
29522                tagpath_handle: None,
29523            },
29524            index::SymbolHit {
29525                name: "alpha_helper".to_string(),
29526                kind: "function".to_string(),
29527                language: "rust".to_string(),
29528                file: "/repo/src/worker.rs".to_string(),
29529                line: 56,
29530                end_line: None,
29531                node_kind: None,
29532                start_byte: None,
29533                end_byte: None,
29534                body_start_byte: None,
29535                body_end_byte: None,
29536                tags: Some("alpha,helper".to_string()),
29537                score: 0.91,
29538                match_type: "tag_overlap".to_string(),
29539                tagpath_handle: None,
29540            },
29541        ];
29542
29543        let report = build_relative_search_budget_report(
29544            "alpha helper",
29545            "lexical",
29546            Path::new("/repo"),
29547            &response,
29548            &symbol_hits,
29549            ResponseBudget::new(Some(5), Some(48)),
29550            &SearchFacetFilters::default(),
29551        );
29552
29553        assert_eq!(report.symbol_total, 1);
29554        assert_eq!(report.raw_symbol_total, 3);
29555        assert_eq!(report.symbols.len(), 1);
29556        assert_eq!(report.symbols[0].tag_alias.as_deref(), Some("alpha/helper"));
29557        assert_eq!(report.symbols[0].match_count, 3);
29558        assert_eq!(report.symbols[0].surface_count, 2);
29559        assert_eq!(report.symbols[0].file_count, 3);
29560        assert_eq!(
29561            report.symbols[0].surface_examples,
29562            vec!["alpha_helper".to_string(), "alphaHelper".to_string()]
29563        );
29564        assert!(report.symbols[0].name.contains("(+1 variant)"));
29565        assert!(report.symbols[0].file.contains("(+2 files)"));
29566        assert!(report.symbols[0].expand.contains("tsift search"));
29567        assert!(report.symbols[0].expand.contains("alpha helper"));
29568    }
29569
29570    #[test]
29571    fn search_budget_report_carries_active_filters() {
29572        let response = empty_search_response(Path::new("/repo"), "lexical");
29573        let symbol_hits = vec![index::SymbolHit {
29574            name: "alpha_helper".to_string(),
29575            kind: "function".to_string(),
29576            language: "rust".to_string(),
29577            file: "/repo/src/lib.rs".to_string(),
29578            line: 12,
29579            end_line: None,
29580            node_kind: Some("function_item".to_string()),
29581            start_byte: None,
29582            end_byte: None,
29583            body_start_byte: None,
29584            body_end_byte: None,
29585            tags: Some("alpha,helper".to_string()),
29586            score: 0.98,
29587            match_type: "exact_name".to_string(),
29588            tagpath_handle: None,
29589        }];
29590        let filters = SearchFacetFilters {
29591            languages: vec!["rust".to_string()],
29592            kinds: vec!["function".to_string()],
29593            node_kinds: vec!["function_item".to_string()],
29594            ..SearchFacetFilters::default()
29595        };
29596
29597        let report = build_relative_search_budget_report(
29598            "alpha helper",
29599            "lexical",
29600            Path::new("/repo"),
29601            &response,
29602            &symbol_hits,
29603            ResponseBudget::new(Some(5), Some(48)),
29604            &filters,
29605        );
29606
29607        assert_eq!(report.filters, filters);
29608        assert_eq!(
29609            search_facet_filters_summary(&report.filters),
29610            "lang=rust kind=function node-kind=function_item"
29611        );
29612    }
29613
29614    #[test]
29615    fn search_budget_report_warns_on_broad_preview_and_lists_narrowing_commands() {
29616        let mut response = empty_search_response(Path::new("/repo"), "lexical");
29617        response.indexed_artifacts = 450;
29618        let symbol_hits = vec![
29619            index::SymbolHit {
29620                name: "alpha_helper".to_string(),
29621                kind: "function".to_string(),
29622                language: "rust".to_string(),
29623                file: "/repo/src/lib.rs".to_string(),
29624                line: 12,
29625                end_line: None,
29626                node_kind: None,
29627                start_byte: None,
29628                end_byte: None,
29629                body_start_byte: None,
29630                body_end_byte: None,
29631                tags: Some("alpha,helper".to_string()),
29632                score: 0.98,
29633                match_type: "exact_name".to_string(),
29634                tagpath_handle: None,
29635            },
29636            index::SymbolHit {
29637                name: "beta_helper".to_string(),
29638                kind: "function".to_string(),
29639                language: "rust".to_string(),
29640                file: "/repo/src/beta.rs".to_string(),
29641                line: 21,
29642                end_line: None,
29643                node_kind: None,
29644                start_byte: None,
29645                end_byte: None,
29646                body_start_byte: None,
29647                body_end_byte: None,
29648                tags: Some("beta,helper".to_string()),
29649                score: 0.92,
29650                match_type: "tag_overlap".to_string(),
29651                tagpath_handle: None,
29652            },
29653        ];
29654
29655        let report = build_relative_search_budget_report(
29656            "helper",
29657            "lexical",
29658            Path::new("/repo"),
29659            &response,
29660            &symbol_hits,
29661            ResponseBudget::new(Some(1), Some(64)),
29662            &SearchFacetFilters::default(),
29663        );
29664
29665        let guard = report
29666            .scale_guard
29667            .as_ref()
29668            .expect("broad previews should emit a scale guard");
29669        assert_eq!(guard.level, "high-hit");
29670        assert_eq!(guard.signals.indexed_artifacts, 450);
29671        assert_eq!(guard.signals.raw_symbol_matches, 2);
29672        assert!(
29673            guard
29674                .narrow_commands
29675                .iter()
29676                .any(|command| command.contains("--exact"))
29677        );
29678        assert!(
29679            guard
29680                .narrow_commands
29681                .iter()
29682                .any(|command| command.contains("alpha helper"))
29683        );
29684        assert!(
29685            guard
29686                .narrow_commands
29687                .last()
29688                .unwrap()
29689                .contains("workflow search")
29690        );
29691    }
29692
29693    #[test]
29694    fn explain_budget_report_limits_edges_and_members() {
29695        let symbols = vec![index::StoredSymbol {
29696            name: "alpha_helper".to_string(),
29697            kind: "function".to_string(),
29698            language: "rust".to_string(),
29699            signature: None,
29700            file: "src/lib.rs".to_string(),
29701            line: 10,
29702            end_line: None,
29703            node_kind: None,
29704            start_byte: None,
29705            end_byte: None,
29706            body_start_byte: None,
29707            body_end_byte: None,
29708            parent_module: None,
29709            visibility: None,
29710            tags: None,
29711            tagpath_handle: None,
29712        }];
29713        let callers = vec![
29714            index::StoredEdge {
29715                caller_file: "src/main.rs".to_string(),
29716                caller_name: "main".to_string(),
29717                caller_line: 1,
29718                callee_name: "alpha_helper".to_string(),
29719                call_site_line: 3,
29720                tagpath_handle: None,
29721            },
29722            index::StoredEdge {
29723                caller_file: "src/worker.rs".to_string(),
29724                caller_name: "worker".to_string(),
29725                caller_line: 5,
29726                callee_name: "alpha_helper".to_string(),
29727                call_site_line: 8,
29728                tagpath_handle: None,
29729            },
29730        ];
29731        let community = graph::Community {
29732            id: 1,
29733            members: vec![
29734                graph::CommunityMember::new("alpha_helper"),
29735                graph::CommunityMember::new("main"),
29736                graph::CommunityMember::new("worker"),
29737            ],
29738            modularity_contribution: 0.5,
29739        };
29740
29741        let report = build_explain_budget_report(
29742            "alpha_helper",
29743            Path::new("/repo"),
29744            &symbols,
29745            &callers,
29746            2,
29747            false,
29748            &[],
29749            0,
29750            false,
29751            Some(&community),
29752            ResponseBudget::new(Some(1), Some(24)),
29753        );
29754
29755        assert_eq!(report.definitions.len(), 1);
29756        assert_eq!(report.callers.len(), 1);
29757        assert!(report.truncated);
29758        assert_eq!(report.community.as_ref().unwrap().members.len(), 1);
29759        assert_eq!(
29760            report.definitions[0].tag_alias.as_deref(),
29761            Some("alpha/helper")
29762        );
29763        assert!(report.callers[0].handle.starts_with("ecall-"));
29764        assert_eq!(report.callers[0].tag_alias.as_deref(), Some("main"));
29765    }
29766
29767    #[test]
29768    fn session_review_next_context_budget_limits_lists() {
29769        let report = session_review::SessionReviewReport {
29770            root: "/repo".to_string(),
29771            target: "tasks/software/tsift.md".to_string(),
29772            target_kind: "file".to_string(),
29773            sessions_considered: 1,
29774            sessions_matched: 1,
29775            claude_sessions: 1,
29776            codex_sessions: 0,
29777            agent_doc_logs: 0,
29778            prompt_target_count: 2,
29779            command_groups: 0,
29780            file_groups: 2,
29781            symbol_groups: 1,
29782            failure_groups: 1,
29783            runtime_event_groups: 0,
29784            restart_churn_groups: 0,
29785            closeout_groups: 0,
29786            usage_samples: 1,
29787            prompt_tokens: 120,
29788            cached_input_tokens: 80,
29789            cache_creation_input_tokens: 0,
29790            output_tokens: 40,
29791            reasoning_output_tokens: 0,
29792            total_tokens: 240,
29793            cached_input_ratio: Some(40.0),
29794            largest_turn_total_tokens: 240,
29795            aggregate_cost: session_review::SessionReviewCostSummary {
29796                scope: "bounded_matched_sessions".to_string(),
29797                sessions: 1,
29798                usage_samples: 1,
29799                prompt_tokens: 120,
29800                cached_input_tokens: 80,
29801                cache_creation_input_tokens: 0,
29802                output_tokens: 40,
29803                reasoning_output_tokens: 0,
29804                total_tokens: 240,
29805                cached_input_ratio: Some(40.0),
29806                largest_turn_total_tokens: 240,
29807            },
29808            latest_session_cost: Some(session_review::SessionReviewCostSummary {
29809                scope: "latest_matched_session".to_string(),
29810                sessions: 1,
29811                usage_samples: 1,
29812                prompt_tokens: 120,
29813                cached_input_tokens: 80,
29814                cache_creation_input_tokens: 0,
29815                output_tokens: 40,
29816                reasoning_output_tokens: 0,
29817                total_tokens: 240,
29818                cached_input_ratio: Some(66.67),
29819                largest_turn_total_tokens: 240,
29820            }),
29821            guardrails: vec![
29822                session_cost::SessionCostGuardrail {
29823                    kind: "cache_resend".to_string(),
29824                    severity: "warn".to_string(),
29825                    message: "cached input ratio was high".to_string(),
29826                    guidance: "compact or restart the session".to_string(),
29827                },
29828                session_cost::SessionCostGuardrail {
29829                    kind: "prompt_budget".to_string(),
29830                    severity: "warn".to_string(),
29831                    message: "largest prompt turn reached 999999 tokens".to_string(),
29832                    guidance: "compact the session before another large turn".to_string(),
29833                },
29834                session_cost::SessionCostGuardrail {
29835                    kind: "restart_loop".to_string(),
29836                    severity: "warn".to_string(),
29837                    message: "restart churn detected".to_string(),
29838                    guidance: "restart cleanly".to_string(),
29839                },
29840                session_cost::SessionCostGuardrail {
29841                    kind: "noop_closeout".to_string(),
29842                    severity: "warn".to_string(),
29843                    message: "commit_already_current appeared 8 times".to_string(),
29844                    guidance: "avoid reopening without new edits".to_string(),
29845                },
29846            ],
29847            loop_clusters: vec![],
29848            file_read_diagnostics: vec![],
29849            prompt_targets: vec![
29850                session_review::SessionReviewPromptTarget {
29851                    text: "do one".to_string(),
29852                    occurrences: 1,
29853                },
29854                session_review::SessionReviewPromptTarget {
29855                    text: "do two".to_string(),
29856                    occurrences: 1,
29857                },
29858            ],
29859            commands: vec![],
29860            touched_files: vec![],
29861            touched_symbols: vec![],
29862            failures: vec![],
29863            runtime_events: vec![],
29864            restart_churn: vec![],
29865            closeout: vec![],
29866            largest_turns: vec![],
29867            sessions: vec![session_review::SessionReviewSession {
29868                source: "claude_jsonl".to_string(),
29869                path: "/tmp/session.jsonl".to_string(),
29870                matched_by: vec!["path".to_string()],
29871                modified_unix_secs: None,
29872                prompt_target_count: 2,
29873                command_groups: 0,
29874                file_groups: 2,
29875                symbol_groups: 1,
29876                failure_groups: 1,
29877                runtime_event_groups: 0,
29878                restart_churn_groups: 0,
29879                closeout_groups: 0,
29880                usage_samples: 1,
29881                prompt_tokens: 120,
29882                cached_input_tokens: 80,
29883                cache_creation_input_tokens: 0,
29884                output_tokens: 40,
29885                reasoning_output_tokens: 0,
29886                total_tokens: 240,
29887                largest_turn_total_tokens: 240,
29888            }],
29889            next_context: session_review::SessionReviewNextContext {
29890                target: "tasks/software/tsift.md".to_string(),
29891                active_prompt_targets: vec!["do one".to_string(), "do two".to_string()],
29892                last_verification: session_review::SessionReviewVerificationState {
29893                    status: "green".to_string(),
29894                    detail: "cargo test".to_string(),
29895                },
29896                touched_files: vec!["src/lib.rs".to_string(), "src/main.rs".to_string()],
29897                touched_symbols: vec!["alpha_helper".to_string(), "main".to_string()],
29898                unresolved_failures: vec![session_review::SessionReviewFailure {
29899                    kind: "timeout".to_string(),
29900                    message: "search timed out".to_string(),
29901                    occurrences: 1,
29902                    command: None,
29903                    session_path: None,
29904                }],
29905                next_digest_commands: vec![
29906                    "tsift session-review --next-context tasks/software/tsift.md".to_string(),
29907                    "tsift diff-digest .".to_string(),
29908                    "tsift test-digest --path . < target/very-long-test-output-file-name-that-must-remain-executable.log".to_string(),
29909                    "tsift log-digest --path . < target/very-long-build-output-file-name-that-must-remain-executable.log".to_string(),
29910                ],
29911            },
29912            warnings: vec![],
29913        };
29914
29915        let budget_report = build_session_review_next_context_budget_report(
29916            &report,
29917            ResponseBudget::new(Some(1), Some(12)),
29918            None,
29919        );
29920
29921        assert!(budget_report.truncated);
29922        assert_eq!(budget_report.prompt_targets, vec!["do one"]);
29923        assert_eq!(budget_report.touched_files, vec!["src/lib.rs"]);
29924        assert!(
29925            budget_report.touched_symbol_refs[0]
29926                .handle
29927                .starts_with("ncsym-")
29928        );
29929        assert_eq!(
29930            budget_report.touched_symbol_refs[0].tag_alias.as_deref(),
29931            Some("alpha/helper")
29932        );
29933        assert!(
29934            budget_report.unresolved_failures[0]
29935                .handle
29936                .starts_with("snf-")
29937        );
29938        assert_eq!(budget_report.next_digest_commands.len(), 4);
29939        assert_eq!(
29940            budget_report.next_digest_commands[2],
29941            "tsift test-digest --path . < target/very-long-test-output-file-name-that-must-remain-executable.log"
29942        );
29943        assert_eq!(budget_report.next_token_actions.len(), 1);
29944        assert_eq!(budget_report.next_token_actions[0].kind, "prompt_budget");
29945
29946        let full_action_report = build_session_review_next_context_budget_report(
29947            &report,
29948            ResponseBudget::new(Some(4), Some(120)),
29949            None,
29950        );
29951        assert_eq!(
29952            full_action_report
29953                .next_token_actions
29954                .iter()
29955                .map(|action| action.kind.as_str())
29956                .collect::<Vec<_>>(),
29957            vec![
29958                "prompt_budget",
29959                "cache_resend",
29960                "restart_loop",
29961                "noop_closeout"
29962            ]
29963        );
29964        assert_eq!(
29965            full_action_report.next_token_actions[0]
29966                .compact_command
29967                .as_deref(),
29968            Some("agent-doc compact \"tasks/software/tsift.md\" --commit")
29969        );
29970        assert_eq!(
29971            full_action_report.next_token_actions[0]
29972                .restart_command
29973                .as_deref(),
29974            Some("agent-doc start \"tasks/software/tsift.md\"")
29975        );
29976        assert!(
29977            full_action_report.next_token_actions[0]
29978                .digest_commands
29979                .iter()
29980                .any(|command| command
29981                    == "tsift --envelope context-pack \"tasks/software/tsift.md\" --budget normal")
29982        );
29983    }
29984
29985    #[test]
29986    fn context_pack_diff_preview_limits_files_and_symbols() {
29987        let report = diff_digest::DiffDigestReport {
29988            root: "/repo".to_string(),
29989            mode: diff_digest::DiffDigestMode::WorkingTree,
29990            revision: None,
29991            files_changed: 2,
29992            files_with_current_summaries: 1,
29993            symbols_touched: 3,
29994            call_edges_added: 1,
29995            call_edges_removed: 0,
29996            files: vec![
29997                diff_digest::DiffDigestFile {
29998                    path: "src/lib.rs".to_string(),
29999                    status: diff_digest::DiffDigestFileStatus::Modified,
30000                    touched_symbols: vec!["alpha_helper".to_string(), "beta_helper".to_string()],
30001                    summary_state: diff_digest::DiffDigestSummaryState::Current,
30002                    current_summaries: vec![diff_digest::DiffDigestSummarySnippet {
30003                        symbol: "alpha_helper".to_string(),
30004                        summary: "alpha helper handles the main alpha workflow".to_string(),
30005                    }],
30006                    added_call_edges: vec!["alpha->beta".to_string()],
30007                    removed_call_edges: vec![],
30008                    warnings: vec!["stale parse".to_string()],
30009                },
30010                diff_digest::DiffDigestFile {
30011                    path: "src/main.rs".to_string(),
30012                    status: diff_digest::DiffDigestFileStatus::Added,
30013                    touched_symbols: vec!["main".to_string()],
30014                    summary_state: diff_digest::DiffDigestSummaryState::Missing,
30015                    current_summaries: vec![],
30016                    added_call_edges: vec![],
30017                    removed_call_edges: vec![],
30018                    warnings: vec![],
30019                },
30020            ],
30021        };
30022
30023        let preview =
30024            build_context_pack_diff_preview(&report, ResponseBudget::new(Some(1), Some(11)), None);
30025
30026        assert!(preview.truncated);
30027        assert_eq!(preview.files.len(), 1);
30028        assert_eq!(preview.files[0].path, "src/lib.rs");
30029        assert_eq!(preview.files[0].touched_symbols, vec!["alpha_he..."]);
30030        assert!(
30031            preview.files[0].touched_symbol_refs[0]
30032                .handle
30033                .starts_with("cdsym-")
30034        );
30035        assert_eq!(
30036            preview.files[0].touched_symbol_refs[0].tag_alias.as_deref(),
30037            Some("alpha/he...")
30038        );
30039        assert!(
30040            preview.files[0].summary_refs[0]
30041                .handle
30042                .starts_with("cdsum-")
30043        );
30044        assert_eq!(
30045            preview.files[0].summary_refs[0].tag_alias.as_deref(),
30046            Some("alpha/he...")
30047        );
30048        assert_eq!(preview.files[0].summary_refs[0].summary, "alpha he...");
30049        assert_eq!(
30050            preview.files[0].summary_refs[0].expand,
30051            "tsift summarize --file \"src/lib.rs\""
30052        );
30053        assert_eq!(preview.files[0].warnings, vec!["stale parse"]);
30054    }
30055
30056    #[test]
30057    fn context_pack_status_reminders_include_stale_index_state() {
30058        let dir = setup_graph_index();
30059        std::thread::sleep(std::time::Duration::from_millis(50));
30060        std::fs::write(
30061            dir.path().join("main.rs"),
30062            "fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }\n",
30063        )
30064        .unwrap();
30065
30066        let reminders = context_pack_status_reminders(dir.path());
30067
30068        assert_eq!(reminders.len(), 1);
30069        assert!(reminders[0].contains("index stale"));
30070        assert!(reminders[0].contains("tsift index ."));
30071    }
30072
30073    // #gdbgatecold regression-lock: the trusted context-pack pipeline must
30074    // share its index-inspection across `prepare_agent_doc_index_gate` and
30075    // `context_pack_status_reminders` (both call `IndexDb::inspect_read_only`
30076    // on the same `(root, .tsift/index.db)` key). With the scope guard
30077    // active in `build_context_pack_report_with_profile`, the second call
30078    // hits the cache, so we should record one miss and at least one hit.
30079    #[test]
30080    fn build_context_pack_reuses_inspect_within_scope() {
30081        let dir = setup_graph_index();
30082        init_git_repo(dir.path());
30083        let _guard = index::InspectScopeGuard::new();
30084        let _ = build_context_pack_report(
30085            dir.path(),
30086            None,
30087            None,
30088            None,
30089            ResponseBudget::new(Some(2), Some(96)),
30090        )
30091        .unwrap();
30092        let (hits, misses) = index::inspect_scope_stats();
30093        assert!(
30094            hits >= 1,
30095            "expected at least one cached inspect within scope (hits={hits}, misses={misses})"
30096        );
30097        assert!(
30098            misses >= 1,
30099            "expected at least one initial inspect miss (hits={hits}, misses={misses})"
30100        );
30101    }
30102
30103    // #gdbgatecold scope-isolation: outside of any scope, every call to
30104    // `IndexDb::inspect_read_only` must hit the disk fresh. This locks in
30105    // the contract that the search/status fast-paths never reuse a cached
30106    // inspection across consecutive top-level calls.
30107    #[test]
30108    fn inspect_read_only_outside_scope_does_not_cache() {
30109        let dir = setup_graph_index();
30110        let db_path = dir.path().join(".tsift/index.db");
30111        let _first = index::IndexDb::inspect_read_only(&db_path, dir.path(), false).unwrap();
30112        let (hits, misses) = index::inspect_scope_stats();
30113        assert_eq!(
30114            (hits, misses),
30115            (0, 0),
30116            "no scope guard => no hits/misses recorded"
30117        );
30118        let _second = index::IndexDb::inspect_read_only(&db_path, dir.path(), false).unwrap();
30119        let (hits, _) = index::inspect_scope_stats();
30120        assert_eq!(hits, 0, "must not reuse inspection outside of any scope");
30121    }
30122
30123    #[test]
30124    fn context_pack_refreshes_stale_index_before_handoff() {
30125        let dir = setup_graph_index();
30126        init_git_repo(dir.path());
30127        std::thread::sleep(std::time::Duration::from_millis(50));
30128        std::fs::write(
30129            dir.path().join("main.rs"),
30130            "fn helper() { println!(\"updated\"); }\nfn main() { helper(); }\n",
30131        )
30132        .unwrap();
30133
30134        let report = build_context_pack_report(
30135            dir.path(),
30136            None,
30137            None,
30138            None,
30139            ResponseBudget::new(Some(2), Some(96)),
30140        )
30141        .unwrap();
30142
30143        assert!(
30144            report
30145                .status_reminders
30146                .iter()
30147                .any(|reminder| reminder.contains("index refreshed")
30148                    && reminder.contains("context-pack handoff")),
30149            "expected context-pack refresh diagnostic, got {:?}",
30150            report.status_reminders
30151        );
30152        assert!(
30153            !report
30154                .status_reminders
30155                .iter()
30156                .any(|reminder| reminder.contains("index stale")),
30157            "stale reminder should be gone after refresh: {:?}",
30158            report.status_reminders
30159        );
30160
30161        let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
30162        let summary = db.compute_changes(dir.path()).unwrap();
30163        assert_eq!(summary.new + summary.modified + summary.deleted, 0);
30164    }
30165
30166    #[test]
30167    fn context_pack_materializes_source_handles_into_graph_store() {
30168        let dir = tempfile::tempdir().unwrap();
30169        let packet = ExplorationPacket {
30170            budget: exploration_budget_for_counts(2, 1),
30171            relationship_map: vec![ExplorationRelation {
30172                from: "file:main.rs".to_string(),
30173                relation: "touches_symbol".to_string(),
30174                to: "symbol:helper".to_string(),
30175                label: Some("modified diff".to_string()),
30176            }],
30177            source_windows: vec![ExplorationSourceWindow {
30178                handle: "xwin-test".to_string(),
30179                file: "main.rs".to_string(),
30180                start: 1,
30181                end: 32,
30182                reason: "changed file".to_string(),
30183                expand: "tsift source-read main.rs --path . --start 1 --lines 32".to_string(),
30184            }],
30185            worker_context: vec![ExplorationWorkerContext {
30186                handle: "xwrk-test".to_string(),
30187                target: "tasks/software/tsift.md".to_string(),
30188                summary: "do #kgnv".to_string(),
30189                expand: "tsift --envelope context-pack tasks/software/tsift.md --budget normal"
30190                    .to_string(),
30191            }],
30192            no_reread_guidance: "use windows".to_string(),
30193        };
30194
30195        let packet = materialize_context_pack_exploration_packet(dir.path(), packet).unwrap();
30196        assert_eq!(packet.source_windows[0].handle, "xwin-test");
30197
30198        let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
30199        let source_handles = store.nodes_by_kind("source_handle").unwrap();
30200        assert_eq!(source_handles.len(), 1);
30201        assert_eq!(
30202            source_handles[0].properties.get("file"),
30203            Some(&"main.rs".to_string())
30204        );
30205        assert_eq!(
30206            store
30207                .outgoing_edges(&exploration_ref_id("file:main.rs"), Some("touches_symbol"))
30208                .unwrap()
30209                .len(),
30210            1
30211        );
30212        let worker_context = store.nodes_by_kind("worker_context").unwrap();
30213        assert_eq!(worker_context.len(), 1);
30214        assert_eq!(
30215            store
30216                .outgoing_edges("xwrk-test", Some("scopes_source"))
30217                .unwrap()
30218                .len(),
30219            1
30220        );
30221    }
30222
30223    #[test]
30224    fn context_pack_records_graph_orchestration_observability() {
30225        let dir = setup_traversal_project();
30226        init_git_repo(dir.path());
30227        let session = dir.path().join("tasks/software/tsift.md");
30228        refresh_traversal_graph_store(dir.path(), &session, None).unwrap();
30229
30230        let report = build_context_pack_report(
30231            &session,
30232            None,
30233            None,
30234            None,
30235            ResponseBudget::new(Some(4), Some(160)),
30236        )
30237        .unwrap();
30238
30239        assert_eq!(
30240            report.graph_orchestration.contract_version,
30241            CONTEXT_PACK_GRAPH_ORCHESTRATION_CONTRACT_VERSION
30242        );
30243        assert_eq!(
30244            report
30245                .graph_orchestration
30246                .projection_freshness
30247                .status
30248                .as_str(),
30249            "current"
30250        );
30251        assert!(!report.graph_orchestration.projection_hashes.is_empty());
30252        assert_eq!(report.graph_orchestration.readiness.status, "blocked");
30253        assert_eq!(
30254            report.graph_orchestration.readiness.reason,
30255            "summary_cache_empty"
30256        );
30257        assert!(report.graph_orchestration.readiness.fail_closed);
30258        assert!(
30259            report
30260                .graph_orchestration
30261                .readiness
30262                .next_commands
30263                .iter()
30264                .any(|command| command == "tsift summarize --extract ."),
30265            "{:?}",
30266            report.graph_orchestration.readiness.next_commands
30267        );
30268        assert!(
30269            report
30270                .graph_orchestration
30271                .evidence_packet_ids
30272                .iter()
30273                .all(|id| !id.starts_with("gevd-")),
30274            "evidence packet ids should be empty when readiness is blocked: {:?}",
30275            report.graph_orchestration.evidence_packet_ids
30276        );
30277        assert!(
30278            report
30279                .graph_orchestration
30280                .conflict_matrix_decisions
30281                .iter()
30282                .any(|decision| decision.contains("readiness blocked")),
30283            "conflict-matrix decisions should reference readiness block: {:?}",
30284            report.graph_orchestration.conflict_matrix_decisions
30285        );
30286        assert!(
30287            !report
30288                .graph_orchestration
30289                .follow_up_commands
30290                .iter()
30291                .any(|command| command.contains("conflict-matrix")),
30292            "conflict-matrix command should not appear when readiness is blocked: {:?}",
30293            report.graph_orchestration.follow_up_commands
30294        );
30295        assert!(
30296            report
30297                .graph_orchestration
30298                .follow_up_commands
30299                .iter()
30300                .any(|command| command == "tsift summarize --extract ."),
30301            "{:?}",
30302            report.graph_orchestration.follow_up_commands
30303        );
30304        assert!(
30305            !report
30306                .graph_orchestration
30307                .worker_ownership_blocks
30308                .is_empty()
30309        );
30310    }
30311
30312    #[test]
30313    fn convex_sync_report_chunks_upserts_and_tombstones() {
30314        let dir = setup_traversal_project();
30315        let source_graph = build_traversal_graph_source(dir.path(), dir.path(), None).unwrap();
30316        let projection = traversal_projection_from_graph(dir.path(), None, &source_graph).unwrap();
30317        let mut snapshot = projection.to_convex_rows();
30318        snapshot.nodes.push(ConvexNodeRow {
30319            external_id: "stale-node".to_string(),
30320            kind: "backlog".to_string(),
30321            label: "stale".to_string(),
30322            properties: BTreeMap::new(),
30323            provenance: Vec::new(),
30324            freshness: None,
30325        });
30326        snapshot.edges.clear();
30327        snapshot.edges.push(ConvexEdgeRow {
30328            edge_key: "stale-edge".to_string(),
30329            from_external_id: "stale-node".to_string(),
30330            to_external_id: "stale-node".to_string(),
30331            kind: "mentions".to_string(),
30332            properties: BTreeMap::new(),
30333            provenance: Vec::new(),
30334            freshness: None,
30335        });
30336        let snapshot_path = dir.path().join("convex-snapshot.json");
30337        fs::write(&snapshot_path, serde_json::to_string(&snapshot).unwrap()).unwrap();
30338
30339        let report = build_convex_sync_report(dir.path(), None, Some(&snapshot_path), 2).unwrap();
30340
30341        assert_eq!(report.freshness.status, "stale");
30342        assert!(report.freshness.fail_closed);
30343        assert_eq!(report.node_tombstones, vec!["stale-node".to_string()]);
30344        assert!(
30345            report.edge_upserts.len() > 1,
30346            "snapshot without edges should upsert local edges"
30347        );
30348        assert_eq!(report.edge_tombstones, vec!["stale-edge".to_string()]);
30349        assert_eq!(
30350            report.chunks.first().map(|chunk| chunk.operation.as_str()),
30351            Some("delete_edges"),
30352            "edge tombstones should be planned before node tombstones"
30353        );
30354        assert!(
30355            report
30356                .chunks
30357                .iter()
30358                .any(|chunk| chunk.operation == "upsert_edges" && chunk.count <= 2),
30359            "expected chunked edge upserts, got {:?}",
30360            report.chunks
30361        );
30362    }
30363
30364    #[test]
30365    fn convex_snapshot_validation_fails_closed_when_stale() {
30366        let dir = setup_traversal_project();
30367        build_traversal_graph(dir.path(), dir.path(), None).unwrap();
30368        let snapshot = ConvexProjectionRows::default();
30369        let snapshot_path = dir.path().join("empty-convex-snapshot.json");
30370        fs::write(&snapshot_path, serde_json::to_string(&snapshot).unwrap()).unwrap();
30371
30372        let err = verify_convex_projection_snapshot(dir.path(), None, &snapshot_path).unwrap_err();
30373        assert!(
30374            err.to_string()
30375                .contains("Convex graph projection is not current"),
30376            "{err}"
30377        );
30378    }
30379
30380    #[test]
30381    fn convex_sync_report_marks_live_apply_mode_without_network() {
30382        let dir = setup_traversal_project();
30383        let report =
30384            build_convex_sync_report_with_snapshot(dir.path(), None, None, 100, false).unwrap();
30385
30386        assert!(!report.dry_run);
30387        assert!(
30388            !report
30389                .diagnostics
30390                .iter()
30391                .any(|diagnostic| diagnostic.contains("dry-run only")),
30392            "apply-mode report should not claim dry-run diagnostics"
30393        );
30394        assert!(
30395            report
30396                .chunks
30397                .iter()
30398                .any(|chunk| chunk.operation == "upsert_nodes"),
30399            "live apply mode should still expose chunked idempotent operations"
30400        );
30401    }
30402
30403    #[test]
30404    fn convex_sync_apply_round_trips_with_http_backend() {
30405        use std::net::TcpListener;
30406        use std::sync::{Arc, Mutex};
30407
30408        let dir = setup_traversal_project();
30409        let report =
30410            build_convex_sync_report_with_snapshot(dir.path(), None, None, 100, false).unwrap();
30411        let expected_chunks = report.chunks.len();
30412        assert!(expected_chunks > 0);
30413
30414        let listener = TcpListener::bind("127.0.0.1:0").unwrap();
30415        let endpoint = format!("http://{}", listener.local_addr().unwrap());
30416        let operations = Arc::new(Mutex::new(Vec::<String>::new()));
30417        let server_operations = Arc::clone(&operations);
30418        let server = std::thread::spawn(move || {
30419            for _ in 0..expected_chunks {
30420                let (mut stream, _) = listener.accept().unwrap();
30421                let mut reader = BufReader::new(stream.try_clone().unwrap());
30422                let mut request_line = String::new();
30423                reader.read_line(&mut request_line).unwrap();
30424                assert!(request_line.starts_with("POST "));
30425
30426                let mut content_length = 0usize;
30427                loop {
30428                    let mut line = String::new();
30429                    reader.read_line(&mut line).unwrap();
30430                    if line == "\r\n" {
30431                        break;
30432                    }
30433                    if let Some(value) = line.to_ascii_lowercase().strip_prefix("content-length:") {
30434                        content_length = value.trim().parse().unwrap();
30435                    }
30436                }
30437
30438                let mut body = vec![0u8; content_length];
30439                reader.read_exact(&mut body).unwrap();
30440                let request: serde_json::Value = serde_json::from_slice(&body).unwrap();
30441                server_operations
30442                    .lock()
30443                    .unwrap()
30444                    .push(request["operation"].as_str().unwrap().to_string());
30445
30446                let response = br#"{"status":"ok","message":"accepted"}"#;
30447                write!(
30448                    stream,
30449                    "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n",
30450                    response.len()
30451                )
30452                .unwrap();
30453                stream.write_all(response).unwrap();
30454            }
30455        });
30456
30457        cmd_convex_sync(
30458            ConvexSyncOptions {
30459                path: dir.path(),
30460                scope: None,
30461                snapshot: None,
30462                chunk_size: 100,
30463                remote_snapshot: false,
30464                apply: true,
30465                endpoint: Some(&endpoint),
30466                auth_token_env: "TSIFT_TEST_CONVEX_AUTH_TOKEN",
30467            },
30468            OutputFormat {
30469                json_output: false,
30470                compact: true,
30471                pretty: false,
30472                terse: false,
30473                ultra_terse: false,
30474                schema: false,
30475                envelope: false,
30476            },
30477        )
30478        .unwrap();
30479        server.join().unwrap();
30480
30481        let operations = operations.lock().unwrap().clone();
30482        assert!(operations.contains(&"upsert_nodes".to_string()));
30483        assert!(operations.contains(&"upsert_edges".to_string()));
30484    }
30485
30486    #[test]
30487    fn context_pack_diff_preview_attaches_tag_ontology_refs() {
30488        let root = tempfile::tempdir().unwrap();
30489        fs::create_dir_all(root.path().join(".naming/tags")).unwrap();
30490        fs::write(
30491            root.path().join(".naming/tags/alpha.md"),
30492            "+++\ntag = \"alpha\"\ntitle = \"Alpha Domain\"\ndomain = \"fixture\"\n+++\n\nAlpha definition.\n",
30493        )
30494        .unwrap();
30495        let ontology = load_tag_ontology_preview_context(root.path()).unwrap();
30496        let report = diff_digest::DiffDigestReport {
30497            root: root.path().display().to_string(),
30498            mode: diff_digest::DiffDigestMode::WorkingTree,
30499            revision: None,
30500            files_changed: 1,
30501            files_with_current_summaries: 1,
30502            symbols_touched: 1,
30503            call_edges_added: 0,
30504            call_edges_removed: 0,
30505            files: vec![diff_digest::DiffDigestFile {
30506                path: "src/lib.rs".to_string(),
30507                status: diff_digest::DiffDigestFileStatus::Modified,
30508                touched_symbols: vec!["alpha_helper".to_string()],
30509                summary_state: diff_digest::DiffDigestSummaryState::Current,
30510                current_summaries: vec![diff_digest::DiffDigestSummarySnippet {
30511                    symbol: "alpha_helper".to_string(),
30512                    summary: "alpha helper summary".to_string(),
30513                }],
30514                added_call_edges: vec![],
30515                removed_call_edges: vec![],
30516                warnings: vec![],
30517            }],
30518        };
30519
30520        let preview = build_context_pack_diff_preview(
30521            &report,
30522            ResponseBudget::new(Some(1), Some(80)),
30523            Some(&ontology),
30524        );
30525
30526        let symbol_ref = &preview.files[0].touched_symbol_refs[0].ontology_refs[0];
30527        assert!(symbol_ref.handle.starts_with("tont-"));
30528        assert_eq!(symbol_ref.tag, "alpha");
30529        assert_eq!(symbol_ref.path, ".naming/tags/alpha.md");
30530        assert_eq!(symbol_ref.title.as_deref(), Some("Alpha Domain"));
30531        assert_eq!(symbol_ref.domain.as_deref(), Some("fixture"));
30532        assert_eq!(
30533            preview.files[0].summary_refs[0].ontology_refs[0].path,
30534            ".naming/tags/alpha.md"
30535        );
30536    }
30537
30538    #[test]
30539    fn context_pack_test_preview_limits_failure_groups() {
30540        let report = test_digest::TestDigestReport {
30541            root: "/repo".to_string(),
30542            runner: "cargo".to_string(),
30543            failures: 2,
30544            grouped_failures: 2,
30545            counts: test_digest::TestDigestCounts {
30546                passed: Some(8),
30547                failed: Some(2),
30548                skipped: Some(1),
30549            },
30550            failure_groups: vec![
30551                test_digest::TestDigestFailure {
30552                    tests: vec!["suite::alpha_failure".to_string()],
30553                    message: "assertion failed".to_string(),
30554                    path: Some("src/lib.rs".to_string()),
30555                    line: Some(42),
30556                    column: None,
30557                    occurrences: 1,
30558                    summary_state: test_digest::TestDigestSummaryState::Current,
30559                    current_summaries: vec![test_digest::TestDigestSummarySnippet {
30560                        symbol: "alpha_failure".to_string(),
30561                        summary: "failure summary for alpha test".to_string(),
30562                    }],
30563                },
30564                test_digest::TestDigestFailure {
30565                    tests: vec!["suite::beta_failure".to_string()],
30566                    message: "panic".to_string(),
30567                    path: Some("src/main.rs".to_string()),
30568                    line: Some(7),
30569                    column: None,
30570                    occurrences: 1,
30571                    summary_state: test_digest::TestDigestSummaryState::Missing,
30572                    current_summaries: vec![],
30573                },
30574            ],
30575            warnings: vec!["warning text".to_string()],
30576        };
30577
30578        let preview =
30579            build_context_pack_test_preview(&report, ResponseBudget::new(Some(1), Some(14)), None);
30580
30581        assert!(preview.truncated);
30582        assert_eq!(preview.failure_groups.len(), 1);
30583        assert_eq!(preview.failure_groups[0].tests, vec!["suite::alph..."]);
30584        assert_eq!(preview.failure_groups[0].message, "assertion f...");
30585        assert!(
30586            preview.failure_groups[0].summary_refs[0]
30587                .handle
30588                .starts_with("ctsum-")
30589        );
30590        assert_eq!(
30591            preview.failure_groups[0].summary_refs[0].expand,
30592            "tsift summarize --file \"src/lib.rs\""
30593        );
30594        assert_eq!(preview.warnings, vec!["warning text"]);
30595    }
30596
30597    #[test]
30598    fn context_pack_log_preview_limits_signals_and_refs() {
30599        let report = log_digest::LogDigestReport {
30600            root: "/repo".to_string(),
30601            total_lines: 12,
30602            non_empty_lines: 10,
30603            signal_groups: 2,
30604            repeated_line_groups: 2,
30605            repeated_line_occurrences: 3,
30606            file_ref_groups: 2,
30607            symbol_ref_groups: 2,
30608            stack_groups: 1,
30609            signals: vec![
30610                log_digest::LogDigestSignal {
30611                    severity: "error".to_string(),
30612                    message: "src/lib.rs:42 boom".to_string(),
30613                    path: Some("src/lib.rs".to_string()),
30614                    line: Some(42),
30615                    column: None,
30616                    occurrences: 2,
30617                    summary_state: log_digest::LogDigestSummaryState::Current,
30618                    current_summaries: vec![log_digest::LogDigestSummarySnippet {
30619                        symbol: "alpha_helper".to_string(),
30620                        summary: "alpha helper cached log summary".to_string(),
30621                    }],
30622                },
30623                log_digest::LogDigestSignal {
30624                    severity: "warn".to_string(),
30625                    message: "slow path".to_string(),
30626                    path: None,
30627                    line: None,
30628                    column: None,
30629                    occurrences: 1,
30630                    summary_state: log_digest::LogDigestSummaryState::Unavailable,
30631                    current_summaries: vec![],
30632                },
30633            ],
30634            repeated_lines: vec![
30635                log_digest::LogDigestRepeatedLine {
30636                    line: "retrying work item alpha".to_string(),
30637                    occurrences: 3,
30638                },
30639                log_digest::LogDigestRepeatedLine {
30640                    line: "retrying work item beta".to_string(),
30641                    occurrences: 2,
30642                },
30643            ],
30644            file_refs: vec![
30645                log_digest::LogDigestFileRef {
30646                    path: "src/lib.rs".to_string(),
30647                    line: Some(42),
30648                    column: None,
30649                    occurrences: 2,
30650                    summary_state: log_digest::LogDigestSummaryState::Current,
30651                    current_summaries: vec![log_digest::LogDigestSummarySnippet {
30652                        symbol: "alpha_helper".to_string(),
30653                        summary: "alpha helper cached file summary".to_string(),
30654                    }],
30655                },
30656                log_digest::LogDigestFileRef {
30657                    path: "src/main.rs".to_string(),
30658                    line: Some(7),
30659                    column: None,
30660                    occurrences: 1,
30661                    summary_state: log_digest::LogDigestSummaryState::Missing,
30662                    current_summaries: vec![],
30663                },
30664            ],
30665            symbol_refs: vec![
30666                log_digest::LogDigestSymbolRef {
30667                    symbol: "alpha_helper".to_string(),
30668                    occurrences: 2,
30669                    summary_state: log_digest::LogDigestSummaryState::Current,
30670                    current_summaries: vec![log_digest::LogDigestSummarySnippet {
30671                        symbol: "alpha_helper".to_string(),
30672                        summary: "alpha helper cached symbol summary".to_string(),
30673                    }],
30674                },
30675                log_digest::LogDigestSymbolRef {
30676                    symbol: "beta_helper".to_string(),
30677                    occurrences: 1,
30678                    summary_state: log_digest::LogDigestSummaryState::Missing,
30679                    current_summaries: vec![],
30680                },
30681            ],
30682            stack_traces: vec![log_digest::LogDigestStackGroup {
30683                frames: vec!["frame one".to_string()],
30684                occurrences: 1,
30685            }],
30686            warnings: vec!["warning text".to_string()],
30687        };
30688
30689        let preview =
30690            build_context_pack_log_preview(&report, ResponseBudget::new(Some(1), Some(14)), None);
30691
30692        assert!(preview.truncated);
30693        assert_eq!(preview.signals.len(), 1);
30694        assert_eq!(preview.signals[0].message, "src/lib.rs:...");
30695        assert_eq!(preview.repeated_lines[0].line, "retrying wo...");
30696        assert_eq!(preview.file_refs.len(), 1);
30697        assert_eq!(preview.symbol_refs[0].symbol, "alpha_helper");
30698        assert!(
30699            preview.signals[0].summary_refs[0]
30700                .handle
30701                .starts_with("clsum-")
30702        );
30703        assert!(
30704            preview.file_refs[0].summary_refs[0]
30705                .handle
30706                .starts_with("clfsum-")
30707        );
30708        assert!(
30709            preview.symbol_refs[0].summary_refs[0]
30710                .handle
30711                .starts_with("clssum-")
30712        );
30713        assert_eq!(
30714            preview.symbol_refs[0].summary_refs[0].tag_alias.as_deref(),
30715            Some("alpha/helper")
30716        );
30717        assert_eq!(
30718            preview.symbol_refs[0].summary_refs[0].expand,
30719            "tsift summarize \"alpha_helper\""
30720        );
30721        assert_eq!(preview.warnings, vec!["warning text"]);
30722    }
30723
30724    #[test]
30725    fn cli_search_rejects_exact_with_strategy_flag() {
30726        let cli = try_parse_cli([
30727            "tsift",
30728            "search",
30729            "test",
30730            "--exact",
30731            "--strategy",
30732            "lexical",
30733        ]);
30734        assert!(cli.is_err());
30735    }
30736
30737    #[test]
30738    fn cli_search_autoindexes_by_default() {
30739        let cli = parse_cli(["tsift", "search", "test"]);
30740        match cli.command {
30741            Some(Commands::Search {
30742                autoindex,
30743                no_autoindex,
30744                ..
30745            }) => {
30746                assert!(!autoindex);
30747                assert!(!no_autoindex);
30748                assert!(autoindex || !no_autoindex);
30749            }
30750            _ => panic!("expected Search command"),
30751        }
30752    }
30753
30754    #[test]
30755    fn cli_search_accepts_no_autoindex_flag() {
30756        let cli = parse_cli(["tsift", "search", "test", "--no-autoindex"]);
30757        match cli.command {
30758            Some(Commands::Search {
30759                autoindex,
30760                no_autoindex,
30761                ..
30762            }) => {
30763                assert!(!autoindex);
30764                assert!(no_autoindex);
30765            }
30766            _ => panic!("expected Search command"),
30767        }
30768    }
30769
30770    #[test]
30771    fn cli_search_rejects_conflicting_autoindex_flags() {
30772        let cli = try_parse_cli(["tsift", "search", "test", "--autoindex", "--no-autoindex"]);
30773        assert!(cli.is_err());
30774    }
30775
30776    // --- relativize paths ---
30777
30778    #[test]
30779    fn cli_accepts_global_absolute_flag() {
30780        let cli = parse_cli(["tsift", "--absolute", "status"]);
30781        assert!(cli.absolute);
30782        assert!(matches!(cli.command, Some(Commands::Status { .. })));
30783    }
30784
30785    #[test]
30786    fn cli_accepts_global_tabular_flag() {
30787        let cli = parse_cli(["tsift", "--tabular", "search", "test"]);
30788        assert!(cli.tabular);
30789        assert!(matches!(cli.command, Some(Commands::Search { .. })));
30790    }
30791
30792    #[test]
30793    fn cli_tabular_with_graph() {
30794        let cli = parse_cli(["tsift", "--tabular", "graph", "main"]);
30795        assert!(cli.tabular);
30796        assert!(matches!(cli.command, Some(Commands::Graph { .. })));
30797    }
30798
30799    #[test]
30800    fn cli_tabular_with_communities() {
30801        let cli = parse_cli(["tsift", "--tabular", "communities"]);
30802        assert!(cli.tabular);
30803        assert!(matches!(cli.command, Some(Commands::Communities { .. })));
30804    }
30805
30806    #[test]
30807    fn cli_tabular_with_explain() {
30808        let cli = parse_cli(["tsift", "--tabular", "explain", "main"]);
30809        assert!(cli.tabular);
30810        assert!(matches!(cli.command, Some(Commands::Explain { .. })));
30811    }
30812
30813    #[test]
30814    fn cli_traverse_accepts_path_target_and_html_format() {
30815        let cli = parse_cli([
30816            "tsift", "traverse", "#kgnv", "--to", "main", "--path", ".", "--format", "html",
30817        ]);
30818        match cli.command {
30819            Some(Commands::Traverse {
30820                node,
30821                to,
30822                path,
30823                format,
30824                ..
30825            }) => {
30826                assert_eq!(node.as_deref(), Some("#kgnv"));
30827                assert_eq!(to.as_deref(), Some("main"));
30828                assert_eq!(path, PathBuf::from("."));
30829                assert_eq!(format, TraverseFormat::Html);
30830            }
30831            _ => panic!("expected Traverse command"),
30832        }
30833    }
30834
30835    #[test]
30836    fn cli_parses_semantic_related_command() {
30837        let cli = parse_cli([
30838            "tsift",
30839            "semantic",
30840            "graph navigation",
30841            "--path",
30842            ".",
30843            "--kind",
30844            "all",
30845            "--limit",
30846            "3",
30847            "--json",
30848        ]);
30849        match cli.command {
30850            Some(Commands::Semantic {
30851                query,
30852                path,
30853                kind,
30854                limit,
30855                json,
30856                ..
30857            }) => {
30858                assert_eq!(query, "graph navigation");
30859                assert_eq!(path, PathBuf::from("."));
30860                assert_eq!(kind, SemanticRelatedKind::All);
30861                assert_eq!(limit, 3);
30862                assert!(json);
30863            }
30864            _ => panic!("expected Semantic command"),
30865        }
30866    }
30867
30868    #[test]
30869    fn cli_parses_convex_sync_command() {
30870        let cli = parse_cli([
30871            "tsift",
30872            "convex-sync",
30873            ".",
30874            "--snapshot",
30875            "rows.json",
30876            "--chunk-size",
30877            "25",
30878            "--json",
30879        ]);
30880        match cli.command {
30881            Some(Commands::ConvexSync {
30882                path,
30883                snapshot,
30884                chunk_size,
30885                json,
30886                ..
30887            }) => {
30888                assert_eq!(path, PathBuf::from("."));
30889                assert_eq!(snapshot, Some(PathBuf::from("rows.json")));
30890                assert_eq!(chunk_size, 25);
30891                assert!(json);
30892            }
30893            _ => panic!("expected ConvexSync command"),
30894        }
30895    }
30896
30897    #[test]
30898    fn cli_parses_convex_sync_live_flags() {
30899        let cli = parse_cli([
30900            "tsift",
30901            "convex-sync",
30902            ".",
30903            "--remote-snapshot",
30904            "--apply",
30905            "--endpoint",
30906            "https://example.test/convex-graph",
30907            "--auth-token-env",
30908            "TSIFT_TEST_TOKEN",
30909        ]);
30910        match cli.command {
30911            Some(Commands::ConvexSync {
30912                remote_snapshot,
30913                apply,
30914                endpoint,
30915                auth_token_env,
30916                ..
30917            }) => {
30918                assert!(remote_snapshot);
30919                assert!(apply);
30920                assert_eq!(
30921                    endpoint.as_deref(),
30922                    Some("https://example.test/convex-graph")
30923                );
30924                assert_eq!(auth_token_env, "TSIFT_TEST_TOKEN");
30925            }
30926            _ => panic!("expected ConvexSync command"),
30927        }
30928    }
30929
30930    #[test]
30931    fn cli_parses_graph_db_query() {
30932        let cli = parse_cli([
30933            "tsift",
30934            "graph-db",
30935            "--backend",
30936            "convex-snapshot",
30937            "--convex-snapshot",
30938            "rows.json",
30939            "--json",
30940            "neighborhood",
30941            "gbak-kgnv",
30942            "--depth",
30943            "2",
30944            "--edge-kind",
30945            "mentions",
30946            "--property",
30947            "path=tasks/software/tsift.md",
30948            "--cursor",
30949            "gbak-old",
30950            "--limit",
30951            "10",
30952        ]);
30953        match cli.command {
30954            Some(Commands::GraphDb {
30955                backend,
30956                convex_snapshot,
30957                json,
30958                query,
30959                ..
30960            }) => {
30961                assert_eq!(backend, GraphDbBackend::ConvexSnapshot);
30962                assert_eq!(convex_snapshot, Some(PathBuf::from("rows.json")));
30963                assert!(json);
30964                match query {
30965                    GraphDbQuery::Neighborhood {
30966                        id,
30967                        depth,
30968                        edge_kind,
30969                        cursor,
30970                        limit,
30971                        property_filters,
30972                    } => {
30973                        assert_eq!(id, "gbak-kgnv");
30974                        assert_eq!(depth, 2);
30975                        assert_eq!(edge_kind.as_deref(), Some("mentions"));
30976                        assert_eq!(cursor.as_deref(), Some("gbak-old"));
30977                        assert_eq!(limit, Some(10));
30978                        assert_eq!(
30979                            property_filters,
30980                            vec!["path=tasks/software/tsift.md".to_string()]
30981                        );
30982                    }
30983                    _ => panic!("expected graph-db neighborhood query"),
30984                }
30985            }
30986            _ => panic!("expected GraphDb command"),
30987        }
30988    }
30989
30990    #[test]
30991    fn cli_parses_graph_db_backend_eval_surrealdb_candidate() {
30992        let cli = parse_cli([
30993            "tsift",
30994            "graph-db",
30995            "--json",
30996            "backend-eval",
30997            "--candidate",
30998            "surrealdb",
30999            "--target",
31000            "gval",
31001            "--full-projection",
31002        ]);
31003        match cli.command {
31004            Some(Commands::GraphDb { json, query, .. }) => {
31005                assert!(json);
31006                match query {
31007                    GraphDbQuery::BackendEval {
31008                        candidates,
31009                        targets,
31010                        full_projection,
31011                    } => {
31012                        assert_eq!(candidates, vec!["surrealdb".to_string()]);
31013                        assert_eq!(targets, vec!["gval".to_string()]);
31014                        assert!(full_projection);
31015                    }
31016                    _ => panic!("expected graph-db backend-eval query"),
31017                }
31018            }
31019            _ => panic!("expected GraphDb command"),
31020        }
31021    }
31022
31023    #[test]
31024    fn cli_parses_graph_db_tokensave_backend() {
31025        let cli = parse_cli([
31026            "tsift",
31027            "graph-db",
31028            "--backend",
31029            "tokensave",
31030            "--json",
31031            "node",
31032            "fn:main",
31033        ]);
31034        match cli.command {
31035            Some(Commands::GraphDb {
31036                backend,
31037                json,
31038                query,
31039                ..
31040            }) => {
31041                assert_eq!(backend, GraphDbBackend::Tokensave);
31042                assert!(json);
31043                match query {
31044                    GraphDbQuery::Node { id } => assert_eq!(id, "fn:main"),
31045                    _ => panic!("expected graph-db node query"),
31046                }
31047            }
31048            _ => panic!("expected GraphDb command"),
31049        }
31050    }
31051
31052    #[test]
31053    fn cli_parses_analyze_command() {
31054        let cli = parse_cli([
31055            "tsift", "analyze", ".", "--scope", "core", "--entry", "main", "--entry", "run",
31056            "--limit", "7", "--json",
31057        ]);
31058        match cli.command {
31059            Some(Commands::Analyze {
31060                path,
31061                scope,
31062                entry_points,
31063                limit,
31064                json,
31065            }) => {
31066                assert_eq!(path, PathBuf::from("."));
31067                assert_eq!(scope.as_deref(), Some("core"));
31068                assert_eq!(entry_points, vec!["main".to_string(), "run".to_string()]);
31069                assert_eq!(limit, 7);
31070                assert!(json);
31071            }
31072            _ => panic!("expected Analyze command"),
31073        }
31074    }
31075
31076    #[test]
31077    fn cli_parses_graph_db_related_query() {
31078        let cli = parse_cli([
31079            "tsift",
31080            "graph-db",
31081            "--json",
31082            "related",
31083            "voice avatar memory retrieval",
31084            "--kind",
31085            "all",
31086            "--depth",
31087            "3",
31088            "--seed-limit",
31089            "4",
31090            "--limit",
31091            "12",
31092        ]);
31093        match cli.command {
31094            Some(Commands::GraphDb { json, query, .. }) => {
31095                assert!(json);
31096                match query {
31097                    GraphDbQuery::Related {
31098                        query,
31099                        kind,
31100                        depth,
31101                        seed_limit,
31102                        limit,
31103                    } => {
31104                        assert_eq!(query, "voice avatar memory retrieval");
31105                        assert_eq!(kind, SemanticRelatedKind::All);
31106                        assert_eq!(depth, 3);
31107                        assert_eq!(seed_limit, 4);
31108                        assert_eq!(limit, 12);
31109                    }
31110                    _ => panic!("expected graph-db related query"),
31111                }
31112            }
31113            _ => panic!("expected GraphDb command"),
31114        }
31115    }
31116
31117    #[test]
31118    fn cli_parses_graph_db_compact_query() {
31119        let cli = parse_cli([
31120            "tsift",
31121            "graph-db",
31122            "--path",
31123            ".",
31124            "compact",
31125            "--apply",
31126            "--prune-tombstones",
31127            "--confirmed-convex-reconciled",
31128        ]);
31129        match cli.command {
31130            Some(Commands::GraphDb { query, .. }) => match query {
31131                GraphDbQuery::Compact {
31132                    apply,
31133                    prune_tombstones,
31134                    confirmed_convex_reconciled,
31135                } => {
31136                    assert!(apply);
31137                    assert!(prune_tombstones);
31138                    assert!(confirmed_convex_reconciled);
31139                }
31140                _ => panic!("expected graph-db compact query"),
31141            },
31142            _ => panic!("expected GraphDb command"),
31143        }
31144    }
31145
31146    #[test]
31147    fn cli_parses_impact_command() {
31148        let cli = parse_cli(["tsift", "impact", ".", "--cached", "--limit", "5"]);
31149        match cli.command {
31150            Some(Commands::Impact {
31151                path,
31152                cached,
31153                limit,
31154                ..
31155            }) => {
31156                assert_eq!(path, PathBuf::from("."));
31157                assert!(cached);
31158                assert_eq!(limit, 5);
31159            }
31160            _ => panic!("expected Impact command"),
31161        }
31162    }
31163
31164    #[test]
31165    fn cli_parses_conflict_matrix_command() {
31166        let cli = parse_cli([
31167            "tsift",
31168            "conflict-matrix",
31169            "--path",
31170            "tasks/software/tsift.md",
31171            "--depth",
31172            "4",
31173            "--limit",
31174            "12",
31175            "--impact-limit",
31176            "6",
31177            "--json",
31178            "pwcm",
31179            "#g6kf",
31180        ]);
31181        match cli.command {
31182            Some(Commands::ConflictMatrix {
31183                targets,
31184                path,
31185                depth,
31186                limit,
31187                impact_limit,
31188                json,
31189                ..
31190            }) => {
31191                assert_eq!(targets, vec!["pwcm".to_string(), "#g6kf".to_string()]);
31192                assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
31193                assert_eq!(depth, 4);
31194                assert_eq!(limit, 12);
31195                assert_eq!(impact_limit, 6);
31196                assert!(json);
31197            }
31198            _ => panic!("expected ConflictMatrix command"),
31199        }
31200    }
31201
31202    #[test]
31203    fn cli_parses_dispatch_trace_command() {
31204        let cli = parse_cli([
31205            "tsift",
31206            "dispatch-trace",
31207            "--path",
31208            "tasks/software/tsift.md",
31209            "--format",
31210            "html",
31211            "--depth",
31212            "4",
31213            "pwcm",
31214            "#g6kf",
31215        ]);
31216        match cli.command {
31217            Some(Commands::DispatchTrace {
31218                targets,
31219                path,
31220                format,
31221                depth,
31222                ..
31223            }) => {
31224                assert_eq!(targets, vec!["pwcm".to_string(), "#g6kf".to_string()]);
31225                assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
31226                assert_eq!(format, DispatchTraceFormat::Html);
31227                assert_eq!(depth, 4);
31228            }
31229            _ => panic!("expected DispatchTrace command"),
31230        }
31231    }
31232
31233    #[test]
31234    fn cli_parses_dependency_dag_command() {
31235        let cli = parse_cli([
31236            "tsift",
31237            "dependency-dag",
31238            "--path",
31239            "tasks/software/tsift.md",
31240            "--depth",
31241            "5",
31242            "--limit",
31243            "20",
31244            "--json",
31245            "alpha",
31246            "#beta",
31247        ]);
31248        match cli.command {
31249            Some(Commands::DependencyDag {
31250                targets,
31251                path,
31252                depth,
31253                limit,
31254                json,
31255                ..
31256            }) => {
31257                assert_eq!(targets, vec!["alpha".to_string(), "#beta".to_string()]);
31258                assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
31259                assert_eq!(depth, 5);
31260                assert_eq!(limit, 20);
31261                assert!(json);
31262            }
31263            _ => panic!("expected DependencyDag command"),
31264        }
31265    }
31266
31267    #[test]
31268    fn relativize_strips_root_prefix() {
31269        let root = std::path::Path::new("/home/user/project");
31270        assert_eq!(
31271            relativize("/home/user/project/src/main.rs", root),
31272            "src/main.rs"
31273        );
31274    }
31275
31276    #[test]
31277    fn relativize_leaves_non_matching_path() {
31278        let root = std::path::Path::new("/home/user/project");
31279        assert_eq!(
31280            relativize("/other/path/file.rs", root),
31281            "/other/path/file.rs"
31282        );
31283    }
31284
31285    #[test]
31286    fn relativize_leaves_already_relative() {
31287        let root = std::path::Path::new("/home/user/project");
31288        assert_eq!(relativize("src/main.rs", root), "src/main.rs");
31289    }
31290
31291    #[test]
31292    fn relativize_pathbuf_strips_prefix() {
31293        let root = std::path::Path::new("/home/user/project");
31294        let path = std::path::Path::new("/home/user/project/src/lib.rs");
31295        assert_eq!(relativize_pathbuf(path, root), PathBuf::from("src/lib.rs"));
31296    }
31297
31298    #[test]
31299    fn relativize_edges_strips_caller_file() {
31300        let root = std::path::Path::new("/tmp/proj");
31301        let mut edges = vec![index::StoredEdge {
31302            caller_file: "/tmp/proj/src/main.rs".to_string(),
31303            caller_name: "main".to_string(),
31304            caller_line: 1,
31305            callee_name: "helper".to_string(),
31306            call_site_line: 5,
31307            tagpath_handle: None,
31308        }];
31309        relativize_edges(&mut edges, root);
31310        assert_eq!(edges[0].caller_file, "src/main.rs");
31311    }
31312
31313    #[test]
31314    fn relativize_json_paths_strips_known_keys() {
31315        let root = std::path::Path::new("/tmp/proj");
31316        let mut val = serde_json::json!({
31317            "file": "/tmp/proj/src/main.rs",
31318            "path": "/tmp/proj/test.rs",
31319            "name": "/tmp/proj/not-a-path",
31320            "hits": [{"path": "/tmp/proj/nested.rs", "score": 1.0}]
31321        });
31322        relativize_json_paths(&mut val, root);
31323        assert_eq!(val["file"], "src/main.rs");
31324        assert_eq!(val["path"], "test.rs");
31325        assert_eq!(val["name"], "/tmp/proj/not-a-path");
31326        assert_eq!(val["hits"][0]["path"], "nested.rs");
31327    }
31328
31329    // --- limit caps ---
31330
31331    #[test]
31332    fn cli_graph_accepts_limit_flag() {
31333        let cli = parse_cli(["tsift", "graph", "main", "--limit", "5"]);
31334        match cli.command {
31335            Some(Commands::Graph { limit, .. }) => assert_eq!(limit, 5),
31336            _ => panic!("expected Graph command"),
31337        }
31338    }
31339
31340    #[test]
31341    fn cli_graph_default_limit_is_20() {
31342        let cli = parse_cli(["tsift", "graph", "main"]);
31343        match cli.command {
31344            Some(Commands::Graph { limit, .. }) => assert_eq!(limit, 20),
31345            _ => panic!("expected Graph command"),
31346        }
31347    }
31348
31349    #[test]
31350    fn cli_communities_accepts_limit_flag() {
31351        let cli = parse_cli(["tsift", "communities", "--limit", "3"]);
31352        match cli.command {
31353            Some(Commands::Communities { limit, .. }) => assert_eq!(limit, 3),
31354            _ => panic!("expected Communities command"),
31355        }
31356    }
31357
31358    #[test]
31359    fn cli_communities_default_limit_is_10() {
31360        let cli = parse_cli(["tsift", "communities"]);
31361        match cli.command {
31362            Some(Commands::Communities { limit, .. }) => assert_eq!(limit, 10),
31363            _ => panic!("expected Communities command"),
31364        }
31365    }
31366
31367    #[test]
31368    fn cli_explain_accepts_limit_flag() {
31369        let cli = parse_cli(["tsift", "explain", "main", "--limit", "7"]);
31370        match cli.command {
31371            Some(Commands::Explain { limit, .. }) => assert_eq!(limit, 7),
31372            _ => panic!("expected Explain command"),
31373        }
31374    }
31375
31376    #[test]
31377    fn cli_explain_default_limit_is_15() {
31378        let cli = parse_cli(["tsift", "explain", "main"]);
31379        match cli.command {
31380            Some(Commands::Explain { limit, .. }) => assert_eq!(limit, 15),
31381            _ => panic!("expected Explain command"),
31382        }
31383    }
31384
31385    #[test]
31386    fn cli_limit_zero_means_unlimited() {
31387        let cli = parse_cli(["tsift", "graph", "main", "--limit", "0"]);
31388        match cli.command {
31389            Some(Commands::Graph { limit, .. }) => assert_eq!(limit, 0),
31390            _ => panic!("expected Graph command"),
31391        }
31392    }
31393
31394    #[test]
31395    fn graph_cmd_limit_runs_ok() {
31396        let dir = setup_graph_index();
31397        let result = cmd_graph(
31398            "main",
31399            dir.path(),
31400            false,
31401            false,
31402            None,
31403            1,
31404            false,
31405            false,
31406            false,
31407            false,
31408            false,
31409            false,
31410            false,
31411            TagpathSearchOpts::default(),
31412        );
31413        assert!(result.is_ok());
31414    }
31415
31416    #[test]
31417    fn graph_cmd_unlimited_runs_ok() {
31418        let dir = setup_graph_index();
31419        let result = cmd_graph(
31420            "main",
31421            dir.path(),
31422            false,
31423            false,
31424            None,
31425            0,
31426            false,
31427            false,
31428            false,
31429            false,
31430            false,
31431            false,
31432            false,
31433            TagpathSearchOpts::default(),
31434        );
31435        assert!(result.is_ok());
31436    }
31437
31438    #[test]
31439    fn graph_cmd_tabular_runs_ok() {
31440        let dir = setup_graph_index();
31441        let result = cmd_graph(
31442            "main",
31443            dir.path(),
31444            false,
31445            false,
31446            None,
31447            20,
31448            false,
31449            false,
31450            false,
31451            false,
31452            false,
31453            true,
31454            false,
31455            TagpathSearchOpts::default(),
31456        );
31457        assert!(result.is_ok());
31458    }
31459
31460    #[test]
31461    fn communities_cmd_tabular_runs_ok() {
31462        let dir = setup_graph_index();
31463        let result = cmd_communities(
31464            dir.path(),
31465            None,
31466            1,
31467            10,
31468            false,
31469            false,
31470            false,
31471            false,
31472            true,
31473            false,
31474            TagpathSearchOpts::default(),
31475        );
31476        assert!(result.is_ok());
31477    }
31478
31479    #[test]
31480    fn explain_cmd_tabular_runs_ok() {
31481        let dir = setup_graph_index();
31482        let result = cmd_explain(
31483            "main",
31484            dir.path(),
31485            None,
31486            15,
31487            false,
31488            false,
31489            false,
31490            false,
31491            false,
31492            true,
31493            false,
31494            false,
31495        );
31496        assert!(result.is_ok());
31497    }
31498
31499    #[test]
31500    fn traversal_excludes_agent_doc_runtime_paths_from_source_watermark() {
31501        // #gdbcacheprove: .agent-doc runtime markdown (snapshots, baselines, archives,
31502        // session docs, runtime logs) must not contribute to the source watermark, or
31503        // every agent-doc cycle would invalidate the graph-db backend-eval cache and
31504        // force a full rebuild on the next run.
31505        let cases = [
31506            ".agent-doc",
31507            ".agent-doc/snapshots/abc.md",
31508            ".agent-doc/baselines/abc.md",
31509            ".agent-doc/archives/2026.md",
31510            ".agent-doc/runtime/run.jsonl",
31511            "src/foo/.agent-doc",
31512            "src/foo/.agent-doc/snapshots/x.md",
31513            "./.agent-doc/snapshots/x.md",
31514        ];
31515        for path in cases {
31516            assert!(
31517                traversal_relative_path_is_generated_artifact(path),
31518                "expected `{path}` to be excluded from source watermark"
31519            );
31520        }
31521        // Real source paths must NOT be excluded.
31522        for path in [
31523            "src/main.rs",
31524            "tests/perf_gate.rs",
31525            "fixtures/x.json",
31526            "agent-doc/src/lib.rs", // sibling dir without the leading dot
31527            "src/.agent-doc-helper.rs",
31528        ] {
31529            assert!(
31530                !traversal_relative_path_is_generated_artifact(path),
31531                "expected `{path}` to be included in source watermark"
31532            );
31533        }
31534    }
31535
31536    #[test]
31537    fn traversal_excludes_tsift_and_target_runtime_paths_from_source_watermark() {
31538        // #cachelookupshift: the conflict-matrix preparation cache key hashes
31539        // file_state snapshot rows + every markdown file under the root. Any
31540        // .tsift/, target/, or .agent-doc/ path slipping past the filter would
31541        // shift the watermark every run because those directories mutate as a
31542        // side effect of running tsift itself. This test locks the artifact
31543        // filter against regressions for each prefix variant
31544        // (bare, root-anchored, nested, and './' leading).
31545        let cases = [
31546            ".tsift",
31547            ".tsift/index.db",
31548            ".tsift/indexes/foo/index.db",
31549            ".tsift/conflict-matrix-cache/inputs/abc.json",
31550            ".tsift/summaries.db",
31551            "src/foo/.tsift",
31552            "src/foo/.tsift/graph.db",
31553            "./.tsift/index.db",
31554            "target",
31555            "target/debug/build/x",
31556            "target/release/tsift",
31557            "src/foo/target/debug/x",
31558            "./target/release/x",
31559        ];
31560        for path in cases {
31561            assert!(
31562                traversal_relative_path_is_generated_artifact(path),
31563                "expected `{path}` to be excluded from source watermark"
31564            );
31565        }
31566        // Look-alike paths must NOT be excluded — only true artifact dirs.
31567        for path in [
31568            "src/ctx-core-dev/lib/a__target/CHANGELOG.md",
31569            "src/ctx-core-dev/lib/a__target/A__Target/index.d.ts",
31570            "src/tsift-extras/lib.rs",
31571            "tsift/README.md",
31572            "src/targeting.rs",
31573            "src/.tsiftrc",
31574            "src/agent-doc-helper.rs",
31575        ] {
31576            assert!(
31577                !traversal_relative_path_is_generated_artifact(path),
31578                "expected `{path}` to be included in source watermark"
31579            );
31580        }
31581    }
31582
31583    #[test]
31584    fn traversal_source_watermark_is_stable_across_invocations_on_quiescent_root() {
31585        // #cachelookupshift: the conflict-matrix preparation cache only hits
31586        // when traversal_source_watermark returns the same hash for two
31587        // consecutive calls on identical source state. Lock that invariant so
31588        // a future change that folds wall-clock time, a directory mtime, or
31589        // any other non-content input into the hash trips this test before
31590        // regressing the preparation_cache_lookup hit rate. We exercise the
31591        // session_only=true path with a hinted markdown file so the test does
31592        // not need a full index DB to drive the index-snapshot branch.
31593        let dir = tempfile::tempdir().unwrap();
31594        let root = dir.path();
31595        std::fs::create_dir_all(root.join("src")).unwrap();
31596        std::fs::write(root.join("src/main.rs"), "fn main() {}\n").unwrap();
31597        let hint = root.join("README.md");
31598        std::fs::write(&hint, "# stable\n").unwrap();
31599        // Add a generated-artifact directory that must NOT affect the watermark.
31600        std::fs::create_dir_all(root.join(".tsift")).unwrap();
31601        std::fs::write(root.join(".tsift/index.db"), b"placeholder").unwrap();
31602        std::fs::create_dir_all(root.join("target/debug")).unwrap();
31603        std::fs::write(root.join("target/debug/marker"), b"placeholder").unwrap();
31604
31605        let first = traversal_source_watermark(root, &hint, None, true)
31606            .expect("first watermark call must succeed")
31607            .expect("first watermark must produce a hash for hinted markdown");
31608        let second = traversal_source_watermark(root, &hint, None, true)
31609            .expect("second watermark call must succeed")
31610            .expect("second watermark must produce a hash for hinted markdown");
31611        assert_eq!(
31612            first, second,
31613            "watermark must be identical across back-to-back invocations on a quiescent root"
31614        );
31615
31616        // Mutating a generated-artifact file must NOT shift the hash.
31617        std::fs::write(root.join(".tsift/index.db"), b"changed").unwrap();
31618        std::fs::write(root.join("target/debug/marker"), b"changed").unwrap();
31619        let third = traversal_source_watermark(root, &hint, None, true)
31620            .expect("third watermark call must succeed")
31621            .expect("third watermark must produce a hash for hinted markdown");
31622        assert_eq!(
31623            first, third,
31624            "watermark must ignore mutations under .tsift/ and target/"
31625        );
31626
31627        // Mutating the hinted markdown file MUST shift the hash so the
31628        // preparation cache invalidates correctly when user state changes.
31629        // Sleep briefly to push the file mtime past the original even on
31630        // coarse-resolution filesystems.
31631        std::thread::sleep(std::time::Duration::from_millis(20));
31632        std::fs::write(&hint, "# stable edited with longer content\n").unwrap();
31633        let fourth = traversal_source_watermark(root, &hint, None, true)
31634            .expect("fourth watermark call must succeed")
31635            .expect("fourth watermark must produce a hash for hinted markdown");
31636        assert_ne!(
31637            first, fourth,
31638            "watermark must invalidate when the hinted markdown file changes"
31639        );
31640    }
31641
31642    #[test]
31643    fn traversal_source_watermark_uses_summary_rows_not_summaries_db_metadata() {
31644        // #gcachemiss: full-projection cache keys must not miss just because the
31645        // SQLite summary cache file header or mtime churned. Only the semantic rows
31646        // that feed traversal projection should participate in the source watermark.
31647        let dir = tempfile::tempdir().unwrap();
31648        let root = dir.path();
31649        std::fs::write(root.join("README.md"), "# stable\n").unwrap();
31650        let summaries_db_path = root.join(".tsift/summaries.db");
31651        let summary_db = summarize::SummaryDb::open(&summaries_db_path).unwrap();
31652        let mut summary = summarize::Summary {
31653            id: 0,
31654            symbol_name: "main".to_string(),
31655            file_path: "src/main.rs".to_string(),
31656            content_hash: "hash-main".to_string(),
31657            summary: "main wires the CLI".to_string(),
31658            entities: Some(vec![summarize::Entity {
31659                name: "Cli".to_string(),
31660                kind: "type".to_string(),
31661                description: "Command-line interface".to_string(),
31662            }]),
31663            relationships: None,
31664            concept_labels: Some(vec!["cli".to_string()]),
31665            extracted_at: "1700000000".to_string(),
31666            model: "test-model".to_string(),
31667            tokens_input: Some(10),
31668            tokens_output: Some(5),
31669        };
31670        summary_db.insert(&summary).unwrap();
31671        drop(summary_db);
31672
31673        let hint = root.join("README.md");
31674        let first = traversal_source_watermark(root, &hint, None, true)
31675            .expect("first watermark call must succeed")
31676            .expect("first watermark must produce a hash");
31677
31678        std::thread::sleep(std::time::Duration::from_millis(20));
31679        let conn = Connection::open(&summaries_db_path).unwrap();
31680        conn.pragma_update(None, "user_version", 1).unwrap();
31681        conn.pragma_update(None, "user_version", 0).unwrap();
31682        drop(conn);
31683
31684        let second = traversal_source_watermark(root, &hint, None, true)
31685            .expect("second watermark call must succeed")
31686            .expect("second watermark must produce a hash");
31687        assert_eq!(
31688            first, second,
31689            "metadata-only summaries.db churn must not invalidate the source watermark"
31690        );
31691
31692        summary.entities = Some(vec![summarize::Entity {
31693            name: "GraphCache".to_string(),
31694            kind: "type".to_string(),
31695            description: "Stable full-projection cache input".to_string(),
31696        }]);
31697        let summary_db = summarize::SummaryDb::open(&summaries_db_path).unwrap();
31698        summary_db.delete_by_file("src/main.rs").unwrap();
31699        summary_db.insert(&summary).unwrap();
31700        drop(summary_db);
31701
31702        let third = traversal_source_watermark(root, &hint, None, true)
31703            .expect("third watermark call must succeed")
31704            .expect("third watermark must produce a hash");
31705        assert_ne!(
31706            first, third,
31707            "semantic summary row changes must invalidate the source watermark"
31708        );
31709    }
31710
31711    #[test]
31712    fn full_projection_source_watermark_ignores_source_mtime_when_index_rows_unchanged() {
31713        // #gfullhot: backend-eval full-projection cache keys should be based on
31714        // the indexed graph inputs, not file_state mtimes. Touching a source file
31715        // without changing extracted symbols/call edges must still hit the cache.
31716        let dir = tempfile::tempdir().unwrap();
31717        let root = dir.path();
31718        std::fs::create_dir_all(root.join("src")).unwrap();
31719        std::fs::create_dir_all(root.join(".tsift")).unwrap();
31720        let source = root.join("src/lib.rs");
31721        let source_body = "pub fn alpha() { beta(); }\npub fn beta() {}\n";
31722        std::fs::write(&source, source_body).unwrap();
31723        let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
31724        db.rebuild(root).unwrap();
31725        drop(db);
31726
31727        let first = graph_db_backend_eval_full_projection_source_watermark(root, None)
31728            .unwrap()
31729            .value;
31730        std::thread::sleep(std::time::Duration::from_millis(20));
31731        std::fs::write(&source, source_body).unwrap();
31732        let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
31733        db.apply_changes(root).unwrap();
31734        drop(db);
31735
31736        let second = graph_db_backend_eval_full_projection_source_watermark(root, None)
31737            .unwrap()
31738            .value;
31739        assert_eq!(
31740            first, second,
31741            "mtime-only source index churn must not invalidate the full-projection cache"
31742        );
31743    }
31744
31745    #[test]
31746    fn full_projection_source_watermark_ignores_session_markdown_churn() {
31747        // #gfullhot: the full-projection performance cache isolates code graph
31748        // and semantic-summary inputs. Current session evidence is measured by
31749        // the bounded real dataset, so unrelated task-doc edits must not force a
31750        // million-row full-projection rebuild.
31751        let dir = tempfile::tempdir().unwrap();
31752        let root = dir.path();
31753        std::fs::create_dir_all(root.join("src")).unwrap();
31754        std::fs::create_dir_all(root.join("tasks/software")).unwrap();
31755        std::fs::create_dir_all(root.join(".tsift")).unwrap();
31756        std::fs::write(root.join("src/lib.rs"), "pub fn alpha() {}\n").unwrap();
31757        let task_doc = root.join("tasks/software/tsift.md");
31758        std::fs::write(
31759            &task_doc,
31760            "---\nagent_doc_session: tsift-v0.1\n---\n\n## Backlog\n\n- [ ] [#one] Initial item\n",
31761        )
31762        .unwrap();
31763        let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
31764        db.rebuild(root).unwrap();
31765        drop(db);
31766
31767        let first = graph_db_backend_eval_full_projection_source_watermark(root, None)
31768            .unwrap()
31769            .value;
31770        std::fs::write(
31771            &task_doc,
31772            "---\nagent_doc_session: tsift-v0.1\n---\n\n## Backlog\n\n- [ ] [#one] Edited item\n",
31773        )
31774        .unwrap();
31775        let second = graph_db_backend_eval_full_projection_source_watermark(root, None)
31776            .unwrap()
31777            .value;
31778        assert_eq!(
31779            first, second,
31780            "session markdown churn must not invalidate the full-projection code/summary cache"
31781        );
31782    }
31783
31784    #[test]
31785    fn full_projection_cache_hit_skips_provider_neutral_rebuild_after_mtime_churn() {
31786        // #gfullhot: once a full-project projection is cached, repeated samples
31787        // with unchanged graph inputs must report zero source_graph_build and
31788        // projection_rows work even if indexed file mtimes changed.
31789        let dir = tempfile::tempdir().unwrap();
31790        let root = dir.path();
31791        std::fs::create_dir_all(root.join("src")).unwrap();
31792        std::fs::create_dir_all(root.join(".tsift")).unwrap();
31793        let source = root.join("src/lib.rs");
31794        let source_body = "pub fn alpha() { beta(); }\npub fn beta() {}\n";
31795        std::fs::write(&source, source_body).unwrap();
31796        let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
31797        db.rebuild(root).unwrap();
31798        drop(db);
31799
31800        let (_projection, _warnings, _phases, first_stats) =
31801            graph_db_backend_eval_full_projection_with_profile(root, None).unwrap();
31802        assert!(
31803            !first_stats.hit,
31804            "the first full-projection run should populate the cache"
31805        );
31806
31807        std::thread::sleep(std::time::Duration::from_millis(20));
31808        std::fs::write(&source, source_body).unwrap();
31809        let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
31810        db.apply_changes(root).unwrap();
31811        drop(db);
31812
31813        let (_projection, _warnings, phases, second_stats) =
31814            graph_db_backend_eval_full_projection_with_profile(root, None).unwrap();
31815        assert!(second_stats.hit, "mtime-only churn should still cache-hit");
31816        let source_graph_build = phases
31817            .iter()
31818            .find(|phase| phase.name == "full_projection.source_graph_build")
31819            .expect("cache hit must report source_graph_build");
31820        let projection_rows = phases
31821            .iter()
31822            .find(|phase| phase.name == "full_projection.projection_rows")
31823            .expect("cache hit must report projection_rows");
31824        assert_eq!(source_graph_build.duration_micros, 0);
31825        assert_eq!(projection_rows.duration_micros, 0);
31826    }
31827
31828    #[test]
31829    fn build_token_capped_preview_within_cap() {
31830        let lines: Vec<&str> = vec!["fn foo() {", "    1 + 2", "}"];
31831        let capped = build_token_capped_preview(&lines, 1, 3, 160, 1000);
31832        assert!(!capped.was_capped);
31833        assert_eq!(capped.preview.len(), 3);
31834        assert_eq!(capped.capped_end, 3);
31835    }
31836
31837    #[test]
31838    fn build_token_capped_preview_truncates_long_body() {
31839        let owned: Vec<String> = (0..200).map(|i| format!("    let line_{i} = {i};")).collect();
31840        let lines: Vec<&str> = owned.iter().map(|s| s.as_str()).collect();
31841        let capped = build_token_capped_preview(&lines, 1, 200, 160, 100);
31842        assert!(capped.was_capped);
31843        assert!(capped.preview.len() < 200);
31844        assert!(capped.capped_end < 200);
31845        assert!(!capped.preview.is_empty());
31846    }
31847
31848    #[test]
31849    fn build_token_capped_preview_respects_start_offset() {
31850        let owned: Vec<String> = (0..100).map(|i| format!("line {i}")).collect();
31851        let lines: Vec<&str> = owned.iter().map(|s| s.as_str()).collect();
31852        let capped = build_token_capped_preview(&lines, 50, 100, 160, 50);
31853        assert!(capped.was_capped);
31854        assert!(capped.capped_end >= 50);
31855        assert!(capped.capped_end < 100);
31856        assert_eq!(capped.preview[0].line, 50);
31857    }
31858
31859    #[test]
31860    fn response_budget_body_token_cap_defaults() {
31861        let budget = ResponseBudget::from_cli(None, None, Some(ResponseBudgetPreset::Normal), true);
31862        assert_eq!(budget.body_token_cap(), 1500);
31863
31864        let budget = ResponseBudget::from_cli(None, None, Some(ResponseBudgetPreset::Small), true);
31865        assert_eq!(budget.body_token_cap(), 500);
31866
31867        let budget = ResponseBudget::from_cli(None, None, Some(ResponseBudgetPreset::Deep), true);
31868        assert_eq!(budget.body_token_cap(), 3000);
31869    }
31870
31871    #[test]
31872    fn build_token_capped_preview_empty_input() {
31873        let lines: Vec<&str> = vec![];
31874        let capped = build_token_capped_preview(&lines, 1, 0, 160, 1000);
31875        assert!(!capped.was_capped);
31876        assert!(capped.preview.is_empty());
31877    }
31878
31879    #[test]
31880    fn build_token_capped_preview_single_long_line_fits() {
31881        let lines: Vec<&str> = vec!["short"];
31882        let capped = build_token_capped_preview(&lines, 1, 1, 160, 100);
31883        assert!(!capped.was_capped);
31884        assert_eq!(capped.preview.len(), 1);
31885        assert_eq!(capped.capped_end, 1);
31886    }
31887
31888    #[test]
31889    fn edge_index_replaces_from_id_to_id_with_positions() {
31890        let input = serde_json::json!({
31891            "nodes": [
31892                {"id": "symbol:src/lib.rs:foo"},
31893                {"id": "symbol:src/lib.rs:bar"},
31894                {"id": "symbol:src/lib.rs:baz"}
31895            ],
31896            "edges": [
31897                {"from_id": "symbol:src/lib.rs:foo", "to_id": "symbol:src/lib.rs:bar", "k": "calls"},
31898                {"from_id": "symbol:src/lib.rs:bar", "to_id": "symbol:src/lib.rs:baz", "k": "calls"}
31899            ]
31900        });
31901        let result = edge_index_transform(input);
31902        let edges = result.get("edges").unwrap().as_array().unwrap();
31903        assert_eq!(edges.len(), 2);
31904        assert_eq!(edges[0]["from"], 0);
31905        assert_eq!(edges[0]["to"], 1);
31906        assert_eq!(edges[1]["from"], 1);
31907        assert_eq!(edges[1]["to"], 2);
31908        assert!(edges[0].get("from_id").is_none());
31909        assert!(edges[0].get("to_id").is_none());
31910    }
31911
31912    #[test]
31913    fn edge_index_preserves_unresolved_ids_as_strings() {
31914        let input = serde_json::json!({
31915            "nodes": [{"id": "symbol:src/lib.rs:foo"}],
31916            "edges": [
31917                {"from_id": "symbol:src/lib.rs:foo", "to_id": "symbol:other.rs:missing", "k": "ref"}
31918            ]
31919        });
31920        let result = edge_index_transform(input);
31921        let edge = &result["edges"][0];
31922        assert_eq!(edge["from"], 0);
31923        assert_eq!(edge["to_id"], "symbol:other.rs:missing");
31924    }
31925
31926    #[test]
31927    fn edge_index_noop_without_nodes_and_edges() {
31928        let input = serde_json::json!({"report": {"entries": [{"from_id": "a", "to_id": "b"}]}});
31929        let result = edge_index_transform(input);
31930        assert_eq!(result["report"]["entries"][0]["from_id"], "a");
31931    }
31932}
31933
31934// --- SQL introspection ---
31935
31936#[derive(Serialize)]
31937struct TableInfo {
31938    name: String,
31939    columns: Vec<ColumnInfo>,
31940    row_count: i64,
31941}
31942
31943#[derive(Serialize)]
31944struct ColumnInfo {
31945    name: String,
31946    #[serde(rename = "type")]
31947    col_type: String,
31948    notnull: bool,
31949    pk: bool,
31950    #[serde(skip_serializing_if = "Option::is_none")]
31951    default_value: Option<String>,
31952}
31953
31954/// Open a SQLite connection (read-only).
31955pub(crate) fn open_db(path: &std::path::Path) -> Result<Connection> {
31956    let conn = Connection::open_with_flags(
31957        path,
31958        rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
31959    )
31960    .with_context(|| format!("opening database: {}", path.display()))?;
31961    Ok(conn)
31962}
31963
31964/// List all user tables with column metadata and row counts.
31965pub(crate) fn schema_overview(conn: &Connection) -> Result<Vec<TableInfo>> {
31966    let mut stmt = conn.prepare(
31967        "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%' ORDER BY name",
31968    )?;
31969    let table_names: Vec<String> = stmt
31970        .query_map([], |row| row.get(0))?
31971        .collect::<std::result::Result<Vec<_>, _>>()?;
31972
31973    let mut tables = Vec::new();
31974    for tbl in table_names {
31975        let columns = table_columns(conn, &tbl)?;
31976        let row_count: i64 =
31977            conn.query_row(&format!("SELECT COUNT(*) FROM \"{}\"", tbl), [], |row| {
31978                row.get(0)
31979            })?;
31980        tables.push(TableInfo {
31981            name: tbl,
31982            columns,
31983            row_count,
31984        });
31985    }
31986    Ok(tables)
31987}
31988
31989/// Get column metadata for a single table.
31990pub(crate) fn table_columns(conn: &Connection, table: &str) -> Result<Vec<ColumnInfo>> {
31991    let mut stmt = conn.prepare(&format!("PRAGMA table_info(\"{}\")", table))?;
31992    let cols = stmt
31993        .query_map([], |row| {
31994            Ok(ColumnInfo {
31995                name: row.get(1)?,
31996                col_type: row.get::<_, String>(2).unwrap_or_default(),
31997                notnull: row.get::<_, bool>(3).unwrap_or(false),
31998                pk: row.get::<_, i32>(5).unwrap_or(0) > 0,
31999                default_value: row.get(4)?,
32000            })
32001        })?
32002        .collect::<std::result::Result<Vec<_>, _>>()?;
32003    Ok(cols)
32004}
32005
32006/// Execute an arbitrary SQL query and return rows as JSON values.
32007pub(crate) fn execute_query(
32008    conn: &Connection,
32009    sql: &str,
32010) -> Result<(Vec<String>, Vec<Vec<serde_json::Value>>)> {
32011    let mut stmt = conn.prepare(sql).context("preparing SQL query")?;
32012    let col_names: Vec<String> = stmt.column_names().iter().map(|s| s.to_string()).collect();
32013    let col_count = col_names.len();
32014
32015    let mut rows = Vec::new();
32016    let mut query_rows = stmt.query([])?;
32017    while let Some(row) = query_rows.next()? {
32018        let mut vals = Vec::with_capacity(col_count);
32019        for i in 0..col_count {
32020            let val = match row.get_ref(i)? {
32021                rusqlite::types::ValueRef::Null => serde_json::Value::Null,
32022                rusqlite::types::ValueRef::Integer(n) => serde_json::json!(n),
32023                rusqlite::types::ValueRef::Real(f) => serde_json::json!(f),
32024                rusqlite::types::ValueRef::Text(s) => {
32025                    serde_json::Value::String(String::from_utf8_lossy(s).into_owned())
32026                }
32027                rusqlite::types::ValueRef::Blob(b) => {
32028                    serde_json::Value::String(format!("<blob {} bytes>", b.len()))
32029                }
32030            };
32031            vals.push(val);
32032        }
32033        rows.push(vals);
32034    }
32035    Ok((col_names, rows))
32036}
32037
32038
32039#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32040enum DigestRunnerKind {
32041    Test,
32042    Log,
32043}
32044
32045impl DigestRunnerKind {
32046    fn parse(raw: &str) -> Result<Self> {
32047        match raw.trim().to_ascii_lowercase().as_str() {
32048            "test" => Ok(Self::Test),
32049            "log" => Ok(Self::Log),
32050            other => bail!("unsupported digest runner kind `{other}`; expected test or log"),
32051        }
32052    }
32053
32054    fn as_str(self) -> &'static str {
32055        match self {
32056            Self::Test => "test",
32057            Self::Log => "log",
32058        }
32059    }
32060}
32061
32062/// Simple shell word splitting (handles single and double quotes).
32063pub(crate) fn shell_split(s: &str) -> Vec<&str> {
32064    let mut parts = Vec::new();
32065    let mut i = 0;
32066    let bytes = s.as_bytes();
32067    while i < bytes.len() {
32068        // Skip whitespace
32069        while i < bytes.len() && bytes[i].is_ascii_whitespace() {
32070            i += 1;
32071        }
32072        if i >= bytes.len() {
32073            break;
32074        }
32075        let start = i;
32076        if bytes[i] == b'"' || bytes[i] == b'\'' {
32077            let quote = bytes[i];
32078            i += 1;
32079            while i < bytes.len() && bytes[i] != quote {
32080                i += 1;
32081            }
32082            if i < bytes.len() {
32083                i += 1; // closing quote
32084            }
32085        } else {
32086            while i < bytes.len() && !bytes[i].is_ascii_whitespace() {
32087                i += 1;
32088            }
32089        }
32090        parts.push(&s[start..i]);
32091    }
32092    parts
32093}
32094
32095/// Quote a string for shell if it contains special characters.
32096pub(crate) fn shell_quote(s: &str) -> String {
32097    // Strip existing quotes
32098    let unquoted =
32099        if (s.starts_with('"') && s.ends_with('"')) || (s.starts_with('\'') && s.ends_with('\'')) {
32100            &s[1..s.len() - 1]
32101        } else {
32102            s
32103        };
32104
32105    if unquoted
32106        .chars()
32107        .all(|c| c.is_alphanumeric() || c == '_' || c == '-' || c == '.' || c == '/')
32108    {
32109        format!("\"{}\"", unquoted)
32110    } else {
32111        format!(
32112            "\"{}\"",
32113            unquoted.replace('\\', "\\\\").replace('"', "\\\"")
32114        )
32115    }
32116}
32117
32118fn empty_search_coverage() -> sift::SearchCoverageSnapshot {
32119    sift::SearchCoverageSnapshot {
32120        mode: sift::SearchCoverageMode::Sealed,
32121        total_sector_count: 0,
32122        mounted_sector_count: 0,
32123        reused_sector_count: 0,
32124        dirty_sector_count: 0,
32125        completed_dirty_sector_count: 0,
32126        rebuilding_sector_count: 0,
32127        resumed_sector_count: 0,
32128        active_rebuild: None,
32129    }
32130}
32131
32132fn aggregate_search_coverage(responses: &[sift::SearchResponse]) -> sift::SearchCoverageSnapshot {
32133    let total_sector_count = responses
32134        .iter()
32135        .map(|response| response.coverage.total_sector_count)
32136        .sum();
32137    let mounted_sector_count = responses
32138        .iter()
32139        .map(|response| response.coverage.mounted_sector_count)
32140        .sum();
32141    let reused_sector_count = responses
32142        .iter()
32143        .map(|response| response.coverage.reused_sector_count)
32144        .sum();
32145    let dirty_sector_count = responses
32146        .iter()
32147        .map(|response| response.coverage.dirty_sector_count)
32148        .sum();
32149    let completed_dirty_sector_count = responses
32150        .iter()
32151        .map(|response| response.coverage.completed_dirty_sector_count)
32152        .sum();
32153    let rebuilding_sector_count = responses
32154        .iter()
32155        .map(|response| response.coverage.rebuilding_sector_count)
32156        .sum();
32157    let resumed_sector_count = responses
32158        .iter()
32159        .map(|response| response.coverage.resumed_sector_count)
32160        .sum();
32161
32162    let mode = if dirty_sector_count == 0 && rebuilding_sector_count == 0 {
32163        sift::SearchCoverageMode::Sealed
32164    } else if completed_dirty_sector_count > 0
32165        || rebuilding_sector_count > 0
32166        || resumed_sector_count > 0
32167    {
32168        sift::SearchCoverageMode::Converging
32169    } else {
32170        sift::SearchCoverageMode::Frontier
32171    };
32172
32173    sift::SearchCoverageSnapshot {
32174        mode,
32175        total_sector_count,
32176        mounted_sector_count,
32177        reused_sector_count,
32178        dirty_sector_count,
32179        completed_dirty_sector_count,
32180        rebuilding_sector_count,
32181        resumed_sector_count,
32182        active_rebuild: responses
32183            .iter()
32184            .find_map(|response| response.coverage.active_rebuild.clone()),
32185    }
32186}
32187
32188fn empty_search_response(root: &Path, strategy: &str) -> sift::SearchResponse {
32189    sift::SearchResponse {
32190        strategy: strategy.to_string(),
32191        root: root.display().to_string(),
32192        indexed_artifacts: 0,
32193        skipped_artifacts: 0,
32194        coverage: empty_search_coverage(),
32195        hits: Vec::new(),
32196    }
32197}
32198
32199fn absolutize_search_hit_paths(response: &mut sift::SearchResponse, search_root: &Path) {
32200    for hit in &mut response.hits {
32201        let path = Path::new(&hit.path);
32202        if path.is_relative() {
32203            hit.path = search_root.join(path).display().to_string();
32204        }
32205    }
32206}
32207
32208fn merge_search_responses(
32209    root: &Path,
32210    strategy: &str,
32211    limit: usize,
32212    responses: Vec<sift::SearchResponse>,
32213) -> sift::SearchResponse {
32214    let indexed_artifacts = responses
32215        .iter()
32216        .map(|response| response.indexed_artifacts)
32217        .sum();
32218    let skipped_artifacts = responses
32219        .iter()
32220        .map(|response| response.skipped_artifacts)
32221        .sum();
32222    let coverage = if responses.is_empty() {
32223        empty_search_coverage()
32224    } else {
32225        aggregate_search_coverage(&responses)
32226    };
32227    let mut hits: Vec<sift::SearchHit> = responses
32228        .into_iter()
32229        .flat_map(|response| response.hits)
32230        .collect();
32231    hits.sort_by(|left, right| {
32232        right
32233            .score
32234            .partial_cmp(&left.score)
32235            .unwrap_or(Ordering::Equal)
32236            .then_with(|| left.path.cmp(&right.path))
32237            .then_with(|| left.location.cmp(&right.location))
32238    });
32239    hits.truncate(limit);
32240    for (rank, hit) in hits.iter_mut().enumerate() {
32241        hit.rank = rank + 1;
32242    }
32243
32244    sift::SearchResponse {
32245        strategy: strategy.to_string(),
32246        root: root.display().to_string(),
32247        indexed_artifacts,
32248        skipped_artifacts,
32249        coverage,
32250        hits,
32251    }
32252}
32253
32254pub(crate) fn federated_sift_search(
32255    root: &Path,
32256    cache_dir: &Path,
32257    query: &str,
32258    limit: usize,
32259    timeout_secs: u64,
32260    strategy: &str,
32261) -> Result<sift::SearchResponse> {
32262    let targets = resolve_search_index_targets(root, root, None, true)?;
32263    if targets.is_empty() {
32264        if config::Config::submodule_dirs(root)?.is_empty() {
32265            return run_search_with_timeout(
32266                root,
32267                cache_dir,
32268                query,
32269                limit,
32270                timeout_secs,
32271                strategy,
32272                &[],
32273            );
32274        }
32275        return Ok(empty_search_response(root, strategy));
32276    }
32277
32278    let mut responses = Vec::with_capacity(targets.len());
32279    for target in &targets {
32280        let mut response = run_search_with_timeout(
32281            &target.source_root,
32282            cache_dir,
32283            query,
32284            limit,
32285            timeout_secs,
32286            strategy,
32287            std::slice::from_ref(target),
32288        )?;
32289        absolutize_search_hit_paths(&mut response, &target.source_root);
32290        response.root = root.display().to_string();
32291        responses.push(response);
32292    }
32293
32294    Ok(merge_search_responses(root, strategy, limit, responses))
32295}
32296
32297/// Federated symbol search across every scoped `.tsift/indexes/<scope>/index.db`
32298/// in the workspace. Per-scope tagpath annotation runs inside the per-scope
32299/// loop so each scope's adapter resolves against its own `.naming.toml` /
32300/// `.naming/index.json` (the workspace root usually has no tagpath of its
32301/// own). The merged `TagpathAnnotationDiagnostic` reports `loaded=true` when
32302/// at least one scope loaded, and `stale=true` with the first stale reason
32303/// when any scope was stale.
32304pub(crate) fn federated_symbol_search(
32305    root: &std::path::Path,
32306    query: &str,
32307    limit: usize,
32308    tagpath_opts: &TagpathSearchOpts,
32309) -> Result<(Vec<index::SymbolHit>, TagpathAnnotationDiagnostic)> {
32310    let cfg = config::Config::load(root)?;
32311    let submodules = config::Config::submodule_dirs(root)?;
32312    let mut all_hits: Vec<index::SymbolHit> = Vec::new();
32313    let mut combined = TagpathAnnotationDiagnostic::default();
32314    for scope in &submodules {
32315        if !cfg.federation_for_scope(scope) {
32316            continue;
32317        }
32318        let db_path = cfg.db_path_for(root, &scope.id);
32319        if !db_path.exists() {
32320            continue;
32321        }
32322        let db = index::IndexDb::open_read_only(&db_path)?;
32323        let mut hits = db.symbol_search(query, limit)?;
32324        let diag = annotate_hits_with_tagpath(&mut hits, &scope.source_root, tagpath_opts)?;
32325        combined.loaded |= diag.loaded;
32326        if diag.stale && !combined.stale {
32327            combined.stale = true;
32328            combined.reason = diag.reason;
32329        }
32330        all_hits.append(&mut hits);
32331    }
32332    all_hits.sort_by(|a, b| {
32333        b.score
32334            .partial_cmp(&a.score)
32335            .unwrap_or(std::cmp::Ordering::Equal)
32336    });
32337    all_hits.truncate(limit);
32338    Ok((all_hits, combined))
32339}
32340
32341#[derive(Debug, Deserialize)]
32342#[serde(tag = "type", rename_all = "lowercase")]
32343enum RipgrepJsonEvent {
32344    Match {
32345        data: RipgrepMatchData,
32346    },
32347    #[serde(other)]
32348    Other,
32349}
32350
32351#[derive(Debug, Deserialize)]
32352struct RipgrepMatchData {
32353    path: RipgrepTextField,
32354    lines: RipgrepTextField,
32355    line_number: Option<usize>,
32356}
32357
32358#[derive(Debug, Deserialize)]
32359struct RipgrepTextField {
32360    text: Option<String>,
32361}
32362
32363pub(crate) fn federated_exact_search(
32364    root: &Path,
32365    query: &str,
32366    limit: usize,
32367    timeout_secs: u64,
32368) -> Result<sift::SearchResponse> {
32369    let cfg = config::Config::load(root)?;
32370    let mut responses = Vec::new();
32371    for scope in config::Config::submodule_dirs(root)? {
32372        if !cfg.federation_for_scope(&scope) {
32373            continue;
32374        }
32375        let mut response =
32376            run_exact_search_with_timeout(&scope.source_root, query, limit, timeout_secs)?;
32377        absolutize_search_hit_paths(&mut response, &scope.source_root);
32378        response.root = root.display().to_string();
32379        responses.push(response);
32380    }
32381
32382    Ok(merge_search_responses(root, "exact", limit, responses))
32383}
32384
32385pub(crate) fn run_sift_search(
32386    search_path: &Path,
32387    cache_dir: &Path,
32388    query: &str,
32389    limit: usize,
32390    strategy: &str,
32391) -> Result<sift::SearchResponse> {
32392    let engine = Sift::builder().with_cache_dir(cache_dir).build();
32393    let options = SearchOptions::default()
32394        .with_limit(limit)
32395        .with_strategy(strategy.to_string());
32396    let input = SearchInput::new(search_path, query).with_options(options);
32397    engine.search(input).context("sift search failed")
32398}
32399
32400fn exact_search_timeout_message(timeout_secs: u64) -> String {
32401    format!(
32402        "tsift search timed out after {}s (strategy: exact). \
32403         Re-run with `--timeout 0` to disable the timeout or narrow `--path` / `--scope`.",
32404        timeout_secs
32405    )
32406}
32407
32408fn exact_search_command(search_path: &Path, query: &str) -> Command {
32409    let mut command = Command::new("rg");
32410    command
32411        .arg("--json")
32412        .arg("--fixed-strings")
32413        .arg("--line-number")
32414        .arg("--hidden")
32415        .arg("--")
32416        .arg(query)
32417        .arg(search_path);
32418    command
32419}
32420
32421fn exact_search_file_timestamp(path: &Path) -> sift::ArtifactFreshness {
32422    let observed_unix_secs = SystemTime::now()
32423        .duration_since(UNIX_EPOCH)
32424        .unwrap_or_default()
32425        .as_secs() as i64;
32426    let modified_unix_secs = fs::metadata(path)
32427        .ok()
32428        .and_then(|metadata| metadata.modified().ok())
32429        .and_then(|modified| modified.duration_since(UNIX_EPOCH).ok())
32430        .map(|duration| duration.as_secs() as i64);
32431    sift::ArtifactFreshness {
32432        observed_unix_secs,
32433        modified_unix_secs,
32434    }
32435}
32436
32437fn parse_exact_search_output(
32438    search_path: &Path,
32439    limit: usize,
32440    raw: &str,
32441) -> Result<sift::SearchResponse> {
32442    if limit == 0 {
32443        return Ok(sift::SearchResponse {
32444            strategy: "exact".to_string(),
32445            root: search_path.display().to_string(),
32446            indexed_artifacts: 0,
32447            skipped_artifacts: 0,
32448            coverage: empty_search_coverage(),
32449            hits: Vec::new(),
32450        });
32451    }
32452
32453    let mut hits = Vec::new();
32454    for line in raw.lines() {
32455        let event: RipgrepJsonEvent =
32456            serde_json::from_str(line).context("parsing ripgrep exact-search output")?;
32457        let RipgrepJsonEvent::Match { data } = event else {
32458            continue;
32459        };
32460        let Some(path_text) = data.path.text else {
32461            continue;
32462        };
32463        let Some(lines_text) = data.lines.text else {
32464            continue;
32465        };
32466        let path = PathBuf::from(path_text);
32467        let snippet = lines_text.trim_end_matches(['\r', '\n']).to_string();
32468        let rank = hits.len() + 1;
32469        hits.push(sift::SearchHit {
32470            artifact_id: format!(
32471                "exact:{}:{}:{}",
32472                path.display(),
32473                data.line_number.unwrap_or(0),
32474                rank
32475            ),
32476            artifact_kind: sift::ContextArtifactKind::File,
32477            path: path.display().to_string(),
32478            rank,
32479            score: (limit.saturating_sub(rank).saturating_add(1)) as f64,
32480            confidence: sift::ScoreConfidence::High,
32481            location: data.line_number.map(|line| format!("line {}", line)),
32482            snippet: snippet.clone(),
32483            provenance: sift::ArtifactProvenance {
32484                adapter: sift::AcquisitionAdapterKind::FileSystem,
32485                source: "ripgrep -F".to_string(),
32486                synthetic: false,
32487            },
32488            freshness: exact_search_file_timestamp(&path),
32489            budget: sift::ArtifactBudget::from_text(&snippet, 1),
32490        });
32491        if hits.len() >= limit {
32492            break;
32493        }
32494    }
32495
32496    Ok(sift::SearchResponse {
32497        strategy: "exact".to_string(),
32498        root: search_path.display().to_string(),
32499        indexed_artifacts: hits.len(),
32500        skipped_artifacts: 0,
32501        coverage: empty_search_coverage(),
32502        hits,
32503    })
32504}
32505
32506fn exact_search_response_from_process(
32507    search_path: &Path,
32508    limit: usize,
32509    status: std::process::ExitStatus,
32510    stdout: &[u8],
32511    stderr: &[u8],
32512) -> Result<sift::SearchResponse> {
32513    if !status.success() && status.code() != Some(1) {
32514        let message = String::from_utf8_lossy(stderr);
32515        let trimmed = message.trim();
32516        if trimmed.is_empty() {
32517            bail!("ripgrep exact search exited with status {}", status);
32518        }
32519        bail!("{}", trimmed);
32520    }
32521
32522    let raw = String::from_utf8(stdout.to_vec()).context("decoding ripgrep exact-search output")?;
32523    parse_exact_search_output(search_path, limit, &raw)
32524}
32525
32526fn run_exact_search(search_path: &Path, query: &str, limit: usize) -> Result<sift::SearchResponse> {
32527    let output = exact_search_command(search_path, query)
32528        .output()
32529        .context("running exact search with ripgrep")?;
32530    exact_search_response_from_process(
32531        search_path,
32532        limit,
32533        output.status,
32534        &output.stdout,
32535        &output.stderr,
32536    )
32537}
32538
32539pub(crate) fn run_exact_search_with_timeout(
32540    search_path: &Path,
32541    query: &str,
32542    limit: usize,
32543    timeout_secs: u64,
32544) -> Result<sift::SearchResponse> {
32545    if timeout_secs == 0 {
32546        return run_exact_search(search_path, query, limit);
32547    }
32548
32549    let mut child = exact_search_command(search_path, query)
32550        .stdin(Stdio::null())
32551        .stdout(Stdio::piped())
32552        .stderr(Stdio::piped())
32553        .spawn()
32554        .context("spawning timed exact search worker")?;
32555
32556    let timeout = Duration::from_secs(timeout_secs);
32557    let status = wait_for_child_exit(&mut child, timeout)
32558        .context("waiting for timed exact search worker")?;
32559    if status.is_none() {
32560        let _ = child.kill();
32561        let _ = child.wait();
32562        bail!("{}", exact_search_timeout_message(timeout_secs));
32563    }
32564
32565    let status = status.unwrap();
32566    let stdout = read_child_stdout(&mut child)?;
32567    let stderr = read_child_stderr(&mut child)?;
32568    exact_search_response_from_process(
32569        search_path,
32570        limit,
32571        status,
32572        stdout.as_bytes(),
32573        stderr.as_bytes(),
32574    )
32575}
32576
32577pub(crate) fn run_search_with_timeout(
32578    search_path: &Path,
32579    cache_dir: &Path,
32580    query: &str,
32581    limit: usize,
32582    timeout_secs: u64,
32583    strategy: &str,
32584    search_targets: &[SearchIndexTarget],
32585) -> Result<sift::SearchResponse> {
32586    if timeout_secs == 0 {
32587        return run_sift_search(search_path, cache_dir, query, limit, strategy);
32588    }
32589
32590    let output_path = next_search_worker_output_path();
32591    let mut child = Command::new(
32592        std::env::current_exe().context("resolving tsift executable for timed search")?,
32593    )
32594    .arg("__search-worker")
32595    .arg("--path")
32596    .arg(search_path)
32597    .arg("--cache-dir")
32598    .arg(cache_dir)
32599    .arg("--query")
32600    .arg(query)
32601    .arg("--limit")
32602    .arg(limit.to_string())
32603    .arg("--strategy")
32604    .arg(strategy)
32605    .arg("--output")
32606    .arg(&output_path)
32607    .stdin(Stdio::null())
32608    .stdout(Stdio::null())
32609    .stderr(Stdio::piped())
32610    .spawn()
32611    .context("spawning timed sift search worker")?;
32612
32613    let timeout = Duration::from_secs(timeout_secs);
32614    let status =
32615        wait_for_child_exit(&mut child, timeout).context("waiting for timed sift search worker")?;
32616    if status.is_none() {
32617        let _ = child.kill();
32618        let _ = child.wait();
32619        let _ = fs::remove_file(&output_path);
32620        bail!(
32621            "{}",
32622            search_timeout_message(timeout_secs, strategy, search_targets)?
32623        );
32624    }
32625
32626    let status = status.unwrap();
32627    let stderr = read_child_stderr(&mut child)?;
32628    if !status.success() {
32629        let _ = fs::remove_file(&output_path);
32630        let message = stderr.trim();
32631        if message.is_empty() {
32632            bail!("sift search worker exited with status {}", status);
32633        }
32634        bail!("{}", message);
32635    }
32636
32637    let raw = fs::read_to_string(&output_path)
32638        .with_context(|| format!("reading search worker output: {}", output_path.display()))?;
32639    let _ = fs::remove_file(&output_path);
32640    serde_json::from_str(&raw).context("parsing search worker output")
32641}
32642
32643fn next_search_worker_output_path() -> PathBuf {
32644    let stamp = SystemTime::now()
32645        .duration_since(UNIX_EPOCH)
32646        .unwrap_or_default()
32647        .as_nanos();
32648    std::env::temp_dir().join(format!(
32649        "tsift-search-{}-{}.json",
32650        std::process::id(),
32651        stamp
32652    ))
32653}
32654
32655fn wait_for_child_exit(
32656    child: &mut std::process::Child,
32657    timeout: Duration,
32658) -> Result<Option<std::process::ExitStatus>> {
32659    let started = Instant::now();
32660    loop {
32661        if let Some(status) = child.try_wait()? {
32662            return Ok(Some(status));
32663        }
32664        if started.elapsed() >= timeout {
32665            return Ok(None);
32666        }
32667        let remaining = timeout.saturating_sub(started.elapsed());
32668        std::thread::sleep(remaining.min(Duration::from_millis(10)));
32669    }
32670}
32671
32672fn read_child_stderr(child: &mut std::process::Child) -> Result<String> {
32673    let mut stderr = String::new();
32674    if let Some(mut pipe) = child.stderr.take() {
32675        pipe.read_to_string(&mut stderr)
32676            .context("reading search worker stderr")?;
32677    }
32678    Ok(stderr)
32679}
32680
32681fn read_child_stdout(child: &mut std::process::Child) -> Result<String> {
32682    let mut stdout = String::new();
32683    if let Some(mut pipe) = child.stdout.take() {
32684        pipe.read_to_string(&mut stdout)
32685            .context("reading search worker stdout")?;
32686    }
32687    Ok(stdout)
32688}
32689
32690pub(crate) fn maybe_apply_search_worker_test_hooks() -> Result<()> {
32691    if let Ok(path) = std::env::var("TSIFT_TEST_SEARCH_WORKER_PID_FILE") {
32692        fs::write(&path, std::process::id().to_string())
32693            .with_context(|| format!("writing search worker pid file: {path}"))?;
32694    }
32695    if let Ok(ms) = std::env::var("TSIFT_TEST_SEARCH_WORKER_SLEEP_MS") {
32696        let delay_ms = ms
32697            .parse::<u64>()
32698            .with_context(|| format!("parsing TSIFT_TEST_SEARCH_WORKER_SLEEP_MS={ms}"))?;
32699        std::thread::sleep(Duration::from_millis(delay_ms));
32700    }
32701    Ok(())
32702}
32703
32704#[cfg(test)]
32705thread_local! {
32706    static SEARCH_POST_PRECHECK_LOCK_HOOK: RefCell<Option<SearchPostPrecheckLockHook>> = const { RefCell::new(None) };
32707}
32708
32709#[cfg(test)]
32710enum SearchPostPrecheckLockMode {
32711    RollbackJournal,
32712    Wal,
32713}
32714
32715#[cfg(test)]
32716struct SearchPostPrecheckLockHook {
32717    db_path: PathBuf,
32718    mode: SearchPostPrecheckLockMode,
32719}
32720
32721#[cfg(test)]
32722struct SearchPostPrecheckLockGuard;
32723
32724#[cfg(test)]
32725impl Drop for SearchPostPrecheckLockGuard {
32726    fn drop(&mut self) {
32727        SEARCH_POST_PRECHECK_LOCK_HOOK.with(|hook| {
32728            hook.borrow_mut().take();
32729        });
32730    }
32731}
32732
32733#[cfg(test)]
32734fn install_search_post_precheck_lock(db_path: PathBuf) -> SearchPostPrecheckLockGuard {
32735    install_search_post_precheck_lock_hook(db_path, SearchPostPrecheckLockMode::RollbackJournal)
32736}
32737
32738#[cfg(test)]
32739fn install_search_post_precheck_wal_lock(db_path: PathBuf) -> SearchPostPrecheckLockGuard {
32740    install_search_post_precheck_lock_hook(db_path, SearchPostPrecheckLockMode::Wal)
32741}
32742
32743#[cfg(test)]
32744fn install_search_post_precheck_lock_hook(
32745    db_path: PathBuf,
32746    mode: SearchPostPrecheckLockMode,
32747) -> SearchPostPrecheckLockGuard {
32748    SEARCH_POST_PRECHECK_LOCK_HOOK.with(|hook| {
32749        assert!(
32750            hook.borrow().is_none(),
32751            "search post-precheck lock hook already installed"
32752        );
32753        *hook.borrow_mut() = Some(SearchPostPrecheckLockHook { db_path, mode });
32754    });
32755    SearchPostPrecheckLockGuard
32756}
32757
32758#[cfg(test)]
32759pub(crate) fn maybe_apply_search_post_precheck_test_hooks() -> Result<()> {
32760    let Some(hook) = SEARCH_POST_PRECHECK_LOCK_HOOK.with(|hook| hook.borrow_mut().take()) else {
32761        return Ok(());
32762    };
32763    let (ready_tx, ready_rx) = std::sync::mpsc::sync_channel(1);
32764    std::thread::spawn(move || {
32765        let conn = Connection::open(&hook.db_path).expect("opening db for search lock hook");
32766        match hook.mode {
32767            SearchPostPrecheckLockMode::RollbackJournal => {
32768                conn.execute_batch("PRAGMA journal_mode=DELETE; BEGIN EXCLUSIVE;")
32769                    .expect("acquiring rollback-journal hook lock");
32770                fs::write(substrate::rollback_journal_path(&hook.db_path), "locked")
32771                    .expect("writing rollback journal marker");
32772            }
32773            SearchPostPrecheckLockMode::Wal => {
32774                conn.execute_batch(
32775                    "PRAGMA journal_mode=WAL;
32776                     PRAGMA wal_autocheckpoint=0;
32777                     CREATE TABLE IF NOT EXISTS search_wal_lock_probe (id INTEGER PRIMARY KEY);
32778                     INSERT INTO search_wal_lock_probe DEFAULT VALUES;
32779                     PRAGMA locking_mode=EXCLUSIVE;
32780                     BEGIN EXCLUSIVE;",
32781                )
32782                .expect("acquiring WAL hook lock");
32783                assert!(substrate::wal_sidecar_path(&hook.db_path).exists());
32784            }
32785        }
32786        ready_tx.send(()).expect("signaling search lock hook");
32787        std::thread::sleep(Duration::from_millis(200));
32788        drop(conn);
32789        let _ = fs::remove_file(substrate::rollback_journal_path(&hook.db_path));
32790    });
32791    ready_rx
32792        .recv_timeout(Duration::from_secs(1))
32793        .context("waiting for search post-precheck lock hook")?;
32794    Ok(())
32795}
32796
32797#[cfg(not(test))]
32798pub(crate) fn maybe_apply_search_post_precheck_test_hooks() -> Result<()> {
32799    Ok(())
32800}