Skip to main content

zagens_core/
working_set.rs

1//! Repo-aware working set tracking and prompt context packing.
2//!
3//! The goal of this module is to keep a small, high-signal list of
4//! "active" paths that the assistant should prioritize. It observes
5//! user messages and tool calls, extracts likely paths, and produces:
6//! - a compact working-set summary block for the system prompt
7//! - pinned message indices that compaction should preserve
8
9use crate::chat::{ContentBlock, Message};
10use ignore::WalkBuilder;
11use regex::Regex;
12use serde::{Deserialize, Serialize};
13use serde_json::Value;
14use std::collections::{HashMap, HashSet};
15use std::ffi::OsStr;
16use std::fs;
17use std::path::{Path, PathBuf};
18use std::sync::OnceLock;
19
20/// Repo-aware resolver for `@`-mentions and file pickers.
21///
22/// `cwd` is captured at construction; if the host's current directory changes
23/// during a session, build a fresh `Workspace`. Fuzzy lookups are backed by a
24/// lazy basename → paths index built once on first miss and reused for the
25/// rest of the session — without it, every mis-typed mention triggered a full
26/// `WalkBuilder` traversal up to depth 6 (Gemini code-review feedback).
27#[derive(Debug)]
28pub struct Workspace {
29    pub root: PathBuf,
30    cwd: Option<PathBuf>,
31    file_index: OnceLock<HashMap<String, Vec<PathBuf>>>,
32}
33
34impl Workspace {
35    /// Construct a workspace anchored at `root`, capturing the process CWD as
36    /// the secondary resolution pass. Convenience entry point intended for
37    /// callers that don't already have a CWD on hand; the App routes through
38    /// [`Workspace::with_cwd`] with its own captured launch directory.
39    #[allow(dead_code)] // Keeps the surface stable for #97 (Ctrl+P picker).
40    pub fn new(root: PathBuf) -> Self {
41        Self::with_cwd(root, std::env::current_dir().ok())
42    }
43
44    /// Construct with an explicit cwd. Used by tests that need deterministic
45    /// resolution against a known directory without depending on (and
46    /// mutating) the process's real working directory.
47    pub fn with_cwd(root: PathBuf, cwd: Option<PathBuf>) -> Self {
48        Self {
49            root,
50            cwd,
51            file_index: OnceLock::new(),
52        }
53    }
54
55    /// Two-pass resolution: workspace, then cwd, then fuzzy fallback.
56    pub fn resolve(&self, raw_path: &str) -> Result<PathBuf, PathBuf> {
57        let path = expand_mention_home(raw_path);
58        if path.is_absolute() {
59            if path.exists() {
60                return Ok(path);
61            }
62            return Err(path);
63        }
64
65        let ws_path = self.root.join(&path);
66        if ws_path.exists() {
67            return Ok(ws_path);
68        }
69
70        if let Some(cwd) = self.cwd.as_ref() {
71            let cwd_path = cwd.join(&path);
72            if cwd_path.exists() {
73                return Ok(cwd_path);
74            }
75        }
76
77        if let Some(fuzzy) = self.fuzzy_resolve(&path) {
78            return Ok(fuzzy);
79        }
80
81        Err(ws_path)
82    }
83
84    fn fuzzy_resolve(&self, path: &Path) -> Option<PathBuf> {
85        let needle = path.file_name()?.to_string_lossy().to_lowercase();
86        if needle.is_empty() {
87            return None;
88        }
89
90        let index = self.file_index.get_or_init(|| self.build_file_index());
91        index.get(&needle).and_then(|paths| paths.first()).cloned()
92    }
93
94    fn build_file_index(&self) -> HashMap<String, Vec<PathBuf>> {
95        let mut index: HashMap<String, Vec<PathBuf>> = HashMap::new();
96        let mut builder = WalkBuilder::new(&self.root);
97        builder.hidden(true).follow_links(false).max_depth(Some(6));
98        // Honor `.deepseekignore` in addition to the defaults the `ignore` crate
99        // already respects (`.gitignore`, `.git/info/exclude`, `.ignore`).
100        let _ = builder.add_custom_ignore_filename(".deepseekignore");
101
102        for entry in builder.build().flatten() {
103            if entry
104                .file_type()
105                .is_some_and(|ft| ft.is_file() || ft.is_dir())
106            {
107                let name = entry.file_name().to_string_lossy().to_lowercase();
108                index
109                    .entry(name)
110                    .or_default()
111                    .push(entry.path().to_path_buf());
112            }
113        }
114        index
115    }
116
117    /// Walk the workspace (and the recorded `cwd` when it diverges) and
118    /// return relative paths whose representation matches `partial`.
119    ///
120    /// Ranking: a candidate matches when its case-insensitive display string
121    /// starts with `partial` (prefix hit) or contains it as a substring; prefix
122    /// hits sort first so `docs/de` lands `docs/deepseek_v4.pdf` ahead of any
123    /// path that merely shares those bytes.
124    ///
125    /// Display strings are workspace-relative for files under `root`, and
126    /// cwd-relative for files only under the recorded `cwd` — so what the user
127    /// Tab-completes matches what their shell would have shown them.
128    ///
129    /// Honors `.gitignore`, `.git/info/exclude`, `.ignore`, and
130    /// `.deepseekignore`. Capped at `limit` results.
131    #[must_use]
132    pub fn completions(&self, partial: &str, limit: usize) -> Vec<String> {
133        if limit == 0 {
134            return Vec::new();
135        }
136        let needle = partial.to_lowercase();
137        let mut prefix_hits: Vec<String> = Vec::new();
138        let mut substring_hits: Vec<String> = Vec::new();
139        let mut seen: HashSet<PathBuf> = HashSet::new();
140
141        // Walk the recorded cwd first when it diverges from the workspace
142        // root, so cwd-relative entries appear ahead of duplicates surfaced by
143        // the workspace walk.
144        let cwd_diverges = self
145            .cwd
146            .as_deref()
147            .map(|c| c != self.root.as_path())
148            .unwrap_or(false);
149        if cwd_diverges && let Some(cwd) = self.cwd.as_deref() {
150            walk_for_completions(
151                cwd,
152                cwd,
153                &needle,
154                limit,
155                &mut prefix_hits,
156                &mut substring_hits,
157                &mut seen,
158            );
159        }
160        walk_for_completions(
161            &self.root,
162            &self.root,
163            &needle,
164            limit,
165            &mut prefix_hits,
166            &mut substring_hits,
167            &mut seen,
168        );
169
170        prefix_hits.sort();
171        substring_hits.sort();
172        prefix_hits.extend(substring_hits);
173        prefix_hits.truncate(limit);
174        prefix_hits
175    }
176}
177
178/// Maximum directory depth walked when surfacing file-mention completions.
179/// Mirrors the existing `project_tree` cutoff and keeps Tab snappy in deep
180/// monorepos.
181const COMPLETIONS_WALK_DEPTH: usize = 6;
182
183#[allow(clippy::too_many_arguments)]
184fn walk_for_completions(
185    walk_root: &Path,
186    display_root: &Path,
187    needle: &str,
188    limit: usize,
189    prefix_hits: &mut Vec<String>,
190    substring_hits: &mut Vec<String>,
191    seen: &mut HashSet<PathBuf>,
192) {
193    let mut builder = WalkBuilder::new(walk_root);
194    builder
195        .hidden(true)
196        .follow_links(false)
197        .max_depth(Some(COMPLETIONS_WALK_DEPTH));
198    let _ = builder.add_custom_ignore_filename(".deepseekignore");
199
200    for entry in builder.build().flatten() {
201        if prefix_hits.len() + substring_hits.len() >= limit {
202            break;
203        }
204        let path = entry.path();
205        let Ok(rel) = path.strip_prefix(display_root) else {
206            continue;
207        };
208        let rel_str = rel.to_string_lossy().replace('\\', "/");
209        if rel_str.is_empty() {
210            continue;
211        }
212        // Dedup across the (cwd, workspace) double-walk by absolute path; we
213        // want the cwd-relative display when both walks see the same file.
214        let abs = path.to_path_buf();
215        if !seen.insert(abs) {
216            continue;
217        }
218        let is_dir = entry.file_type().is_some_and(|ft| ft.is_dir());
219        let candidate = if is_dir {
220            format!("{rel_str}/")
221        } else {
222            rel_str.clone()
223        };
224        let lower = candidate.to_lowercase();
225        if needle.is_empty() || lower.starts_with(needle) {
226            prefix_hits.push(candidate);
227        } else if lower.contains(needle) {
228            substring_hits.push(candidate);
229        }
230    }
231}
232
233impl Clone for Workspace {
234    fn clone(&self) -> Self {
235        // Don't carry the cached file_index — clones get a fresh OnceLock so
236        // they don't pin a stale snapshot of the previous owner's tree.
237        Self {
238            root: self.root.clone(),
239            cwd: self.cwd.clone(),
240            file_index: OnceLock::new(),
241        }
242    }
243}
244
245fn expand_mention_home(path: &str) -> PathBuf {
246    if path == "~"
247        && let Some(home) = std::env::var_os("HOME")
248    {
249        return PathBuf::from(home);
250    }
251    if let Some(rest) = path.strip_prefix("~/")
252        && let Some(home) = std::env::var_os("HOME")
253    {
254        return PathBuf::from(home).join(rest);
255    }
256    PathBuf::from(path)
257}
258
259/// Configuration for working-set tracking.
260#[derive(Debug, Clone, Serialize, Deserialize)]
261pub struct WorkingSetConfig {
262    /// Maximum number of entries to keep.
263    pub max_entries: usize,
264    /// Maximum number of paths to pin during compaction.
265    pub max_pinned_paths: usize,
266    /// Maximum characters to scan per text block when pinning messages.
267    pub max_scan_chars: usize,
268    /// Maximum entries to show in the system prompt block.
269    pub max_prompt_entries: usize,
270}
271
272impl Default for WorkingSetConfig {
273    fn default() -> Self {
274        Self {
275            max_entries: 16,
276            max_pinned_paths: 8,
277            max_scan_chars: 2_000,
278            max_prompt_entries: 8,
279        }
280    }
281}
282
283/// The source that most recently updated an entry.
284#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
285pub enum WorkingSetSource {
286    UserMessage,
287    ToolInput,
288    ToolOutput,
289    Rebuild,
290}
291
292/// A single working-set entry.
293#[derive(Debug, Clone, Serialize, Deserialize)]
294pub struct WorkingSetEntry {
295    /// Workspace-relative path string.
296    pub path: String,
297    /// Whether the path is a directory (best-effort).
298    pub is_dir: bool,
299    /// Whether the path exists on disk (best-effort).
300    pub exists: bool,
301    /// Number of times this path was observed.
302    pub touches: u32,
303    /// The last observed turn index.
304    pub last_turn: u64,
305    /// The last update source.
306    pub last_source: WorkingSetSource,
307}
308
309impl WorkingSetEntry {
310    fn new(path: String, exists: bool, is_dir: bool, turn: u64, source: WorkingSetSource) -> Self {
311        Self {
312            path,
313            is_dir,
314            exists,
315            touches: 1,
316            last_turn: turn,
317            last_source: source,
318        }
319    }
320}
321
322/// Repo-aware working-set state.
323#[derive(Debug, Clone, Serialize, Deserialize, Default)]
324pub struct WorkingSet {
325    /// Tracking configuration.
326    pub config: WorkingSetConfig,
327    /// Monotonic turn counter (increments on user messages).
328    pub turn: u64,
329    /// Path entries keyed by workspace-relative path.
330    pub entries: HashMap<String, WorkingSetEntry>,
331}
332
333impl WorkingSet {
334    /// Advance to the next turn.
335    pub fn next_turn(&mut self) {
336        self.turn = self.turn.saturating_add(1);
337    }
338
339    /// Observe a user message and update the working set.
340    pub fn observe_user_message(&mut self, text: &str, workspace: &Path) {
341        self.next_turn();
342        let paths = extract_paths_from_text(text);
343        self.record_candidates(paths, workspace, WorkingSetSource::UserMessage);
344    }
345
346    /// Observe a tool call (input and optional output).
347    pub fn observe_tool_call(
348        &mut self,
349        tool_name: &str,
350        input: &Value,
351        output: Option<&str>,
352        workspace: &Path,
353    ) {
354        let input_candidates = extract_paths_from_value(input, Some(tool_name));
355        self.record_candidates(input_candidates, workspace, WorkingSetSource::ToolInput);
356
357        if let Some(text) = output {
358            let output_candidates = extract_paths_from_text(text);
359            self.record_candidates(output_candidates, workspace, WorkingSetSource::ToolOutput);
360        }
361    }
362
363    /// Rebuild the working set from existing messages (best effort).
364    ///
365    /// This is used when syncing a resumed session.
366    pub fn rebuild_from_messages(&mut self, messages: &[Message], workspace: &Path) {
367        self.entries.clear();
368        self.turn = 0;
369
370        for message in messages {
371            if message.role == "user" {
372                self.next_turn();
373            }
374            let candidates = extract_paths_from_message(message);
375            if candidates.is_empty() {
376                continue;
377            }
378            self.record_candidates(candidates, workspace, WorkingSetSource::Rebuild);
379        }
380    }
381
382    /// Render a compact working-set block for the system prompt.
383    ///
384    /// Byte-stable across `next_turn()` calls when no new paths are observed
385    /// (#280): the rendered lines drop the turn-relative `touches` and
386    /// `last seen N turn(s) ago` fields, and the order is taken from
387    /// `sorted_for_prompt` (turn-agnostic) instead of `sorted_entries`.
388    /// The block lands in the system prompt before the historical
389    /// conversation; any byte that drifts here cache-misses everything that
390    /// follows in DeepSeek's KV prefix cache.
391    pub fn summary_block(&self, workspace: &Path) -> Option<String> {
392        let prompt_entries: Vec<&WorkingSetEntry> = self
393            .sorted_for_prompt()
394            .into_iter()
395            .take(self.config.max_prompt_entries)
396            .collect();
397
398        let repo_summary = summarize_repo_root(workspace);
399
400        if repo_summary.is_none() && prompt_entries.is_empty() {
401            return None;
402        }
403
404        let mut lines: Vec<String> = Vec::new();
405        lines.push("## Repo Working Set".to_string());
406        lines.push(format!("Workspace: {}", workspace.display()));
407
408        if let Some(summary) = repo_summary {
409            lines.push(summary);
410        }
411
412        if !prompt_entries.is_empty() {
413            lines.push("Active paths (prioritize these):".to_string());
414            for entry in prompt_entries {
415                let kind = if entry.is_dir { "dir" } else { "file" };
416                lines.push(format!("- {} ({kind})", entry.path));
417            }
418        }
419
420        lines.push(
421            "When in doubt, use tools to verify and keep changes focused on the working set."
422                .to_string(),
423        );
424
425        Some(lines.join("\n"))
426    }
427
428    /// Return the most relevant paths in score order.
429    pub fn top_paths(&self, limit: usize) -> Vec<String> {
430        self.sorted_entries()
431            .into_iter()
432            .take(limit)
433            .map(|entry| entry.path.clone())
434            .collect()
435    }
436
437    /// Identify message indices that should be pinned during compaction.
438    pub fn pinned_message_indices(&self, messages: &[Message], workspace: &Path) -> Vec<usize> {
439        if messages.is_empty() || self.entries.is_empty() {
440            return Vec::new();
441        }
442
443        let pinned_paths: Vec<&WorkingSetEntry> = self
444            .sorted_entries()
445            .into_iter()
446            .take(self.config.max_pinned_paths)
447            .collect();
448        if pinned_paths.is_empty() {
449            return Vec::new();
450        }
451
452        let needles = build_search_needles(&pinned_paths, workspace);
453        if needles.is_empty() {
454            return Vec::new();
455        }
456
457        let mut pinned: Vec<usize> = Vec::new();
458        for (idx, message) in messages.iter().enumerate() {
459            if message_mentions_any_path(message, &needles, self.config.max_scan_chars) {
460                pinned.push(idx);
461            }
462        }
463        pinned
464    }
465
466    fn record_candidates(
467        &mut self,
468        candidates: Vec<String>,
469        workspace: &Path,
470        source: WorkingSetSource,
471    ) {
472        if candidates.is_empty() {
473            return;
474        }
475
476        let workspace_canon = workspace.canonicalize().ok();
477
478        for raw in candidates {
479            let Some(normalized) = normalize_candidate(&raw) else {
480                continue;
481            };
482            let Some((rel, exists, is_dir)) =
483                relativize_candidate(&normalized, workspace, workspace_canon.as_deref())
484            else {
485                continue;
486            };
487            self.record_path(rel, exists, is_dir, source);
488        }
489
490        self.prune();
491    }
492
493    fn record_path(&mut self, rel: String, exists: bool, is_dir: bool, source: WorkingSetSource) {
494        match self.entries.get_mut(&rel) {
495            Some(entry) => {
496                entry.exists |= exists;
497                entry.is_dir |= is_dir;
498                entry.touches = entry.touches.saturating_add(1);
499                entry.last_turn = self.turn;
500                entry.last_source = source;
501            }
502            None => {
503                let entry = WorkingSetEntry::new(rel.clone(), exists, is_dir, self.turn, source);
504                let _ = self.entries.insert(rel, entry);
505            }
506        }
507    }
508
509    fn prune(&mut self) {
510        let max_entries = self.config.max_entries;
511        if self.entries.len() <= max_entries {
512            return;
513        }
514
515        // Rank by score ascending and drop the lowest until within bounds.
516        let mut ranked: Vec<(String, i64)> = self
517            .entries
518            .values()
519            .map(|entry| (entry.path.clone(), score_entry(entry, self.turn)))
520            .collect();
521        ranked.sort_by_key(|a| a.1);
522
523        let to_remove = self.entries.len().saturating_sub(max_entries);
524        for (path, _) in ranked.into_iter().take(to_remove) {
525            let _ = self.entries.remove(&path);
526        }
527    }
528
529    fn sorted_entries(&self) -> Vec<&WorkingSetEntry> {
530        let mut entries: Vec<&WorkingSetEntry> = self.entries.values().collect();
531        entries.sort_by(|a, b| {
532            let sb = score_entry(b, self.turn);
533            let sa = score_entry(a, self.turn);
534            sb.cmp(&sa).then_with(|| a.path.cmp(&b.path))
535        });
536        entries
537    }
538
539    /// Turn-agnostic ordering used when rendering the prompt summary block.
540    /// `sorted_entries` mixes in a recency bonus from `self.turn`, so its
541    /// output reorders as turns advance even when no new paths are touched —
542    /// that movement would cross `max_prompt_entries` boundaries and bust the
543    /// KV prefix cache (#280). Compaction pinning still uses the recency-aware
544    /// `sorted_entries`; only the prompt-facing surface is stabilised here.
545    fn sorted_for_prompt(&self) -> Vec<&WorkingSetEntry> {
546        let mut entries: Vec<&WorkingSetEntry> = self.entries.values().collect();
547        entries.sort_by(|a, b| b.touches.cmp(&a.touches).then_with(|| a.path.cmp(&b.path)));
548        entries
549    }
550}
551
552fn score_entry(entry: &WorkingSetEntry, current_turn: u64) -> i64 {
553    let age = current_turn.saturating_sub(entry.last_turn);
554    let recency_bonus = match age {
555        0 => 6,
556        1 => 4,
557        2 => 3,
558        3..=5 => 2,
559        6..=10 => 1,
560        _ => 0,
561    };
562    i64::from(entry.touches) * 4 + recency_bonus
563}
564
565fn normalize_candidate(raw: &str) -> Option<String> {
566    let trimmed = raw.trim().trim_matches(|c: char| {
567        matches!(
568            c,
569            '"' | '\'' | '`' | ',' | ';' | ':' | '(' | ')' | '[' | ']'
570        )
571    });
572    if trimmed.is_empty() {
573        return None;
574    }
575    Some(trimmed.to_string())
576}
577
578fn relativize_candidate(
579    candidate: &str,
580    workspace: &Path,
581    workspace_canon: Option<&Path>,
582) -> Option<(String, bool, bool)> {
583    let candidate_path = Path::new(candidate);
584
585    // Reject obvious URLs and non-paths early.
586    if candidate.contains("://") {
587        return None;
588    }
589
590    let (rel_path, abs_path) = if candidate_path.is_absolute() {
591        let within_workspace = workspace_canon
592            .map(|ws| candidate_path.starts_with(ws))
593            .unwrap_or_else(|| candidate_path.starts_with(workspace));
594        if !within_workspace {
595            return None;
596        }
597        let rel = candidate_path.strip_prefix(workspace).ok()?.to_path_buf();
598        (rel, candidate_path.to_path_buf())
599    } else {
600        if starts_with_parent_dir(candidate_path) {
601            return None;
602        }
603        let rel = clean_relative(candidate_path);
604        let abs = workspace.join(&rel);
605        (rel, abs)
606    };
607
608    let metadata = fs::metadata(&abs_path).ok();
609    let exists = metadata.is_some();
610    let is_dir = metadata
611        .as_ref()
612        .map(fs::Metadata::is_dir)
613        .unwrap_or_else(|| candidate.ends_with('/'));
614
615    let rel_string = path_to_string(&rel_path)?;
616    Some((rel_string, exists, is_dir))
617}
618
619fn starts_with_parent_dir(path: &Path) -> bool {
620    matches!(
621        path.components().next(),
622        Some(std::path::Component::ParentDir)
623    )
624}
625
626fn clean_relative(path: &Path) -> PathBuf {
627    use std::path::Component;
628
629    let mut parts: Vec<PathBuf> = Vec::new();
630    for comp in path.components() {
631        match comp {
632            Component::CurDir => {}
633            Component::ParentDir => {
634                let _ = parts.pop();
635            }
636            Component::Normal(p) => parts.push(PathBuf::from(p)),
637            Component::RootDir | Component::Prefix(_) => {}
638        }
639    }
640    let mut out = PathBuf::new();
641    for part in parts {
642        out.push(part);
643    }
644    out
645}
646
647fn path_to_string(path: &Path) -> Option<String> {
648    path.as_os_str().to_str().map(|s| s.replace('\\', "/"))
649}
650
651fn extract_paths_from_message(message: &Message) -> Vec<String> {
652    let mut paths = Vec::new();
653    for block in &message.content {
654        match block {
655            ContentBlock::Text { text, .. } => {
656                paths.extend(extract_paths_from_text(text));
657            }
658            ContentBlock::ToolUse { input, .. } => {
659                paths.extend(extract_paths_from_value(input, None));
660            }
661            ContentBlock::ToolResult { content, .. } => {
662                paths.extend(extract_paths_from_text(content));
663            }
664            ContentBlock::Thinking { .. }
665            | ContentBlock::ServerToolUse { .. }
666            | ContentBlock::ToolSearchToolResult { .. }
667            | ContentBlock::CodeExecutionToolResult { .. } => {}
668        }
669    }
670    paths
671}
672
673/// Count path-like candidates in a tool input value (kernel replay substrate).
674#[must_use]
675pub fn path_candidates_from_tool_input(tool_name: &str, input: &Value) -> usize {
676    extract_paths_from_value(input, Some(tool_name)).len()
677}
678
679fn extract_paths_from_value(value: &Value, tool_hint: Option<&str>) -> Vec<String> {
680    let mut out = Vec::new();
681    extract_paths_from_value_inner(value, tool_hint, None, &mut out);
682    out
683}
684
685fn extract_paths_from_value_inner(
686    value: &Value,
687    tool_hint: Option<&str>,
688    key_hint: Option<&str>,
689    out: &mut Vec<String>,
690) {
691    match value {
692        Value::String(s) => {
693            let key_suggests_path = key_hint.map(key_is_path_like).unwrap_or(false);
694            if key_suggests_path || looks_like_path(s) {
695                let before = out.len();
696                out.extend(extract_paths_from_text(s));
697                if key_suggests_path && !s.contains('/') && !s.contains('\\') {
698                    out.push(s.to_string());
699                } else if out.len() == before && key_suggests_path && looks_like_path(s) {
700                    out.push(s.trim().to_string());
701                }
702            } else if tool_hint == Some("exec_shell") && s.len() < 400 {
703                out.extend(extract_paths_from_text(s));
704            }
705        }
706        Value::Array(arr) => {
707            for item in arr {
708                extract_paths_from_value_inner(item, tool_hint, key_hint, out);
709            }
710        }
711        Value::Object(map) => {
712            for (k, v) in map {
713                extract_paths_from_value_inner(v, tool_hint, Some(k.as_str()), out);
714            }
715        }
716        Value::Null | Value::Bool(_) | Value::Number(_) => {}
717    }
718}
719
720fn key_is_path_like(key: &str) -> bool {
721    let lower = key.to_ascii_lowercase();
722    lower.contains("path")
723        || lower.contains("file")
724        || lower.contains("dir")
725        || lower.contains("cwd")
726        || lower.contains("workspace")
727        || lower.contains("root")
728        || lower == "target"
729}
730
731fn looks_like_path(text: &str) -> bool {
732    let trimmed = text.trim();
733    if trimmed.is_empty() {
734        return false;
735    }
736    if trimmed.contains('/') || trimmed.contains('\\') {
737        return true;
738    }
739    match Path::new(trimmed).extension().and_then(OsStr::to_str) {
740        Some(ext) => COMMON_EXTENSIONS.contains(&ext),
741        None => false,
742    }
743}
744
745const COMMON_EXTENSIONS: &[&str] = &[
746    "rs", "toml", "md", "txt", "json", "yaml", "yml", "ts", "tsx", "js", "jsx", "py", "go", "java",
747    "c", "cc", "cpp", "h", "hpp", "sh", "bash", "zsh", "sql", "html", "css", "scss",
748];
749
750fn extract_paths_from_text(text: &str) -> Vec<String> {
751    if text.trim().is_empty() {
752        return Vec::new();
753    }
754
755    let re = path_regex();
756    re.find_iter(text)
757        .map(|m| m.as_str().to_string())
758        .filter(|s| looks_like_path(s))
759        .collect()
760}
761
762fn path_regex() -> &'static Regex {
763    static RE: OnceLock<Regex> = OnceLock::new();
764    RE.get_or_init(|| {
765        // Path-ish tokens with separators or file extensions.
766        Regex::new(
767            r#"(?x)
768            (?:
769                (?:[A-Za-z]:\\)?                # optional Windows drive
770                (?:\./|\../|/)?                 # optional leading
771                [A-Za-z0-9._-]+
772                (?:[/\\][A-Za-z0-9._-]+)+
773                (?:\.[A-Za-z0-9]{1,8})?         # optional extension
774            )
775            |
776            (?:
777                [A-Za-z0-9._-]+\.[A-Za-z0-9]{1,8}
778            )
779            "#,
780        )
781        .expect("path regex should compile")
782    })
783}
784
785fn truncate_chars(text: &str, max_chars: usize) -> &str {
786    if max_chars == 0 {
787        return "";
788    }
789    match text.char_indices().nth(max_chars) {
790        Some((idx, _)) => &text[..idx],
791        None => text,
792    }
793}
794
795fn build_search_needles(entries: &[&WorkingSetEntry], workspace: &Path) -> Vec<String> {
796    let mut needles: HashSet<String> = HashSet::new();
797    for entry in entries {
798        let rel = entry.path.clone();
799        if rel.is_empty() {
800            continue;
801        }
802        let abs = workspace.join(&rel);
803        let abs_str = abs.as_os_str().to_str().map(ToOwned::to_owned);
804
805        let _ = needles.insert(rel.clone());
806        if let Some(abs_str) = abs_str {
807            let _ = needles.insert(abs_str);
808        }
809    }
810    needles.into_iter().collect()
811}
812
813fn message_mentions_any_path(message: &Message, needles: &[String], max_scan_chars: usize) -> bool {
814    if needles.is_empty() {
815        return false;
816    }
817    for block in &message.content {
818        match block {
819            ContentBlock::Text { text, .. } => {
820                let snippet = truncate_chars(text, max_scan_chars);
821                if contains_any(snippet, needles) {
822                    return true;
823                }
824            }
825            ContentBlock::ToolUse { input, .. } => {
826                if let Ok(json) = serde_json::to_string(input)
827                    && contains_any(&json, needles)
828                {
829                    return true;
830                }
831            }
832            ContentBlock::ToolResult { content, .. } => {
833                let snippet = truncate_chars(content, max_scan_chars);
834                if contains_any(snippet, needles) {
835                    return true;
836                }
837            }
838            ContentBlock::Thinking { .. }
839            | ContentBlock::ServerToolUse { .. }
840            | ContentBlock::ToolSearchToolResult { .. }
841            | ContentBlock::CodeExecutionToolResult { .. } => {}
842        }
843    }
844    false
845}
846
847fn contains_any(text: &str, needles: &[String]) -> bool {
848    needles
849        .iter()
850        .any(|needle| !needle.is_empty() && text.contains(needle))
851}
852
853fn summarize_repo_root(workspace: &Path) -> Option<String> {
854    let key_files = detect_key_files(workspace);
855    let top_dirs = list_top_level_dirs(workspace, 8);
856
857    if key_files.is_empty() && top_dirs.is_empty() {
858        return None;
859    }
860
861    let mut parts: Vec<String> = Vec::new();
862    if !key_files.is_empty() {
863        parts.push(format!("Key files: {}", key_files.join(", ")));
864    }
865    if !top_dirs.is_empty() {
866        parts.push(format!("Top-level dirs: {}", top_dirs.join(", ")));
867    }
868    Some(parts.join("\n"))
869}
870
871fn detect_key_files(workspace: &Path) -> Vec<String> {
872    const CANDIDATES: &[&str] = &[
873        "Cargo.toml",
874        "README.md",
875        "AGENTS.md",
876        "CLAUDE.md",
877        "package.json",
878        "pyproject.toml",
879        "go.mod",
880        "Makefile",
881    ];
882
883    CANDIDATES
884        .iter()
885        .filter_map(|name| {
886            let path = workspace.join(name);
887            if path.exists() {
888                Some((*name).to_string())
889            } else {
890                None
891            }
892        })
893        .collect()
894}
895
896fn list_top_level_dirs(workspace: &Path, limit: usize) -> Vec<String> {
897    let mut dirs = Vec::new();
898    let entries = match fs::read_dir(workspace) {
899        Ok(entries) => entries,
900        Err(_) => return dirs,
901    };
902
903    for entry in entries.flatten() {
904        let file_name = entry.file_name();
905        let Some(name) = file_name.to_str() else {
906            continue;
907        };
908
909        if name.starts_with('.') || IGNORED_ROOT_DIRS.contains(&name) {
910            continue;
911        }
912
913        if let Ok(meta) = entry.metadata()
914            && meta.is_dir()
915        {
916            dirs.push(name.to_string());
917        }
918
919        if dirs.len() >= limit {
920            break;
921        }
922    }
923
924    dirs.sort();
925    dirs
926}
927
928const IGNORED_ROOT_DIRS: &[&str] = &["target", "node_modules", "dist", "build", ".git"];
929
930#[cfg(test)]
931mod tests {
932    use super::*;
933    use tempfile::TempDir;
934
935    fn make_message(role: &str, text: &str) -> Message {
936        Message {
937            role: role.to_string(),
938            content: vec![ContentBlock::Text {
939                text: text.to_string(),
940                cache_control: None,
941            }],
942        }
943    }
944
945    #[test]
946    fn observe_user_message_tracks_paths() {
947        let tmp = TempDir::new().expect("tempdir");
948        let src = tmp.path().join("src");
949        let file = src.join("lib.rs");
950        fs::create_dir_all(&src).expect("mkdir");
951        fs::write(&file, "pub fn x() {}").expect("write");
952
953        let mut ws = WorkingSet::default();
954        ws.observe_user_message("Please check src/lib.rs", tmp.path());
955
956        assert!(ws.entries.contains_key("src/lib.rs"));
957        let entry = ws.entries.get("src/lib.rs").expect("entry");
958        assert!(entry.exists);
959        assert!(!entry.is_dir);
960    }
961
962    #[test]
963    fn observe_tool_call_extracts_paths_from_input() {
964        let tmp = TempDir::new().expect("tempdir");
965        let file = tmp.path().join("Cargo.toml");
966        fs::write(&file, "[package]\nname = \"x\"").expect("write");
967
968        let mut ws = WorkingSet::default();
969        let input = serde_json::json!({ "path": "Cargo.toml" });
970        ws.observe_tool_call("read_file", &input, None, tmp.path());
971
972        assert!(ws.entries.contains_key("Cargo.toml"));
973    }
974
975    #[test]
976    fn pinned_message_indices_respects_working_set() {
977        let tmp = TempDir::new().expect("tempdir");
978        let src = tmp.path().join("src");
979        fs::create_dir_all(&src).expect("mkdir");
980        let file = src.join("main.rs");
981        fs::write(&file, "fn main() {}").expect("write");
982
983        let mut ws = WorkingSet::default();
984        ws.observe_user_message("Edit src/main.rs", tmp.path());
985
986        let messages = vec![
987            make_message("user", "Unrelated text"),
988            make_message("assistant", "I will read src/main.rs next."),
989            make_message("user", "More unrelated text"),
990        ];
991
992        let pinned = ws.pinned_message_indices(&messages, tmp.path());
993        assert_eq!(pinned, vec![1]);
994    }
995
996    #[test]
997    fn summary_block_includes_repo_and_working_set() {
998        let tmp = TempDir::new().expect("tempdir");
999        fs::write(tmp.path().join("Cargo.toml"), "[package]\nname = \"x\"").expect("write");
1000        let src = tmp.path().join("src");
1001        fs::create_dir_all(&src).expect("mkdir");
1002        fs::write(src.join("lib.rs"), "pub fn x() {}").expect("write");
1003
1004        let mut ws = WorkingSet::default();
1005        ws.observe_user_message("src/lib.rs", tmp.path());
1006        let block = ws.summary_block(tmp.path()).expect("block");
1007
1008        assert!(block.contains("Repo Working Set"));
1009        assert!(block.contains("Cargo.toml"));
1010        assert!(block.contains("src"));
1011        assert!(block.contains("src/lib.rs"));
1012    }
1013
1014    /// #280 regression: `summary_block` must produce byte-identical output
1015    /// across `next_turn()` advances when no new paths are touched. Prior to
1016    /// the fix, the rendered lines interpolated `entry.touches` and
1017    /// `self.turn - entry.last_turn`, both of which drift turn-over-turn even
1018    /// when the path set is unchanged. The drift busted DeepSeek's KV prefix
1019    /// cache on every user message because the working-set block lands in the
1020    /// system prompt before the historical conversation.
1021    #[test]
1022    fn summary_block_is_byte_stable_across_next_turn_when_no_new_paths_observed() {
1023        use crate::test_support::assert_byte_identical;
1024
1025        let tmp = TempDir::new().expect("tempdir");
1026        fs::write(tmp.path().join("Cargo.toml"), "[package]\nname = \"x\"").expect("write");
1027        let src = tmp.path().join("src");
1028        fs::create_dir_all(&src).expect("mkdir");
1029        fs::write(src.join("a.rs"), "a").expect("write");
1030        fs::write(src.join("b.rs"), "b").expect("write");
1031
1032        let mut ws = WorkingSet::default();
1033        ws.observe_user_message("Edit src/a.rs and src/b.rs", tmp.path());
1034
1035        let before = ws.summary_block(tmp.path()).expect("block before");
1036        ws.next_turn();
1037        let after = ws.summary_block(tmp.path()).expect("block after");
1038
1039        assert_byte_identical(
1040            "summary_block must be stable across next_turn when no new paths touched",
1041            &before,
1042            &after,
1043        );
1044    }
1045
1046    /// Companion to the byte-stability test: a fresh path *should* invalidate
1047    /// the block (the KV cache is allowed to miss when there's genuinely new
1048    /// signal), so the model still sees newly touched paths after the block
1049    /// stabilises across no-op turns.
1050    #[test]
1051    fn summary_block_changes_when_a_new_path_is_observed() {
1052        let tmp = TempDir::new().expect("tempdir");
1053        fs::write(tmp.path().join("Cargo.toml"), "[package]\nname = \"x\"").expect("write");
1054        let src = tmp.path().join("src");
1055        fs::create_dir_all(&src).expect("mkdir");
1056        fs::write(src.join("a.rs"), "a").expect("write");
1057        fs::write(src.join("c.rs"), "c").expect("write");
1058
1059        let mut ws = WorkingSet::default();
1060        ws.observe_user_message("src/a.rs", tmp.path());
1061        let before = ws.summary_block(tmp.path()).expect("block before");
1062
1063        ws.observe_user_message("src/c.rs", tmp.path());
1064        let after = ws.summary_block(tmp.path()).expect("block after");
1065
1066        assert_ne!(before, after, "new path must update the rendered summary");
1067        assert!(after.contains("src/c.rs"));
1068    }
1069
1070    #[test]
1071    fn extract_paths_from_message_picks_up_tool_results() {
1072        let msg = Message {
1073            role: "user".to_string(),
1074            content: vec![ContentBlock::ToolResult {
1075                tool_use_id: "tool_1".to_string(),
1076                content: "Changed src/compaction.rs".to_string(),
1077                is_error: None,
1078                content_blocks: None,
1079            }],
1080        };
1081
1082        let paths = extract_paths_from_message(&msg);
1083        assert!(paths.iter().any(|p| p.contains("src/compaction.rs")));
1084    }
1085
1086    #[test]
1087    fn pinning_prefers_high_signal_paths() {
1088        let tmp = TempDir::new().expect("tempdir");
1089        fs::create_dir_all(tmp.path().join("src")).expect("mkdir");
1090        fs::write(tmp.path().join("src/a.rs"), "a").expect("write");
1091        fs::write(tmp.path().join("src/b.rs"), "b").expect("write");
1092
1093        let mut ws = WorkingSet::default();
1094        ws.observe_user_message("src/a.rs", tmp.path());
1095        ws.observe_tool_call(
1096            "read_file",
1097            &serde_json::json!({ "path": "src/a.rs" }),
1098            Some("src/a.rs"),
1099            tmp.path(),
1100        );
1101        ws.observe_user_message("src/b.rs", tmp.path());
1102
1103        let a_score = score_entry(ws.entries.get("src/a.rs").expect("a"), ws.turn);
1104        let b_score = score_entry(ws.entries.get("src/b.rs").expect("b"), ws.turn);
1105        assert!(a_score >= b_score);
1106    }
1107
1108    #[test]
1109    fn estimate_tokens_is_available_for_future_budgeting() {
1110        let messages = [make_message("user", "src/main.rs")];
1111        assert!(!messages.is_empty());
1112    }
1113
1114    #[test]
1115    fn workspace_resolve_respects_cwd_and_workspace() {
1116        let tmp = TempDir::new().unwrap();
1117
1118        let sub = tmp.path().join("sub");
1119        std::fs::create_dir_all(&sub).unwrap();
1120        let bar = sub.join("bar.txt");
1121        std::fs::write(&bar, "bar").unwrap();
1122
1123        let nested = tmp.path().join("nested/deep");
1124        std::fs::create_dir_all(&nested).unwrap();
1125        let file_md = nested.join("file.md");
1126        std::fs::write(&file_md, "md").unwrap();
1127
1128        // Construct with an explicit cwd so the test doesn't race with other
1129        // tests that mutate the real process cwd.
1130        let ws = Workspace::with_cwd(tmp.path().to_path_buf(), Some(sub.clone()));
1131
1132        // #101 repro #1: @bar.txt with cwd=sub MUST resolve via the cwd pass,
1133        // never to the bogus workspace path tmp/bar.txt (which doesn't exist).
1134        let res1 = ws.resolve("bar.txt").unwrap();
1135        assert_eq!(
1136            res1.canonicalize().unwrap_or(res1.clone()),
1137            bar.canonicalize().unwrap_or(bar.clone())
1138        );
1139        let wrong = tmp.path().join("bar.txt");
1140        assert_ne!(res1, wrong, "must not have routed to workspace fallback");
1141
1142        // #101 repro #2: @nested/deep/file.md falls through to workspace root.
1143        let res2 = ws.resolve("nested/deep/file.md").unwrap();
1144        assert_eq!(
1145            res2.canonicalize().unwrap_or(res2),
1146            file_md.canonicalize().unwrap_or(file_md)
1147        );
1148    }
1149
1150    /// Negative test (#101): a truly missing path returns `Err` with a path
1151    /// that callers can show to the user as a signal of failure.
1152    #[test]
1153    fn workspace_resolve_returns_err_for_truly_missing_path() {
1154        let tmp = TempDir::new().unwrap();
1155        let ws = Workspace::with_cwd(tmp.path().to_path_buf(), Some(tmp.path().to_path_buf()));
1156
1157        let res = ws.resolve("does/not/exist.txt");
1158        assert!(res.is_err(), "expected Err for missing path, got: {res:?}");
1159    }
1160
1161    /// `Workspace::completions` returns workspace-relative entries for files
1162    /// under the root, and cwd-relative entries when the cwd-only file lives
1163    /// outside the workspace tree. Honors `.gitignore`.
1164    #[test]
1165    fn workspace_completions_walk_surfaces_workspace_and_cwd() {
1166        let tmp = TempDir::new().unwrap();
1167        // Two trees: a workspace under `ws/` and a cwd under `cwd/` that is
1168        // NOT inside the workspace, so the two walks are disjoint and we can
1169        // assert each branch contributed.
1170        let ws_root = tmp.path().join("ws");
1171        let cwd_root = tmp.path().join("cwd");
1172        std::fs::create_dir_all(&ws_root).unwrap();
1173        std::fs::create_dir_all(&cwd_root).unwrap();
1174        std::fs::write(ws_root.join("alpha.txt"), "a").unwrap();
1175        std::fs::write(cwd_root.join("alphabeta.txt"), "b").unwrap();
1176
1177        let ws = Workspace::with_cwd(ws_root.clone(), Some(cwd_root.clone()));
1178        let entries = ws.completions("alpha", 16);
1179        assert!(
1180            entries.iter().any(|e| e == "alpha.txt"),
1181            "expected workspace entry alpha.txt; got: {entries:?}",
1182        );
1183        assert!(
1184            entries.iter().any(|e| e == "alphabeta.txt"),
1185            "expected cwd entry alphabeta.txt; got: {entries:?}",
1186        );
1187    }
1188
1189    #[test]
1190    fn fuzzy_index_finds_files_and_directories() {
1191        let tmp = TempDir::new().unwrap();
1192        std::fs::create_dir_all(tmp.path().join("a/b/target_dir")).unwrap();
1193        std::fs::write(tmp.path().join("a/b/needle.rs"), "fn main(){}").unwrap();
1194
1195        let ws = Workspace::with_cwd(tmp.path().to_path_buf(), None);
1196
1197        // Basename-only mention triggers fuzzy fallback for both files and dirs.
1198        let f = ws.resolve("needle.rs").unwrap();
1199        assert!(f.ends_with("a/b/needle.rs"));
1200        let d = ws.resolve("target_dir").unwrap();
1201        assert!(d.ends_with("a/b/target_dir"));
1202
1203        // Index was populated exactly once (subsequent lookups reuse it).
1204        assert!(ws.file_index.get().is_some());
1205    }
1206}