Skip to main content

srcwalk/
map.rs

1use std::collections::BTreeMap;
2use std::fmt::Write;
3use std::path::{Path, PathBuf};
4
5use ignore::WalkBuilder;
6
7use crate::cache::OutlineCache;
8use crate::lang::detect_file_type;
9use crate::read::outline;
10use crate::types::{estimate_tokens, FileType};
11
12struct WalkConfig {
13    hidden: bool,
14    git_ignore: bool,
15    git_global: bool,
16    git_exclude: bool,
17    ignore: bool,
18    parents: bool,
19}
20
21/// Build the "# Note:" header line listing which ignore sources the walker
22/// honours, derived from the actual `WalkConfig` (no hardcoded copy).
23fn format_walk_note(cfg: &WalkConfig) -> String {
24    let mut respects: Vec<&'static str> = Vec::new();
25    if cfg.git_ignore {
26        respects.push(".gitignore");
27    }
28    if cfg.git_exclude {
29        respects.push(".git/info/exclude");
30    }
31    if cfg.git_global {
32        respects.push("core.excludesFile");
33    }
34    if cfg.ignore {
35        respects.push(".ignore");
36    }
37    let scope_word = if cfg.parents {
38        "+ parents"
39    } else {
40        "scope only"
41    };
42
43    let respects_part = if respects.is_empty() {
44        "no ignore files".to_string()
45    } else {
46        format!("{} ({scope_word})", respects.join(", "))
47    };
48
49    let hidden_part = if cfg.hidden {
50        "dotfiles excluded"
51    } else {
52        "dotfiles included"
53    };
54
55    format!(
56        "# Note: respects {respects_part}; {hidden_part}; built-in SKIP_DIRS still apply \
57         (target, node_modules, …). Use `srcwalk <path>` to inspect an ignored file directly.\n",
58    )
59}
60
61/// Generate a structural codebase map.
62/// By default files show compact token estimates; symbol names are opt-in.
63#[must_use]
64pub fn generate(
65    scope: &Path,
66    depth: usize,
67    budget: Option<u64>,
68    cache: &OutlineCache,
69    include_symbols: bool,
70) -> String {
71    let mut tree: BTreeMap<PathBuf, Vec<FileEntry>> = BTreeMap::new();
72
73    let cfg = WalkConfig {
74        hidden: false,
75        git_ignore: true,
76        git_global: true,
77        git_exclude: true,
78        ignore: true,
79        parents: true,
80    };
81
82    let walker = WalkBuilder::new(scope)
83        .follow_links(true)
84        .hidden(cfg.hidden)
85        .git_ignore(cfg.git_ignore)
86        .git_global(cfg.git_global)
87        .git_exclude(cfg.git_exclude)
88        .ignore(cfg.ignore)
89        .parents(cfg.parents)
90        .filter_entry(|entry| {
91            if entry.file_type().is_some_and(|ft| ft.is_dir()) {
92                if let Some(name) = entry.file_name().to_str() {
93                    return !crate::search::io::SKIP_DIRS.contains(&name);
94                }
95            }
96            true
97        })
98        .max_depth(Some(depth + 1))
99        .build();
100
101    for entry in walker.flatten() {
102        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
103            continue;
104        }
105
106        let path = entry.path();
107        let rel = path.strip_prefix(scope).unwrap_or(path);
108
109        // Skip if deeper than requested
110        let file_depth = rel.components().count().saturating_sub(1);
111        if file_depth > depth {
112            continue;
113        }
114
115        let parent = rel.parent().unwrap_or(Path::new("")).to_path_buf();
116        let name = rel
117            .file_name()
118            .and_then(|n| n.to_str())
119            .unwrap_or("")
120            .to_string();
121
122        let meta = std::fs::metadata(path).ok();
123        let byte_len = meta.as_ref().map_or(0, std::fs::Metadata::len);
124        let tokens = estimate_tokens(byte_len);
125
126        let symbols = if include_symbols {
127            let file_type = detect_file_type(path);
128            match file_type {
129                FileType::Code(_) => {
130                    let mtime = meta
131                        .and_then(|m| m.modified().ok())
132                        .unwrap_or(std::time::SystemTime::UNIX_EPOCH);
133
134                    let outline_str = cache.get_or_compute(path, mtime, || {
135                        let content = std::fs::read_to_string(path).unwrap_or_default();
136                        let buf = content.as_bytes();
137                        outline::generate(path, file_type, &content, buf, true)
138                    });
139
140                    Some(extract_symbol_names(&outline_str))
141                }
142                _ => None,
143            }
144        } else {
145            None
146        };
147
148        tree.entry(parent.clone()).or_default().push(FileEntry {
149            name,
150            symbols,
151            tokens,
152        });
153
154        // Ensure all ancestor directories exist in the tree so format_tree can find them.
155        let mut ancestor = parent.parent();
156        while let Some(a) = ancestor {
157            tree.entry(a.to_path_buf()).or_default();
158            if a == Path::new("") {
159                break;
160            }
161            ancestor = a.parent();
162        }
163    }
164
165    let mut out = format!(
166        "# Map: {} (depth {}, sizes ~= tokens)\n",
167        scope.display(),
168        depth
169    );
170    out.push_str(&format_walk_note(&cfg));
171    let totals = compute_dir_totals(&tree);
172    format_tree(&tree, &totals, Path::new(""), 0, &mut out);
173
174    let mut out = match budget {
175        Some(b) => crate::budget::apply(&out, b),
176        None => out,
177    };
178    if include_symbols {
179        out.push_str("\n\n> Tip: narrow with --scope <dir>.\n");
180    } else {
181        out.push_str("\n\n> Tip: add --symbols, or narrow with --scope <dir>.\n");
182    }
183    out
184}
185
186/// Compute total tokens for each directory (sum of all descendant files).
187fn compute_dir_totals(tree: &BTreeMap<PathBuf, Vec<FileEntry>>) -> BTreeMap<PathBuf, u64> {
188    let mut totals: BTreeMap<PathBuf, u64> = BTreeMap::new();
189    for (dir, files) in tree {
190        let sum: u64 = files.iter().map(|f| f.tokens).sum();
191        // Add this dir's direct file tokens to itself and every ancestor.
192        let mut cur: Option<&Path> = Some(dir.as_path());
193        while let Some(p) = cur {
194            *totals.entry(p.to_path_buf()).or_insert(0) += sum;
195            if p == Path::new("") {
196                break;
197            }
198            cur = p.parent();
199        }
200    }
201    totals
202}
203
204struct FileEntry {
205    name: String,
206    symbols: Option<Vec<String>>,
207    tokens: u64,
208}
209
210/// Extract symbol names from an outline string.
211/// Outline lines look like: `[7-57]       fn classify`
212/// We extract the last word(s) after the kind keyword.
213fn extract_symbol_names(outline: &str) -> Vec<String> {
214    let mut names = Vec::new();
215    for line in outline.lines() {
216        let trimmed = line.trim();
217        // Skip import lines and empty lines
218        if trimmed.starts_with('[') {
219            // Find the symbol name after kind keywords
220            if let Some(sig_start) = find_symbol_start(trimmed) {
221                let sig = &trimmed[sig_start..];
222                // Take just the name (up to first paren or space after name)
223                let name = extract_name_from_sig(sig);
224                if !name.is_empty() && name != "imports" {
225                    names.push(name);
226                }
227            }
228        }
229    }
230    names
231}
232
233fn find_symbol_start(line: &str) -> Option<usize> {
234    let kinds = [
235        "fn ",
236        "struct ",
237        "enum ",
238        "trait ",
239        "impl ",
240        "mod ",
241        "class ",
242        "interface ",
243        "type ",
244        "const ",
245        "static ",
246        "function ",
247        "method ",
248        "def ",
249    ];
250    for kind in &kinds {
251        if let Some(pos) = line.find(kind) {
252            return Some(pos + kind.len());
253        }
254    }
255    None
256}
257
258fn extract_name_from_sig(sig: &str) -> String {
259    // Take characters until we hit a non-identifier char
260    sig.chars()
261        .take_while(|c| c.is_alphanumeric() || *c == '_' || *c == '$')
262        .collect()
263}
264
265fn format_tree(
266    tree: &BTreeMap<PathBuf, Vec<FileEntry>>,
267    totals: &BTreeMap<PathBuf, u64>,
268    dir: &Path,
269    indent: usize,
270    out: &mut String,
271) {
272    // Collect subdirectories that have entries
273    let mut subdirs: Vec<&PathBuf> = tree
274        .keys()
275        .filter(|k| k.parent() == Some(dir) && *k != dir)
276        .collect();
277    subdirs.sort();
278
279    let prefix = "  ".repeat(indent);
280
281    // Show files in this directory
282    if let Some(files) = tree.get(dir) {
283        for f in files {
284            if let Some(ref symbols) = f.symbols {
285                if symbols.is_empty() {
286                    let _ = writeln!(out, "{prefix}{}  ~{}", f.name, fmt_tokens(f.tokens));
287                } else {
288                    let syms = symbols.join(", ");
289                    let truncated = if syms.len() > 80 {
290                        format!("{}...", crate::types::truncate_str(&syms, 77))
291                    } else {
292                        syms
293                    };
294                    let _ = writeln!(out, "{prefix}{}: {truncated}", f.name);
295                }
296            } else {
297                let _ = writeln!(out, "{prefix}{}  ~{}", f.name, fmt_tokens(f.tokens));
298            }
299        }
300    }
301
302    // Recurse into subdirectories — show rollup token total next to dir name.
303    for subdir in subdirs {
304        let dir_name = subdir.file_name().and_then(|n| n.to_str()).unwrap_or("?");
305        let total = totals.get(subdir).copied().unwrap_or(0);
306        let _ = writeln!(out, "{prefix}{dir_name}/  ~{}", fmt_tokens(total));
307        format_tree(tree, totals, subdir, indent + 1, out);
308    }
309}
310
311/// Compact token count for directory rollups (e.g. "12.3k", "1.2M").
312fn fmt_tokens(n: u64) -> String {
313    #[allow(clippy::cast_precision_loss)] // display-only; mantissa loss is fine for summaries
314    let f = n as f64;
315    if n >= 1_000_000 {
316        format!("{:.1}M", f / 1_000_000.0)
317    } else if n >= 1_000 {
318        format!("{:.1}k", f / 1_000.0)
319    } else {
320        n.to_string()
321    }
322}