Skip to main content

srcwalk/
map.rs

1use std::collections::BTreeMap;
2use std::fmt::Write;
3use std::path::{Path, PathBuf};
4
5use ignore::WalkBuilder;
6
7use crate::cache::OutlineCache;
8use crate::error::SrcwalkError;
9use crate::lang::detect_file_type;
10use crate::read::outline;
11use crate::types::{estimate_tokens, FileType};
12
13struct WalkConfig {
14    hidden: bool,
15    git_ignore: bool,
16    git_global: bool,
17    git_exclude: bool,
18    ignore: bool,
19    parents: bool,
20}
21
22/// Build the "# Note:" header line listing which ignore sources the walker
23/// honours, derived from the actual `WalkConfig` (no hardcoded copy).
24fn format_walk_note(cfg: &WalkConfig) -> String {
25    let mut respects: Vec<&'static str> = Vec::new();
26    if cfg.git_ignore {
27        respects.push(".gitignore");
28    }
29    if cfg.git_exclude {
30        respects.push(".git/info/exclude");
31    }
32    if cfg.git_global {
33        respects.push("core.excludesFile");
34    }
35    if cfg.ignore {
36        respects.push(".ignore");
37    }
38    let scope_word = if cfg.parents {
39        "+ parents"
40    } else {
41        "scope only"
42    };
43
44    let respects_part = if respects.is_empty() {
45        "no ignore files".to_string()
46    } else {
47        format!("{} ({scope_word})", respects.join(", "))
48    };
49
50    let hidden_part = if cfg.hidden {
51        "dotfiles excluded"
52    } else {
53        "dotfiles included"
54    };
55
56    format!(
57        "# Note: respects {respects_part}; {hidden_part}; built-in SKIP_DIRS still apply \
58         (target, node_modules, …). Use `srcwalk <path>` to inspect an ignored file directly.\n",
59    )
60}
61
62/// Generate a structural codebase map.
63/// By default files show compact token estimates; symbol names are opt-in.
64pub fn generate(
65    scope: &Path,
66    depth: usize,
67    budget: Option<u64>,
68    cache: &OutlineCache,
69    include_symbols: bool,
70    glob: Option<&str>,
71) -> Result<String, SrcwalkError> {
72    let mut tree: BTreeMap<PathBuf, Vec<FileEntry>> = BTreeMap::new();
73
74    let cfg = WalkConfig {
75        hidden: false,
76        git_ignore: true,
77        git_global: true,
78        git_exclude: true,
79        ignore: true,
80        parents: true,
81    };
82
83    let mut builder = WalkBuilder::new(scope);
84    builder
85        .follow_links(true)
86        .hidden(cfg.hidden)
87        .git_ignore(cfg.git_ignore)
88        .git_global(cfg.git_global)
89        .git_exclude(cfg.git_exclude)
90        .ignore(cfg.ignore)
91        .parents(cfg.parents)
92        .filter_entry(|entry| {
93            if entry.file_type().is_some_and(|ft| ft.is_dir()) {
94                if let Some(name) = entry.file_name().to_str() {
95                    return !crate::search::io::SKIP_DIRS.contains(&name);
96                }
97            }
98            true
99        })
100        .max_depth(Some(depth + 1));
101
102    if let Some(pattern) = glob.filter(|p| !p.is_empty()) {
103        let mut overrides = ignore::overrides::OverrideBuilder::new(scope);
104        overrides
105            .add(pattern)
106            .map_err(|e| SrcwalkError::InvalidQuery {
107                query: pattern.to_string(),
108                reason: format!("invalid glob: {e}"),
109            })?;
110        builder.overrides(overrides.build().map_err(|e| SrcwalkError::InvalidQuery {
111            query: pattern.to_string(),
112            reason: format!("invalid glob: {e}"),
113        })?);
114    }
115
116    let walker = builder.build();
117
118    for entry in walker.flatten() {
119        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
120            continue;
121        }
122
123        let path = entry.path();
124        let rel = path.strip_prefix(scope).unwrap_or(path);
125
126        // Skip if deeper than requested
127        let file_depth = rel.components().count().saturating_sub(1);
128        if file_depth > depth {
129            continue;
130        }
131
132        let parent = rel.parent().unwrap_or(Path::new("")).to_path_buf();
133        let name = rel
134            .file_name()
135            .and_then(|n| n.to_str())
136            .unwrap_or("")
137            .to_string();
138
139        let meta = std::fs::metadata(path).ok();
140        let byte_len = meta.as_ref().map_or(0, std::fs::Metadata::len);
141        let tokens = estimate_tokens(byte_len);
142
143        let symbols = if include_symbols {
144            let file_type = detect_file_type(path);
145            match file_type {
146                FileType::Code(_) => {
147                    let mtime = meta
148                        .and_then(|m| m.modified().ok())
149                        .unwrap_or(std::time::SystemTime::UNIX_EPOCH);
150
151                    let outline_str = cache.get_or_compute(path, mtime, || {
152                        let content = std::fs::read_to_string(path).unwrap_or_default();
153                        let buf = content.as_bytes();
154                        outline::generate(path, file_type, &content, buf, true)
155                    });
156
157                    Some(extract_symbol_names(&outline_str))
158                }
159                _ => None,
160            }
161        } else {
162            None
163        };
164
165        tree.entry(parent.clone()).or_default().push(FileEntry {
166            name,
167            symbols,
168            tokens,
169        });
170
171        // Ensure all ancestor directories exist in the tree so format_tree can find them.
172        let mut ancestor = parent.parent();
173        while let Some(a) = ancestor {
174            tree.entry(a.to_path_buf()).or_default();
175            if a == Path::new("") {
176                break;
177            }
178            ancestor = a.parent();
179        }
180    }
181
182    let mut out = format!(
183        "# Map: {} (depth {}, sizes ~= tokens)\n",
184        scope.display(),
185        depth
186    );
187    out.push_str(&format_walk_note(&cfg));
188    let totals = compute_dir_totals(&tree);
189    format_tree(&tree, &totals, Path::new(""), 0, &mut out);
190
191    let mut out = match budget {
192        Some(b) => crate::budget::apply(&out, b),
193        None => out,
194    };
195    if include_symbols {
196        out.push_str("\n\n> Tip: narrow with --scope <dir>.\n");
197    } else {
198        out.push_str("\n\n> Tip: add --symbols, or narrow with --scope <dir>.\n");
199    }
200    Ok(out)
201}
202
203/// Compute total tokens for each directory (sum of all descendant files).
204fn compute_dir_totals(tree: &BTreeMap<PathBuf, Vec<FileEntry>>) -> BTreeMap<PathBuf, u64> {
205    let mut totals: BTreeMap<PathBuf, u64> = BTreeMap::new();
206    for (dir, files) in tree {
207        let sum: u64 = files.iter().map(|f| f.tokens).sum();
208        // Add this dir's direct file tokens to itself and every ancestor.
209        let mut cur: Option<&Path> = Some(dir.as_path());
210        while let Some(p) = cur {
211            *totals.entry(p.to_path_buf()).or_insert(0) += sum;
212            if p == Path::new("") {
213                break;
214            }
215            cur = p.parent();
216        }
217    }
218    totals
219}
220
221struct FileEntry {
222    name: String,
223    symbols: Option<Vec<String>>,
224    tokens: u64,
225}
226
227/// Extract symbol names from an outline string.
228/// Outline lines look like: `[7-57]       fn classify`
229/// We extract the last word(s) after the kind keyword.
230fn extract_symbol_names(outline: &str) -> Vec<String> {
231    let mut names = Vec::new();
232    for line in outline.lines() {
233        let trimmed = line.trim();
234        // Skip import lines and empty lines
235        if trimmed.starts_with('[') {
236            // Find the symbol name after kind keywords
237            if let Some(sig_start) = find_symbol_start(trimmed) {
238                let sig = &trimmed[sig_start..];
239                // Take just the name (up to first paren or space after name)
240                let name = extract_name_from_sig(sig);
241                if !name.is_empty() && name != "imports" {
242                    names.push(name);
243                }
244            }
245        }
246    }
247    names
248}
249
250fn find_symbol_start(line: &str) -> Option<usize> {
251    let kinds = [
252        "fn ",
253        "struct ",
254        "enum ",
255        "trait ",
256        "impl ",
257        "mod ",
258        "class ",
259        "interface ",
260        "type ",
261        "const ",
262        "static ",
263        "function ",
264        "method ",
265        "def ",
266    ];
267    for kind in &kinds {
268        if let Some(pos) = line.find(kind) {
269            return Some(pos + kind.len());
270        }
271    }
272    None
273}
274
275fn extract_name_from_sig(sig: &str) -> String {
276    // Take characters until we hit a non-identifier char
277    sig.chars()
278        .take_while(|c| c.is_alphanumeric() || *c == '_' || *c == '$')
279        .collect()
280}
281
282fn format_tree(
283    tree: &BTreeMap<PathBuf, Vec<FileEntry>>,
284    totals: &BTreeMap<PathBuf, u64>,
285    dir: &Path,
286    indent: usize,
287    out: &mut String,
288) {
289    // Show directories first, largest first, so truncated maps keep the
290    // highest-signal navigation scaffold near the top.
291    let mut subdirs: Vec<&PathBuf> = tree
292        .keys()
293        .filter(|k| k.parent() == Some(dir) && *k != dir)
294        .collect();
295    subdirs.sort_by(|a, b| {
296        let a_total = totals.get(*a).copied().unwrap_or(0);
297        let b_total = totals.get(*b).copied().unwrap_or(0);
298        b_total.cmp(&a_total).then_with(|| a.cmp(b))
299    });
300
301    let prefix = "  ".repeat(indent);
302
303    for subdir in subdirs {
304        let dir_name = subdir.file_name().and_then(|n| n.to_str()).unwrap_or("?");
305        let total = totals.get(subdir).copied().unwrap_or(0);
306        let _ = writeln!(out, "{prefix}{dir_name}/  ~{}", fmt_tokens(total));
307        format_tree(tree, totals, subdir, indent + 1, out);
308    }
309
310    if let Some(files) = tree.get(dir) {
311        let mut files: Vec<&FileEntry> = files.iter().collect();
312        files.sort_by(|a, b| b.tokens.cmp(&a.tokens).then_with(|| a.name.cmp(&b.name)));
313
314        for f in files {
315            if let Some(ref symbols) = f.symbols {
316                if symbols.is_empty() {
317                    let _ = writeln!(out, "{prefix}{}  ~{}", f.name, fmt_tokens(f.tokens));
318                } else {
319                    let syms = symbols.join(", ");
320                    let truncated = if syms.len() > 80 {
321                        format!("{}...", crate::types::truncate_str(&syms, 77))
322                    } else {
323                        syms
324                    };
325                    let _ = writeln!(out, "{prefix}{}: {truncated}", f.name);
326                }
327            } else {
328                let _ = writeln!(out, "{prefix}{}  ~{}", f.name, fmt_tokens(f.tokens));
329            }
330        }
331    }
332}
333
334/// Compact token count for directory rollups (e.g. "12.3k", "1.2M").
335fn fmt_tokens(n: u64) -> String {
336    #[allow(clippy::cast_precision_loss)] // display-only; mantissa loss is fine for summaries
337    let f = n as f64;
338    if n >= 1_000_000 {
339        format!("{:.1}M", f / 1_000_000.0)
340    } else if n >= 1_000 {
341        format!("{:.1}k", f / 1_000.0)
342    } else {
343        n.to_string()
344    }
345}