1use std::collections::BTreeMap;
2use std::io::Write;
3use std::path::{Path, PathBuf};
4
5use rayon::prelude::*;
6use serde::Serialize;
7
8use crate::args::{ExtractArgs, OutputFormat, OutputMode};
9use crate::cache;
10use crate::extract;
11use crate::format;
12use crate::predicate::{self, MatchSet, Predicate, RefMatch};
13use crate::tsconfig;
14use crate::walk;
15use code_moniker_core::core::code_graph::{DefRecord, RefRecord};
16use code_moniker_core::core::moniker::Moniker;
17use code_moniker_core::lang::Lang;
18
19const TOP_KINDS_DISPLAYED: usize = 3;
20
21pub fn run<W: Write>(
22 args: &ExtractArgs,
23 stdout: &mut W,
24 root: &Path,
25 scheme: &str,
26) -> anyhow::Result<bool> {
27 let files = walk::walk_lang_files(root);
28 let ctx = extract::Context {
29 ts: tsconfig::load(root),
30 project: args.project.clone(),
31 };
32 let has_filter = !args.kind.is_empty() || !args.shape.is_empty() || !args.where_.is_empty();
33 if has_filter {
34 run_filter(args, stdout, &files, root, scheme, &ctx)
35 } else {
36 run_summary(args, stdout, &files, root, &ctx)
37 }
38}
39
40fn run_summary<W: Write>(
41 args: &ExtractArgs,
42 stdout: &mut W,
43 files: &[walk::WalkedFile],
44 root: &Path,
45 ctx: &extract::Context,
46) -> anyhow::Result<bool> {
47 let cache_dir = args.cache.as_deref();
48 let summaries: Vec<FileSummary> = files
49 .par_iter()
50 .filter_map(|f| FileSummary::compute(&f.path, f.lang, root, cache_dir, ctx))
51 .collect();
52 let total_defs: usize = summaries.iter().map(|s| s.defs).sum();
53 let total_refs: usize = summaries.iter().map(|s| s.refs).sum();
54 let any = total_defs + total_refs > 0;
55 match args.mode() {
56 OutputMode::Default => match args.format {
57 OutputFormat::Tsv => write_summary_tsv(stdout, &summaries)?,
58 OutputFormat::Json => write_summary_json(stdout, &summaries)?,
59 #[cfg(feature = "pretty")]
60 OutputFormat::Tree => write_summary_tree(stdout, &summaries, args)?,
61 },
62 OutputMode::Count => writeln!(stdout, "{}", total_defs + total_refs)?,
63 OutputMode::Quiet => {}
64 }
65 Ok(any)
66}
67
68fn run_filter<W: Write>(
69 args: &ExtractArgs,
70 stdout: &mut W,
71 files: &[walk::WalkedFile],
72 root: &Path,
73 scheme: &str,
74 ctx: &extract::Context,
75) -> anyhow::Result<bool> {
76 let predicates = args.compiled_predicates(scheme)?;
77 let mut langs: Vec<Lang> = files.iter().map(|f| f.lang).collect();
78 langs.sort_by_key(|l| l.tag());
79 langs.dedup();
80 let known = predicate::known_kinds(langs.iter());
81 let unknown = predicate::unknown_kinds(&args.kind, &known);
82 if !unknown.is_empty() {
83 return Err(crate::unknown_kinds_error(&unknown, &langs, &known));
84 }
85 let cache_dir = args.cache.as_deref();
86 let rows: Vec<FilterRow> = files
87 .par_iter()
88 .filter_map(|f| {
89 FilterRow::compute(
90 &f.path,
91 f.lang,
92 root,
93 &predicates,
94 &args.kind,
95 &args.shape,
96 cache_dir,
97 ctx,
98 )
99 })
100 .collect();
101 let total_defs: usize = rows.iter().map(|r| r.defs.len()).sum();
102 let total_refs: usize = rows.iter().map(|r| r.refs.len()).sum();
103 let any = total_defs + total_refs > 0;
104 match args.mode() {
105 OutputMode::Default => match args.format {
106 OutputFormat::Tsv => write_filter_tsv(stdout, &rows, args, scheme)?,
107 OutputFormat::Json => write_filter_json(stdout, &rows, args, scheme)?,
108 #[cfg(feature = "pretty")]
109 OutputFormat::Tree => write_filter_tree(stdout, &rows, args, scheme)?,
110 },
111 OutputMode::Count => writeln!(stdout, "{}", total_defs + total_refs)?,
112 OutputMode::Quiet => {}
113 }
114 Ok(any)
115}
116
117#[derive(Serialize)]
118struct FileSummary {
119 file: String,
120 lang: &'static str,
121 defs: usize,
122 refs: usize,
123 by_def_kind: BTreeMap<String, usize>,
124 by_ref_kind: BTreeMap<String, usize>,
125}
126
127impl FileSummary {
128 fn compute(
129 path: &Path,
130 lang: Lang,
131 root: &Path,
132 cache_dir: Option<&Path>,
133 ctx: &extract::Context,
134 ) -> Option<Self> {
135 let rel = path.strip_prefix(root).unwrap_or(path);
136 let (graph, _) = cache::load_or_extract(path, rel, lang, cache_dir, ctx)?;
137 let mut by_def_kind: BTreeMap<String, usize> = BTreeMap::new();
138 let mut defs = 0usize;
139 for d in graph.defs() {
140 defs += 1;
141 bump_kind(&mut by_def_kind, &d.kind);
142 }
143 let mut by_ref_kind: BTreeMap<String, usize> = BTreeMap::new();
144 let mut refs = 0usize;
145 for r in graph.refs() {
146 refs += 1;
147 bump_kind(&mut by_ref_kind, &r.kind);
148 }
149 Some(Self {
150 file: rel.display().to_string(),
151 lang: lang.tag(),
152 defs,
153 refs,
154 by_def_kind,
155 by_ref_kind,
156 })
157 }
158}
159
160fn write_summary_tsv<W: Write>(w: &mut W, summaries: &[FileSummary]) -> std::io::Result<()> {
161 for s in summaries {
162 writeln!(
163 w,
164 "{file}\t{lang}\t{defs}\t{refs}\t{top}",
165 file = s.file,
166 lang = s.lang,
167 defs = s.defs,
168 refs = s.refs,
169 top = top_kinds(&s.by_def_kind, TOP_KINDS_DISPLAYED),
170 )?;
171 }
172 Ok(())
173}
174
175#[cfg(feature = "pretty")]
176fn write_summary_tree<W: Write>(
177 w: &mut W,
178 summaries: &[FileSummary],
179 args: &ExtractArgs,
180) -> anyhow::Result<()> {
181 let entries: Vec<(String, String)> = summaries
182 .iter()
183 .map(|s| {
184 let label = format!(
185 "({lang}) defs:{defs} refs:{refs} [{top}]",
186 lang = s.lang,
187 defs = s.defs,
188 refs = s.refs,
189 top = top_kinds(&s.by_def_kind, TOP_KINDS_DISPLAYED),
190 );
191 (s.file.clone(), label)
192 })
193 .collect();
194 format::tree::render_dir_tree(w, &entries, args)?;
195 Ok(())
196}
197
198fn write_summary_json<W: Write>(w: &mut W, summaries: &[FileSummary]) -> anyhow::Result<()> {
199 #[derive(Serialize)]
200 struct Out<'a> {
201 total_files: usize,
202 total_defs: usize,
203 total_refs: usize,
204 files: &'a [FileSummary],
205 }
206 let total_defs = summaries.iter().map(|s| s.defs).sum();
207 let total_refs = summaries.iter().map(|s| s.refs).sum();
208 let out = Out {
209 total_files: summaries.len(),
210 total_defs,
211 total_refs,
212 files: summaries,
213 };
214 serde_json::to_writer_pretty(&mut *w, &out)?;
215 w.write_all(b"\n")?;
216 Ok(())
217}
218
219fn bump_kind(map: &mut BTreeMap<String, usize>, kind: &[u8]) {
220 let key = std::str::from_utf8(kind).unwrap_or("");
221 if let Some(c) = map.get_mut(key) {
222 *c += 1;
223 } else {
224 map.insert(key.to_owned(), 1);
225 }
226}
227
228fn top_kinds(map: &BTreeMap<String, usize>, n: usize) -> String {
229 if map.is_empty() {
230 return "-".to_string();
231 }
232 let mut pairs: Vec<(&String, &usize)> = map.iter().collect();
233 pairs.sort_by(|a, b| b.1.cmp(a.1).then_with(|| a.0.cmp(b.0)));
234 pairs
235 .into_iter()
236 .take(n)
237 .map(|(k, v)| format!("{k}:{v}"))
238 .collect::<Vec<_>>()
239 .join(", ")
240}
241
242struct FilterRow {
243 rel: PathBuf,
244 lang: Lang,
245 source: String,
246 defs: Vec<DefRecord>,
247 refs: Vec<(RefRecord, Moniker)>,
248}
249
250impl FilterRow {
251 #[allow(clippy::too_many_arguments)]
252 fn compute(
253 path: &Path,
254 lang: Lang,
255 root: &Path,
256 predicates: &[Predicate],
257 kinds: &[String],
258 shapes: &[code_moniker_core::core::shape::Shape],
259 cache_dir: Option<&Path>,
260 ctx: &extract::Context,
261 ) -> Option<Self> {
262 let rel = path.strip_prefix(root).unwrap_or(path).to_path_buf();
263 let (graph, extracted_source) = cache::load_or_extract(path, &rel, lang, cache_dir, ctx)?;
264 let matches = predicate::filter(&graph, predicates, kinds, shapes);
265 if matches.defs.is_empty() && matches.refs.is_empty() {
266 return None;
267 }
268 let source = match extracted_source {
269 Some(s) => s,
270 None => std::fs::read_to_string(path).ok()?,
271 };
272 let defs = matches.defs.into_iter().cloned().collect();
273 let refs = matches
274 .refs
275 .into_iter()
276 .map(|rm| (rm.record.clone(), rm.source.clone()))
277 .collect();
278 Some(Self {
279 rel,
280 lang,
281 source,
282 defs,
283 refs,
284 })
285 }
286
287 fn match_set(&self) -> MatchSet<'_> {
288 MatchSet {
289 defs: self.defs.iter().collect(),
290 refs: self
291 .refs
292 .iter()
293 .map(|(rec, src)| RefMatch {
294 record: rec,
295 source: src,
296 })
297 .collect(),
298 }
299 }
300}
301
302fn write_filter_tsv<W: Write>(
303 w: &mut W,
304 rows: &[FilterRow],
305 args: &ExtractArgs,
306 scheme: &str,
307) -> std::io::Result<()> {
308 for row in rows {
309 let matches = row.match_set();
310 let mut buf: Vec<u8> = Vec::new();
311 format::write_tsv(&mut buf, &matches, &row.source, args, scheme)?;
312 let prefix = row.rel.display().to_string();
313 for line in std::str::from_utf8(&buf).unwrap_or("").lines() {
314 writeln!(w, "{prefix}\t{line}")?;
315 }
316 }
317 Ok(())
318}
319
320#[cfg(feature = "pretty")]
321fn write_filter_tree<W: Write>(
322 w: &mut W,
323 rows: &[FilterRow],
324 args: &ExtractArgs,
325 scheme: &str,
326) -> anyhow::Result<()> {
327 let entries: Vec<format::tree::FileEntry<'_>> = rows
328 .iter()
329 .map(|row| format::tree::FileEntry {
330 rel_path: row.rel.to_string_lossy().into_owned(),
331 matches: row.match_set(),
332 source: row.source.as_str(),
333 })
334 .collect();
335 format::tree::write_files_tree(w, &entries, args, scheme)?;
336 Ok(())
337}
338
339fn write_filter_json<W: Write>(
340 w: &mut W,
341 rows: &[FilterRow],
342 args: &ExtractArgs,
343 scheme: &str,
344) -> anyhow::Result<()> {
345 #[derive(Serialize)]
346 struct Entry {
347 file: String,
348 lang: &'static str,
349 matches: serde_json::Value,
350 }
351 let entries: Vec<Entry> = rows
352 .iter()
353 .map(|row| {
354 let matches = row.match_set();
355 Entry {
356 file: row.rel.display().to_string(),
357 lang: row.lang.tag(),
358 matches: format::build_matches_value(&matches, &row.source, args, scheme),
359 }
360 })
361 .collect();
362 let total_defs: usize = rows.iter().map(|r| r.defs.len()).sum();
363 let total_refs: usize = rows.iter().map(|r| r.refs.len()).sum();
364 #[derive(Serialize)]
365 struct Out {
366 total_files: usize,
367 total_defs: usize,
368 total_refs: usize,
369 files: Vec<Entry>,
370 }
371 let out = Out {
372 total_files: entries.len(),
373 total_defs,
374 total_refs,
375 files: entries,
376 };
377 serde_json::to_writer_pretty(&mut *w, &out)?;
378 w.write_all(b"\n")?;
379 Ok(())
380}
381
382#[cfg(test)]
383mod tests {
384 use super::*;
385 use std::fs;
386
387 fn write_file(root: &Path, rel: &str, body: &str) {
388 let p = root.join(rel);
389 if let Some(parent) = p.parent() {
390 fs::create_dir_all(parent).unwrap();
391 }
392 fs::write(p, body).unwrap();
393 }
394
395 #[test]
396 fn summary_aggregates_per_file_counts() {
397 let tmp = tempfile::tempdir().unwrap();
398 let root = tmp.path();
399 write_file(root, "a.ts", "export class Foo {}\nfunction bar() {}\n");
400 write_file(root, "b.ts", "import { x } from 'y';\n");
401 let files = walk::walk_lang_files(root);
402 let summaries: Vec<FileSummary> = files
403 .iter()
404 .filter_map(|f| {
405 FileSummary::compute(&f.path, f.lang, root, None, &extract::Context::default())
406 })
407 .collect();
408 assert_eq!(summaries.len(), 2);
409 let a = summaries.iter().find(|s| s.file.ends_with("a.ts")).unwrap();
410 assert!(a.defs >= 2, "a.ts should have at least 2 defs: {a:?}");
411 let b = summaries.iter().find(|s| s.file.ends_with("b.ts")).unwrap();
412 assert!(b.refs >= 1, "b.ts should have at least 1 ref: {b:?}");
413 }
414
415 #[test]
416 fn top_kinds_sorted_by_count_desc_then_name() {
417 let mut m = BTreeMap::new();
418 m.insert("function".to_string(), 5);
419 m.insert("class".to_string(), 5);
420 m.insert("comment".to_string(), 10);
421 assert_eq!(top_kinds(&m, 3), "comment:10, class:5, function:5");
422 }
423
424 #[test]
425 fn top_kinds_empty_renders_dash() {
426 assert_eq!(top_kinds(&BTreeMap::new(), 3), "-");
427 }
428
429 impl std::fmt::Debug for FileSummary {
430 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
431 f.debug_struct("FileSummary")
432 .field("file", &self.file)
433 .field("defs", &self.defs)
434 .field("refs", &self.refs)
435 .finish()
436 }
437 }
438}