1use std::collections::BTreeMap;
2use std::io::Write;
3use std::path::{Path, PathBuf};
4
5use rayon::prelude::*;
6use serde::Serialize;
7
8use crate::args::{Args, OutputFormat, OutputMode};
9use crate::cache;
10use crate::format;
11use crate::predicate::{self, MatchSet, Predicate, RefMatch};
12use crate::walk;
13use code_moniker_core::core::code_graph::{DefRecord, RefRecord};
14use code_moniker_core::core::moniker::Moniker;
15use code_moniker_core::lang::Lang;
16
17const TOP_KINDS_DISPLAYED: usize = 3;
18
19pub fn run<W: Write>(
20 args: &Args,
21 stdout: &mut W,
22 root: &Path,
23 scheme: &str,
24) -> anyhow::Result<bool> {
25 let files = walk::walk_lang_files(root);
26 let has_filter = !args.kind.is_empty() || !args.where_.is_empty();
27 if has_filter {
28 run_filter(args, stdout, &files, root, scheme)
29 } else {
30 run_summary(args, stdout, &files, root)
31 }
32}
33
34fn run_summary<W: Write>(
35 args: &Args,
36 stdout: &mut W,
37 files: &[walk::WalkedFile],
38 root: &Path,
39) -> anyhow::Result<bool> {
40 let cache_dir = args.cache.as_deref();
41 let summaries: Vec<FileSummary> = files
42 .par_iter()
43 .filter_map(|f| FileSummary::compute(&f.path, f.lang, root, cache_dir))
44 .collect();
45 let total_defs: usize = summaries.iter().map(|s| s.defs).sum();
46 let total_refs: usize = summaries.iter().map(|s| s.refs).sum();
47 let any = total_defs + total_refs > 0;
48 match args.mode() {
49 OutputMode::Default => match args.format {
50 OutputFormat::Tsv => write_summary_tsv(stdout, &summaries)?,
51 OutputFormat::Json => write_summary_json(stdout, &summaries)?,
52 },
53 OutputMode::Count => writeln!(stdout, "{}", total_defs + total_refs)?,
54 OutputMode::Quiet => {}
55 }
56 Ok(any)
57}
58
59fn run_filter<W: Write>(
60 args: &Args,
61 stdout: &mut W,
62 files: &[walk::WalkedFile],
63 root: &Path,
64 scheme: &str,
65) -> anyhow::Result<bool> {
66 let predicates = args.compiled_predicates(scheme)?;
67 let mut langs: Vec<Lang> = files.iter().map(|f| f.lang).collect();
68 langs.sort_by_key(|l| l.tag());
69 langs.dedup();
70 let known = predicate::known_kinds(langs.iter());
71 let unknown = predicate::unknown_kinds(&args.kind, &known);
72 if !unknown.is_empty() {
73 return Err(crate::unknown_kinds_error(&unknown, &langs, &known));
74 }
75 let cache_dir = args.cache.as_deref();
76 let rows: Vec<FilterRow> = files
77 .par_iter()
78 .filter_map(|f| {
79 FilterRow::compute(&f.path, f.lang, root, &predicates, &args.kind, cache_dir)
80 })
81 .collect();
82 let total_defs: usize = rows.iter().map(|r| r.defs.len()).sum();
83 let total_refs: usize = rows.iter().map(|r| r.refs.len()).sum();
84 let any = total_defs + total_refs > 0;
85 match args.mode() {
86 OutputMode::Default => match args.format {
87 OutputFormat::Tsv => write_filter_tsv(stdout, &rows, args, scheme)?,
88 OutputFormat::Json => write_filter_json(stdout, &rows, args, scheme)?,
89 },
90 OutputMode::Count => writeln!(stdout, "{}", total_defs + total_refs)?,
91 OutputMode::Quiet => {}
92 }
93 Ok(any)
94}
95
96#[derive(Serialize)]
97struct FileSummary {
98 file: String,
99 lang: &'static str,
100 defs: usize,
101 refs: usize,
102 by_def_kind: BTreeMap<String, usize>,
103 by_ref_kind: BTreeMap<String, usize>,
104}
105
106impl FileSummary {
107 fn compute(path: &Path, lang: Lang, root: &Path, cache_dir: Option<&Path>) -> Option<Self> {
108 let rel = path.strip_prefix(root).unwrap_or(path);
109 let (graph, _) = cache::load_or_extract(path, rel, lang, cache_dir)?;
110 let mut by_def_kind: BTreeMap<String, usize> = BTreeMap::new();
111 let mut defs = 0usize;
112 for d in graph.defs() {
113 defs += 1;
114 bump_kind(&mut by_def_kind, &d.kind);
115 }
116 let mut by_ref_kind: BTreeMap<String, usize> = BTreeMap::new();
117 let mut refs = 0usize;
118 for r in graph.refs() {
119 refs += 1;
120 bump_kind(&mut by_ref_kind, &r.kind);
121 }
122 Some(Self {
123 file: rel.display().to_string(),
124 lang: lang.tag(),
125 defs,
126 refs,
127 by_def_kind,
128 by_ref_kind,
129 })
130 }
131}
132
133fn write_summary_tsv<W: Write>(w: &mut W, summaries: &[FileSummary]) -> std::io::Result<()> {
134 for s in summaries {
135 writeln!(
136 w,
137 "{file}\t{lang}\t{defs}\t{refs}\t{top}",
138 file = s.file,
139 lang = s.lang,
140 defs = s.defs,
141 refs = s.refs,
142 top = top_kinds(&s.by_def_kind, TOP_KINDS_DISPLAYED),
143 )?;
144 }
145 Ok(())
146}
147
148fn write_summary_json<W: Write>(w: &mut W, summaries: &[FileSummary]) -> anyhow::Result<()> {
149 #[derive(Serialize)]
150 struct Out<'a> {
151 total_files: usize,
152 total_defs: usize,
153 total_refs: usize,
154 files: &'a [FileSummary],
155 }
156 let total_defs = summaries.iter().map(|s| s.defs).sum();
157 let total_refs = summaries.iter().map(|s| s.refs).sum();
158 let out = Out {
159 total_files: summaries.len(),
160 total_defs,
161 total_refs,
162 files: summaries,
163 };
164 serde_json::to_writer_pretty(&mut *w, &out)?;
165 w.write_all(b"\n")?;
166 Ok(())
167}
168
169fn bump_kind(map: &mut BTreeMap<String, usize>, kind: &[u8]) {
170 let key = std::str::from_utf8(kind).unwrap_or("");
171 if let Some(c) = map.get_mut(key) {
172 *c += 1;
173 } else {
174 map.insert(key.to_owned(), 1);
175 }
176}
177
178fn top_kinds(map: &BTreeMap<String, usize>, n: usize) -> String {
179 if map.is_empty() {
180 return "-".to_string();
181 }
182 let mut pairs: Vec<(&String, &usize)> = map.iter().collect();
183 pairs.sort_by(|a, b| b.1.cmp(a.1).then_with(|| a.0.cmp(b.0)));
184 pairs
185 .into_iter()
186 .take(n)
187 .map(|(k, v)| format!("{k}:{v}"))
188 .collect::<Vec<_>>()
189 .join(", ")
190}
191
192struct FilterRow {
193 rel: PathBuf,
194 lang: Lang,
195 source: String,
196 defs: Vec<DefRecord>,
197 refs: Vec<(RefRecord, Moniker)>,
198}
199
200impl FilterRow {
201 fn compute(
202 path: &Path,
203 lang: Lang,
204 root: &Path,
205 predicates: &[Predicate],
206 kinds: &[String],
207 cache_dir: Option<&Path>,
208 ) -> Option<Self> {
209 let rel = path.strip_prefix(root).unwrap_or(path).to_path_buf();
210 let (graph, extracted_source) = cache::load_or_extract(path, &rel, lang, cache_dir)?;
211 let matches = predicate::filter(&graph, predicates, kinds);
212 if matches.defs.is_empty() && matches.refs.is_empty() {
213 return None;
214 }
215 let source = match extracted_source {
216 Some(s) => s,
217 None => std::fs::read_to_string(path).ok()?,
218 };
219 let defs = matches.defs.into_iter().cloned().collect();
220 let refs = matches
221 .refs
222 .into_iter()
223 .map(|rm| (rm.record.clone(), rm.source.clone()))
224 .collect();
225 Some(Self {
226 rel,
227 lang,
228 source,
229 defs,
230 refs,
231 })
232 }
233
234 fn match_set(&self) -> MatchSet<'_> {
235 MatchSet {
236 defs: self.defs.iter().collect(),
237 refs: self
238 .refs
239 .iter()
240 .map(|(rec, src)| RefMatch {
241 record: rec,
242 source: src,
243 })
244 .collect(),
245 }
246 }
247}
248
249fn write_filter_tsv<W: Write>(
250 w: &mut W,
251 rows: &[FilterRow],
252 args: &Args,
253 scheme: &str,
254) -> std::io::Result<()> {
255 for row in rows {
256 let matches = row.match_set();
257 let mut buf: Vec<u8> = Vec::new();
258 format::write_tsv(&mut buf, &matches, &row.source, args, scheme)?;
259 let prefix = row.rel.display().to_string();
260 for line in std::str::from_utf8(&buf).unwrap_or("").lines() {
261 writeln!(w, "{prefix}\t{line}")?;
262 }
263 }
264 Ok(())
265}
266
267fn write_filter_json<W: Write>(
268 w: &mut W,
269 rows: &[FilterRow],
270 args: &Args,
271 scheme: &str,
272) -> anyhow::Result<()> {
273 #[derive(Serialize)]
274 struct Entry {
275 file: String,
276 lang: &'static str,
277 matches: serde_json::Value,
278 }
279 let entries: Vec<Entry> = rows
280 .iter()
281 .map(|row| {
282 let matches = row.match_set();
283 Entry {
284 file: row.rel.display().to_string(),
285 lang: row.lang.tag(),
286 matches: format::build_matches_value(&matches, &row.source, args, scheme),
287 }
288 })
289 .collect();
290 let total_defs: usize = rows.iter().map(|r| r.defs.len()).sum();
291 let total_refs: usize = rows.iter().map(|r| r.refs.len()).sum();
292 #[derive(Serialize)]
293 struct Out {
294 total_files: usize,
295 total_defs: usize,
296 total_refs: usize,
297 files: Vec<Entry>,
298 }
299 let out = Out {
300 total_files: entries.len(),
301 total_defs,
302 total_refs,
303 files: entries,
304 };
305 serde_json::to_writer_pretty(&mut *w, &out)?;
306 w.write_all(b"\n")?;
307 Ok(())
308}
309
310#[cfg(test)]
311mod tests {
312 use super::*;
313 use std::fs;
314
315 fn write_file(root: &Path, rel: &str, body: &str) {
316 let p = root.join(rel);
317 if let Some(parent) = p.parent() {
318 fs::create_dir_all(parent).unwrap();
319 }
320 fs::write(p, body).unwrap();
321 }
322
323 #[test]
324 fn summary_aggregates_per_file_counts() {
325 let tmp = tempfile::tempdir().unwrap();
326 let root = tmp.path();
327 write_file(root, "a.ts", "export class Foo {}\nfunction bar() {}\n");
328 write_file(root, "b.ts", "import { x } from 'y';\n");
329 let files = walk::walk_lang_files(root);
330 let summaries: Vec<FileSummary> = files
331 .iter()
332 .filter_map(|f| FileSummary::compute(&f.path, f.lang, root, None))
333 .collect();
334 assert_eq!(summaries.len(), 2);
335 let a = summaries.iter().find(|s| s.file.ends_with("a.ts")).unwrap();
336 assert!(a.defs >= 2, "a.ts should have at least 2 defs: {a:?}");
337 let b = summaries.iter().find(|s| s.file.ends_with("b.ts")).unwrap();
338 assert!(b.refs >= 1, "b.ts should have at least 1 ref: {b:?}");
339 }
340
341 #[test]
342 fn top_kinds_sorted_by_count_desc_then_name() {
343 let mut m = BTreeMap::new();
344 m.insert("function".to_string(), 5);
345 m.insert("class".to_string(), 5);
346 m.insert("comment".to_string(), 10);
347 assert_eq!(top_kinds(&m, 3), "comment:10, class:5, function:5");
348 }
349
350 #[test]
351 fn top_kinds_empty_renders_dash() {
352 assert_eq!(top_kinds(&BTreeMap::new(), 3), "-");
353 }
354
355 impl std::fmt::Debug for FileSummary {
356 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
357 f.debug_struct("FileSummary")
358 .field("file", &self.file)
359 .field("defs", &self.defs)
360 .field("refs", &self.refs)
361 .finish()
362 }
363 }
364}