1pub mod args;
5pub mod cache;
6pub mod check;
7pub mod dir;
8pub mod extract;
9pub mod format;
10pub mod lang;
11pub mod lines;
12pub mod manifest;
13pub mod predicate;
14pub mod tsconfig;
15pub mod walk;
16
17use std::io::Write;
18use std::path::{Path, PathBuf};
19use std::process::ExitCode;
20
21pub use args::{
22 CheckArgs, CheckFormat, Cli, Command, ExtractArgs, LangsArgs, LangsFormat, ManifestArgs,
23 ManifestFormat, OutputFormat, OutputMode, ShapesArgs,
24};
25pub use lang::{LangError, path_to_lang};
26pub use predicate::{MatchSet, Predicate};
27
28pub(crate) const DEFAULT_SCHEME: &str = "code+moniker://";
29
30pub(crate) fn unknown_kinds_error(
31 unknown: &[String],
32 langs: &[code_moniker_core::lang::Lang],
33 known: &std::collections::BTreeSet<&'static str>,
34) -> anyhow::Error {
35 let lang_tags: Vec<&str> = langs.iter().map(|l| l.tag()).collect();
36 let known_list: Vec<&str> = known.iter().copied().collect();
37 anyhow::anyhow!(
38 "unknown --kind {} (langs in scope: {}; known kinds: {})",
39 unknown.join(", "),
40 lang_tags.join(", "),
41 known_list.join(", "),
42 )
43}
44
45pub(crate) fn render_uri(
46 m: &code_moniker_core::core::moniker::Moniker,
47 cfg: &code_moniker_core::core::uri::UriConfig<'_>,
48) -> String {
49 code_moniker_core::core::uri::to_uri(m, cfg)
50 .unwrap_or_else(|_| format!("<non-utf8:{}b>", m.as_bytes().len()))
51}
52
53#[derive(Copy, Clone, Debug, Eq, PartialEq)]
54pub enum Exit {
55 Match,
56 NoMatch,
57 UsageError,
58}
59
60impl From<Exit> for ExitCode {
61 fn from(e: Exit) -> Self {
62 match e {
63 Exit::Match => ExitCode::SUCCESS,
64 Exit::NoMatch => ExitCode::from(1),
65 Exit::UsageError => ExitCode::from(2),
66 }
67 }
68}
69
70pub fn run<W1: Write, W2: Write>(cli: &Cli, stdout: &mut W1, stderr: &mut W2) -> Exit {
71 match &cli.command {
72 Command::Extract(args) => run_extract(args, stdout, stderr),
73 Command::Check(args) => run_check(args, stdout, stderr),
74 Command::Langs(args) => run_langs(args, stdout, stderr),
75 Command::Shapes(args) => run_shapes(args, stdout, stderr),
76 Command::Manifest(args) => run_manifest(args, stdout, stderr),
77 }
78}
79
80fn run_manifest<W1: Write, W2: Write>(
81 args: &ManifestArgs,
82 stdout: &mut W1,
83 stderr: &mut W2,
84) -> Exit {
85 match manifest::run(args, stdout, stderr) {
86 0 => Exit::Match,
87 1 => Exit::NoMatch,
88 _ => Exit::UsageError,
89 }
90}
91
92fn shape_description(shape: code_moniker_core::core::shape::Shape) -> &'static str {
93 use code_moniker_core::core::shape::Shape;
94 match shape {
95 Shape::Namespace => "container scopes (module, namespace, schema, impl)",
96 Shape::Type => {
97 "type-like declarations (class, struct, enum, interface, trait, table, view, …)"
98 }
99 Shape::Callable => {
100 "executable code (function, method, constructor, procedure, async_function)"
101 }
102 Shape::Value => "named bindings (field, const, static, enum_constant, param, local, …)",
103 Shape::Annotation => "attached metadata (comment) — not a structural scope",
104 Shape::Ref => {
105 "cross-record references (calls, imports_*, extends, uses_type, …) — marker shape for ref records"
106 }
107 }
108}
109
110fn run_shapes<W1: Write, W2: Write>(args: &ShapesArgs, stdout: &mut W1, stderr: &mut W2) -> Exit {
111 match shapes_inner(args, stdout) {
112 Ok(()) => Exit::Match,
113 Err(e) => {
114 let _ = writeln!(stderr, "code-moniker: {e:#}");
115 Exit::UsageError
116 }
117 }
118}
119
120fn shapes_inner<W: Write>(args: &ShapesArgs, stdout: &mut W) -> anyhow::Result<()> {
121 use code_moniker_core::core::shape::Shape;
122 match args.format {
123 LangsFormat::Text => {
124 writeln!(
125 stdout,
126 "Each def's `kind` maps to exactly one shape; refs share `ref` as marker."
127 )?;
128 writeln!(
129 stdout,
130 "Filter with `--shape <NAME>`; `code-moniker langs <TAG>` shows the kind↔shape map per language."
131 )?;
132 writeln!(stdout)?;
133 let width = Shape::ALL
134 .iter()
135 .map(|s| s.as_str().len())
136 .max()
137 .unwrap_or(0);
138 for shape in Shape::ALL {
139 writeln!(
140 stdout,
141 " {:<width$} {}",
142 shape.as_str(),
143 shape_description(*shape),
144 width = width
145 )?;
146 }
147 }
148 LangsFormat::Json => {
149 #[derive(serde::Serialize)]
150 struct Entry<'a> {
151 name: &'a str,
152 description: &'a str,
153 }
154 let entries: Vec<Entry> = Shape::ALL
155 .iter()
156 .map(|s| Entry {
157 name: s.as_str(),
158 description: shape_description(*s),
159 })
160 .collect();
161 serde_json::to_writer_pretty(&mut *stdout, &entries)?;
162 stdout.write_all(b"\n")?;
163 }
164 }
165 Ok(())
166}
167
168fn run_langs<W1: Write, W2: Write>(args: &LangsArgs, stdout: &mut W1, stderr: &mut W2) -> Exit {
169 match langs_inner(args, stdout) {
170 Ok(()) => Exit::Match,
171 Err(e) => {
172 let _ = writeln!(stderr, "code-moniker: {e:#}");
173 Exit::UsageError
174 }
175 }
176}
177
178fn collect_kinds(
179 lang: code_moniker_core::lang::Lang,
180) -> Vec<(&'static str, code_moniker_core::core::shape::Shape)> {
181 use code_moniker_core::core::shape::Shape;
182 predicate::known_kinds(std::iter::once(&lang))
183 .into_iter()
184 .map(|k| (k, Shape::for_kind(k.as_bytes())))
185 .collect()
186}
187
188fn langs_inner<W: Write>(args: &LangsArgs, stdout: &mut W) -> anyhow::Result<()> {
189 use code_moniker_core::lang::Lang;
190
191 match &args.lang {
192 None => match args.format {
193 LangsFormat::Text => {
194 for lang in Lang::ALL {
195 writeln!(stdout, "{}", lang.tag())?;
196 }
197 }
198 LangsFormat::Json => {
199 let tags: Vec<&str> = Lang::ALL.iter().map(|l| l.tag()).collect();
200 serde_json::to_writer_pretty(&mut *stdout, &tags)?;
201 stdout.write_all(b"\n")?;
202 }
203 },
204 Some(tag) => {
205 let lang = Lang::from_tag(tag).ok_or_else(|| {
206 let known: Vec<&str> = Lang::ALL.iter().map(|l| l.tag()).collect();
207 anyhow::anyhow!("unknown language `{tag}` (known: {})", known.join(", "))
208 })?;
209 let kinds = collect_kinds(lang);
210 let visibilities = lang.allowed_visibilities();
211 match args.format {
212 LangsFormat::Text => write_langs_text(stdout, lang.tag(), &kinds, visibilities)?,
213 LangsFormat::Json => write_langs_json(stdout, lang.tag(), &kinds, visibilities)?,
214 }
215 }
216 }
217 Ok(())
218}
219
220fn write_langs_text<W: Write>(
221 w: &mut W,
222 tag: &str,
223 kinds: &[(&'static str, code_moniker_core::core::shape::Shape)],
224 visibilities: &[&'static str],
225) -> std::io::Result<()> {
226 use code_moniker_core::core::shape::Shape;
227 writeln!(w, "lang: {tag}")?;
228 writeln!(w, "kinds:")?;
229 let width = Shape::ALL
230 .iter()
231 .map(|s| s.as_str().len() + 1)
232 .max()
233 .unwrap_or(0);
234 for shape in Shape::ALL {
235 let names: Vec<&str> = kinds
236 .iter()
237 .filter(|(_, s)| s == shape)
238 .map(|(n, _)| *n)
239 .collect();
240 if names.is_empty() {
241 continue;
242 }
243 writeln!(
244 w,
245 " {:<width$} {}",
246 format!("{}:", shape.as_str()),
247 names.join(", "),
248 width = width
249 )?;
250 }
251 if visibilities.is_empty() {
252 writeln!(w, "visibilities: (none — ignored by this language)")?;
253 } else {
254 writeln!(w, "visibilities: {}", visibilities.join(", "))?;
255 }
256 Ok(())
257}
258
259fn write_langs_json<W: Write>(
260 w: &mut W,
261 tag: &str,
262 kinds: &[(&'static str, code_moniker_core::core::shape::Shape)],
263 visibilities: &[&'static str],
264) -> anyhow::Result<()> {
265 #[derive(serde::Serialize)]
266 struct KindEntry<'a> {
267 name: &'a str,
268 shape: &'a str,
269 }
270 #[derive(serde::Serialize)]
271 struct Out<'a> {
272 lang: &'a str,
273 kinds: Vec<KindEntry<'a>>,
274 visibilities: &'a [&'static str],
275 }
276 let out = Out {
277 lang: tag,
278 kinds: kinds
279 .iter()
280 .map(|(n, s)| KindEntry {
281 name: n,
282 shape: s.as_str(),
283 })
284 .collect(),
285 visibilities,
286 };
287 serde_json::to_writer_pretty(&mut *w, &out)?;
288 w.write_all(b"\n")?;
289 Ok(())
290}
291
292fn run_extract<W1: Write, W2: Write>(args: &ExtractArgs, stdout: &mut W1, stderr: &mut W2) -> Exit {
293 match extract_inner(args, stdout) {
294 Ok(any) => {
295 if any {
296 Exit::Match
297 } else {
298 Exit::NoMatch
299 }
300 }
301 Err(e) => {
302 let _ = writeln!(stderr, "code-moniker: {e:#}");
303 Exit::UsageError
304 }
305 }
306}
307
308fn extract_inner<W: Write>(args: &ExtractArgs, stdout: &mut W) -> anyhow::Result<bool> {
309 let path: &Path = &args.path;
310 let scheme = args.scheme.as_deref().unwrap_or(DEFAULT_SCHEME).to_string();
311 let meta = std::fs::metadata(path)
312 .map_err(|e| anyhow::anyhow!("cannot stat {}: {e}", path.display()))?;
313 if meta.is_dir() {
314 return dir::run(args, stdout, path, &scheme);
315 }
316 let lang = path_to_lang(path)?;
317 let predicates = args.compiled_predicates(&scheme)?;
318 let known = predicate::known_kinds(std::iter::once(&lang));
319 let unknown = predicate::unknown_kinds(&args.kind, &known);
320 if !unknown.is_empty() {
321 return Err(unknown_kinds_error(&unknown, &[lang], &known));
322 }
323 let ctx = extract::Context {
324 ts: tsconfig::load(path.parent().unwrap_or_else(|| Path::new("."))),
325 project: args.project.clone(),
326 };
327 let (graph, extracted_source) =
328 cache::load_or_extract(path, path, lang, args.cache.as_deref(), &ctx)
329 .ok_or_else(|| anyhow::anyhow!("cannot read {}", path.display()))?;
330 let source = match extracted_source {
331 Some(s) => s,
332 None => std::fs::read_to_string(path)
333 .map_err(|e| anyhow::anyhow!("cannot read {}: {e}", path.display()))?,
334 };
335 let matches = predicate::filter(&graph, &predicates, &args.kind, &args.shape);
336 let any = !matches.defs.is_empty() || !matches.refs.is_empty();
337 match args.mode() {
338 OutputMode::Default => match args.format {
339 OutputFormat::Tsv => format::write_tsv(stdout, &matches, &source, args, &scheme)?,
340 OutputFormat::Json => {
341 format::write_json(stdout, &matches, &source, args, lang, path, &scheme)?
342 }
343 #[cfg(feature = "pretty")]
344 OutputFormat::Tree => format::tree::write_tree(stdout, &matches, &source, args, &scheme)?,
345 },
346 OutputMode::Count => {
347 let n = matches.defs.len() + matches.refs.len();
348 writeln!(stdout, "{n}")?;
349 }
350 OutputMode::Quiet => {}
351 }
352 Ok(any)
353}
354
355fn run_check<W1: Write, W2: Write>(args: &CheckArgs, stdout: &mut W1, stderr: &mut W2) -> Exit {
356 match check_inner(args, stdout, stderr) {
357 Ok(any_violation_or_error) => {
358 if any_violation_or_error {
359 Exit::NoMatch
360 } else {
361 Exit::Match
362 }
363 }
364 Err(e) => {
365 let _ = writeln!(stderr, "code-moniker: {e:#}");
366 Exit::UsageError
367 }
368 }
369}
370
371fn check_inner<W: Write, E: Write>(
372 args: &CheckArgs,
373 stdout: &mut W,
374 stderr: &mut E,
375) -> anyhow::Result<bool> {
376 let path: &Path = &args.path;
377 let mut cfg = check::load_with_overrides(Some(&args.rules))?;
378 if let Some(name) = &args.profile {
379 cfg.apply_profile(name)?;
380 }
381 let meta = std::fs::metadata(path)
382 .map_err(|e| anyhow::anyhow!("cannot stat {}: {e}", path.display()))?;
383 let (reports, errors) = if meta.is_dir() {
384 check_project(path, &cfg, args.report)?
385 } else {
386 match check_one_file(path, &cfg, args.report)? {
387 Some(report) => (vec![report], Vec::new()),
388 None => return Ok(false),
389 }
390 };
391 for e in &errors {
392 let _ = writeln!(
393 stderr,
394 "code-moniker: error reading {}: {}",
395 e.path.display(),
396 e.error
397 );
398 }
399 let any_violation = reports.iter().any(|r| !r.violations.is_empty());
400 match args.format {
401 CheckFormat::Text => write_reports_text(stdout, &reports, &errors, args.report)?,
402 CheckFormat::Json => write_reports_json(stdout, &reports, &errors, args.report)?,
403 }
404 Ok(any_violation || !errors.is_empty())
405}
406
407struct FileReport {
408 path: PathBuf,
409 violations: Vec<check::Violation>,
410 rule_reports: Vec<check::RuleReport>,
411}
412
413struct FileError {
414 path: PathBuf,
415 error: String,
416}
417
418fn check_one_file(
419 path: &Path,
420 cfg: &check::Config,
421 report: bool,
422) -> anyhow::Result<Option<FileReport>> {
423 let Ok(lang) = path_to_lang(path) else {
424 return Ok(None);
425 };
426 let compiled = check::compile_rules(cfg, lang, DEFAULT_SCHEME)?;
427 check_one_compiled(path, None, lang, &compiled, report).map(Some)
428}
429
430fn check_one_compiled(
434 fs_path: &Path,
435 moniker_anchor: Option<&Path>,
436 lang: code_moniker_core::lang::Lang,
437 compiled: &check::CompiledRules,
438 report: bool,
439) -> anyhow::Result<FileReport> {
440 let source = std::fs::read_to_string(fs_path)
441 .map_err(|e| anyhow::anyhow!("cannot read {}: {e}", fs_path.display()))?;
442 let graph = extract::extract(lang, &source, moniker_anchor.unwrap_or(fs_path));
443 let raw = check::evaluate_compiled(&graph, &source, lang, DEFAULT_SCHEME, compiled);
444 let violations = check::apply_suppressions(&graph, &source, raw);
445 let rule_reports = if report {
446 let mut rule_reports =
447 check::rule_report_compiled(&graph, &source, lang, DEFAULT_SCHEME, compiled);
448 align_report_violations_with_suppressions(&mut rule_reports, &violations);
449 rule_reports
450 } else {
451 Vec::new()
452 };
453 Ok(FileReport {
454 path: fs_path.to_path_buf(),
455 violations,
456 rule_reports,
457 })
458}
459
460fn check_project(
464 root: &Path,
465 cfg: &check::Config,
466 report: bool,
467) -> anyhow::Result<(Vec<FileReport>, Vec<FileError>)> {
468 use rayon::prelude::*;
469 use std::collections::HashMap;
470 let paths = walk::walk_lang_files(root);
471 let mut compiled: HashMap<code_moniker_core::lang::Lang, check::CompiledRules> = HashMap::new();
472 for f in &paths {
473 if compiled.contains_key(&f.lang) {
474 continue;
475 }
476 compiled.insert(f.lang, check::compile_rules(cfg, f.lang, DEFAULT_SCHEME)?);
477 }
478 let outcomes: Vec<Result<FileReport, FileError>> = paths
479 .par_iter()
480 .map(|f| {
481 let rules = &compiled[&f.lang];
482 let rel = f.path.strip_prefix(root).unwrap_or(&f.path);
483 check_one_compiled(&f.path, Some(rel), f.lang, rules, report).map_err(|e| FileError {
484 path: f.path.clone(),
485 error: format!("{e:#}"),
486 })
487 })
488 .collect();
489 let mut reports = Vec::new();
490 let mut errors = Vec::new();
491 for o in outcomes {
492 match o {
493 Ok(r) => reports.push(r),
494 Err(e) => errors.push(e),
495 }
496 }
497 reports.sort_by(|a, b| a.path.cmp(&b.path));
498 errors.sort_by(|a, b| a.path.cmp(&b.path));
499 Ok((reports, errors))
500}
501
502fn align_report_violations_with_suppressions(
503 rule_reports: &mut [check::RuleReport],
504 violations: &[check::Violation],
505) {
506 use std::collections::HashMap;
507 let mut counts: HashMap<&str, usize> = HashMap::new();
508 for v in violations {
509 *counts.entry(v.rule_id.as_str()).or_insert(0) += 1;
510 }
511 for report in rule_reports {
512 report.violations = counts.get(report.rule_id.as_str()).copied().unwrap_or(0);
513 }
514}
515
516fn write_reports_text<W: Write>(
520 w: &mut W,
521 reports: &[FileReport],
522 errors: &[FileError],
523 include_rule_report: bool,
524) -> std::io::Result<()> {
525 let mut total = 0usize;
526 let mut files_with = 0usize;
527 for r in reports {
528 if r.violations.is_empty() {
529 continue;
530 }
531 files_with += 1;
532 total += r.violations.len();
533 for v in &r.violations {
534 writeln!(
535 w,
536 "{}:L{}-L{} [{}] {}",
537 r.path.display(),
538 v.lines.0,
539 v.lines.1,
540 v.rule_id,
541 v.message
542 )?;
543 if let Some(explanation) = &v.explanation {
544 for line in explanation.trim().lines() {
545 writeln!(w, " → {line}")?;
546 }
547 }
548 }
549 }
550 let single_clean = reports.len() == 1 && files_with == 0 && errors.is_empty();
551 if !single_clean {
552 write!(
553 w,
554 "\n{total} violation(s) across {files_with} file(s) ({} scanned",
555 reports.len()
556 )?;
557 if !errors.is_empty() {
558 write!(w, ", {} file(s) errored", errors.len())?;
559 }
560 writeln!(w, ").")?;
561 }
562 if include_rule_report {
563 write_rule_report_text(w, reports)?;
564 }
565 Ok(())
566}
567
568fn write_rule_report_text<W: Write>(w: &mut W, reports: &[FileReport]) -> std::io::Result<()> {
569 let rule_reports = aggregate_rule_reports(reports);
570 if rule_reports.is_empty() {
571 return Ok(());
572 }
573 writeln!(w, "\nRule report:")?;
574 for r in rule_reports {
575 write!(
576 w,
577 "- {}: domain={}, evaluated={}, matches={}, violations={}",
578 r.rule_id, r.domain, r.evaluated, r.matches, r.violations
579 )?;
580 if let Some(n) = r.antecedent_matches {
581 write!(w, ", antecedent_matches={n}")?;
582 }
583 if let Some(warning) = r.warning {
584 write!(w, " warning: {warning}")?;
585 }
586 writeln!(w)?;
587 }
588 Ok(())
589}
590
591fn aggregate_rule_reports(reports: &[FileReport]) -> Vec<check::RuleReport> {
592 use std::collections::BTreeMap;
593 let mut by_rule: BTreeMap<String, check::RuleReport> = BTreeMap::new();
594 for report in reports {
595 for item in &report.rule_reports {
596 by_rule
597 .entry(item.rule_id.clone())
598 .and_modify(|acc| {
599 acc.evaluated += item.evaluated;
600 acc.matches += item.matches;
601 acc.violations += item.violations;
602 if let Some(n) = item.antecedent_matches {
603 acc.antecedent_matches = Some(acc.antecedent_matches.unwrap_or(0) + n);
604 }
605 })
606 .or_insert_with(|| item.clone());
607 }
608 }
609 let mut out: Vec<_> = by_rule.into_values().collect();
610 for r in &mut out {
611 if r.evaluated > 0 && r.antecedent_matches == Some(0) {
612 r.warning = Some("antecedent never matched".to_string());
613 } else {
614 r.warning = None;
615 }
616 }
617 out
618}
619
620fn write_reports_json<W: Write>(
621 w: &mut W,
622 reports: &[FileReport],
623 errors: &[FileError],
624 include_rule_report: bool,
625) -> anyhow::Result<()> {
626 #[derive(serde::Serialize)]
627 struct FileEntry<'a> {
628 file: String,
629 violations: &'a [check::Violation],
630 }
631 #[derive(serde::Serialize)]
632 struct ErrorEntry<'a> {
633 file: String,
634 error: &'a str,
635 }
636 #[derive(serde::Serialize)]
637 struct Summary {
638 files_scanned: usize,
639 files_with_violations: usize,
640 total_violations: usize,
641 files_with_errors: usize,
642 }
643 #[derive(serde::Serialize)]
644 struct Out<'a> {
645 summary: Summary,
646 files: Vec<FileEntry<'a>>,
647 #[serde(skip_serializing_if = "Vec::is_empty")]
648 errors: Vec<ErrorEntry<'a>>,
649 #[serde(skip_serializing_if = "Vec::is_empty")]
650 rule_report: Vec<check::RuleReport>,
651 }
652 let files: Vec<FileEntry> = reports
653 .iter()
654 .map(|r| FileEntry {
655 file: r.path.display().to_string(),
656 violations: &r.violations,
657 })
658 .collect();
659 let total = files.iter().map(|f| f.violations.len()).sum();
660 let files_with = files.iter().filter(|f| !f.violations.is_empty()).count();
661 let err_entries: Vec<ErrorEntry> = errors
662 .iter()
663 .map(|e| ErrorEntry {
664 file: e.path.display().to_string(),
665 error: &e.error,
666 })
667 .collect();
668 let out = Out {
669 summary: Summary {
670 files_scanned: files.len(),
671 files_with_violations: files_with,
672 total_violations: total,
673 files_with_errors: err_entries.len(),
674 },
675 files,
676 errors: err_entries,
677 rule_report: if include_rule_report {
678 aggregate_rule_reports(reports)
679 } else {
680 Vec::new()
681 },
682 };
683 serde_json::to_writer_pretty(&mut *w, &out)?;
684 w.write_all(b"\n")?;
685 Ok(())
686}
687
688#[cfg(test)]
689mod tests {
690 use super::*;
691
692 #[test]
693 fn exit_codes_are_stable() {
694 assert_eq!(ExitCode::from(Exit::Match), ExitCode::SUCCESS);
695 assert_eq!(ExitCode::from(Exit::NoMatch), ExitCode::from(1));
696 assert_eq!(ExitCode::from(Exit::UsageError), ExitCode::from(2));
697 }
698
699 #[test]
700 fn shape_description_exists_for_every_canonical_shape() {
701 for shape in code_moniker_core::core::shape::Shape::ALL {
702 assert!(
703 !shape_description(*shape).is_empty(),
704 "missing description for {shape:?}"
705 );
706 }
707 }
708}