Skip to main content

code_moniker_cli/
lib.rs

1//! Standalone CLI surface. See `docs/cli-extract.md` (per-file probe)
2//! and `docs/cli-check.md` (project linter).
3
4pub mod args;
5pub mod cache;
6pub mod check;
7pub mod dir;
8pub mod extract;
9pub mod format;
10pub mod lang;
11pub mod lines;
12pub mod predicate;
13pub mod walk;
14
15use std::io::Write;
16use std::path::{Path, PathBuf};
17use std::process::ExitCode;
18
19pub use args::{Args, CheckArgs, CheckFormat, Cli, Command, OutputFormat, OutputMode};
20pub use lang::{LangError, path_to_lang};
21pub use predicate::{MatchSet, Predicate};
22
23pub(crate) const DEFAULT_SCHEME: &str = "code+moniker://";
24
25pub(crate) fn unknown_kinds_error(
26	unknown: &[String],
27	langs: &[code_moniker_core::lang::Lang],
28	known: &std::collections::BTreeSet<&'static str>,
29) -> anyhow::Error {
30	let lang_tags: Vec<&str> = langs.iter().map(|l| l.tag()).collect();
31	let known_list: Vec<&str> = known.iter().copied().collect();
32	anyhow::anyhow!(
33		"unknown --kind {} (langs in scope: {}; known kinds: {})",
34		unknown.join(", "),
35		lang_tags.join(", "),
36		known_list.join(", "),
37	)
38}
39
40pub(crate) fn render_uri(
41	m: &code_moniker_core::core::moniker::Moniker,
42	cfg: &code_moniker_core::core::uri::UriConfig<'_>,
43) -> String {
44	code_moniker_core::core::uri::to_uri(m, cfg)
45		.unwrap_or_else(|_| format!("<non-utf8:{}b>", m.as_bytes().len()))
46}
47
48#[derive(Copy, Clone, Debug, Eq, PartialEq)]
49pub enum Exit {
50	Match,
51	NoMatch,
52	UsageError,
53}
54
55impl From<Exit> for ExitCode {
56	fn from(e: Exit) -> Self {
57		match e {
58			Exit::Match => ExitCode::SUCCESS,
59			Exit::NoMatch => ExitCode::from(1),
60			Exit::UsageError => ExitCode::from(2),
61		}
62	}
63}
64
65pub fn run<W1: Write, W2: Write>(cli: &Cli, stdout: &mut W1, stderr: &mut W2) -> Exit {
66	match &cli.command {
67		Some(Command::Check(args)) => run_check(args, stdout, stderr),
68		None => run_extract(&cli.extract, stdout, stderr),
69	}
70}
71
72fn run_extract<W1: Write, W2: Write>(args: &Args, stdout: &mut W1, stderr: &mut W2) -> Exit {
73	match extract_inner(args, stdout) {
74		Ok(any) => {
75			if any {
76				Exit::Match
77			} else {
78				Exit::NoMatch
79			}
80		}
81		Err(e) => {
82			let _ = writeln!(stderr, "code-moniker: {e:#}");
83			Exit::UsageError
84		}
85	}
86}
87
88fn extract_inner<W: Write>(args: &Args, stdout: &mut W) -> anyhow::Result<bool> {
89	let file = args
90		.file
91		.as_deref()
92		.ok_or_else(|| anyhow::anyhow!("missing FILE argument; run `code-moniker --help`"))?;
93	let path: &Path = file;
94	let scheme = args.scheme.as_deref().unwrap_or(DEFAULT_SCHEME).to_string();
95	let meta = std::fs::metadata(path)
96		.map_err(|e| anyhow::anyhow!("cannot stat {}: {e}", path.display()))?;
97	if meta.is_dir() {
98		return dir::run(args, stdout, path, &scheme);
99	}
100	let lang = path_to_lang(path)?;
101	let predicates = args.compiled_predicates(&scheme)?;
102	let known = predicate::known_kinds(std::iter::once(&lang));
103	let unknown = predicate::unknown_kinds(&args.kind, &known);
104	if !unknown.is_empty() {
105		return Err(unknown_kinds_error(&unknown, &[lang], &known));
106	}
107	let (graph, extracted_source) = cache::load_or_extract(path, path, lang, args.cache.as_deref())
108		.ok_or_else(|| anyhow::anyhow!("cannot read {}", path.display()))?;
109	let source = match extracted_source {
110		Some(s) => s,
111		None => std::fs::read_to_string(path)
112			.map_err(|e| anyhow::anyhow!("cannot read {}: {e}", path.display()))?,
113	};
114	let matches = predicate::filter(&graph, &predicates, &args.kind);
115	let any = !matches.defs.is_empty() || !matches.refs.is_empty();
116	match args.mode() {
117		OutputMode::Default => match args.format {
118			OutputFormat::Tsv => format::write_tsv(stdout, &matches, &source, args, &scheme)?,
119			OutputFormat::Json => {
120				format::write_json(stdout, &matches, &source, args, lang, path, &scheme)?
121			}
122		},
123		OutputMode::Count => {
124			let n = matches.defs.len() + matches.refs.len();
125			writeln!(stdout, "{n}")?;
126		}
127		OutputMode::Quiet => {}
128	}
129	Ok(any)
130}
131
132fn run_check<W1: Write, W2: Write>(args: &CheckArgs, stdout: &mut W1, stderr: &mut W2) -> Exit {
133	match check_inner(args, stdout, stderr) {
134		Ok(any_violation_or_error) => {
135			if any_violation_or_error {
136				Exit::NoMatch
137			} else {
138				Exit::Match
139			}
140		}
141		Err(e) => {
142			let _ = writeln!(stderr, "code-moniker: {e:#}");
143			Exit::UsageError
144		}
145	}
146}
147
148fn check_inner<W: Write, E: Write>(
149	args: &CheckArgs,
150	stdout: &mut W,
151	stderr: &mut E,
152) -> anyhow::Result<bool> {
153	let path: &Path = &args.file;
154	let mut cfg = check::load_with_overrides(Some(&args.rules))?;
155	if let Some(name) = &args.profile {
156		cfg.apply_profile(name)?;
157	}
158	let meta = std::fs::metadata(path)
159		.map_err(|e| anyhow::anyhow!("cannot stat {}: {e}", path.display()))?;
160	let (reports, errors) = if meta.is_dir() {
161		check_project(path, &cfg)?
162	} else {
163		match check_one_file(path, &cfg)? {
164			Some(report) => (vec![report], Vec::new()),
165			None => return Ok(false),
166		}
167	};
168	for e in &errors {
169		let _ = writeln!(
170			stderr,
171			"code-moniker: error reading {}: {}",
172			e.path.display(),
173			e.error
174		);
175	}
176	let any_violation = reports.iter().any(|r| !r.violations.is_empty());
177	match args.format {
178		CheckFormat::Text => write_reports_text(stdout, &reports, &errors)?,
179		CheckFormat::Json => write_reports_json(stdout, &reports, &errors)?,
180	}
181	Ok(any_violation || !errors.is_empty())
182}
183
184struct FileReport {
185	path: PathBuf,
186	violations: Vec<check::Violation>,
187}
188
189struct FileError {
190	path: PathBuf,
191	error: String,
192}
193
194fn check_one_file(path: &Path, cfg: &check::Config) -> anyhow::Result<Option<FileReport>> {
195	let Ok(lang) = path_to_lang(path) else {
196		return Ok(None);
197	};
198	let compiled = check::compile_rules(cfg, lang, DEFAULT_SCHEME)?;
199	check_one_compiled(path, None, lang, &compiled).map(Some)
200}
201
202/// `moniker_anchor` overrides the path passed to the extractor — used by
203/// project mode to anchor each file's moniker on its path relative to the
204/// scan root. `None` means "same as `fs_path`" (single-file mode).
205fn check_one_compiled(
206	fs_path: &Path,
207	moniker_anchor: Option<&Path>,
208	lang: code_moniker_core::lang::Lang,
209	compiled: &check::CompiledRules,
210) -> anyhow::Result<FileReport> {
211	let source = std::fs::read_to_string(fs_path)
212		.map_err(|e| anyhow::anyhow!("cannot read {}: {e}", fs_path.display()))?;
213	let graph = extract::extract(lang, &source, moniker_anchor.unwrap_or(fs_path));
214	let raw = check::evaluate_compiled(&graph, &source, lang, DEFAULT_SCHEME, compiled);
215	let violations = check::apply_suppressions(&graph, &source, raw);
216	Ok(FileReport {
217		path: fs_path.to_path_buf(),
218		violations,
219	})
220}
221
222/// Project-mode scan. Per-file I/O errors are accumulated in `Vec<FileError>`
223/// rather than aborting the scan. Rules are compiled once per language and
224/// shared across the parallel pool.
225fn check_project(
226	root: &Path,
227	cfg: &check::Config,
228) -> anyhow::Result<(Vec<FileReport>, Vec<FileError>)> {
229	use rayon::prelude::*;
230	use std::collections::HashMap;
231	let paths = walk::walk_lang_files(root);
232	let mut compiled: HashMap<code_moniker_core::lang::Lang, check::CompiledRules> = HashMap::new();
233	for f in &paths {
234		if compiled.contains_key(&f.lang) {
235			continue;
236		}
237		compiled.insert(f.lang, check::compile_rules(cfg, f.lang, DEFAULT_SCHEME)?);
238	}
239	let outcomes: Vec<Result<FileReport, FileError>> = paths
240		.par_iter()
241		.map(|f| {
242			let rules = &compiled[&f.lang];
243			let rel = f.path.strip_prefix(root).unwrap_or(&f.path);
244			check_one_compiled(&f.path, Some(rel), f.lang, rules).map_err(|e| FileError {
245				path: f.path.clone(),
246				error: format!("{e:#}"),
247			})
248		})
249		.collect();
250	let mut reports = Vec::new();
251	let mut errors = Vec::new();
252	for o in outcomes {
253		match o {
254			Ok(r) => reports.push(r),
255			Err(e) => errors.push(e),
256		}
257	}
258	reports.sort_by(|a, b| a.path.cmp(&b.path));
259	errors.sort_by(|a, b| a.path.cmp(&b.path));
260	Ok((reports, errors))
261}
262
263/// Single-file clean runs (one report, zero violations, zero errors) skip the
264/// trailing summary so per-edit PostToolUse hooks stay silent. Every other
265/// shape emits the `N violation(s) across M file(s) (K scanned)` footer.
266fn write_reports_text<W: Write>(
267	w: &mut W,
268	reports: &[FileReport],
269	errors: &[FileError],
270) -> std::io::Result<()> {
271	let mut total = 0usize;
272	let mut files_with = 0usize;
273	for r in reports {
274		if r.violations.is_empty() {
275			continue;
276		}
277		files_with += 1;
278		total += r.violations.len();
279		for v in &r.violations {
280			writeln!(
281				w,
282				"{}:L{}-L{} [{}] {}",
283				r.path.display(),
284				v.lines.0,
285				v.lines.1,
286				v.rule_id,
287				v.message
288			)?;
289			if let Some(explanation) = &v.explanation {
290				for line in explanation.trim().lines() {
291					writeln!(w, "  → {line}")?;
292				}
293			}
294		}
295	}
296	let single_clean = reports.len() == 1 && files_with == 0 && errors.is_empty();
297	if !single_clean {
298		write!(
299			w,
300			"\n{total} violation(s) across {files_with} file(s) ({} scanned",
301			reports.len()
302		)?;
303		if !errors.is_empty() {
304			write!(w, ", {} file(s) errored", errors.len())?;
305		}
306		writeln!(w, ").")?;
307	}
308	Ok(())
309}
310
311fn write_reports_json<W: Write>(
312	w: &mut W,
313	reports: &[FileReport],
314	errors: &[FileError],
315) -> anyhow::Result<()> {
316	#[derive(serde::Serialize)]
317	struct FileEntry<'a> {
318		file: String,
319		violations: &'a [check::Violation],
320	}
321	#[derive(serde::Serialize)]
322	struct ErrorEntry<'a> {
323		file: String,
324		error: &'a str,
325	}
326	#[derive(serde::Serialize)]
327	struct Summary {
328		files_scanned: usize,
329		files_with_violations: usize,
330		total_violations: usize,
331		files_with_errors: usize,
332	}
333	#[derive(serde::Serialize)]
334	struct Out<'a> {
335		summary: Summary,
336		files: Vec<FileEntry<'a>>,
337		#[serde(skip_serializing_if = "Vec::is_empty")]
338		errors: Vec<ErrorEntry<'a>>,
339	}
340	let files: Vec<FileEntry> = reports
341		.iter()
342		.map(|r| FileEntry {
343			file: r.path.display().to_string(),
344			violations: &r.violations,
345		})
346		.collect();
347	let total = files.iter().map(|f| f.violations.len()).sum();
348	let files_with = files.iter().filter(|f| !f.violations.is_empty()).count();
349	let err_entries: Vec<ErrorEntry> = errors
350		.iter()
351		.map(|e| ErrorEntry {
352			file: e.path.display().to_string(),
353			error: &e.error,
354		})
355		.collect();
356	let out = Out {
357		summary: Summary {
358			files_scanned: files.len(),
359			files_with_violations: files_with,
360			total_violations: total,
361			files_with_errors: err_entries.len(),
362		},
363		files,
364		errors: err_entries,
365	};
366	serde_json::to_writer_pretty(&mut *w, &out)?;
367	w.write_all(b"\n")?;
368	Ok(())
369}
370
371#[cfg(test)]
372mod tests {
373	use super::*;
374
375	#[test]
376	fn exit_codes_are_stable() {
377		assert_eq!(ExitCode::from(Exit::Match), ExitCode::SUCCESS);
378		assert_eq!(ExitCode::from(Exit::NoMatch), ExitCode::from(1));
379		assert_eq!(ExitCode::from(Exit::UsageError), ExitCode::from(2));
380	}
381}