big_code_analysis/lib.rs
1// Per-language metric and AST modules deliberately consume the macro-
2// generated tree-sitter token enums via `use crate::*` and `use Foo::*`
3// inside match expressions — explicit imports would list dozens of
4// variants per arm and obscure the per-language token sets that are the
5// point of these files. Allowed at the module level rather than per
6// function so the per-language impl blocks stay readable.
7#![allow(clippy::doc_markdown, clippy::enum_glob_use, clippy::wildcard_imports)]
8
9//! big-code-analysis is a library to analyze and extract information
10//! from source codes written in many different programming languages.
11//!
12//! You can find the source code of this software on
13//! <a href="https://github.com/dekobon/big-code-analysis/" target="_blank">GitHub</a>,
14//! while issues and feature requests can be posted on the respective
15//! <a href="https://github.com/dekobon/big-code-analysis/issues/" target="_blank">GitHub Issue Tracker</a>.
16//!
17//! ## Quick start
18//!
19//! Most callers want the recommended entry points exposed in
20//! [`prelude`]:
21//!
22//! ```no_run
23//! use big_code_analysis::prelude::*;
24//!
25//! let source = b"fn main() {}";
26//! let space = analyze(
27//! Source::new(LANG::Rust, source),
28//! MetricsOptions::default(),
29//! ).expect("Rust source parses");
30//! println!("cognitive sum: {}", space.metrics.cognitive.cognitive_sum());
31//! ```
32//!
33//! ## Supported Languages
34//!
35//! Each grammar is gated behind a per-language Cargo feature; the
36//! default `all-languages` feature enables every grammar so the
37//! historical "every language compiled in" behaviour is preserved.
38//! Library consumers that only need a subset can opt out of the
39//! defaults — see [Per-language Cargo features][feat] in the book.
40//!
41//! - Bash (`bash`)
42//! - C/C++ (`cpp`, also exposes the internal `Ccomment` / `Preproc` helpers)
43//! - C# (`csharp`)
44//! - Elixir (`elixir`)
45//! - Go (`go`)
46//! - Groovy (`groovy`)
47//! - Java (`java`)
48//! - JavaScript (`javascript`)
49//! - JavaScript, Firefox-internal "MozJS" (`mozjs`)
50//! - Kotlin (`kotlin`)
51//! - Lua (`lua`)
52//! - Perl (`perl`)
53//! - PHP (`php`)
54//! - Python (`python`)
55//! - Ruby (`ruby`)
56//! - Rust (`rust`)
57//! - Tcl (`tcl`)
58//! - TSX (`typescript`)
59//! - TypeScript (`typescript`)
60//!
61//! [feat]: https://dekobon.github.io/big-code-analysis/library/cargo-features.html
62//!
63//! ## Supported Metrics
64//!
65//! - ABC: it measures the size of a source code based on
66//! assignments, branches, and conditions.
67//! - CC: it calculates the code complexity examining the control flow of a
68//! program. Both standard and modified flavours are exposed: the
69//! modified variant collapses all case/match arms inside a single
70//! switch/match/when/select into one decision point.
71//! - Cognitive Complexity: it measures how difficult it is
72//! to understand a unit of code.
73//! - SLOC: it counts the number of lines in a source file.
74//! - PLOC: it counts the number of physical lines (instructions)
75//! contained in a source file.
76//! - LLOC: it counts the number of logical lines (statements)
77//! contained in a source file.
78//! - CLOC: it counts the number of comments in a source file.
79//! - BLANK: it counts the number of blank lines in a source file.
80//! - HALSTEAD: it is a suite that provides a series of information,
81//! such as the effort required to maintain the analyzed code,
82//! the size in bits to store the program, the difficulty to understand
83//! the code, an estimate of the number of bugs present in the codebase,
84//! and an estimate of the time needed to implement the software.
85//! - MI: it is a suite that allows to evaluate the maintainability
86//! of a software.
87//! - NOM: it counts the number of functions and closures
88//! in a file/trait/class.
89//! - NEXITS: it counts the number of possible exit points
90//! from a method/function.
91//! - NARGS: it counts the number of arguments of a function/method.
92//! - NPA: it counts the number of public attributes of a class.
93//! - NPM: it counts the number of public methods of a class.
94//! - WMC: it is the sum of the complexities of all methods
95//! in a class.
96
97#![allow(clippy::upper_case_acronyms)]
98
99// Internal-only modules. Nothing is re-exported from these.
100mod c_langs_macros;
101mod c_macro;
102mod cfg_predicate;
103mod checker;
104mod getter;
105mod languages;
106mod macros;
107
108// `langs` hosts the `mk_langs!` macro expansion. Every name produced
109// there — `LANG`, the `action` / `get_function_spaces` dispatch
110// helpers, per-language `<Lang>Code` tags and `<Lang>Parser` aliases —
111// is enumerated explicitly in the curated re-exports below.
112mod langs;
113pub use crate::langs::{
114 BashCode, BashParser, CcommentCode, CcommentParser, CppCode, CppParser, CsharpCode,
115 CsharpParser, ElixirCode, ElixirParser, GoCode, GoParser, GroovyCode, GroovyParser, JavaCode,
116 JavaParser, JavascriptCode, JavascriptParser, KotlinCode, KotlinParser, LANG, LuaCode,
117 LuaParser, MozjsCode, MozjsParser, PerlCode, PerlParser, PhpCode, PhpParser, PreprocCode,
118 PreprocParser, PythonCode, PythonParser, RubyCode, RubyParser, RustCode, RustParser, TclCode,
119 TclParser, TsxCode, TsxParser, TypescriptCode, TypescriptParser, action, analyze_dispatch,
120 get_from_emacs_mode, get_from_ext, get_ops, metrics_from_tree,
121};
122// The path-positional `get_function_spaces*` shims are `#[deprecated]`
123// at their definition sites; re-exporting them at the crate root keeps
124// the previously-globbed surface intact, scoped with
125// `#[allow(deprecated)]` so the re-export itself does not warn.
126#[allow(deprecated)]
127pub use crate::langs::{get_function_spaces, get_function_spaces_with_options};
128
129// Internal crate-root re-exports. Hand-written per-language modules
130// (`src/getter.rs`, `src/checker.rs`, `src/alterator.rs`, the
131// per-language metric impls) use `use crate::*` to bring the
132// macro-generated `<Lang>Code` token enums and per-language helper
133// types into scope; the per-language token enums in
134// `src/languages/language_*.rs` are also reached through the crate
135// root. Re-exporting these as `pub(crate)` keeps internal compilation
136// working without widening the published surface.
137pub(crate) use crate::checker::*;
138pub(crate) use crate::languages::*;
139
140// Hand-written modules (`src/spaces.rs`, `src/output/dump_metrics.rs`,
141// the metric macros) refer to per-metric submodules by their short
142// crate-root path (`crate::abc`, `crate::cognitive`, ...). Re-export
143// them under those names without widening the public surface.
144pub(crate) use crate::metrics::{
145 abc, cognitive, cyclomatic, exit, halstead, loc, mi, nargs, nom, npa, npm, tokens, wmc,
146};
147
148// Module declarations. Each `pub use` line below names exactly the
149// items intended to be part of the public API surface; anything not
150// listed stays out of the crate root. Per issue #255, glob re-exports
151// (`pub use module::*`) are no longer used here because every newly
152// `pub`-marked helper in any sub-module would silently leak into the
153// published API.
154
155// --- Core analysis entry points and result types (spaces.rs) ---
156mod spaces;
157pub use crate::spaces::{
158 Ast, CodeMetrics, FuncSpace, Metrics, MetricsCfg, MetricsOptions, Source, SpaceKind, analyze,
159};
160// The path-positional `metrics` / `metrics_with_options` shims are
161// `#[deprecated]` at their definition site; re-export them so the
162// previously-globbed API surface keeps working, scoped with
163// `#[allow(deprecated)]` to avoid lint noise at this seam.
164// `metrics_inner` is consumed by feature-gated arms in `mk_action!`.
165// With `--no-default-features` and no language feature, every arm
166// compiles out and the re-export becomes nominally unused; the
167// language-features that ship in the default set keep the symbol
168// live in any normal build.
169#[allow(unused_imports)]
170pub(crate) use crate::spaces::metrics_inner;
171#[allow(deprecated)]
172pub use crate::spaces::{metrics, metrics_with_options};
173#[cfg(test)]
174pub(crate) use crate::tools::check_func_space;
175
176/// Per-metric implementations.
177///
178/// Each sub-module owns one metric — its `Stats` accumulator, the
179/// per-language trait implementations, and any small helpers used
180/// only by tests. Most callers will not need these directly; reach
181/// through [`CodeMetrics`] on a [`FuncSpace`] instead.
182pub mod metrics;
183
184// --- Errors ---
185mod error;
186pub use crate::error::MetricsError;
187
188// --- Metric selection ---
189mod metric_set;
190pub use crate::metric_set::{Metric, MetricSet, ParseMetricError};
191
192// --- Suppression markers ---
193mod suppression;
194pub use crate::suppression::{MetricKind, SuppressionPolicy, SuppressionScope};
195
196/// Output formatters: CSV, SARIF, Checkstyle, clang/MSVC warning
197/// lines, and AST/metric pretty-dumps used by `bca` and the offender
198/// reporters.
199///
200/// The most commonly used writers (`write_csv`, `write_sarif`,
201/// `write_checkstyle`, `write_clang_warning`, `write_msvc_warning`)
202/// and shared types (`OffenderRecord`, `Severity`, `TOOL_ID`,
203/// `CSV_HEADER`, `CSV_EXTENSION`) are also re-exported at the crate
204/// root.
205pub mod output;
206pub use crate::output::{
207 CSV_EXTENSION, CSV_HEADER, Dump, DumpCfg, OffenderRecord, Severity, TOOL_ID, dump_node,
208 dump_ops, dump_root, write_checkstyle, write_clang_warning, write_csv, write_msvc_warning,
209 write_sarif,
210};
211
212// --- AST plumbing (Node, Cursor) ---
213mod node;
214pub use crate::node::{Cursor, Node};
215
216// --- Language detection / I/O helpers ---
217mod tools;
218pub use crate::tools::{
219 get_language_for_file, guess_language, is_generated, read_file, read_file_with_eol, write_file,
220};
221
222// --- Source walker ---
223mod concurrent_files;
224pub use crate::concurrent_files::{ConcurrentErrors, ConcurrentRunner, FilesData};
225
226// --- Comment removal ---
227mod comment_rm;
228pub use crate::comment_rm::{CommentRm, CommentRmCfg, rm_comments};
229
230// --- Per-function metric callbacks (CLI surface) ---
231mod count;
232pub use crate::count::{Count, CountCfg, count};
233
234mod find;
235pub use crate::find::{Find, FindCfg, find};
236
237mod function;
238pub use crate::function::{Function, FunctionCfg, FunctionSpan, function};
239
240// --- AST dump ---
241mod ast;
242pub use crate::ast::{AstCallback, AstCfg, AstNode, AstPayload, AstResponse, Span};
243
244// --- Halstead operator/operand callback ---
245mod ops;
246pub use crate::ops::{Ops, OpsCfg, OpsCode, operands_and_operators};
247
248// --- Preprocessor handling (C/C++) ---
249mod preproc;
250pub use crate::preproc::{PreprocFile, PreprocResults, fix_includes, get_macros, preprocess};
251
252// --- Alterator trait (per-language AST simplification) ---
253mod alterator;
254pub use crate::alterator::Alterator;
255
256// --- Generic parser plumbing ---
257//
258// `Parser`, `ParserTrait`, `Filter`, `LanguageInfo`, and `Callback`
259// are part of the value-not-stable surface — they are required for
260// callers that want to feed pre-parsed trees through the metric
261// pipeline or implement a custom `Callback`, but they are
262// `#[doc(hidden)]` at their definition sites so they do not clutter
263// the rendered rustdoc. See STABILITY.md.
264mod parser;
265pub use crate::parser::{Filter, Parser};
266
267mod traits;
268pub(crate) use crate::traits::Search;
269pub use crate::traits::{Callback, LanguageInfo, ParserTrait};
270
271/// Re-export of the underlying `tree-sitter` crate.
272///
273/// Lets callers build a [`tree_sitter::Tree`] (via
274/// [`tree_sitter::Parser`]) against the exact grammar version this
275/// library is pinned to, and feed it back through
276/// [`Parser::from_tree`] / [`metrics_from_tree`] without taking a
277/// separate `tree-sitter` dependency that may drift out of pin.
278///
279/// This is part of the value-not-stable surface: the underlying
280/// pin may bump in any minor release (see `STABILITY.md`).
281pub use ::tree_sitter;
282
283/// Recommended entry points for the 90% case.
284///
285/// Star-import this module to get the curated set of types and
286/// functions most callers need:
287///
288/// ```no_run
289/// use big_code_analysis::prelude::*;
290///
291/// let source = b"fn main() {}";
292/// let space = analyze(
293/// Source::new(LANG::Rust, source),
294/// MetricsOptions::default(),
295/// ).expect("Rust source parses");
296/// # let _ = space;
297/// ```
298///
299/// Anything not exposed here can still be imported with its
300/// fully-qualified name from the crate root (`use
301/// big_code_analysis::Something;`). Items deliberately omitted from
302/// the prelude are either deprecated, doc-hidden, or unlikely to
303/// appear in typical caller code.
304pub mod prelude {
305 pub use crate::{
306 // Parse-once handle
307 Ast,
308 // Result types
309 CodeMetrics,
310 FuncSpace,
311 // Language enum
312 LANG,
313 // Metric selection
314 Metric,
315 // Errors and options
316 MetricsError,
317 MetricsOptions,
318 Source,
319 SpaceKind,
320 // Core entry points
321 analyze,
322 metrics_from_tree,
323 };
324}