Skip to main content

sloc_config/
lib.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright (C) 2026 Nima Shafie <nimzshafie@gmail.com>
3
4use std::collections::BTreeMap;
5use std::fs;
6use std::path::{Path, PathBuf};
7
8use anyhow::{Context, Result};
9use clap::ValueEnum;
10use serde::{Deserialize, Serialize};
11
12#[derive(Debug, Clone, Copy, Serialize, Deserialize, ValueEnum, PartialEq, Eq, Default)]
13#[serde(rename_all = "snake_case")]
14pub enum MixedLinePolicy {
15    #[default]
16    CodeOnly,
17    CodeAndComment,
18    CommentOnly,
19    SeparateMixedCategory,
20}
21
22#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
23#[serde(rename_all = "snake_case")]
24pub enum BinaryFileBehavior {
25    #[default]
26    Skip,
27    Fail,
28}
29
30#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
31#[serde(rename_all = "snake_case")]
32pub enum FailureBehavior {
33    #[default]
34    WarnSkip,
35    Fail,
36}
37
38/// IEEE 1045-1992: how backslash line continuations are handled for physical SLOC counting.
39///
40/// Physical SLOC (the default) counts each physical line. Logical mode collapses a
41/// backslash-continued sequence into a single counted line, which is useful when measuring
42/// logical statements (e.g., multi-line C preprocessor macros).
43#[derive(Debug, Clone, Copy, Serialize, Deserialize, ValueEnum, PartialEq, Eq, Default)]
44#[serde(rename_all = "snake_case")]
45pub enum ContinuationLinePolicy {
46    #[default]
47    /// Count each physical line separately — the IEEE 1045-1992 default for physical SLOC.
48    EachPhysicalLine,
49    /// Collapse backslash-continued physical lines into a single logical line.
50    CollapseToLogical,
51}
52
53/// IEEE 1045-1992: how blank lines that fall inside a block comment are classified.
54///
55/// The standard aligns with counting them as comment lines (they are part of the comment
56/// body). The `CountAsBlank` variant preserves the legacy behaviour if required.
57#[derive(Debug, Clone, Copy, Serialize, Deserialize, ValueEnum, PartialEq, Eq, Default)]
58#[serde(rename_all = "snake_case")]
59pub enum BlankInBlockCommentPolicy {
60    #[default]
61    /// Blank lines inside /* */ (or equivalent) blocks count as comment lines — IEEE aligned.
62    CountAsComment,
63    /// Blank lines inside block comments count as blank lines.
64    CountAsBlank,
65}
66
67#[allow(clippy::struct_excessive_bools)]
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct DiscoveryConfig {
70    pub root_paths: Vec<PathBuf>,
71    pub include_globs: Vec<String>,
72    pub exclude_globs: Vec<String>,
73    pub excluded_directories: Vec<String>,
74    pub honor_ignore_files: bool,
75    pub ignore_hidden_files: bool,
76    pub follow_symlinks: bool,
77    pub max_file_size_bytes: u64,
78    pub parallelism_limit: Option<usize>,
79    /// When true, detect .gitmodules and produce a per-submodule summary alongside the overall run.
80    #[serde(default = "default_true")]
81    pub submodule_breakdown: bool,
82    #[serde(default)]
83    pub allowed_scan_roots: Vec<PathBuf>,
84}
85
86impl Default for DiscoveryConfig {
87    fn default() -> Self {
88        Self {
89            root_paths: Vec::new(),
90            include_globs: Vec::new(),
91            exclude_globs: Vec::new(),
92            excluded_directories: vec![".git".into(), "node_modules".into(), "target".into()],
93            honor_ignore_files: true,
94            ignore_hidden_files: true,
95            follow_symlinks: false,
96            max_file_size_bytes: 2 * 1024 * 1024,
97            parallelism_limit: None,
98            submodule_breakdown: true,
99            allowed_scan_roots: Vec::new(),
100        }
101    }
102}
103
104#[allow(clippy::struct_excessive_bools)]
105#[derive(Debug, Clone, Serialize, Deserialize)]
106pub struct AnalysisConfig {
107    pub enabled_languages: Vec<String>,
108    pub extension_overrides: BTreeMap<String, String>,
109    pub shebang_detection: bool,
110    pub mixed_line_policy: MixedLinePolicy,
111    pub python_docstrings_as_comments: bool,
112    pub generated_file_detection: bool,
113    pub minified_file_detection: bool,
114    pub vendor_directory_detection: bool,
115    pub include_lockfiles: bool,
116    pub binary_file_behavior: BinaryFileBehavior,
117    pub decode_failure_behavior: FailureBehavior,
118    pub parse_failure_behavior: FailureBehavior,
119    /// IEEE 1045-1992: how backslash line continuations (C macros, shell, Makefile) are counted.
120    #[serde(default)]
121    pub continuation_line_policy: ContinuationLinePolicy,
122    /// IEEE 1045-1992: whether blank lines inside block comments count as comment lines.
123    #[serde(default)]
124    pub blank_in_block_comment_policy: BlankInBlockCommentPolicy,
125    /// IEEE 1045-1992 §4.2: when false, preprocessor/compiler directives (#include, #define,
126    /// etc.) are excluded from code SLOC and tracked separately in `compiler_directive_lines`.
127    /// Applies to C, C++, and Objective-C. Default: true (directives count toward code SLOC).
128    #[serde(default = "default_true")]
129    pub count_compiler_directives: bool,
130    /// Optional SLOC budget thresholds. When set, `--fail-on-budget` exits non-zero if
131    /// any threshold is exceeded. Configured under `[analysis.budget]` in the TOML.
132    #[serde(default)]
133    pub budget: Option<BudgetConfig>,
134    /// Path to an LCOV `.info` file produced by lcov, gcov, cargo-llvm-cov, etc.
135    /// When set, oxide-sloc attaches per-file line/function coverage to each `FileRecord`.
136    /// Can also be set via the `SLOC_COVERAGE_FILE` environment variable.
137    #[serde(default, skip_serializing_if = "Option::is_none")]
138    pub coverage_file: Option<PathBuf>,
139}
140
141const fn default_true() -> bool {
142    true
143}
144
145/// Validates that `s` is a CSS hex colour: `#RGB` or `#RRGGBB`.
146///
147/// # Errors
148/// Returns an error if `s` does not start with `#` or is not a 3- or 6-digit hex colour.
149pub fn validate_hex_color(s: &str) -> Result<()> {
150    let hex = s
151        .strip_prefix('#')
152        .ok_or_else(|| anyhow::anyhow!("must start with '#'"))?;
153    if !matches!(hex.len(), 3 | 6) || !hex.chars().all(|c| c.is_ascii_hexdigit()) {
154        anyhow::bail!("must be a 3- or 6-digit hex colour (e.g. #3b82f6)");
155    }
156    Ok(())
157}
158
159/// Per-language and total SLOC thresholds. Used with `--fail-on-budget` in CI.
160///
161/// Keys in `per_language` are case-insensitive language display names
162/// (e.g. `"rust"`, `"typescript"`). Zero means unlimited.
163#[derive(Debug, Clone, Default, Serialize, Deserialize)]
164pub struct BudgetConfig {
165    /// Maximum total code lines across all languages (0 = unlimited).
166    #[serde(default)]
167    pub total_max: u64,
168    /// Per-language code-line ceilings. Key is the language display name, lowercase.
169    #[serde(default)]
170    pub per_language: BTreeMap<String, u64>,
171}
172
173impl BudgetConfig {
174    /// Returns `true` if no limits are configured.
175    #[must_use]
176    pub fn is_empty(&self) -> bool {
177        self.total_max == 0 && self.per_language.is_empty()
178    }
179
180    /// # Errors
181    ///
182    /// Returns an error if any budget threshold is zero (which would always fail).
183    pub fn validate(&self) -> Result<()> {
184        for (lang, &limit) in &self.per_language {
185            if limit == 0 {
186                anyhow::bail!("per_language[\"{lang}\"] limit must be > 0");
187            }
188        }
189        Ok(())
190    }
191}
192
193impl Default for AnalysisConfig {
194    fn default() -> Self {
195        Self {
196            enabled_languages: Vec::new(),
197            extension_overrides: BTreeMap::new(),
198            shebang_detection: true,
199            mixed_line_policy: MixedLinePolicy::CodeOnly,
200            python_docstrings_as_comments: true,
201            generated_file_detection: true,
202            minified_file_detection: true,
203            vendor_directory_detection: true,
204            include_lockfiles: false,
205            binary_file_behavior: BinaryFileBehavior::Skip,
206            decode_failure_behavior: FailureBehavior::WarnSkip,
207            parse_failure_behavior: FailureBehavior::WarnSkip,
208            continuation_line_policy: ContinuationLinePolicy::EachPhysicalLine,
209            blank_in_block_comment_policy: BlankInBlockCommentPolicy::CountAsComment,
210            count_compiler_directives: true,
211            budget: None,
212            coverage_file: None,
213        }
214    }
215}
216
217#[derive(Debug, Clone, Serialize, Deserialize)]
218pub struct ReportingConfig {
219    pub report_title: String,
220    pub output_formats: Vec<String>,
221    pub include_summary_charts: bool,
222    pub include_skipped_files_section: bool,
223    pub include_warnings_section: bool,
224    pub theme: String,
225    /// Optional company or team name shown in the report header instead of "`OxideSLOC`".
226    #[serde(default)]
227    pub company_name: Option<String>,
228    /// Path to a PNG/SVG logo file to embed in the report header.
229    /// If unset, the default `OxideSLOC` logo is used.
230    #[serde(default)]
231    pub logo_path: Option<std::path::PathBuf>,
232    /// CSS hex colour (e.g. `#3b82f6`) used as the primary accent throughout the report.
233    /// Must start with `#` and be a valid 3- or 6-digit hex colour.
234    #[serde(default)]
235    pub accent_color: Option<String>,
236    /// Text printed in a header and footer strip on every page of the HTML/PDF report.
237    /// Use for company name, project identifier, or scanner identification.
238    #[serde(default)]
239    pub report_header_footer: Option<String>,
240}
241
242impl Default for ReportingConfig {
243    fn default() -> Self {
244        Self {
245            report_title: "OxideSLOC Report".into(),
246            output_formats: vec!["cli".into(), "json".into(), "html".into()],
247            include_summary_charts: true,
248            include_skipped_files_section: true,
249            include_warnings_section: true,
250            theme: "auto".into(),
251            company_name: None,
252            logo_path: None,
253            accent_color: None,
254            report_header_footer: None,
255        }
256    }
257}
258
259#[derive(Debug, Clone, Serialize, Deserialize)]
260pub struct WebConfig {
261    pub bind_address: String,
262    /// When true the server binds to 0.0.0.0 by default, suppresses browser
263    /// auto-open, and disables desktop-only routes (pick-directory, open-path).
264    #[serde(default)]
265    pub server_mode: bool,
266}
267
268impl Default for WebConfig {
269    fn default() -> Self {
270        Self {
271            bind_address: "127.0.0.1:4317".into(),
272            server_mode: false,
273        }
274    }
275}
276
277/// A named configuration profile.
278///
279/// All sub-config sections are optional; any present section *replaces* the
280/// corresponding base config section in full. Commonly used to represent
281/// different scanning contexts in the same repo
282/// (e.g. `[profile.frontend]`, `[profile.backend]`).
283#[derive(Debug, Clone, Default, Serialize, Deserialize)]
284pub struct ProfileConfig {
285    #[serde(default)]
286    pub discovery: Option<DiscoveryConfig>,
287    #[serde(default)]
288    pub analysis: Option<AnalysisConfig>,
289    #[serde(default)]
290    pub reporting: Option<ReportingConfig>,
291}
292
293#[derive(Debug, Clone, Serialize, Deserialize, Default)]
294pub struct AppConfig {
295    pub discovery: DiscoveryConfig,
296    pub analysis: AnalysisConfig,
297    pub reporting: ReportingConfig,
298    pub web: WebConfig,
299    /// Named profiles that override base config sections when selected via `--profile`.
300    #[serde(default)]
301    pub profiles: BTreeMap<String, ProfileConfig>,
302}
303
304impl AppConfig {
305    /// Apply the named profile overrides on top of this config.
306    ///
307    /// # Errors
308    ///
309    /// Returns an error if no profile with that name exists or if the resulting
310    /// config fails validation.
311    pub fn apply_profile(&mut self, name: &str) -> Result<()> {
312        let profile = self
313            .profiles
314            .get(name)
315            .ok_or_else(|| anyhow::anyhow!("profile '{name}' not found in config"))?
316            .clone();
317        if let Some(d) = profile.discovery {
318            self.discovery = d;
319        }
320        if let Some(a) = profile.analysis {
321            self.analysis = a;
322        }
323        if let Some(r) = profile.reporting {
324            self.reporting = r;
325        }
326        self.validate()
327    }
328}
329
330impl AppConfig {
331    /// # Errors
332    ///
333    /// Returns an error if the file cannot be read, the TOML cannot be parsed, or the
334    /// resulting config fails validation.
335    pub fn load_from_file(path: &Path) -> Result<Self> {
336        let raw = fs::read_to_string(path)
337            .with_context(|| format!("failed to read config file {}", path.display()))?;
338        let config: Self = toml::from_str(&raw)
339            .with_context(|| format!("failed to parse TOML config {}", path.display()))?;
340        config.validate()?;
341        Ok(config)
342    }
343
344    /// # Errors
345    ///
346    /// Returns an error if any configuration field contains an invalid value.
347    pub fn validate(&self) -> Result<()> {
348        if self.discovery.max_file_size_bytes == 0 {
349            anyhow::bail!("discovery.max_file_size_bytes must be greater than zero");
350        }
351
352        if self.web.bind_address.trim().is_empty() {
353            anyhow::bail!("web.bind_address must not be empty");
354        }
355
356        if let Some(color) = &self.reporting.accent_color {
357            validate_hex_color(color)
358                .with_context(|| format!("reporting.accent_color is invalid: {color}"))?;
359        }
360
361        if let Some(budget) = &self.analysis.budget {
362            budget.validate().context("analysis.budget is invalid")?;
363        }
364
365        Ok(())
366    }
367}