pub mod findings;
pub mod util;
use std::collections::BTreeMap;
use std::fmt;
use serde::{Deserialize, Serialize};
use tokmd_types::{ScanStatus, ToolInfo};
pub use util::{
AnalysisLimits, empty_file_row, is_infra_lang, is_test_path, normalize_path, normalize_root,
now_ms, path_depth,
};
#[cfg(test)]
pub use tokmd_scan::{gini_coefficient, percentile, round_f64, safe_ratio};
pub const ANALYSIS_SCHEMA_VERSION: u32 = 9;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AnalysisReceipt {
pub schema_version: u32,
pub generated_at_ms: u128,
pub tool: ToolInfo,
pub mode: String,
pub status: ScanStatus,
pub warnings: Vec<String>,
pub source: AnalysisSource,
pub args: AnalysisArgsMeta,
pub archetype: Option<Archetype>,
pub topics: Option<TopicClouds>,
pub entropy: Option<EntropyReport>,
pub predictive_churn: Option<PredictiveChurnReport>,
pub corporate_fingerprint: Option<CorporateFingerprint>,
pub license: Option<LicenseReport>,
pub derived: Option<DerivedReport>,
pub assets: Option<AssetReport>,
pub deps: Option<DependencyReport>,
pub git: Option<GitReport>,
pub imports: Option<ImportReport>,
pub dup: Option<DuplicateReport>,
pub complexity: Option<ComplexityReport>,
pub api_surface: Option<ApiSurfaceReport>,
pub effort: Option<EffortEstimateReport>,
pub fun: Option<FunReport>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AnalysisSource {
pub inputs: Vec<String>,
pub export_path: Option<String>,
pub base_receipt_path: Option<String>,
pub export_schema_version: Option<u32>,
pub export_generated_at_ms: Option<u128>,
pub base_signature: Option<String>,
pub module_roots: Vec<String>,
pub module_depth: usize,
pub children: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AnalysisArgsMeta {
pub preset: String,
pub format: String,
pub window_tokens: Option<usize>,
pub git: Option<bool>,
pub max_files: Option<usize>,
pub max_bytes: Option<u64>,
pub max_commits: Option<usize>,
pub max_commit_files: Option<usize>,
pub max_file_bytes: Option<u64>,
pub import_granularity: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Archetype {
pub kind: String,
pub evidence: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TopicClouds {
pub per_module: BTreeMap<String, Vec<TopicTerm>>,
pub overall: Vec<TopicTerm>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TopicTerm {
pub term: String,
pub score: f64,
pub tf: u32,
pub df: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EntropyReport {
pub suspects: Vec<EntropyFinding>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EntropyFinding {
pub path: String,
pub module: String,
pub entropy_bits_per_byte: f32,
pub sample_bytes: u32,
pub class: EntropyClass,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum EntropyClass {
Low,
Normal,
Suspicious,
High,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PredictiveChurnReport {
pub per_module: BTreeMap<String, ChurnTrend>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChurnTrend {
pub slope: f64,
pub r2: f64,
pub recent_change: i64,
pub classification: TrendClass,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum TrendClass {
Rising,
Flat,
Falling,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CorporateFingerprint {
pub domains: Vec<DomainStat>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DomainStat {
pub domain: String,
pub commits: u32,
pub pct: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LicenseReport {
pub findings: Vec<LicenseFinding>,
pub effective: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LicenseFinding {
pub spdx: String,
pub confidence: f32,
pub source_path: String,
pub source_kind: LicenseSourceKind,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum LicenseSourceKind {
Metadata,
Text,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DerivedReport {
pub totals: DerivedTotals,
pub doc_density: RatioReport,
pub whitespace: RatioReport,
pub verbosity: RateReport,
pub max_file: MaxFileReport,
pub lang_purity: LangPurityReport,
pub nesting: NestingReport,
pub test_density: TestDensityReport,
pub boilerplate: BoilerplateReport,
pub polyglot: PolyglotReport,
pub distribution: DistributionReport,
pub histogram: Vec<HistogramBucket>,
pub top: TopOffenders,
pub tree: Option<String>,
pub reading_time: ReadingTimeReport,
pub context_window: Option<ContextWindowReport>,
pub cocomo: Option<CocomoReport>,
pub todo: Option<TodoReport>,
pub integrity: IntegrityReport,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DerivedTotals {
pub files: usize,
pub code: usize,
pub comments: usize,
pub blanks: usize,
pub lines: usize,
pub bytes: usize,
pub tokens: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RatioReport {
pub total: RatioRow,
pub by_lang: Vec<RatioRow>,
pub by_module: Vec<RatioRow>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RatioRow {
pub key: String,
pub numerator: usize,
pub denominator: usize,
pub ratio: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RateReport {
pub total: RateRow,
pub by_lang: Vec<RateRow>,
pub by_module: Vec<RateRow>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RateRow {
pub key: String,
pub numerator: usize,
pub denominator: usize,
pub rate: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MaxFileReport {
pub overall: FileStatRow,
pub by_lang: Vec<MaxFileRow>,
pub by_module: Vec<MaxFileRow>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MaxFileRow {
pub key: String,
pub file: FileStatRow,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileStatRow {
pub path: String,
pub module: String,
pub lang: String,
pub code: usize,
pub comments: usize,
pub blanks: usize,
pub lines: usize,
pub bytes: usize,
pub tokens: usize,
pub doc_pct: Option<f64>,
pub bytes_per_line: Option<f64>,
pub depth: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LangPurityReport {
pub rows: Vec<LangPurityRow>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LangPurityRow {
pub module: String,
pub lang_count: usize,
pub dominant_lang: String,
pub dominant_lines: usize,
pub dominant_pct: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NestingReport {
pub max: usize,
pub avg: f64,
pub by_module: Vec<NestingRow>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NestingRow {
pub key: String,
pub max: usize,
pub avg: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TestDensityReport {
pub test_lines: usize,
pub prod_lines: usize,
pub test_files: usize,
pub prod_files: usize,
pub ratio: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BoilerplateReport {
pub infra_lines: usize,
pub logic_lines: usize,
pub ratio: f64,
pub infra_langs: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PolyglotReport {
pub lang_count: usize,
pub entropy: f64,
pub dominant_lang: String,
pub dominant_lines: usize,
pub dominant_pct: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DistributionReport {
pub count: usize,
pub min: usize,
pub max: usize,
pub mean: f64,
pub median: f64,
pub p90: f64,
pub p99: f64,
pub gini: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HistogramBucket {
pub label: String,
pub min: usize,
pub max: Option<usize>,
pub files: usize,
pub pct: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TopOffenders {
pub largest_lines: Vec<FileStatRow>,
pub largest_tokens: Vec<FileStatRow>,
pub largest_bytes: Vec<FileStatRow>,
pub least_documented: Vec<FileStatRow>,
pub most_dense: Vec<FileStatRow>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReadingTimeReport {
pub minutes: f64,
pub lines_per_minute: usize,
pub basis_lines: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TodoReport {
pub total: usize,
pub density_per_kloc: f64,
pub tags: Vec<TodoTagRow>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TodoTagRow {
pub tag: String,
pub count: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ContextWindowReport {
pub window_tokens: usize,
pub total_tokens: usize,
pub pct: f64,
pub fits: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EffortEstimateReport {
pub model: EffortModel,
pub size_basis: EffortSizeBasis,
pub results: EffortResults,
pub confidence: EffortConfidence,
pub drivers: Vec<EffortDriver>,
pub assumptions: EffortAssumptions,
#[serde(skip_serializing_if = "Option::is_none")]
pub delta: Option<EffortDeltaReport>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EffortSizeBasis {
pub total_lines: usize,
pub authored_lines: usize,
pub generated_lines: usize,
pub vendored_lines: usize,
pub kloc_total: f64,
pub kloc_authored: f64,
pub generated_pct: f64,
pub vendored_pct: f64,
pub classification_confidence: EffortConfidenceLevel,
pub warnings: Vec<String>,
pub by_tag: Vec<EffortTagSizeRow>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EffortTagSizeRow {
pub tag: String,
pub lines: usize,
pub authored_lines: usize,
pub pct_of_total: f64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub enum EffortModel {
Cocomo81Basic,
Cocomo2Early,
Ensemble,
}
impl fmt::Display for EffortModel {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Cocomo81Basic => f.write_str("cocomo81-basic"),
Self::Cocomo2Early => f.write_str("cocomo2-early"),
Self::Ensemble => f.write_str("ensemble"),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EffortResults {
pub effort_pm_p50: f64,
pub schedule_months_p50: f64,
pub staff_p50: f64,
pub effort_pm_low: f64,
pub effort_pm_p80: f64,
pub schedule_months_low: f64,
pub schedule_months_p80: f64,
pub staff_low: f64,
pub staff_p80: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EffortConfidence {
pub level: EffortConfidenceLevel,
pub reasons: Vec<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub data_coverage_pct: Option<f64>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum EffortConfidenceLevel {
Low,
Medium,
High,
}
impl fmt::Display for EffortConfidenceLevel {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Low => f.write_str("low"),
Self::Medium => f.write_str("medium"),
Self::High => f.write_str("high"),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EffortDriver {
pub key: String,
pub label: String,
pub weight: f64,
pub direction: EffortDriverDirection,
pub evidence: String,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum EffortDriverDirection {
Raises,
Lowers,
Neutral,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EffortAssumptions {
pub notes: Vec<String>,
pub overrides: BTreeMap<String, String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EffortDeltaReport {
pub base: String,
pub head: String,
pub files_changed: usize,
pub modules_changed: usize,
pub langs_changed: usize,
pub hotspot_files_touched: usize,
pub coupled_neighbors_touched: usize,
pub blast_radius: f64,
pub classification: EffortDeltaClassification,
pub effort_pm_low: f64,
pub effort_pm_est: f64,
pub effort_pm_high: f64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum EffortDeltaClassification {
Low,
Medium,
High,
Critical,
}
impl fmt::Display for EffortDeltaClassification {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Low => f.write_str("low"),
Self::Medium => f.write_str("medium"),
Self::High => f.write_str("high"),
Self::Critical => f.write_str("critical"),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CocomoReport {
pub mode: String,
pub kloc: f64,
pub effort_pm: f64,
pub duration_months: f64,
pub staff: f64,
pub a: f64,
pub b: f64,
pub c: f64,
pub d: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IntegrityReport {
pub algo: String,
pub hash: String,
pub entries: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AssetReport {
pub total_files: usize,
pub total_bytes: u64,
pub categories: Vec<AssetCategoryRow>,
pub top_files: Vec<AssetFileRow>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AssetCategoryRow {
pub category: String,
pub files: usize,
pub bytes: u64,
pub extensions: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AssetFileRow {
pub path: String,
pub bytes: u64,
pub category: String,
pub extension: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DependencyReport {
pub total: usize,
pub lockfiles: Vec<LockfileReport>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LockfileReport {
pub path: String,
pub kind: String,
pub dependencies: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GitReport {
pub commits_scanned: usize,
pub files_seen: usize,
pub hotspots: Vec<HotspotRow>,
pub bus_factor: Vec<BusFactorRow>,
pub freshness: FreshnessReport,
pub coupling: Vec<CouplingRow>,
#[serde(skip_serializing_if = "Option::is_none")]
pub age_distribution: Option<CodeAgeDistributionReport>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub intent: Option<CommitIntentReport>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HotspotRow {
pub path: String,
pub commits: usize,
pub lines: usize,
pub score: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BusFactorRow {
pub module: String,
pub authors: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FreshnessReport {
pub threshold_days: usize,
pub stale_files: usize,
pub total_files: usize,
pub stale_pct: f64,
pub by_module: Vec<ModuleFreshnessRow>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModuleFreshnessRow {
pub module: String,
pub avg_days: f64,
pub p90_days: f64,
pub stale_pct: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CouplingRow {
pub left: String,
pub right: String,
pub count: usize,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub jaccard: Option<f64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub lift: Option<f64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub n_left: Option<usize>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub n_right: Option<usize>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CodeAgeDistributionReport {
pub buckets: Vec<CodeAgeBucket>,
pub recent_refreshes: usize,
pub prior_refreshes: usize,
pub refresh_trend: TrendClass,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CodeAgeBucket {
pub label: String,
pub min_days: usize,
pub max_days: Option<usize>,
pub files: usize,
pub pct: f64,
}
pub use tokmd_types::CommitIntentKind;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CommitIntentReport {
pub overall: CommitIntentCounts,
pub by_module: Vec<ModuleIntentRow>,
pub unknown_pct: f64,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub corrective_ratio: Option<f64>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct CommitIntentCounts {
pub feat: usize,
pub fix: usize,
pub refactor: usize,
pub docs: usize,
pub test: usize,
pub chore: usize,
pub ci: usize,
pub build: usize,
pub perf: usize,
pub style: usize,
pub revert: usize,
pub other: usize,
pub total: usize,
}
impl CommitIntentCounts {
pub fn increment(&mut self, kind: CommitIntentKind) {
match kind {
CommitIntentKind::Feat => self.feat += 1,
CommitIntentKind::Fix => self.fix += 1,
CommitIntentKind::Refactor => self.refactor += 1,
CommitIntentKind::Docs => self.docs += 1,
CommitIntentKind::Test => self.test += 1,
CommitIntentKind::Chore => self.chore += 1,
CommitIntentKind::Ci => self.ci += 1,
CommitIntentKind::Build => self.build += 1,
CommitIntentKind::Perf => self.perf += 1,
CommitIntentKind::Style => self.style += 1,
CommitIntentKind::Revert => self.revert += 1,
CommitIntentKind::Other => self.other += 1,
}
self.total += 1;
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModuleIntentRow {
pub module: String,
pub counts: CommitIntentCounts,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
#[serde(rename_all = "kebab-case")]
pub enum NearDupScope {
#[default]
Module,
Lang,
Global,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NearDupParams {
pub scope: NearDupScope,
pub threshold: f64,
pub max_files: usize,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub max_pairs: Option<usize>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub max_file_bytes: Option<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub selection_method: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub algorithm: Option<NearDupAlgorithm>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub exclude_patterns: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct NearDupAlgorithm {
pub k_gram_size: usize,
pub window_size: usize,
pub max_postings: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NearDuplicateReport {
pub params: NearDupParams,
pub pairs: Vec<NearDupPairRow>,
pub files_analyzed: usize,
pub files_skipped: usize,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub eligible_files: Option<usize>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub clusters: Option<Vec<NearDupCluster>>,
#[serde(default)]
pub truncated: bool,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub excluded_by_pattern: Option<usize>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub stats: Option<NearDupStats>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NearDupCluster {
pub files: Vec<String>,
pub max_similarity: f64,
pub representative: String,
pub pair_count: usize,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct NearDupStats {
pub fingerprinting_ms: u64,
pub pairing_ms: u64,
pub bytes_processed: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NearDupPairRow {
pub left: String,
pub right: String,
pub similarity: f64,
pub shared_fingerprints: usize,
pub left_fingerprints: usize,
pub right_fingerprints: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ImportReport {
pub granularity: String,
pub edges: Vec<ImportEdge>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ImportEdge {
pub from: String,
pub to: String,
pub count: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DuplicateReport {
pub groups: Vec<DuplicateGroup>,
pub wasted_bytes: u64,
pub strategy: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub density: Option<DuplicationDensityReport>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub near: Option<NearDuplicateReport>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DuplicateGroup {
pub hash: String,
pub bytes: u64,
pub files: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DuplicationDensityReport {
pub duplicate_groups: usize,
pub duplicate_files: usize,
pub duplicated_bytes: u64,
pub wasted_bytes: u64,
pub wasted_pct_of_codebase: f64,
pub by_module: Vec<ModuleDuplicationDensityRow>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModuleDuplicationDensityRow {
pub module: String,
pub duplicate_files: usize,
pub wasted_files: usize,
pub duplicated_bytes: u64,
pub wasted_bytes: u64,
pub module_bytes: u64,
pub density: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HalsteadMetrics {
pub distinct_operators: usize,
pub distinct_operands: usize,
pub total_operators: usize,
pub total_operands: usize,
pub vocabulary: usize,
pub length: usize,
pub volume: f64,
pub difficulty: f64,
pub effort: f64,
pub time_seconds: f64,
pub estimated_bugs: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MaintainabilityIndex {
pub score: f64,
pub avg_cyclomatic: f64,
pub avg_loc: f64,
#[serde(skip_serializing_if = "Option::is_none")]
pub avg_halstead_volume: Option<f64>,
pub grade: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TechnicalDebtRatio {
pub ratio: f64,
pub complexity_points: usize,
pub code_kloc: f64,
pub level: TechnicalDebtLevel,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum TechnicalDebtLevel {
Low,
Moderate,
High,
Critical,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ComplexityReport {
pub total_functions: usize,
pub avg_function_length: f64,
pub max_function_length: usize,
pub avg_cyclomatic: f64,
pub max_cyclomatic: usize,
#[serde(skip_serializing_if = "Option::is_none")]
pub avg_cognitive: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub max_cognitive: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub avg_nesting_depth: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub max_nesting_depth: Option<usize>,
pub high_risk_files: usize,
#[serde(skip_serializing_if = "Option::is_none")]
pub histogram: Option<ComplexityHistogram>,
#[serde(skip_serializing_if = "Option::is_none")]
pub halstead: Option<HalsteadMetrics>,
#[serde(skip_serializing_if = "Option::is_none")]
pub maintainability_index: Option<MaintainabilityIndex>,
#[serde(skip_serializing_if = "Option::is_none")]
pub technical_debt: Option<TechnicalDebtRatio>,
pub files: Vec<FileComplexity>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileComplexity {
pub path: String,
pub module: String,
pub function_count: usize,
pub max_function_length: usize,
pub cyclomatic_complexity: usize,
#[serde(skip_serializing_if = "Option::is_none")]
pub cognitive_complexity: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub max_nesting: Option<usize>,
pub risk_level: ComplexityRisk,
#[serde(skip_serializing_if = "Option::is_none")]
pub functions: Option<Vec<FunctionComplexityDetail>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FunctionComplexityDetail {
pub name: String,
pub line_start: usize,
pub line_end: usize,
pub length: usize,
pub cyclomatic: usize,
#[serde(skip_serializing_if = "Option::is_none")]
pub cognitive: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub max_nesting: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub param_count: Option<usize>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ComplexityRisk {
Low,
Moderate,
High,
Critical,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ComplexityHistogram {
pub buckets: Vec<u32>,
pub counts: Vec<u32>,
pub total: u32,
}
impl ComplexityHistogram {
pub fn to_ascii(&self, width: usize) -> String {
use std::fmt::Write;
let max_count = self.counts.iter().max().copied().unwrap_or(1).max(1);
let mut output = String::with_capacity(self.counts.len() * (width + 20));
for (i, count) in self.counts.iter().enumerate() {
if i < self.buckets.len() - 1 {
let _ = write!(
output,
"{:>2}-{:<2} |",
self.buckets[i],
self.buckets[i + 1] - 1
);
} else {
let _ = write!(
output,
"{:>2}+ |",
self.buckets.get(i).copied().unwrap_or(30)
);
}
let bar_len = (*count as f64 / max_count as f64 * width as f64) as usize;
for _ in 0..bar_len {
output.push('\u{2588}');
}
let _ = writeln!(output, " {}", count);
}
output
}
}
pub const BASELINE_VERSION: u32 = 1;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ComplexityBaseline {
pub baseline_version: u32,
pub generated_at: String,
pub commit: Option<String>,
pub metrics: BaselineMetrics,
pub files: Vec<FileBaselineEntry>,
#[serde(skip_serializing_if = "Option::is_none")]
pub complexity: Option<BaselineComplexitySection>,
#[serde(skip_serializing_if = "Option::is_none")]
pub determinism: Option<DeterminismBaseline>,
}
impl ComplexityBaseline {
pub fn new() -> Self {
Self {
baseline_version: BASELINE_VERSION,
generated_at: String::new(),
commit: None,
metrics: BaselineMetrics::default(),
files: Vec::new(),
complexity: None,
determinism: None,
}
}
pub fn from_analysis(receipt: &AnalysisReceipt) -> Self {
let generated_at = chrono_timestamp_iso8601(receipt.generated_at_ms);
let total_code_lines = receipt
.derived
.as_ref()
.map(|d| d.totals.code as u64)
.unwrap_or(0);
let total_files = receipt
.derived
.as_ref()
.map(|d| d.totals.files as u64)
.unwrap_or(0);
let (metrics, files, complexity) = if let Some(ref complexity_report) = receipt.complexity {
let metrics = BaselineMetrics {
total_code_lines,
total_files,
avg_cyclomatic: complexity_report.avg_cyclomatic,
max_cyclomatic: complexity_report.max_cyclomatic as u32,
avg_cognitive: complexity_report.avg_cognitive.unwrap_or(0.0),
max_cognitive: complexity_report.max_cognitive.unwrap_or(0) as u32,
avg_nesting_depth: complexity_report.avg_nesting_depth.unwrap_or(0.0),
max_nesting_depth: complexity_report.max_nesting_depth.unwrap_or(0) as u32,
function_count: complexity_report.total_functions as u64,
avg_function_length: complexity_report.avg_function_length,
};
let files: Vec<FileBaselineEntry> = complexity_report
.files
.iter()
.map(|f| FileBaselineEntry {
path: f.path.clone(),
code_lines: 0, cyclomatic: f.cyclomatic_complexity as u32,
cognitive: f.cognitive_complexity.unwrap_or(0) as u32,
max_nesting: f.max_nesting.unwrap_or(0) as u32,
function_count: f.function_count as u32,
content_hash: None,
})
.collect();
let complexity_section = BaselineComplexitySection {
total_functions: complexity_report.total_functions,
avg_function_length: complexity_report.avg_function_length,
max_function_length: complexity_report.max_function_length,
avg_cyclomatic: complexity_report.avg_cyclomatic,
max_cyclomatic: complexity_report.max_cyclomatic,
avg_cognitive: complexity_report.avg_cognitive,
max_cognitive: complexity_report.max_cognitive,
avg_nesting_depth: complexity_report.avg_nesting_depth,
max_nesting_depth: complexity_report.max_nesting_depth,
high_risk_files: complexity_report.high_risk_files,
};
(metrics, files, Some(complexity_section))
} else {
let fallback_metrics = BaselineMetrics {
total_code_lines,
total_files,
..Default::default()
};
(fallback_metrics, Vec::new(), None)
};
Self {
baseline_version: BASELINE_VERSION,
generated_at,
commit: None,
metrics,
files,
complexity,
determinism: None,
}
}
}
impl Default for ComplexityBaseline {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BaselineComplexitySection {
pub total_functions: usize,
pub avg_function_length: f64,
pub max_function_length: usize,
pub avg_cyclomatic: f64,
pub max_cyclomatic: usize,
#[serde(skip_serializing_if = "Option::is_none")]
pub avg_cognitive: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub max_cognitive: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub avg_nesting_depth: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub max_nesting_depth: Option<usize>,
pub high_risk_files: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BaselineMetrics {
pub total_code_lines: u64,
pub total_files: u64,
pub avg_cyclomatic: f64,
pub max_cyclomatic: u32,
pub avg_cognitive: f64,
pub max_cognitive: u32,
pub avg_nesting_depth: f64,
pub max_nesting_depth: u32,
pub function_count: u64,
pub avg_function_length: f64,
}
impl Default for BaselineMetrics {
fn default() -> Self {
Self {
total_code_lines: 0,
total_files: 0,
avg_cyclomatic: 0.0,
max_cyclomatic: 0,
avg_cognitive: 0.0,
max_cognitive: 0,
avg_nesting_depth: 0.0,
max_nesting_depth: 0,
function_count: 0,
avg_function_length: 0.0,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileBaselineEntry {
pub path: String,
pub code_lines: u64,
pub cyclomatic: u32,
pub cognitive: u32,
pub max_nesting: u32,
pub function_count: u32,
pub content_hash: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeterminismBaseline {
pub baseline_version: u32,
pub generated_at: String,
pub build_hash: String,
pub source_hash: String,
pub cargo_lock_hash: Option<String>,
}
fn chrono_timestamp_iso8601(ms: u128) -> String {
let total_secs = (ms / 1000) as i64;
let millis = (ms % 1000) as u32;
const SECS_PER_MIN: i64 = 60;
const SECS_PER_HOUR: i64 = 3600;
const SECS_PER_DAY: i64 = 86400;
let days = total_secs / SECS_PER_DAY;
let day_secs = total_secs % SECS_PER_DAY;
let (days, day_secs) = if day_secs < 0 {
(days - 1, day_secs + SECS_PER_DAY)
} else {
(days, day_secs)
};
let hour = day_secs / SECS_PER_HOUR;
let min = (day_secs % SECS_PER_HOUR) / SECS_PER_MIN;
let sec = day_secs % SECS_PER_MIN;
let z = days + 719468; let era = if z >= 0 { z } else { z - 146096 } / 146097;
let doe = (z - era * 146097) as u32; let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; let y = yoe as i64 + era * 400;
let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); let mp = (5 * doy + 2) / 153; let d = doy - (153 * mp + 2) / 5 + 1; let m = if mp < 10 { mp + 3 } else { mp - 9 }; let y = if m <= 2 { y + 1 } else { y };
format!(
"{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:03}Z",
y, m, d, hour, min, sec, millis
)
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ApiSurfaceReport {
pub total_items: usize,
pub public_items: usize,
pub internal_items: usize,
pub public_ratio: f64,
pub documented_ratio: f64,
pub by_language: BTreeMap<String, LangApiSurface>,
pub by_module: Vec<ModuleApiRow>,
pub top_exporters: Vec<ApiExportItem>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LangApiSurface {
pub total_items: usize,
pub public_items: usize,
pub internal_items: usize,
pub public_ratio: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModuleApiRow {
pub module: String,
pub total_items: usize,
pub public_items: usize,
pub public_ratio: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ApiExportItem {
pub path: String,
pub lang: String,
pub public_items: usize,
pub total_items: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FunReport {
pub eco_label: Option<EcoLabel>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EcoLabel {
pub score: f64,
pub label: String,
pub bytes: u64,
pub notes: String,
}
pub const ENVELOPE_SCHEMA: &str = tokmd_envelope::SENSOR_REPORT_SCHEMA;
pub use tokmd_envelope::Artifact;
pub use tokmd_envelope::Finding;
pub use tokmd_envelope::FindingLocation;
pub use tokmd_envelope::FindingSeverity;
pub use tokmd_envelope::GateItem;
pub use tokmd_envelope::GateResults as GatesEnvelope;
pub use tokmd_envelope::SensorReport as Envelope;
pub use tokmd_envelope::ToolMeta as EnvelopeTool;
pub use tokmd_envelope::Verdict;
pub use tokmd_envelope::GateResults;
pub use tokmd_envelope::SensorReport;
pub use tokmd_envelope::ToolMeta;
#[cfg(test)]
mod tests {
use super::*;
use chrono::{SecondsFormat, TimeZone, Utc};
use proptest::prelude::*;
#[test]
fn analysis_schema_version_constant() -> Result<(), Box<dyn std::error::Error>> {
assert_eq!(ANALYSIS_SCHEMA_VERSION, 9);
Ok(())
}
#[test]
fn baseline_version_constant() -> Result<(), Box<dyn std::error::Error>> {
assert_eq!(BASELINE_VERSION, 1);
Ok(())
}
#[test]
fn complexity_baseline_default() -> Result<(), Box<dyn std::error::Error>> {
let b = ComplexityBaseline::default();
assert_eq!(b.baseline_version, BASELINE_VERSION);
assert!(b.generated_at.is_empty());
assert!(b.commit.is_none());
assert!(b.files.is_empty());
assert!(b.complexity.is_none());
assert!(b.determinism.is_none());
Ok(())
}
#[test]
fn complexity_baseline_new_equals_default() -> Result<(), Box<dyn std::error::Error>> {
let a = ComplexityBaseline::new();
let b = ComplexityBaseline::default();
assert_eq!(a.baseline_version, b.baseline_version);
assert_eq!(a.generated_at, b.generated_at);
assert_eq!(a.files.len(), b.files.len());
Ok(())
}
#[test]
fn baseline_metrics_default_is_zeroed() -> Result<(), Box<dyn std::error::Error>> {
let m = BaselineMetrics::default();
assert_eq!(m.total_code_lines, 0);
assert_eq!(m.total_files, 0);
assert_eq!(m.avg_cyclomatic, 0.0);
assert_eq!(m.max_cyclomatic, 0);
assert_eq!(m.avg_cognitive, 0.0);
assert_eq!(m.function_count, 0);
Ok(())
}
#[test]
fn entropy_class_serde_roundtrip() -> Result<(), Box<dyn std::error::Error>> {
for variant in [
EntropyClass::Low,
EntropyClass::Normal,
EntropyClass::Suspicious,
EntropyClass::High,
] {
let json = serde_json::to_string(&variant)?;
let back: EntropyClass = serde_json::from_str(&json)?;
assert_eq!(back, variant);
}
Ok(())
}
#[test]
fn trend_class_serde_roundtrip() -> Result<(), Box<dyn std::error::Error>> {
for variant in [TrendClass::Rising, TrendClass::Flat, TrendClass::Falling] {
let json = serde_json::to_string(&variant)?;
let back: TrendClass = serde_json::from_str(&json)?;
assert_eq!(back, variant);
}
Ok(())
}
#[test]
fn license_source_kind_serde_roundtrip() -> Result<(), Box<dyn std::error::Error>> {
for variant in [LicenseSourceKind::Metadata, LicenseSourceKind::Text] {
let json = serde_json::to_string(&variant)?;
let back: LicenseSourceKind = serde_json::from_str(&json)?;
assert_eq!(back, variant);
}
Ok(())
}
#[test]
fn complexity_risk_serde_roundtrip() -> Result<(), Box<dyn std::error::Error>> {
for variant in [
ComplexityRisk::Low,
ComplexityRisk::Moderate,
ComplexityRisk::High,
ComplexityRisk::Critical,
] {
let json = serde_json::to_string(&variant)?;
let back: ComplexityRisk = serde_json::from_str(&json)?;
assert_eq!(back, variant);
}
Ok(())
}
#[test]
fn technical_debt_level_serde_roundtrip() -> Result<(), Box<dyn std::error::Error>> {
for variant in [
TechnicalDebtLevel::Low,
TechnicalDebtLevel::Moderate,
TechnicalDebtLevel::High,
TechnicalDebtLevel::Critical,
] {
let json = serde_json::to_string(&variant)?;
let back: TechnicalDebtLevel = serde_json::from_str(&json)?;
assert_eq!(back, variant);
}
Ok(())
}
#[test]
fn entropy_class_uses_snake_case() -> Result<(), Box<dyn std::error::Error>> {
assert_eq!(
serde_json::to_string(&EntropyClass::Suspicious)?,
"\"suspicious\""
);
Ok(())
}
#[test]
fn trend_class_uses_snake_case() -> Result<(), Box<dyn std::error::Error>> {
assert_eq!(serde_json::to_string(&TrendClass::Rising)?, "\"rising\"");
Ok(())
}
#[test]
fn effort_model_display_strings_are_stable() -> Result<(), Box<dyn std::error::Error>> {
assert_eq!(EffortModel::Cocomo81Basic.to_string(), "cocomo81-basic");
assert_eq!(EffortModel::Cocomo2Early.to_string(), "cocomo2-early");
assert_eq!(EffortModel::Ensemble.to_string(), "ensemble");
Ok(())
}
#[test]
fn effort_confidence_level_display_strings_are_stable() -> Result<(), Box<dyn std::error::Error>>
{
assert_eq!(EffortConfidenceLevel::Low.to_string(), "low");
assert_eq!(EffortConfidenceLevel::Medium.to_string(), "medium");
assert_eq!(EffortConfidenceLevel::High.to_string(), "high");
Ok(())
}
#[test]
fn effort_delta_classification_display_strings_are_stable()
-> Result<(), Box<dyn std::error::Error>> {
assert_eq!(EffortDeltaClassification::Low.to_string(), "low");
assert_eq!(EffortDeltaClassification::Medium.to_string(), "medium");
assert_eq!(EffortDeltaClassification::High.to_string(), "high");
assert_eq!(EffortDeltaClassification::Critical.to_string(), "critical");
Ok(())
}
#[test]
fn complexity_risk_uses_snake_case() -> Result<(), Box<dyn std::error::Error>> {
assert_eq!(
serde_json::to_string(&ComplexityRisk::Moderate)?,
"\"moderate\""
);
Ok(())
}
#[test]
fn eco_label_serde_roundtrip() -> Result<(), Box<dyn std::error::Error>> {
let label = EcoLabel {
score: 85.0,
label: "A".into(),
bytes: 1000,
notes: "Good".into(),
};
let json = serde_json::to_string(&label)?;
let back: EcoLabel = serde_json::from_str(&json)?;
assert_eq!(back.label, "A");
assert_eq!(back.bytes, 1000);
Ok(())
}
#[test]
fn topic_term_serde_roundtrip() -> Result<(), Box<dyn std::error::Error>> {
let term = TopicTerm {
term: "async".into(),
score: 0.95,
tf: 10,
df: 3,
};
let json = serde_json::to_string(&term)?;
let back: TopicTerm = serde_json::from_str(&json)?;
assert_eq!(back.term, "async");
assert_eq!(back.tf, 10);
Ok(())
}
#[test]
fn complexity_baseline_serde_roundtrip() -> Result<(), Box<dyn std::error::Error>> {
let b = ComplexityBaseline {
baseline_version: BASELINE_VERSION,
generated_at: "2025-01-01T00:00:00.000Z".into(),
commit: Some("abc123".into()),
metrics: BaselineMetrics::default(),
files: vec![FileBaselineEntry {
path: "src/lib.rs".into(),
code_lines: 100,
cyclomatic: 5,
cognitive: 3,
max_nesting: 2,
function_count: 10,
content_hash: Some("deadbeef".into()),
}],
complexity: None,
determinism: None,
};
let json = serde_json::to_string(&b)?;
let back: ComplexityBaseline = serde_json::from_str(&json)?;
assert_eq!(back.baseline_version, BASELINE_VERSION);
assert_eq!(back.commit.as_deref(), Some("abc123"));
assert_eq!(back.files.len(), 1);
assert_eq!(back.files[0].path, "src/lib.rs");
Ok(())
}
#[test]
fn complexity_histogram_to_ascii_basic() -> Result<(), Box<dyn std::error::Error>> {
let h = ComplexityHistogram {
buckets: vec![0, 5, 10],
counts: vec![10, 5, 2],
total: 17,
};
let ascii = h.to_ascii(20);
assert!(!ascii.is_empty());
assert_eq!(ascii.lines().count(), 3);
Ok(())
}
#[test]
fn complexity_histogram_to_ascii_empty_counts() -> Result<(), Box<dyn std::error::Error>> {
let h = ComplexityHistogram {
buckets: vec![0, 5],
counts: vec![0, 0],
total: 0,
};
let ascii = h.to_ascii(20);
assert!(!ascii.is_empty());
Ok(())
}
#[test]
fn timestamp_epoch() -> Result<(), Box<dyn std::error::Error>> {
let result = chrono_timestamp_iso8601(0);
assert_eq!(result, "1970-01-01T00:00:00.000Z");
Ok(())
}
#[test]
fn timestamp_with_millis() -> Result<(), Box<dyn std::error::Error>> {
let result = chrono_timestamp_iso8601(1735689600500);
assert!(result.ends_with(".500Z"));
assert!(result.starts_with("2025-01-01"));
Ok(())
}
proptest! {
#[test]
fn chrono_timestamp_matches_chrono(ms in 0u128..253_402_300_799_000u128) {
let chrono_dt = Utc
.timestamp_millis_opt(ms as i64)
.single()
.expect("timestamp within supported range");
let expected = chrono_dt.to_rfc3339_opts(SecondsFormat::Millis, true);
prop_assert_eq!(chrono_timestamp_iso8601(ms), expected);
}
#[test]
fn chrono_timestamp_is_rfc3339(ms in 0u128..253_402_300_799_000u128) {
let rendered = chrono_timestamp_iso8601(ms);
prop_assert!(chrono::DateTime::parse_from_rfc3339(&rendered).is_ok());
}
}
}