tokmd_analysis_types/
lib.rs

1//! # tokmd-analysis-types
2//!
3//! **Tier 0 (Analysis Contract)**
4//!
5//! Pure data structures for analysis receipts. No I/O or business logic.
6//!
7//! ## What belongs here
8//! * Analysis-specific receipt types and findings
9//! * Schema definitions for analysis outputs
10//! * Type enums for classification results
11//!
12//! ## What does NOT belong here
13//! * Analysis computation logic (use tokmd-analysis)
14//! * Formatting logic (use tokmd-analysis-format)
15//! * File I/O operations
16
17pub mod findings;
18
19use std::collections::BTreeMap;
20use std::fmt;
21
22use serde::{Deserialize, Serialize};
23use tokmd_types::{ScanStatus, ToolInfo};
24
25/// Schema version for analysis receipts.
26/// v7: Added coupling normalization (Jaccard/Lift), commit intent classification, near-duplicate detection.
27/// v8: Near-dup clusters, selection metadata, max_pairs guardrail, runtime stats.
28/// v9: Added effort estimation report.
29pub const ANALYSIS_SCHEMA_VERSION: u32 = 9;
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct AnalysisReceipt {
33    pub schema_version: u32,
34    pub generated_at_ms: u128,
35    pub tool: ToolInfo,
36    pub mode: String,
37    pub status: ScanStatus,
38    pub warnings: Vec<String>,
39    pub source: AnalysisSource,
40    pub args: AnalysisArgsMeta,
41    pub archetype: Option<Archetype>,
42    pub topics: Option<TopicClouds>,
43    pub entropy: Option<EntropyReport>,
44    pub predictive_churn: Option<PredictiveChurnReport>,
45    pub corporate_fingerprint: Option<CorporateFingerprint>,
46    pub license: Option<LicenseReport>,
47    pub derived: Option<DerivedReport>,
48    pub assets: Option<AssetReport>,
49    pub deps: Option<DependencyReport>,
50    pub git: Option<GitReport>,
51    pub imports: Option<ImportReport>,
52    pub dup: Option<DuplicateReport>,
53    pub complexity: Option<ComplexityReport>,
54    pub api_surface: Option<ApiSurfaceReport>,
55    pub effort: Option<EffortEstimateReport>,
56    pub fun: Option<FunReport>,
57}
58
59#[derive(Debug, Clone, Serialize, Deserialize)]
60pub struct AnalysisSource {
61    pub inputs: Vec<String>,
62    pub export_path: Option<String>,
63    pub base_receipt_path: Option<String>,
64    pub export_schema_version: Option<u32>,
65    pub export_generated_at_ms: Option<u128>,
66    pub base_signature: Option<String>,
67    pub module_roots: Vec<String>,
68    pub module_depth: usize,
69    pub children: String,
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize)]
73pub struct AnalysisArgsMeta {
74    pub preset: String,
75    pub format: String,
76    pub window_tokens: Option<usize>,
77    pub git: Option<bool>,
78    pub max_files: Option<usize>,
79    pub max_bytes: Option<u64>,
80    pub max_commits: Option<usize>,
81    pub max_commit_files: Option<usize>,
82    pub max_file_bytes: Option<u64>,
83    pub import_granularity: String,
84}
85
86// ---------------
87// Project context
88// ---------------
89
90#[derive(Debug, Clone, Serialize, Deserialize)]
91pub struct Archetype {
92    pub kind: String,
93    pub evidence: Vec<String>,
94}
95
96// -----------------
97// Semantic topics
98// -----------------
99
100#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct TopicClouds {
102    pub per_module: BTreeMap<String, Vec<TopicTerm>>,
103    pub overall: Vec<TopicTerm>,
104}
105
106#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct TopicTerm {
108    pub term: String,
109    pub score: f64,
110    pub tf: u32,
111    pub df: u32,
112}
113
114// -----------------
115// Entropy profiling
116// -----------------
117
118#[derive(Debug, Clone, Serialize, Deserialize)]
119pub struct EntropyReport {
120    pub suspects: Vec<EntropyFinding>,
121}
122
123#[derive(Debug, Clone, Serialize, Deserialize)]
124pub struct EntropyFinding {
125    pub path: String,
126    pub module: String,
127    pub entropy_bits_per_byte: f32,
128    pub sample_bytes: u32,
129    pub class: EntropyClass,
130}
131
132#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
133#[serde(rename_all = "snake_case")]
134pub enum EntropyClass {
135    Low,
136    Normal,
137    Suspicious,
138    High,
139}
140
141// -----------------
142// Predictive churn
143// -----------------
144
145#[derive(Debug, Clone, Serialize, Deserialize)]
146pub struct PredictiveChurnReport {
147    pub per_module: BTreeMap<String, ChurnTrend>,
148}
149
150#[derive(Debug, Clone, Serialize, Deserialize)]
151pub struct ChurnTrend {
152    pub slope: f64,
153    pub r2: f64,
154    pub recent_change: i64,
155    pub classification: TrendClass,
156}
157
158#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
159#[serde(rename_all = "snake_case")]
160pub enum TrendClass {
161    Rising,
162    Flat,
163    Falling,
164}
165
166// ---------------------
167// Corporate fingerprint
168// ---------------------
169
170#[derive(Debug, Clone, Serialize, Deserialize)]
171pub struct CorporateFingerprint {
172    pub domains: Vec<DomainStat>,
173}
174
175#[derive(Debug, Clone, Serialize, Deserialize)]
176pub struct DomainStat {
177    pub domain: String,
178    pub commits: u32,
179    pub pct: f32,
180}
181
182// -------------
183// License radar
184// -------------
185
186#[derive(Debug, Clone, Serialize, Deserialize)]
187pub struct LicenseReport {
188    pub findings: Vec<LicenseFinding>,
189    pub effective: Option<String>,
190}
191
192#[derive(Debug, Clone, Serialize, Deserialize)]
193pub struct LicenseFinding {
194    pub spdx: String,
195    pub confidence: f32,
196    pub source_path: String,
197    pub source_kind: LicenseSourceKind,
198}
199
200#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
201#[serde(rename_all = "snake_case")]
202pub enum LicenseSourceKind {
203    Metadata,
204    Text,
205}
206
207// -----------------
208// Derived analytics
209// -----------------
210
211#[derive(Debug, Clone, Serialize, Deserialize)]
212pub struct DerivedReport {
213    pub totals: DerivedTotals,
214    pub doc_density: RatioReport,
215    pub whitespace: RatioReport,
216    pub verbosity: RateReport,
217    pub max_file: MaxFileReport,
218    pub lang_purity: LangPurityReport,
219    pub nesting: NestingReport,
220    pub test_density: TestDensityReport,
221    pub boilerplate: BoilerplateReport,
222    pub polyglot: PolyglotReport,
223    pub distribution: DistributionReport,
224    pub histogram: Vec<HistogramBucket>,
225    pub top: TopOffenders,
226    pub tree: Option<String>,
227    pub reading_time: ReadingTimeReport,
228    pub context_window: Option<ContextWindowReport>,
229    pub cocomo: Option<CocomoReport>,
230    pub todo: Option<TodoReport>,
231    pub integrity: IntegrityReport,
232}
233
234#[derive(Debug, Clone, Serialize, Deserialize)]
235pub struct DerivedTotals {
236    pub files: usize,
237    pub code: usize,
238    pub comments: usize,
239    pub blanks: usize,
240    pub lines: usize,
241    pub bytes: usize,
242    pub tokens: usize,
243}
244
245#[derive(Debug, Clone, Serialize, Deserialize)]
246pub struct RatioReport {
247    pub total: RatioRow,
248    pub by_lang: Vec<RatioRow>,
249    pub by_module: Vec<RatioRow>,
250}
251
252#[derive(Debug, Clone, Serialize, Deserialize)]
253pub struct RatioRow {
254    pub key: String,
255    pub numerator: usize,
256    pub denominator: usize,
257    pub ratio: f64,
258}
259
260#[derive(Debug, Clone, Serialize, Deserialize)]
261pub struct RateReport {
262    pub total: RateRow,
263    pub by_lang: Vec<RateRow>,
264    pub by_module: Vec<RateRow>,
265}
266
267#[derive(Debug, Clone, Serialize, Deserialize)]
268pub struct RateRow {
269    pub key: String,
270    pub numerator: usize,
271    pub denominator: usize,
272    pub rate: f64,
273}
274
275#[derive(Debug, Clone, Serialize, Deserialize)]
276pub struct MaxFileReport {
277    pub overall: FileStatRow,
278    pub by_lang: Vec<MaxFileRow>,
279    pub by_module: Vec<MaxFileRow>,
280}
281
282#[derive(Debug, Clone, Serialize, Deserialize)]
283pub struct MaxFileRow {
284    pub key: String,
285    pub file: FileStatRow,
286}
287
288#[derive(Debug, Clone, Serialize, Deserialize)]
289pub struct FileStatRow {
290    pub path: String,
291    pub module: String,
292    pub lang: String,
293    pub code: usize,
294    pub comments: usize,
295    pub blanks: usize,
296    pub lines: usize,
297    pub bytes: usize,
298    pub tokens: usize,
299    pub doc_pct: Option<f64>,
300    pub bytes_per_line: Option<f64>,
301    pub depth: usize,
302}
303
304#[derive(Debug, Clone, Serialize, Deserialize)]
305pub struct LangPurityReport {
306    pub rows: Vec<LangPurityRow>,
307}
308
309#[derive(Debug, Clone, Serialize, Deserialize)]
310pub struct LangPurityRow {
311    pub module: String,
312    pub lang_count: usize,
313    pub dominant_lang: String,
314    pub dominant_lines: usize,
315    pub dominant_pct: f64,
316}
317
318#[derive(Debug, Clone, Serialize, Deserialize)]
319pub struct NestingReport {
320    pub max: usize,
321    pub avg: f64,
322    pub by_module: Vec<NestingRow>,
323}
324
325#[derive(Debug, Clone, Serialize, Deserialize)]
326pub struct NestingRow {
327    pub key: String,
328    pub max: usize,
329    pub avg: f64,
330}
331
332#[derive(Debug, Clone, Serialize, Deserialize)]
333pub struct TestDensityReport {
334    pub test_lines: usize,
335    pub prod_lines: usize,
336    pub test_files: usize,
337    pub prod_files: usize,
338    pub ratio: f64,
339}
340
341#[derive(Debug, Clone, Serialize, Deserialize)]
342pub struct BoilerplateReport {
343    pub infra_lines: usize,
344    pub logic_lines: usize,
345    pub ratio: f64,
346    pub infra_langs: Vec<String>,
347}
348
349#[derive(Debug, Clone, Serialize, Deserialize)]
350pub struct PolyglotReport {
351    pub lang_count: usize,
352    pub entropy: f64,
353    pub dominant_lang: String,
354    pub dominant_lines: usize,
355    pub dominant_pct: f64,
356}
357
358#[derive(Debug, Clone, Serialize, Deserialize)]
359pub struct DistributionReport {
360    pub count: usize,
361    pub min: usize,
362    pub max: usize,
363    pub mean: f64,
364    pub median: f64,
365    pub p90: f64,
366    pub p99: f64,
367    pub gini: f64,
368}
369
370#[derive(Debug, Clone, Serialize, Deserialize)]
371pub struct HistogramBucket {
372    pub label: String,
373    pub min: usize,
374    pub max: Option<usize>,
375    pub files: usize,
376    pub pct: f64,
377}
378
379#[derive(Debug, Clone, Serialize, Deserialize)]
380pub struct TopOffenders {
381    pub largest_lines: Vec<FileStatRow>,
382    pub largest_tokens: Vec<FileStatRow>,
383    pub largest_bytes: Vec<FileStatRow>,
384    pub least_documented: Vec<FileStatRow>,
385    pub most_dense: Vec<FileStatRow>,
386}
387
388#[derive(Debug, Clone, Serialize, Deserialize)]
389pub struct ReadingTimeReport {
390    pub minutes: f64,
391    pub lines_per_minute: usize,
392    pub basis_lines: usize,
393}
394
395#[derive(Debug, Clone, Serialize, Deserialize)]
396pub struct TodoReport {
397    pub total: usize,
398    pub density_per_kloc: f64,
399    pub tags: Vec<TodoTagRow>,
400}
401
402#[derive(Debug, Clone, Serialize, Deserialize)]
403pub struct TodoTagRow {
404    pub tag: String,
405    pub count: usize,
406}
407
408#[derive(Debug, Clone, Serialize, Deserialize)]
409pub struct ContextWindowReport {
410    pub window_tokens: usize,
411    pub total_tokens: usize,
412    pub pct: f64,
413    pub fits: bool,
414}
415
416#[derive(Debug, Clone, Serialize, Deserialize)]
417pub struct EffortEstimateReport {
418    pub model: EffortModel,
419    pub size_basis: EffortSizeBasis,
420    pub results: EffortResults,
421    pub confidence: EffortConfidence,
422    pub drivers: Vec<EffortDriver>,
423    pub assumptions: EffortAssumptions,
424    #[serde(skip_serializing_if = "Option::is_none")]
425    pub delta: Option<EffortDeltaReport>,
426}
427
428#[derive(Debug, Clone, Serialize, Deserialize)]
429pub struct EffortSizeBasis {
430    pub total_lines: usize,
431    pub authored_lines: usize,
432    pub generated_lines: usize,
433    pub vendored_lines: usize,
434    pub kloc_total: f64,
435    pub kloc_authored: f64,
436    pub generated_pct: f64,
437    pub vendored_pct: f64,
438    pub classification_confidence: EffortConfidenceLevel,
439    pub warnings: Vec<String>,
440    pub by_tag: Vec<EffortTagSizeRow>,
441}
442
443#[derive(Debug, Clone, Serialize, Deserialize)]
444pub struct EffortTagSizeRow {
445    pub tag: String,
446    pub lines: usize,
447    pub authored_lines: usize,
448    pub pct_of_total: f64,
449}
450
451#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
452#[serde(rename_all = "kebab-case")]
453pub enum EffortModel {
454    Cocomo81Basic,
455    Cocomo2Early,
456    Ensemble,
457}
458
459impl fmt::Display for EffortModel {
460    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
461        match self {
462            Self::Cocomo81Basic => f.write_str("cocomo81-basic"),
463            Self::Cocomo2Early => f.write_str("cocomo2-early"),
464            Self::Ensemble => f.write_str("ensemble"),
465        }
466    }
467}
468
469#[derive(Debug, Clone, Serialize, Deserialize)]
470pub struct EffortResults {
471    pub effort_pm_p50: f64,
472    pub schedule_months_p50: f64,
473    pub staff_p50: f64,
474    pub effort_pm_low: f64,
475    pub effort_pm_p80: f64,
476    pub schedule_months_low: f64,
477    pub schedule_months_p80: f64,
478    pub staff_low: f64,
479    pub staff_p80: f64,
480}
481
482#[derive(Debug, Clone, Serialize, Deserialize)]
483pub struct EffortConfidence {
484    pub level: EffortConfidenceLevel,
485    pub reasons: Vec<String>,
486    #[serde(skip_serializing_if = "Option::is_none")]
487    pub data_coverage_pct: Option<f64>,
488}
489
490#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
491#[serde(rename_all = "snake_case")]
492pub enum EffortConfidenceLevel {
493    Low,
494    Medium,
495    High,
496}
497
498impl fmt::Display for EffortConfidenceLevel {
499    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
500        match self {
501            Self::Low => f.write_str("low"),
502            Self::Medium => f.write_str("medium"),
503            Self::High => f.write_str("high"),
504        }
505    }
506}
507
508#[derive(Debug, Clone, Serialize, Deserialize)]
509pub struct EffortDriver {
510    pub key: String,
511    pub label: String,
512    pub weight: f64,
513    pub direction: EffortDriverDirection,
514    pub evidence: String,
515}
516
517#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
518#[serde(rename_all = "snake_case")]
519pub enum EffortDriverDirection {
520    Raises,
521    Lowers,
522    Neutral,
523}
524
525#[derive(Debug, Clone, Serialize, Deserialize)]
526pub struct EffortAssumptions {
527    pub notes: Vec<String>,
528    pub overrides: BTreeMap<String, String>,
529}
530
531#[derive(Debug, Clone, Serialize, Deserialize)]
532pub struct EffortDeltaReport {
533    pub base: String,
534    pub head: String,
535    pub files_changed: usize,
536    pub modules_changed: usize,
537    pub langs_changed: usize,
538    pub hotspot_files_touched: usize,
539    pub coupled_neighbors_touched: usize,
540    pub blast_radius: f64,
541    pub classification: EffortDeltaClassification,
542    pub effort_pm_low: f64,
543    pub effort_pm_est: f64,
544    pub effort_pm_high: f64,
545}
546
547#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
548#[serde(rename_all = "snake_case")]
549pub enum EffortDeltaClassification {
550    Low,
551    Medium,
552    High,
553    Critical,
554}
555
556impl fmt::Display for EffortDeltaClassification {
557    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
558        match self {
559            Self::Low => f.write_str("low"),
560            Self::Medium => f.write_str("medium"),
561            Self::High => f.write_str("high"),
562            Self::Critical => f.write_str("critical"),
563        }
564    }
565}
566
567#[derive(Debug, Clone, Serialize, Deserialize)]
568pub struct CocomoReport {
569    pub mode: String,
570    pub kloc: f64,
571    pub effort_pm: f64,
572    pub duration_months: f64,
573    pub staff: f64,
574    pub a: f64,
575    pub b: f64,
576    pub c: f64,
577    pub d: f64,
578}
579
580#[derive(Debug, Clone, Serialize, Deserialize)]
581pub struct IntegrityReport {
582    pub algo: String,
583    pub hash: String,
584    pub entries: usize,
585}
586
587// -------------
588// Asset metrics
589// -------------
590
591#[derive(Debug, Clone, Serialize, Deserialize)]
592pub struct AssetReport {
593    pub total_files: usize,
594    pub total_bytes: u64,
595    pub categories: Vec<AssetCategoryRow>,
596    pub top_files: Vec<AssetFileRow>,
597}
598
599#[derive(Debug, Clone, Serialize, Deserialize)]
600pub struct AssetCategoryRow {
601    pub category: String,
602    pub files: usize,
603    pub bytes: u64,
604    pub extensions: Vec<String>,
605}
606
607#[derive(Debug, Clone, Serialize, Deserialize)]
608pub struct AssetFileRow {
609    pub path: String,
610    pub bytes: u64,
611    pub category: String,
612    pub extension: String,
613}
614
615// -----------------
616// Dependency metrics
617// -----------------
618
619#[derive(Debug, Clone, Serialize, Deserialize)]
620pub struct DependencyReport {
621    pub total: usize,
622    pub lockfiles: Vec<LockfileReport>,
623}
624
625#[derive(Debug, Clone, Serialize, Deserialize)]
626pub struct LockfileReport {
627    pub path: String,
628    pub kind: String,
629    pub dependencies: usize,
630}
631
632// ---------
633// Git report
634// ---------
635
636#[derive(Debug, Clone, Serialize, Deserialize)]
637pub struct GitReport {
638    pub commits_scanned: usize,
639    pub files_seen: usize,
640    pub hotspots: Vec<HotspotRow>,
641    pub bus_factor: Vec<BusFactorRow>,
642    pub freshness: FreshnessReport,
643    pub coupling: Vec<CouplingRow>,
644    /// Code age bucket distribution plus recent refresh trend.
645    #[serde(skip_serializing_if = "Option::is_none")]
646    pub age_distribution: Option<CodeAgeDistributionReport>,
647    /// Commit intent classification (feat/fix/refactor/etc.).
648    #[serde(default, skip_serializing_if = "Option::is_none")]
649    pub intent: Option<CommitIntentReport>,
650}
651
652#[derive(Debug, Clone, Serialize, Deserialize)]
653pub struct HotspotRow {
654    pub path: String,
655    pub commits: usize,
656    pub lines: usize,
657    pub score: usize,
658}
659
660#[derive(Debug, Clone, Serialize, Deserialize)]
661pub struct BusFactorRow {
662    pub module: String,
663    pub authors: usize,
664}
665
666#[derive(Debug, Clone, Serialize, Deserialize)]
667pub struct FreshnessReport {
668    pub threshold_days: usize,
669    pub stale_files: usize,
670    pub total_files: usize,
671    pub stale_pct: f64,
672    pub by_module: Vec<ModuleFreshnessRow>,
673}
674
675#[derive(Debug, Clone, Serialize, Deserialize)]
676pub struct ModuleFreshnessRow {
677    pub module: String,
678    pub avg_days: f64,
679    pub p90_days: f64,
680    pub stale_pct: f64,
681}
682
683#[derive(Debug, Clone, Serialize, Deserialize)]
684pub struct CouplingRow {
685    pub left: String,
686    pub right: String,
687    pub count: usize,
688    /// Jaccard similarity: count / (n_left + n_right - count). Range (0.0, 1.0].
689    #[serde(default, skip_serializing_if = "Option::is_none")]
690    pub jaccard: Option<f64>,
691    /// Lift: (count * N) / (n_left * n_right), where N = commits_considered.
692    #[serde(default, skip_serializing_if = "Option::is_none")]
693    pub lift: Option<f64>,
694    /// Commits touching left module (within commits_considered universe).
695    #[serde(default, skip_serializing_if = "Option::is_none")]
696    pub n_left: Option<usize>,
697    /// Commits touching right module (within commits_considered universe).
698    #[serde(default, skip_serializing_if = "Option::is_none")]
699    pub n_right: Option<usize>,
700}
701
702#[derive(Debug, Clone, Serialize, Deserialize)]
703pub struct CodeAgeDistributionReport {
704    pub buckets: Vec<CodeAgeBucket>,
705    pub recent_refreshes: usize,
706    pub prior_refreshes: usize,
707    pub refresh_trend: TrendClass,
708}
709
710#[derive(Debug, Clone, Serialize, Deserialize)]
711pub struct CodeAgeBucket {
712    pub label: String,
713    pub min_days: usize,
714    pub max_days: Option<usize>,
715    pub files: usize,
716    pub pct: f64,
717}
718
719// --------------------------
720// Commit intent classification
721// --------------------------
722
723// Re-export from tokmd-types (Tier 0) so existing consumers keep working.
724pub use tokmd_types::CommitIntentKind;
725
726/// Overall commit intent classification report.
727#[derive(Debug, Clone, Serialize, Deserialize)]
728pub struct CommitIntentReport {
729    /// Aggregate counts across all scanned commits.
730    pub overall: CommitIntentCounts,
731    /// Per-module intent breakdown.
732    pub by_module: Vec<ModuleIntentRow>,
733    /// Percentage of commits classified as "other" (unrecognized).
734    pub unknown_pct: f64,
735    /// Corrective ratio: (fix + revert) / total. Range [0.0, 1.0].
736    #[serde(default, skip_serializing_if = "Option::is_none")]
737    pub corrective_ratio: Option<f64>,
738}
739
740/// Counts per intent kind.
741#[derive(Debug, Clone, Serialize, Deserialize, Default)]
742pub struct CommitIntentCounts {
743    pub feat: usize,
744    pub fix: usize,
745    pub refactor: usize,
746    pub docs: usize,
747    pub test: usize,
748    pub chore: usize,
749    pub ci: usize,
750    pub build: usize,
751    pub perf: usize,
752    pub style: usize,
753    pub revert: usize,
754    pub other: usize,
755    pub total: usize,
756}
757
758impl CommitIntentCounts {
759    /// Increment the count for a given intent kind.
760    pub fn increment(&mut self, kind: CommitIntentKind) {
761        match kind {
762            CommitIntentKind::Feat => self.feat += 1,
763            CommitIntentKind::Fix => self.fix += 1,
764            CommitIntentKind::Refactor => self.refactor += 1,
765            CommitIntentKind::Docs => self.docs += 1,
766            CommitIntentKind::Test => self.test += 1,
767            CommitIntentKind::Chore => self.chore += 1,
768            CommitIntentKind::Ci => self.ci += 1,
769            CommitIntentKind::Build => self.build += 1,
770            CommitIntentKind::Perf => self.perf += 1,
771            CommitIntentKind::Style => self.style += 1,
772            CommitIntentKind::Revert => self.revert += 1,
773            CommitIntentKind::Other => self.other += 1,
774        }
775        self.total += 1;
776    }
777}
778
779/// Per-module intent breakdown row.
780#[derive(Debug, Clone, Serialize, Deserialize)]
781pub struct ModuleIntentRow {
782    pub module: String,
783    pub counts: CommitIntentCounts,
784}
785
786// ----------------------------
787// Near-duplicate detection
788// ----------------------------
789
790/// Scope for near-duplicate comparison partitioning.
791#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
792#[serde(rename_all = "kebab-case")]
793pub enum NearDupScope {
794    /// Compare files within the same module.
795    #[default]
796    Module,
797    /// Compare files within the same language.
798    Lang,
799    /// Compare all files globally.
800    Global,
801}
802
803/// Parameters for near-duplicate detection.
804#[derive(Debug, Clone, Serialize, Deserialize)]
805pub struct NearDupParams {
806    pub scope: NearDupScope,
807    pub threshold: f64,
808    pub max_files: usize,
809    /// Maximum pairs to emit (truncation guardrail).
810    #[serde(default, skip_serializing_if = "Option::is_none")]
811    pub max_pairs: Option<usize>,
812    /// Effective per-file byte limit used for eligibility filtering.
813    #[serde(default, skip_serializing_if = "Option::is_none")]
814    pub max_file_bytes: Option<u64>,
815    /// How files were selected for analysis.
816    #[serde(default, skip_serializing_if = "Option::is_none")]
817    pub selection_method: Option<String>,
818    /// Algorithm constants used for fingerprinting.
819    #[serde(default, skip_serializing_if = "Option::is_none")]
820    pub algorithm: Option<NearDupAlgorithm>,
821    /// Glob patterns used to exclude files from near-dup analysis.
822    #[serde(default, skip_serializing_if = "Vec::is_empty")]
823    pub exclude_patterns: Vec<String>,
824}
825
826/// Algorithm constants for near-duplicate fingerprinting.
827#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
828pub struct NearDupAlgorithm {
829    /// Number of tokens per k-gram shingle.
830    pub k_gram_size: usize,
831    /// Winnowing window size.
832    pub window_size: usize,
833    /// Skip fingerprints appearing in more than this many files.
834    pub max_postings: usize,
835}
836
837/// Report of near-duplicate file pairs.
838#[derive(Debug, Clone, Serialize, Deserialize)]
839pub struct NearDuplicateReport {
840    pub params: NearDupParams,
841    pub pairs: Vec<NearDupPairRow>,
842    pub files_analyzed: usize,
843    pub files_skipped: usize,
844    /// Number of files eligible before the max_files cap.
845    #[serde(default, skip_serializing_if = "Option::is_none")]
846    pub eligible_files: Option<usize>,
847    /// Connected-component clusters derived from pairs.
848    #[serde(default, skip_serializing_if = "Option::is_none")]
849    pub clusters: Option<Vec<NearDupCluster>>,
850    /// Whether the pairs list was truncated by `max_pairs`.
851    /// Clusters are built from the complete pair set before truncation.
852    #[serde(default)]
853    pub truncated: bool,
854    /// Number of files excluded by glob patterns.
855    #[serde(default, skip_serializing_if = "Option::is_none")]
856    pub excluded_by_pattern: Option<usize>,
857    /// Runtime performance statistics.
858    #[serde(default, skip_serializing_if = "Option::is_none")]
859    pub stats: Option<NearDupStats>,
860}
861
862/// A connected component of near-duplicate files.
863#[derive(Debug, Clone, Serialize, Deserialize)]
864pub struct NearDupCluster {
865    /// Files in this cluster, sorted alphabetically.
866    pub files: Vec<String>,
867    /// Maximum pairwise similarity in the cluster.
868    pub max_similarity: f64,
869    /// Most-connected file (tie-break alphabetical).
870    pub representative: String,
871    /// Number of pairs within this cluster.
872    pub pair_count: usize,
873}
874
875/// Runtime statistics for near-duplicate detection.
876#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
877pub struct NearDupStats {
878    /// Time spent computing fingerprints (milliseconds).
879    pub fingerprinting_ms: u64,
880    /// Time spent computing pair similarities (milliseconds).
881    pub pairing_ms: u64,
882    /// Total bytes of source files processed.
883    pub bytes_processed: u64,
884}
885
886/// A pair of near-duplicate files with similarity score.
887#[derive(Debug, Clone, Serialize, Deserialize)]
888pub struct NearDupPairRow {
889    pub left: String,
890    pub right: String,
891    pub similarity: f64,
892    pub shared_fingerprints: usize,
893    pub left_fingerprints: usize,
894    pub right_fingerprints: usize,
895}
896
897// -----------------
898// Import graph info
899// -----------------
900
901#[derive(Debug, Clone, Serialize, Deserialize)]
902pub struct ImportReport {
903    pub granularity: String,
904    pub edges: Vec<ImportEdge>,
905}
906
907#[derive(Debug, Clone, Serialize, Deserialize)]
908pub struct ImportEdge {
909    pub from: String,
910    pub to: String,
911    pub count: usize,
912}
913
914// -------------------
915// Duplication metrics
916// -------------------
917
918#[derive(Debug, Clone, Serialize, Deserialize)]
919pub struct DuplicateReport {
920    pub groups: Vec<DuplicateGroup>,
921    pub wasted_bytes: u64,
922    pub strategy: String,
923    /// Duplication density summary overall and by module.
924    #[serde(skip_serializing_if = "Option::is_none")]
925    pub density: Option<DuplicationDensityReport>,
926    /// Near-duplicate file pairs detected by fingerprint similarity.
927    #[serde(default, skip_serializing_if = "Option::is_none")]
928    pub near: Option<NearDuplicateReport>,
929}
930
931#[derive(Debug, Clone, Serialize, Deserialize)]
932pub struct DuplicateGroup {
933    pub hash: String,
934    pub bytes: u64,
935    pub files: Vec<String>,
936}
937
938#[derive(Debug, Clone, Serialize, Deserialize)]
939pub struct DuplicationDensityReport {
940    pub duplicate_groups: usize,
941    pub duplicate_files: usize,
942    pub duplicated_bytes: u64,
943    pub wasted_bytes: u64,
944    pub wasted_pct_of_codebase: f64,
945    pub by_module: Vec<ModuleDuplicationDensityRow>,
946}
947
948#[derive(Debug, Clone, Serialize, Deserialize)]
949pub struct ModuleDuplicationDensityRow {
950    pub module: String,
951    pub duplicate_files: usize,
952    pub wasted_files: usize,
953    pub duplicated_bytes: u64,
954    pub wasted_bytes: u64,
955    pub module_bytes: u64,
956    pub density: f64,
957}
958
959// -------------------
960// Halstead metrics
961// -------------------
962
963/// Halstead software science metrics computed from operator/operand token counts.
964#[derive(Debug, Clone, Serialize, Deserialize)]
965pub struct HalsteadMetrics {
966    /// Number of distinct operators (n1).
967    pub distinct_operators: usize,
968    /// Number of distinct operands (n2).
969    pub distinct_operands: usize,
970    /// Total number of operators (N1).
971    pub total_operators: usize,
972    /// Total number of operands (N2).
973    pub total_operands: usize,
974    /// Program vocabulary: n1 + n2.
975    pub vocabulary: usize,
976    /// Program length: N1 + N2.
977    pub length: usize,
978    /// Volume: N * log2(n).
979    pub volume: f64,
980    /// Difficulty: (n1/2) * (N2/n2).
981    pub difficulty: f64,
982    /// Effort: D * V.
983    pub effort: f64,
984    /// Estimated programming time in seconds: E / 18.
985    pub time_seconds: f64,
986    /// Estimated number of bugs: V / 3000.
987    pub estimated_bugs: f64,
988}
989
990// -------------------
991// Maintainability Index
992// -------------------
993
994/// Composite maintainability index based on the SEI formula.
995///
996/// MI = 171 - 5.2 * ln(V) - 0.23 * CC - 16.2 * ln(LOC)
997///
998/// When Halstead volume is unavailable, a simplified formula is used.
999#[derive(Debug, Clone, Serialize, Deserialize)]
1000pub struct MaintainabilityIndex {
1001    /// Maintainability index score (0-171 scale, higher is better).
1002    pub score: f64,
1003    /// Average cyclomatic complexity used in calculation.
1004    pub avg_cyclomatic: f64,
1005    /// Average lines of code per file used in calculation.
1006    pub avg_loc: f64,
1007    /// Average Halstead volume (if Halstead metrics were computed).
1008    #[serde(skip_serializing_if = "Option::is_none")]
1009    pub avg_halstead_volume: Option<f64>,
1010    /// Letter grade: "A" (>=85), "B" (65-84), "C" (<65).
1011    pub grade: String,
1012}
1013
1014/// Complexity-to-size ratio heuristic for technical debt estimation.
1015#[derive(Debug, Clone, Serialize, Deserialize)]
1016pub struct TechnicalDebtRatio {
1017    /// Complexity points per KLOC (higher means denser debt).
1018    pub ratio: f64,
1019    /// Aggregate complexity points used in the ratio.
1020    pub complexity_points: usize,
1021    /// KLOC basis used in the ratio denominator.
1022    pub code_kloc: f64,
1023    /// Bucketed interpretation of debt ratio.
1024    pub level: TechnicalDebtLevel,
1025}
1026
1027#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1028#[serde(rename_all = "snake_case")]
1029pub enum TechnicalDebtLevel {
1030    Low,
1031    Moderate,
1032    High,
1033    Critical,
1034}
1035
1036// -------------------
1037// Complexity metrics
1038// -------------------
1039
1040#[derive(Debug, Clone, Serialize, Deserialize)]
1041pub struct ComplexityReport {
1042    pub total_functions: usize,
1043    pub avg_function_length: f64,
1044    pub max_function_length: usize,
1045    pub avg_cyclomatic: f64,
1046    pub max_cyclomatic: usize,
1047    /// Average cognitive complexity across files.
1048    #[serde(skip_serializing_if = "Option::is_none")]
1049    pub avg_cognitive: Option<f64>,
1050    /// Maximum cognitive complexity found.
1051    #[serde(skip_serializing_if = "Option::is_none")]
1052    pub max_cognitive: Option<usize>,
1053    /// Average nesting depth across files.
1054    #[serde(skip_serializing_if = "Option::is_none")]
1055    pub avg_nesting_depth: Option<f64>,
1056    /// Maximum nesting depth found.
1057    #[serde(skip_serializing_if = "Option::is_none")]
1058    pub max_nesting_depth: Option<usize>,
1059    pub high_risk_files: usize,
1060    /// Histogram of cyclomatic complexity distribution.
1061    #[serde(skip_serializing_if = "Option::is_none")]
1062    pub histogram: Option<ComplexityHistogram>,
1063    /// Halstead software science metrics (requires `halstead` feature).
1064    #[serde(skip_serializing_if = "Option::is_none")]
1065    pub halstead: Option<HalsteadMetrics>,
1066    /// Composite maintainability index.
1067    #[serde(skip_serializing_if = "Option::is_none")]
1068    pub maintainability_index: Option<MaintainabilityIndex>,
1069    /// Complexity-to-size debt heuristic.
1070    #[serde(skip_serializing_if = "Option::is_none")]
1071    pub technical_debt: Option<TechnicalDebtRatio>,
1072    pub files: Vec<FileComplexity>,
1073}
1074
1075#[derive(Debug, Clone, Serialize, Deserialize)]
1076pub struct FileComplexity {
1077    pub path: String,
1078    pub module: String,
1079    pub function_count: usize,
1080    pub max_function_length: usize,
1081    pub cyclomatic_complexity: usize,
1082    /// Cognitive complexity for this file.
1083    #[serde(skip_serializing_if = "Option::is_none")]
1084    pub cognitive_complexity: Option<usize>,
1085    /// Maximum nesting depth in this file.
1086    #[serde(skip_serializing_if = "Option::is_none")]
1087    pub max_nesting: Option<usize>,
1088    pub risk_level: ComplexityRisk,
1089    /// Function-level complexity details (only when --detail-functions is used).
1090    #[serde(skip_serializing_if = "Option::is_none")]
1091    pub functions: Option<Vec<FunctionComplexityDetail>>,
1092}
1093
1094/// Function-level complexity details.
1095#[derive(Debug, Clone, Serialize, Deserialize)]
1096pub struct FunctionComplexityDetail {
1097    /// Function name.
1098    pub name: String,
1099    /// Start line (1-indexed).
1100    pub line_start: usize,
1101    /// End line (1-indexed).
1102    pub line_end: usize,
1103    /// Function length in lines.
1104    pub length: usize,
1105    /// Cyclomatic complexity.
1106    pub cyclomatic: usize,
1107    /// Cognitive complexity (if computed).
1108    #[serde(skip_serializing_if = "Option::is_none")]
1109    pub cognitive: Option<usize>,
1110    /// Maximum nesting depth within the function.
1111    #[serde(skip_serializing_if = "Option::is_none")]
1112    pub max_nesting: Option<usize>,
1113    /// Number of parameters.
1114    #[serde(skip_serializing_if = "Option::is_none")]
1115    pub param_count: Option<usize>,
1116}
1117
1118#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1119#[serde(rename_all = "snake_case")]
1120pub enum ComplexityRisk {
1121    Low,
1122    Moderate,
1123    High,
1124    Critical,
1125}
1126
1127/// Histogram of cyclomatic complexity distribution across files.
1128///
1129/// Used to visualize the distribution of complexity values in a codebase.
1130/// Default bucket boundaries are 0-4, 5-9, 10-14, 15-19, 20-24, 25-29, 30+.
1131#[derive(Debug, Clone, Serialize, Deserialize)]
1132pub struct ComplexityHistogram {
1133    /// Bucket boundaries (e.g., [0, 5, 10, 15, 20, 25, 30]).
1134    pub buckets: Vec<u32>,
1135    /// Count of files in each bucket.
1136    pub counts: Vec<u32>,
1137    /// Total files analyzed.
1138    pub total: u32,
1139}
1140
1141impl ComplexityHistogram {
1142    /// Generate an ASCII bar chart visualization of the histogram.
1143    ///
1144    /// # Arguments
1145    /// * `width` - Maximum width of the bars in characters
1146    ///
1147    /// # Returns
1148    /// A multi-line string with labeled bars showing distribution
1149    pub fn to_ascii(&self, width: usize) -> String {
1150        use std::fmt::Write;
1151        let max_count = self.counts.iter().max().copied().unwrap_or(1).max(1);
1152        let mut output = String::with_capacity(self.counts.len() * (width + 20));
1153        for (i, count) in self.counts.iter().enumerate() {
1154            if i < self.buckets.len() - 1 {
1155                let _ = write!(
1156                    output,
1157                    "{:>2}-{:<2} |",
1158                    self.buckets[i],
1159                    self.buckets[i + 1] - 1
1160                );
1161            } else {
1162                let _ = write!(
1163                    output,
1164                    "{:>2}+  |",
1165                    self.buckets.get(i).copied().unwrap_or(30)
1166                );
1167            }
1168
1169            let bar_len = (*count as f64 / max_count as f64 * width as f64) as usize;
1170            for _ in 0..bar_len {
1171                output.push('\u{2588}');
1172            }
1173            let _ = writeln!(output, " {}", count);
1174        }
1175        output
1176    }
1177}
1178
1179// -------------------
1180// Baseline/Ratchet types
1181// -------------------
1182
1183/// Schema version for baseline files.
1184/// v1: Initial baseline format with complexity and determinism tracking.
1185pub const BASELINE_VERSION: u32 = 1;
1186
1187/// Complexity baseline for tracking trends over time.
1188///
1189/// Used by the ratchet system to enforce that complexity metrics
1190/// do not regress across commits. The baseline captures a snapshot
1191/// of complexity at a known-good state.
1192#[derive(Debug, Clone, Serialize, Deserialize)]
1193pub struct ComplexityBaseline {
1194    /// Schema version for forward compatibility.
1195    pub baseline_version: u32,
1196    /// ISO 8601 timestamp when this baseline was generated.
1197    pub generated_at: String,
1198    /// Git commit SHA at which this baseline was captured, if available.
1199    pub commit: Option<String>,
1200    /// Aggregate complexity metrics.
1201    pub metrics: BaselineMetrics,
1202    /// Per-file baseline entries for granular tracking.
1203    pub files: Vec<FileBaselineEntry>,
1204    /// Complexity section mirroring analysis receipt structure for ratchet compatibility.
1205    ///
1206    /// This allows using the same JSON pointers (e.g., `/complexity/avg_cyclomatic`)
1207    /// when comparing baselines against current analysis receipts.
1208    #[serde(skip_serializing_if = "Option::is_none")]
1209    pub complexity: Option<BaselineComplexitySection>,
1210    /// Determinism baseline for reproducibility verification.
1211    ///
1212    /// Present when the baseline was generated with `--determinism`.
1213    #[serde(skip_serializing_if = "Option::is_none")]
1214    pub determinism: Option<DeterminismBaseline>,
1215}
1216
1217impl ComplexityBaseline {
1218    /// Creates a new empty baseline with default values.
1219    pub fn new() -> Self {
1220        Self {
1221            baseline_version: BASELINE_VERSION,
1222            generated_at: String::new(),
1223            commit: None,
1224            metrics: BaselineMetrics::default(),
1225            files: Vec::new(),
1226            complexity: None,
1227            determinism: None,
1228        }
1229    }
1230
1231    /// Creates a baseline from an analysis receipt.
1232    ///
1233    /// Extracts complexity information from the receipt's complexity report
1234    /// and derived totals to build a baseline snapshot.
1235    pub fn from_analysis(receipt: &AnalysisReceipt) -> Self {
1236        let generated_at = chrono_timestamp_iso8601(receipt.generated_at_ms);
1237
1238        let (metrics, files, complexity) = if let Some(ref complexity_report) = receipt.complexity {
1239            let total_code_lines = receipt
1240                .derived
1241                .as_ref()
1242                .map(|d| d.totals.code as u64)
1243                .unwrap_or(0);
1244            let total_files = receipt
1245                .derived
1246                .as_ref()
1247                .map(|d| d.totals.files as u64)
1248                .unwrap_or(0);
1249
1250            let metrics = BaselineMetrics {
1251                total_code_lines,
1252                total_files,
1253                avg_cyclomatic: complexity_report.avg_cyclomatic,
1254                max_cyclomatic: complexity_report.max_cyclomatic as u32,
1255                avg_cognitive: complexity_report.avg_cognitive.unwrap_or(0.0),
1256                max_cognitive: complexity_report.max_cognitive.unwrap_or(0) as u32,
1257                avg_nesting_depth: complexity_report.avg_nesting_depth.unwrap_or(0.0),
1258                max_nesting_depth: complexity_report.max_nesting_depth.unwrap_or(0) as u32,
1259                function_count: complexity_report.total_functions as u64,
1260                avg_function_length: complexity_report.avg_function_length,
1261            };
1262
1263            let files: Vec<FileBaselineEntry> = complexity_report
1264                .files
1265                .iter()
1266                .map(|f| FileBaselineEntry {
1267                    path: f.path.clone(),
1268                    code_lines: 0, // Not available in FileComplexity
1269                    cyclomatic: f.cyclomatic_complexity as u32,
1270                    cognitive: f.cognitive_complexity.unwrap_or(0) as u32,
1271                    max_nesting: f.max_nesting.unwrap_or(0) as u32,
1272                    function_count: f.function_count as u32,
1273                    content_hash: None,
1274                })
1275                .collect();
1276
1277            // Build complexity section mirroring analysis receipt structure
1278            let complexity_section = BaselineComplexitySection {
1279                total_functions: complexity_report.total_functions,
1280                avg_function_length: complexity_report.avg_function_length,
1281                max_function_length: complexity_report.max_function_length,
1282                avg_cyclomatic: complexity_report.avg_cyclomatic,
1283                max_cyclomatic: complexity_report.max_cyclomatic,
1284                avg_cognitive: complexity_report.avg_cognitive,
1285                max_cognitive: complexity_report.max_cognitive,
1286                avg_nesting_depth: complexity_report.avg_nesting_depth,
1287                max_nesting_depth: complexity_report.max_nesting_depth,
1288                high_risk_files: complexity_report.high_risk_files,
1289            };
1290
1291            (metrics, files, Some(complexity_section))
1292        } else {
1293            (BaselineMetrics::default(), Vec::new(), None)
1294        };
1295
1296        Self {
1297            baseline_version: BASELINE_VERSION,
1298            generated_at,
1299            commit: None,
1300            metrics,
1301            files,
1302            complexity,
1303            determinism: None,
1304        }
1305    }
1306}
1307
1308impl Default for ComplexityBaseline {
1309    fn default() -> Self {
1310        Self::new()
1311    }
1312}
1313
1314/// Complexity section mirroring analysis receipt structure for ratchet compatibility.
1315///
1316/// This provides the same field names as `ComplexityReport` so that JSON pointers
1317/// like `/complexity/avg_cyclomatic` work consistently across baselines and receipts.
1318#[derive(Debug, Clone, Serialize, Deserialize)]
1319pub struct BaselineComplexitySection {
1320    /// Total number of functions analyzed.
1321    pub total_functions: usize,
1322    /// Average function length in lines.
1323    pub avg_function_length: f64,
1324    /// Maximum function length found.
1325    pub max_function_length: usize,
1326    /// Average cyclomatic complexity across all files.
1327    pub avg_cyclomatic: f64,
1328    /// Maximum cyclomatic complexity found in any file.
1329    pub max_cyclomatic: usize,
1330    /// Average cognitive complexity across all files.
1331    #[serde(skip_serializing_if = "Option::is_none")]
1332    pub avg_cognitive: Option<f64>,
1333    /// Maximum cognitive complexity found.
1334    #[serde(skip_serializing_if = "Option::is_none")]
1335    pub max_cognitive: Option<usize>,
1336    /// Average nesting depth across all files.
1337    #[serde(skip_serializing_if = "Option::is_none")]
1338    pub avg_nesting_depth: Option<f64>,
1339    /// Maximum nesting depth found.
1340    #[serde(skip_serializing_if = "Option::is_none")]
1341    pub max_nesting_depth: Option<usize>,
1342    /// Number of high-risk files.
1343    pub high_risk_files: usize,
1344}
1345
1346/// Aggregate baseline metrics for the entire codebase.
1347#[derive(Debug, Clone, Serialize, Deserialize)]
1348pub struct BaselineMetrics {
1349    /// Total lines of code across all files.
1350    pub total_code_lines: u64,
1351    /// Total number of source files.
1352    pub total_files: u64,
1353    /// Average cyclomatic complexity across all functions.
1354    pub avg_cyclomatic: f64,
1355    /// Maximum cyclomatic complexity found in any function.
1356    pub max_cyclomatic: u32,
1357    /// Average cognitive complexity across all functions.
1358    pub avg_cognitive: f64,
1359    /// Maximum cognitive complexity found in any function.
1360    pub max_cognitive: u32,
1361    /// Average nesting depth across all functions.
1362    pub avg_nesting_depth: f64,
1363    /// Maximum nesting depth found in any function.
1364    pub max_nesting_depth: u32,
1365    /// Total number of functions analyzed.
1366    pub function_count: u64,
1367    /// Average function length in lines.
1368    pub avg_function_length: f64,
1369}
1370
1371impl Default for BaselineMetrics {
1372    fn default() -> Self {
1373        Self {
1374            total_code_lines: 0,
1375            total_files: 0,
1376            avg_cyclomatic: 0.0,
1377            max_cyclomatic: 0,
1378            avg_cognitive: 0.0,
1379            max_cognitive: 0,
1380            avg_nesting_depth: 0.0,
1381            max_nesting_depth: 0,
1382            function_count: 0,
1383            avg_function_length: 0.0,
1384        }
1385    }
1386}
1387
1388/// Per-file baseline entry for granular complexity tracking.
1389#[derive(Debug, Clone, Serialize, Deserialize)]
1390pub struct FileBaselineEntry {
1391    /// Normalized file path (forward slashes).
1392    pub path: String,
1393    /// Lines of code in this file.
1394    pub code_lines: u64,
1395    /// Cyclomatic complexity for this file.
1396    pub cyclomatic: u32,
1397    /// Cognitive complexity for this file.
1398    pub cognitive: u32,
1399    /// Maximum nesting depth in this file.
1400    pub max_nesting: u32,
1401    /// Number of functions in this file.
1402    pub function_count: u32,
1403    /// BLAKE3 hash of file content for change detection.
1404    pub content_hash: Option<String>,
1405}
1406
1407/// Build determinism baseline for reproducibility verification.
1408///
1409/// Tracks hashes of build artifacts and source inputs to detect
1410/// non-deterministic builds.
1411#[derive(Debug, Clone, Serialize, Deserialize)]
1412pub struct DeterminismBaseline {
1413    /// Schema version for forward compatibility.
1414    pub baseline_version: u32,
1415    /// ISO 8601 timestamp when this baseline was generated.
1416    pub generated_at: String,
1417    /// Hash of the final build artifact.
1418    pub build_hash: String,
1419    /// Hash of all source files combined.
1420    pub source_hash: String,
1421    /// Hash of Cargo.lock if present (Rust projects).
1422    pub cargo_lock_hash: Option<String>,
1423}
1424
1425/// Helper to convert milliseconds timestamp to RFC 3339 / ISO 8601 string.
1426fn chrono_timestamp_iso8601(ms: u128) -> String {
1427    // Convert milliseconds to seconds and remaining millis
1428    let total_secs = (ms / 1000) as i64;
1429    let millis = (ms % 1000) as u32;
1430
1431    // Constants for date calculation
1432    const SECS_PER_MIN: i64 = 60;
1433    const SECS_PER_HOUR: i64 = 3600;
1434    const SECS_PER_DAY: i64 = 86400;
1435
1436    // Days since Unix epoch (1970-01-01)
1437    let days = total_secs / SECS_PER_DAY;
1438    let day_secs = total_secs % SECS_PER_DAY;
1439
1440    // Handle negative timestamps (before epoch)
1441    let (days, day_secs) = if day_secs < 0 {
1442        (days - 1, day_secs + SECS_PER_DAY)
1443    } else {
1444        (days, day_secs)
1445    };
1446
1447    // Time of day
1448    let hour = day_secs / SECS_PER_HOUR;
1449    let min = (day_secs % SECS_PER_HOUR) / SECS_PER_MIN;
1450    let sec = day_secs % SECS_PER_MIN;
1451
1452    // Convert days since epoch to year/month/day
1453    // Using algorithm from Howard Hinnant's date library
1454    let z = days + 719468; // shift to March 1, year 0
1455    let era = if z >= 0 { z } else { z - 146096 } / 146097;
1456    let doe = (z - era * 146097) as u32; // day of era [0, 146096]
1457    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; // year of era
1458    let y = yoe as i64 + era * 400;
1459    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); // day of year
1460    let mp = (5 * doy + 2) / 153; // month pseudo
1461    let d = doy - (153 * mp + 2) / 5 + 1; // day
1462    let m = if mp < 10 { mp + 3 } else { mp - 9 }; // month
1463    let y = if m <= 2 { y + 1 } else { y }; // year
1464
1465    // Format as RFC 3339: YYYY-MM-DDTHH:MM:SS.sssZ
1466    format!(
1467        "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:03}Z",
1468        y, m, d, hour, min, sec, millis
1469    )
1470}
1471
1472// -------------------
1473// API Surface metrics
1474// -------------------
1475
1476/// Public API surface analysis report.
1477///
1478/// Computes public export ratios per language and module by scanning
1479/// source files for exported symbols (pub fn, export function, etc.).
1480#[derive(Debug, Clone, Serialize, Deserialize)]
1481pub struct ApiSurfaceReport {
1482    /// Total items discovered across all languages.
1483    pub total_items: usize,
1484    /// Items with public visibility.
1485    pub public_items: usize,
1486    /// Items with internal/private visibility.
1487    pub internal_items: usize,
1488    /// Ratio of public to total items (0.0-1.0).
1489    pub public_ratio: f64,
1490    /// Ratio of documented public items (0.0-1.0).
1491    pub documented_ratio: f64,
1492    /// Per-language breakdown.
1493    pub by_language: BTreeMap<String, LangApiSurface>,
1494    /// Per-module breakdown.
1495    pub by_module: Vec<ModuleApiRow>,
1496    /// Top exporters (files with most public items).
1497    pub top_exporters: Vec<ApiExportItem>,
1498}
1499
1500/// Per-language API surface breakdown.
1501#[derive(Debug, Clone, Serialize, Deserialize)]
1502pub struct LangApiSurface {
1503    /// Total items in this language.
1504    pub total_items: usize,
1505    /// Public items in this language.
1506    pub public_items: usize,
1507    /// Internal items in this language.
1508    pub internal_items: usize,
1509    /// Public ratio for this language.
1510    pub public_ratio: f64,
1511}
1512
1513/// Per-module API surface row.
1514#[derive(Debug, Clone, Serialize, Deserialize)]
1515pub struct ModuleApiRow {
1516    /// Module path.
1517    pub module: String,
1518    /// Total items in this module.
1519    pub total_items: usize,
1520    /// Public items in this module.
1521    pub public_items: usize,
1522    /// Public ratio for this module.
1523    pub public_ratio: f64,
1524}
1525
1526/// A file that exports many public items.
1527#[derive(Debug, Clone, Serialize, Deserialize)]
1528pub struct ApiExportItem {
1529    /// File path.
1530    pub path: String,
1531    /// Language of the file.
1532    pub lang: String,
1533    /// Number of public items exported.
1534    pub public_items: usize,
1535    /// Total items in the file.
1536    pub total_items: usize,
1537}
1538
1539// ---------
1540// Fun stuff
1541// ---------
1542
1543#[derive(Debug, Clone, Serialize, Deserialize)]
1544pub struct FunReport {
1545    pub eco_label: Option<EcoLabel>,
1546}
1547
1548#[derive(Debug, Clone, Serialize, Deserialize)]
1549pub struct EcoLabel {
1550    pub score: f64,
1551    pub label: String,
1552    pub bytes: u64,
1553    pub notes: String,
1554}
1555
1556// =========================
1557// Ecosystem Envelope (v1) — re-exported from tokmd-envelope
1558// =========================
1559
1560/// Schema identifier for ecosystem envelope format.
1561/// v1: Initial envelope specification for multi-sensor integration.
1562pub const ENVELOPE_SCHEMA: &str = tokmd_envelope::SENSOR_REPORT_SCHEMA;
1563
1564// Re-export all envelope types with backwards-compatible aliases
1565pub use tokmd_envelope::Artifact;
1566pub use tokmd_envelope::Finding;
1567pub use tokmd_envelope::FindingLocation;
1568pub use tokmd_envelope::FindingSeverity;
1569pub use tokmd_envelope::GateItem;
1570pub use tokmd_envelope::GateResults as GatesEnvelope;
1571pub use tokmd_envelope::SensorReport as Envelope;
1572pub use tokmd_envelope::ToolMeta as EnvelopeTool;
1573pub use tokmd_envelope::Verdict;
1574
1575// Also re-export the canonical names for new code
1576pub use tokmd_envelope::GateResults;
1577pub use tokmd_envelope::SensorReport;
1578pub use tokmd_envelope::ToolMeta;
1579
1580#[cfg(test)]
1581mod tests {
1582    use super::*;
1583
1584    // ── Schema version constant ───────────────────────────────────────
1585    #[test]
1586    fn analysis_schema_version_constant() -> Result<(), Box<dyn std::error::Error>> {
1587        assert_eq!(ANALYSIS_SCHEMA_VERSION, 9);
1588        Ok(())
1589    }
1590
1591    #[test]
1592    fn baseline_version_constant() -> Result<(), Box<dyn std::error::Error>> {
1593        assert_eq!(BASELINE_VERSION, 1);
1594        Ok(())
1595    }
1596
1597    // ── Default impls ─────────────────────────────────────────────────
1598    #[test]
1599    fn complexity_baseline_default() -> Result<(), Box<dyn std::error::Error>> {
1600        let b = ComplexityBaseline::default();
1601        assert_eq!(b.baseline_version, BASELINE_VERSION);
1602        assert!(b.generated_at.is_empty());
1603        assert!(b.commit.is_none());
1604        assert!(b.files.is_empty());
1605        assert!(b.complexity.is_none());
1606        assert!(b.determinism.is_none());
1607        Ok(())
1608    }
1609
1610    #[test]
1611    fn complexity_baseline_new_equals_default() -> Result<(), Box<dyn std::error::Error>> {
1612        let a = ComplexityBaseline::new();
1613        let b = ComplexityBaseline::default();
1614        assert_eq!(a.baseline_version, b.baseline_version);
1615        assert_eq!(a.generated_at, b.generated_at);
1616        assert_eq!(a.files.len(), b.files.len());
1617        Ok(())
1618    }
1619
1620    #[test]
1621    fn baseline_metrics_default_is_zeroed() -> Result<(), Box<dyn std::error::Error>> {
1622        let m = BaselineMetrics::default();
1623        assert_eq!(m.total_code_lines, 0);
1624        assert_eq!(m.total_files, 0);
1625        assert_eq!(m.avg_cyclomatic, 0.0);
1626        assert_eq!(m.max_cyclomatic, 0);
1627        assert_eq!(m.avg_cognitive, 0.0);
1628        assert_eq!(m.function_count, 0);
1629        Ok(())
1630    }
1631
1632    // ── Enum serde roundtrips ─────────────────────────────────────────
1633    #[test]
1634    fn entropy_class_serde_roundtrip() -> Result<(), Box<dyn std::error::Error>> {
1635        for variant in [
1636            EntropyClass::Low,
1637            EntropyClass::Normal,
1638            EntropyClass::Suspicious,
1639            EntropyClass::High,
1640        ] {
1641            let json = serde_json::to_string(&variant)?;
1642            let back: EntropyClass = serde_json::from_str(&json)?;
1643            assert_eq!(back, variant);
1644        }
1645        Ok(())
1646    }
1647
1648    #[test]
1649    fn trend_class_serde_roundtrip() -> Result<(), Box<dyn std::error::Error>> {
1650        for variant in [TrendClass::Rising, TrendClass::Flat, TrendClass::Falling] {
1651            let json = serde_json::to_string(&variant)?;
1652            let back: TrendClass = serde_json::from_str(&json)?;
1653            assert_eq!(back, variant);
1654        }
1655        Ok(())
1656    }
1657
1658    #[test]
1659    fn license_source_kind_serde_roundtrip() -> Result<(), Box<dyn std::error::Error>> {
1660        for variant in [LicenseSourceKind::Metadata, LicenseSourceKind::Text] {
1661            let json = serde_json::to_string(&variant)?;
1662            let back: LicenseSourceKind = serde_json::from_str(&json)?;
1663            assert_eq!(back, variant);
1664        }
1665        Ok(())
1666    }
1667
1668    #[test]
1669    fn complexity_risk_serde_roundtrip() -> Result<(), Box<dyn std::error::Error>> {
1670        for variant in [
1671            ComplexityRisk::Low,
1672            ComplexityRisk::Moderate,
1673            ComplexityRisk::High,
1674            ComplexityRisk::Critical,
1675        ] {
1676            let json = serde_json::to_string(&variant)?;
1677            let back: ComplexityRisk = serde_json::from_str(&json)?;
1678            assert_eq!(back, variant);
1679        }
1680        Ok(())
1681    }
1682
1683    #[test]
1684    fn technical_debt_level_serde_roundtrip() -> Result<(), Box<dyn std::error::Error>> {
1685        for variant in [
1686            TechnicalDebtLevel::Low,
1687            TechnicalDebtLevel::Moderate,
1688            TechnicalDebtLevel::High,
1689            TechnicalDebtLevel::Critical,
1690        ] {
1691            let json = serde_json::to_string(&variant)?;
1692            let back: TechnicalDebtLevel = serde_json::from_str(&json)?;
1693            assert_eq!(back, variant);
1694        }
1695        Ok(())
1696    }
1697
1698    // ── Enum naming conventions ───────────────────────────────────────
1699    #[test]
1700    fn entropy_class_uses_snake_case() -> Result<(), Box<dyn std::error::Error>> {
1701        assert_eq!(
1702            serde_json::to_string(&EntropyClass::Suspicious)?,
1703            "\"suspicious\""
1704        );
1705        Ok(())
1706    }
1707
1708    #[test]
1709    fn trend_class_uses_snake_case() -> Result<(), Box<dyn std::error::Error>> {
1710        assert_eq!(serde_json::to_string(&TrendClass::Rising)?, "\"rising\"");
1711        Ok(())
1712    }
1713
1714    #[test]
1715    fn effort_model_display_strings_are_stable() -> Result<(), Box<dyn std::error::Error>> {
1716        assert_eq!(EffortModel::Cocomo81Basic.to_string(), "cocomo81-basic");
1717        assert_eq!(EffortModel::Cocomo2Early.to_string(), "cocomo2-early");
1718        assert_eq!(EffortModel::Ensemble.to_string(), "ensemble");
1719        Ok(())
1720    }
1721
1722    #[test]
1723    fn effort_confidence_level_display_strings_are_stable() -> Result<(), Box<dyn std::error::Error>>
1724    {
1725        assert_eq!(EffortConfidenceLevel::Low.to_string(), "low");
1726        assert_eq!(EffortConfidenceLevel::Medium.to_string(), "medium");
1727        assert_eq!(EffortConfidenceLevel::High.to_string(), "high");
1728        Ok(())
1729    }
1730
1731    #[test]
1732    fn effort_delta_classification_display_strings_are_stable()
1733    -> Result<(), Box<dyn std::error::Error>> {
1734        assert_eq!(EffortDeltaClassification::Low.to_string(), "low");
1735        assert_eq!(EffortDeltaClassification::Medium.to_string(), "medium");
1736        assert_eq!(EffortDeltaClassification::High.to_string(), "high");
1737        assert_eq!(EffortDeltaClassification::Critical.to_string(), "critical");
1738        Ok(())
1739    }
1740
1741    #[test]
1742    fn complexity_risk_uses_snake_case() -> Result<(), Box<dyn std::error::Error>> {
1743        assert_eq!(
1744            serde_json::to_string(&ComplexityRisk::Moderate)?,
1745            "\"moderate\""
1746        );
1747        Ok(())
1748    }
1749
1750    // ── Struct serde roundtrips ───────────────────────────────────────
1751    #[test]
1752    fn eco_label_serde_roundtrip() -> Result<(), Box<dyn std::error::Error>> {
1753        let label = EcoLabel {
1754            score: 85.0,
1755            label: "A".into(),
1756            bytes: 1000,
1757            notes: "Good".into(),
1758        };
1759        let json = serde_json::to_string(&label)?;
1760        let back: EcoLabel = serde_json::from_str(&json)?;
1761        assert_eq!(back.label, "A");
1762        assert_eq!(back.bytes, 1000);
1763        Ok(())
1764    }
1765
1766    #[test]
1767    fn topic_term_serde_roundtrip() -> Result<(), Box<dyn std::error::Error>> {
1768        let term = TopicTerm {
1769            term: "async".into(),
1770            score: 0.95,
1771            tf: 10,
1772            df: 3,
1773        };
1774        let json = serde_json::to_string(&term)?;
1775        let back: TopicTerm = serde_json::from_str(&json)?;
1776        assert_eq!(back.term, "async");
1777        assert_eq!(back.tf, 10);
1778        Ok(())
1779    }
1780
1781    #[test]
1782    fn complexity_baseline_serde_roundtrip() -> Result<(), Box<dyn std::error::Error>> {
1783        let b = ComplexityBaseline {
1784            baseline_version: BASELINE_VERSION,
1785            generated_at: "2025-01-01T00:00:00.000Z".into(),
1786            commit: Some("abc123".into()),
1787            metrics: BaselineMetrics::default(),
1788            files: vec![FileBaselineEntry {
1789                path: "src/lib.rs".into(),
1790                code_lines: 100,
1791                cyclomatic: 5,
1792                cognitive: 3,
1793                max_nesting: 2,
1794                function_count: 10,
1795                content_hash: Some("deadbeef".into()),
1796            }],
1797            complexity: None,
1798            determinism: None,
1799        };
1800        let json = serde_json::to_string(&b)?;
1801        let back: ComplexityBaseline = serde_json::from_str(&json)?;
1802        assert_eq!(back.baseline_version, BASELINE_VERSION);
1803        assert_eq!(back.commit.as_deref(), Some("abc123"));
1804        assert_eq!(back.files.len(), 1);
1805        assert_eq!(back.files[0].path, "src/lib.rs");
1806        Ok(())
1807    }
1808
1809    // ── ComplexityHistogram ───────────────────────────────────────────
1810    #[test]
1811    fn complexity_histogram_to_ascii_basic() -> Result<(), Box<dyn std::error::Error>> {
1812        let h = ComplexityHistogram {
1813            buckets: vec![0, 5, 10],
1814            counts: vec![10, 5, 2],
1815            total: 17,
1816        };
1817        let ascii = h.to_ascii(20);
1818        assert!(!ascii.is_empty());
1819        // Should have 3 lines (one per bucket)
1820        assert_eq!(ascii.lines().count(), 3);
1821        Ok(())
1822    }
1823
1824    #[test]
1825    fn complexity_histogram_to_ascii_empty_counts() -> Result<(), Box<dyn std::error::Error>> {
1826        let h = ComplexityHistogram {
1827            buckets: vec![0, 5],
1828            counts: vec![0, 0],
1829            total: 0,
1830        };
1831        let ascii = h.to_ascii(20);
1832        assert!(!ascii.is_empty());
1833        Ok(())
1834    }
1835
1836    // ── chrono_timestamp_iso8601 ──────────────────────────────────────
1837    #[test]
1838    fn timestamp_epoch() -> Result<(), Box<dyn std::error::Error>> {
1839        let result = chrono_timestamp_iso8601(0);
1840        assert_eq!(result, "1970-01-01T00:00:00.000Z");
1841        Ok(())
1842    }
1843
1844    #[test]
1845    fn timestamp_with_millis() -> Result<(), Box<dyn std::error::Error>> {
1846        // 2025-01-01T00:00:00.500Z = 1735689600500 ms
1847        let result = chrono_timestamp_iso8601(1735689600500);
1848        assert!(result.ends_with(".500Z"));
1849        assert!(result.starts_with("2025-01-01"));
1850        Ok(())
1851    }
1852}
tokmd_analysis_types/lib.rs

tokmd_analysis_types/
lib.rs