Skip to main content

tokmd_analysis_types/
lib.rs

1//! # tokmd-analysis-types
2//!
3//! **Tier 0 (Analysis Contract)**
4//!
5//! Pure data structures for analysis receipts. No I/O or business logic.
6//!
7//! ## What belongs here
8//! * Analysis-specific receipt types and findings
9//! * Schema definitions for analysis outputs
10//! * Type enums for classification results
11//!
12//! ## What does NOT belong here
13//! * Analysis computation logic (use tokmd-analysis)
14//! * Formatting logic (use tokmd-analysis-format)
15//! * File I/O operations
16
17pub mod findings;
18
19use std::collections::BTreeMap;
20use std::fmt;
21
22use serde::{Deserialize, Serialize};
23use tokmd_types::{ScanStatus, ToolInfo};
24
25/// Schema version for analysis receipts.
26/// v7: Added coupling normalization (Jaccard/Lift), commit intent classification, near-duplicate detection.
27/// v8: Near-dup clusters, selection metadata, max_pairs guardrail, runtime stats.
28/// v9: Added effort estimation report.
29pub const ANALYSIS_SCHEMA_VERSION: u32 = 9;
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct AnalysisReceipt {
33    pub schema_version: u32,
34    pub generated_at_ms: u128,
35    pub tool: ToolInfo,
36    pub mode: String,
37    pub status: ScanStatus,
38    pub warnings: Vec<String>,
39    pub source: AnalysisSource,
40    pub args: AnalysisArgsMeta,
41    pub archetype: Option<Archetype>,
42    pub topics: Option<TopicClouds>,
43    pub entropy: Option<EntropyReport>,
44    pub predictive_churn: Option<PredictiveChurnReport>,
45    pub corporate_fingerprint: Option<CorporateFingerprint>,
46    pub license: Option<LicenseReport>,
47    pub derived: Option<DerivedReport>,
48    pub assets: Option<AssetReport>,
49    pub deps: Option<DependencyReport>,
50    pub git: Option<GitReport>,
51    pub imports: Option<ImportReport>,
52    pub dup: Option<DuplicateReport>,
53    pub complexity: Option<ComplexityReport>,
54    pub api_surface: Option<ApiSurfaceReport>,
55    pub effort: Option<EffortEstimateReport>,
56    pub fun: Option<FunReport>,
57}
58
59#[derive(Debug, Clone, Serialize, Deserialize)]
60pub struct AnalysisSource {
61    pub inputs: Vec<String>,
62    pub export_path: Option<String>,
63    pub base_receipt_path: Option<String>,
64    pub export_schema_version: Option<u32>,
65    pub export_generated_at_ms: Option<u128>,
66    pub base_signature: Option<String>,
67    pub module_roots: Vec<String>,
68    pub module_depth: usize,
69    pub children: String,
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize)]
73pub struct AnalysisArgsMeta {
74    pub preset: String,
75    pub format: String,
76    pub window_tokens: Option<usize>,
77    pub git: Option<bool>,
78    pub max_files: Option<usize>,
79    pub max_bytes: Option<u64>,
80    pub max_commits: Option<usize>,
81    pub max_commit_files: Option<usize>,
82    pub max_file_bytes: Option<u64>,
83    pub import_granularity: String,
84}
85
86// ---------------
87// Project context
88// ---------------
89
90#[derive(Debug, Clone, Serialize, Deserialize)]
91pub struct Archetype {
92    pub kind: String,
93    pub evidence: Vec<String>,
94}
95
96// -----------------
97// Semantic topics
98// -----------------
99
100#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct TopicClouds {
102    pub per_module: BTreeMap<String, Vec<TopicTerm>>,
103    pub overall: Vec<TopicTerm>,
104}
105
106#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct TopicTerm {
108    pub term: String,
109    pub score: f64,
110    pub tf: u32,
111    pub df: u32,
112}
113
114// -----------------
115// Entropy profiling
116// -----------------
117
118#[derive(Debug, Clone, Serialize, Deserialize)]
119pub struct EntropyReport {
120    pub suspects: Vec<EntropyFinding>,
121}
122
123#[derive(Debug, Clone, Serialize, Deserialize)]
124pub struct EntropyFinding {
125    pub path: String,
126    pub module: String,
127    pub entropy_bits_per_byte: f32,
128    pub sample_bytes: u32,
129    pub class: EntropyClass,
130}
131
132#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
133#[serde(rename_all = "snake_case")]
134pub enum EntropyClass {
135    Low,
136    Normal,
137    Suspicious,
138    High,
139}
140
141// -----------------
142// Predictive churn
143// -----------------
144
145#[derive(Debug, Clone, Serialize, Deserialize)]
146pub struct PredictiveChurnReport {
147    pub per_module: BTreeMap<String, ChurnTrend>,
148}
149
150#[derive(Debug, Clone, Serialize, Deserialize)]
151pub struct ChurnTrend {
152    pub slope: f64,
153    pub r2: f64,
154    pub recent_change: i64,
155    pub classification: TrendClass,
156}
157
158#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
159#[serde(rename_all = "snake_case")]
160pub enum TrendClass {
161    Rising,
162    Flat,
163    Falling,
164}
165
166// ---------------------
167// Corporate fingerprint
168// ---------------------
169
170#[derive(Debug, Clone, Serialize, Deserialize)]
171pub struct CorporateFingerprint {
172    pub domains: Vec<DomainStat>,
173}
174
175#[derive(Debug, Clone, Serialize, Deserialize)]
176pub struct DomainStat {
177    pub domain: String,
178    pub commits: u32,
179    pub pct: f32,
180}
181
182// -------------
183// License radar
184// -------------
185
186#[derive(Debug, Clone, Serialize, Deserialize)]
187pub struct LicenseReport {
188    pub findings: Vec<LicenseFinding>,
189    pub effective: Option<String>,
190}
191
192#[derive(Debug, Clone, Serialize, Deserialize)]
193pub struct LicenseFinding {
194    pub spdx: String,
195    pub confidence: f32,
196    pub source_path: String,
197    pub source_kind: LicenseSourceKind,
198}
199
200#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
201#[serde(rename_all = "snake_case")]
202pub enum LicenseSourceKind {
203    Metadata,
204    Text,
205}
206
207// -----------------
208// Derived analytics
209// -----------------
210
211#[derive(Debug, Clone, Serialize, Deserialize)]
212pub struct DerivedReport {
213    pub totals: DerivedTotals,
214    pub doc_density: RatioReport,
215    pub whitespace: RatioReport,
216    pub verbosity: RateReport,
217    pub max_file: MaxFileReport,
218    pub lang_purity: LangPurityReport,
219    pub nesting: NestingReport,
220    pub test_density: TestDensityReport,
221    pub boilerplate: BoilerplateReport,
222    pub polyglot: PolyglotReport,
223    pub distribution: DistributionReport,
224    pub histogram: Vec<HistogramBucket>,
225    pub top: TopOffenders,
226    pub tree: Option<String>,
227    pub reading_time: ReadingTimeReport,
228    pub context_window: Option<ContextWindowReport>,
229    pub cocomo: Option<CocomoReport>,
230    pub todo: Option<TodoReport>,
231    pub integrity: IntegrityReport,
232}
233
234#[derive(Debug, Clone, Serialize, Deserialize)]
235pub struct DerivedTotals {
236    pub files: usize,
237    pub code: usize,
238    pub comments: usize,
239    pub blanks: usize,
240    pub lines: usize,
241    pub bytes: usize,
242    pub tokens: usize,
243}
244
245#[derive(Debug, Clone, Serialize, Deserialize)]
246pub struct RatioReport {
247    pub total: RatioRow,
248    pub by_lang: Vec<RatioRow>,
249    pub by_module: Vec<RatioRow>,
250}
251
252#[derive(Debug, Clone, Serialize, Deserialize)]
253pub struct RatioRow {
254    pub key: String,
255    pub numerator: usize,
256    pub denominator: usize,
257    pub ratio: f64,
258}
259
260#[derive(Debug, Clone, Serialize, Deserialize)]
261pub struct RateReport {
262    pub total: RateRow,
263    pub by_lang: Vec<RateRow>,
264    pub by_module: Vec<RateRow>,
265}
266
267#[derive(Debug, Clone, Serialize, Deserialize)]
268pub struct RateRow {
269    pub key: String,
270    pub numerator: usize,
271    pub denominator: usize,
272    pub rate: f64,
273}
274
275#[derive(Debug, Clone, Serialize, Deserialize)]
276pub struct MaxFileReport {
277    pub overall: FileStatRow,
278    pub by_lang: Vec<MaxFileRow>,
279    pub by_module: Vec<MaxFileRow>,
280}
281
282#[derive(Debug, Clone, Serialize, Deserialize)]
283pub struct MaxFileRow {
284    pub key: String,
285    pub file: FileStatRow,
286}
287
288#[derive(Debug, Clone, Serialize, Deserialize)]
289pub struct FileStatRow {
290    pub path: String,
291    pub module: String,
292    pub lang: String,
293    pub code: usize,
294    pub comments: usize,
295    pub blanks: usize,
296    pub lines: usize,
297    pub bytes: usize,
298    pub tokens: usize,
299    pub doc_pct: Option<f64>,
300    pub bytes_per_line: Option<f64>,
301    pub depth: usize,
302}
303
304#[derive(Debug, Clone, Serialize, Deserialize)]
305pub struct LangPurityReport {
306    pub rows: Vec<LangPurityRow>,
307}
308
309#[derive(Debug, Clone, Serialize, Deserialize)]
310pub struct LangPurityRow {
311    pub module: String,
312    pub lang_count: usize,
313    pub dominant_lang: String,
314    pub dominant_lines: usize,
315    pub dominant_pct: f64,
316}
317
318#[derive(Debug, Clone, Serialize, Deserialize)]
319pub struct NestingReport {
320    pub max: usize,
321    pub avg: f64,
322    pub by_module: Vec<NestingRow>,
323}
324
325#[derive(Debug, Clone, Serialize, Deserialize)]
326pub struct NestingRow {
327    pub key: String,
328    pub max: usize,
329    pub avg: f64,
330}
331
332#[derive(Debug, Clone, Serialize, Deserialize)]
333pub struct TestDensityReport {
334    pub test_lines: usize,
335    pub prod_lines: usize,
336    pub test_files: usize,
337    pub prod_files: usize,
338    pub ratio: f64,
339}
340
341#[derive(Debug, Clone, Serialize, Deserialize)]
342pub struct BoilerplateReport {
343    pub infra_lines: usize,
344    pub logic_lines: usize,
345    pub ratio: f64,
346    pub infra_langs: Vec<String>,
347}
348
349#[derive(Debug, Clone, Serialize, Deserialize)]
350pub struct PolyglotReport {
351    pub lang_count: usize,
352    pub entropy: f64,
353    pub dominant_lang: String,
354    pub dominant_lines: usize,
355    pub dominant_pct: f64,
356}
357
358#[derive(Debug, Clone, Serialize, Deserialize)]
359pub struct DistributionReport {
360    pub count: usize,
361    pub min: usize,
362    pub max: usize,
363    pub mean: f64,
364    pub median: f64,
365    pub p90: f64,
366    pub p99: f64,
367    pub gini: f64,
368}
369
370#[derive(Debug, Clone, Serialize, Deserialize)]
371pub struct HistogramBucket {
372    pub label: String,
373    pub min: usize,
374    pub max: Option<usize>,
375    pub files: usize,
376    pub pct: f64,
377}
378
379#[derive(Debug, Clone, Serialize, Deserialize)]
380pub struct TopOffenders {
381    pub largest_lines: Vec<FileStatRow>,
382    pub largest_tokens: Vec<FileStatRow>,
383    pub largest_bytes: Vec<FileStatRow>,
384    pub least_documented: Vec<FileStatRow>,
385    pub most_dense: Vec<FileStatRow>,
386}
387
388#[derive(Debug, Clone, Serialize, Deserialize)]
389pub struct ReadingTimeReport {
390    pub minutes: f64,
391    pub lines_per_minute: usize,
392    pub basis_lines: usize,
393}
394
395#[derive(Debug, Clone, Serialize, Deserialize)]
396pub struct TodoReport {
397    pub total: usize,
398    pub density_per_kloc: f64,
399    pub tags: Vec<TodoTagRow>,
400}
401
402#[derive(Debug, Clone, Serialize, Deserialize)]
403pub struct TodoTagRow {
404    pub tag: String,
405    pub count: usize,
406}
407
408#[derive(Debug, Clone, Serialize, Deserialize)]
409pub struct ContextWindowReport {
410    pub window_tokens: usize,
411    pub total_tokens: usize,
412    pub pct: f64,
413    pub fits: bool,
414}
415
416#[derive(Debug, Clone, Serialize, Deserialize)]
417pub struct EffortEstimateReport {
418    pub model: EffortModel,
419    pub size_basis: EffortSizeBasis,
420    pub results: EffortResults,
421    pub confidence: EffortConfidence,
422    pub drivers: Vec<EffortDriver>,
423    pub assumptions: EffortAssumptions,
424    #[serde(skip_serializing_if = "Option::is_none")]
425    pub delta: Option<EffortDeltaReport>,
426}
427
428#[derive(Debug, Clone, Serialize, Deserialize)]
429pub struct EffortSizeBasis {
430    pub total_lines: usize,
431    pub authored_lines: usize,
432    pub generated_lines: usize,
433    pub vendored_lines: usize,
434    pub kloc_total: f64,
435    pub kloc_authored: f64,
436    pub generated_pct: f64,
437    pub vendored_pct: f64,
438    pub classification_confidence: EffortConfidenceLevel,
439    pub warnings: Vec<String>,
440    pub by_tag: Vec<EffortTagSizeRow>,
441}
442
443#[derive(Debug, Clone, Serialize, Deserialize)]
444pub struct EffortTagSizeRow {
445    pub tag: String,
446    pub lines: usize,
447    pub authored_lines: usize,
448    pub pct_of_total: f64,
449}
450
451#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
452#[serde(rename_all = "kebab-case")]
453pub enum EffortModel {
454    Cocomo81Basic,
455    Cocomo2Early,
456    Ensemble,
457}
458
459impl fmt::Display for EffortModel {
460    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
461        match self {
462            Self::Cocomo81Basic => f.write_str("cocomo81-basic"),
463            Self::Cocomo2Early => f.write_str("cocomo2-early"),
464            Self::Ensemble => f.write_str("ensemble"),
465        }
466    }
467}
468
469#[derive(Debug, Clone, Serialize, Deserialize)]
470pub struct EffortResults {
471    pub effort_pm_p50: f64,
472    pub schedule_months_p50: f64,
473    pub staff_p50: f64,
474    pub effort_pm_low: f64,
475    pub effort_pm_p80: f64,
476    pub schedule_months_low: f64,
477    pub schedule_months_p80: f64,
478    pub staff_low: f64,
479    pub staff_p80: f64,
480}
481
482#[derive(Debug, Clone, Serialize, Deserialize)]
483pub struct EffortConfidence {
484    pub level: EffortConfidenceLevel,
485    pub reasons: Vec<String>,
486    #[serde(skip_serializing_if = "Option::is_none")]
487    pub data_coverage_pct: Option<f64>,
488}
489
490#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
491#[serde(rename_all = "snake_case")]
492pub enum EffortConfidenceLevel {
493    Low,
494    Medium,
495    High,
496}
497
498impl fmt::Display for EffortConfidenceLevel {
499    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
500        match self {
501            Self::Low => f.write_str("low"),
502            Self::Medium => f.write_str("medium"),
503            Self::High => f.write_str("high"),
504        }
505    }
506}
507
508#[derive(Debug, Clone, Serialize, Deserialize)]
509pub struct EffortDriver {
510    pub key: String,
511    pub label: String,
512    pub weight: f64,
513    pub direction: EffortDriverDirection,
514    pub evidence: String,
515}
516
517#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
518#[serde(rename_all = "snake_case")]
519pub enum EffortDriverDirection {
520    Raises,
521    Lowers,
522    Neutral,
523}
524
525#[derive(Debug, Clone, Serialize, Deserialize)]
526pub struct EffortAssumptions {
527    pub notes: Vec<String>,
528    pub overrides: BTreeMap<String, String>,
529}
530
531#[derive(Debug, Clone, Serialize, Deserialize)]
532pub struct EffortDeltaReport {
533    pub base: String,
534    pub head: String,
535    pub files_changed: usize,
536    pub modules_changed: usize,
537    pub langs_changed: usize,
538    pub hotspot_files_touched: usize,
539    pub coupled_neighbors_touched: usize,
540    pub blast_radius: f64,
541    pub classification: EffortDeltaClassification,
542    pub effort_pm_low: f64,
543    pub effort_pm_est: f64,
544    pub effort_pm_high: f64,
545}
546
547#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
548#[serde(rename_all = "snake_case")]
549pub enum EffortDeltaClassification {
550    Low,
551    Medium,
552    High,
553    Critical,
554}
555
556impl fmt::Display for EffortDeltaClassification {
557    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
558        match self {
559            Self::Low => f.write_str("low"),
560            Self::Medium => f.write_str("medium"),
561            Self::High => f.write_str("high"),
562            Self::Critical => f.write_str("critical"),
563        }
564    }
565}
566
567#[derive(Debug, Clone, Serialize, Deserialize)]
568pub struct CocomoReport {
569    pub mode: String,
570    pub kloc: f64,
571    pub effort_pm: f64,
572    pub duration_months: f64,
573    pub staff: f64,
574    pub a: f64,
575    pub b: f64,
576    pub c: f64,
577    pub d: f64,
578}
579
580#[derive(Debug, Clone, Serialize, Deserialize)]
581pub struct IntegrityReport {
582    pub algo: String,
583    pub hash: String,
584    pub entries: usize,
585}
586
587// -------------
588// Asset metrics
589// -------------
590
591#[derive(Debug, Clone, Serialize, Deserialize)]
592pub struct AssetReport {
593    pub total_files: usize,
594    pub total_bytes: u64,
595    pub categories: Vec<AssetCategoryRow>,
596    pub top_files: Vec<AssetFileRow>,
597}
598
599#[derive(Debug, Clone, Serialize, Deserialize)]
600pub struct AssetCategoryRow {
601    pub category: String,
602    pub files: usize,
603    pub bytes: u64,
604    pub extensions: Vec<String>,
605}
606
607#[derive(Debug, Clone, Serialize, Deserialize)]
608pub struct AssetFileRow {
609    pub path: String,
610    pub bytes: u64,
611    pub category: String,
612    pub extension: String,
613}
614
615// -----------------
616// Dependency metrics
617// -----------------
618
619#[derive(Debug, Clone, Serialize, Deserialize)]
620pub struct DependencyReport {
621    pub total: usize,
622    pub lockfiles: Vec<LockfileReport>,
623}
624
625#[derive(Debug, Clone, Serialize, Deserialize)]
626pub struct LockfileReport {
627    pub path: String,
628    pub kind: String,
629    pub dependencies: usize,
630}
631
632// ---------
633// Git report
634// ---------
635
636#[derive(Debug, Clone, Serialize, Deserialize)]
637pub struct GitReport {
638    pub commits_scanned: usize,
639    pub files_seen: usize,
640    pub hotspots: Vec<HotspotRow>,
641    pub bus_factor: Vec<BusFactorRow>,
642    pub freshness: FreshnessReport,
643    pub coupling: Vec<CouplingRow>,
644    /// Code age bucket distribution plus recent refresh trend.
645    #[serde(skip_serializing_if = "Option::is_none")]
646    pub age_distribution: Option<CodeAgeDistributionReport>,
647    /// Commit intent classification (feat/fix/refactor/etc.).
648    #[serde(default, skip_serializing_if = "Option::is_none")]
649    pub intent: Option<CommitIntentReport>,
650}
651
652#[derive(Debug, Clone, Serialize, Deserialize)]
653pub struct HotspotRow {
654    pub path: String,
655    pub commits: usize,
656    pub lines: usize,
657    pub score: usize,
658}
659
660#[derive(Debug, Clone, Serialize, Deserialize)]
661pub struct BusFactorRow {
662    pub module: String,
663    pub authors: usize,
664}
665
666#[derive(Debug, Clone, Serialize, Deserialize)]
667pub struct FreshnessReport {
668    pub threshold_days: usize,
669    pub stale_files: usize,
670    pub total_files: usize,
671    pub stale_pct: f64,
672    pub by_module: Vec<ModuleFreshnessRow>,
673}
674
675#[derive(Debug, Clone, Serialize, Deserialize)]
676pub struct ModuleFreshnessRow {
677    pub module: String,
678    pub avg_days: f64,
679    pub p90_days: f64,
680    pub stale_pct: f64,
681}
682
683#[derive(Debug, Clone, Serialize, Deserialize)]
684pub struct CouplingRow {
685    pub left: String,
686    pub right: String,
687    pub count: usize,
688    /// Jaccard similarity: count / (n_left + n_right - count). Range (0.0, 1.0].
689    #[serde(default, skip_serializing_if = "Option::is_none")]
690    pub jaccard: Option<f64>,
691    /// Lift: (count * N) / (n_left * n_right), where N = commits_considered.
692    #[serde(default, skip_serializing_if = "Option::is_none")]
693    pub lift: Option<f64>,
694    /// Commits touching left module (within commits_considered universe).
695    #[serde(default, skip_serializing_if = "Option::is_none")]
696    pub n_left: Option<usize>,
697    /// Commits touching right module (within commits_considered universe).
698    #[serde(default, skip_serializing_if = "Option::is_none")]
699    pub n_right: Option<usize>,
700}
701
702#[derive(Debug, Clone, Serialize, Deserialize)]
703pub struct CodeAgeDistributionReport {
704    pub buckets: Vec<CodeAgeBucket>,
705    pub recent_refreshes: usize,
706    pub prior_refreshes: usize,
707    pub refresh_trend: TrendClass,
708}
709
710#[derive(Debug, Clone, Serialize, Deserialize)]
711pub struct CodeAgeBucket {
712    pub label: String,
713    pub min_days: usize,
714    pub max_days: Option<usize>,
715    pub files: usize,
716    pub pct: f64,
717}
718
719// --------------------------
720// Commit intent classification
721// --------------------------
722
723// Re-export from tokmd-types (Tier 0) so existing consumers keep working.
724pub use tokmd_types::CommitIntentKind;
725
726/// Overall commit intent classification report.
727#[derive(Debug, Clone, Serialize, Deserialize)]
728pub struct CommitIntentReport {
729    /// Aggregate counts across all scanned commits.
730    pub overall: CommitIntentCounts,
731    /// Per-module intent breakdown.
732    pub by_module: Vec<ModuleIntentRow>,
733    /// Percentage of commits classified as "other" (unrecognized).
734    pub unknown_pct: f64,
735    /// Corrective ratio: (fix + revert) / total. Range [0.0, 1.0].
736    #[serde(default, skip_serializing_if = "Option::is_none")]
737    pub corrective_ratio: Option<f64>,
738}
739
740/// Counts per intent kind.
741#[derive(Debug, Clone, Serialize, Deserialize, Default)]
742pub struct CommitIntentCounts {
743    pub feat: usize,
744    pub fix: usize,
745    pub refactor: usize,
746    pub docs: usize,
747    pub test: usize,
748    pub chore: usize,
749    pub ci: usize,
750    pub build: usize,
751    pub perf: usize,
752    pub style: usize,
753    pub revert: usize,
754    pub other: usize,
755    pub total: usize,
756}
757
758impl CommitIntentCounts {
759    /// Increment the count for a given intent kind.
760    pub fn increment(&mut self, kind: CommitIntentKind) {
761        match kind {
762            CommitIntentKind::Feat => self.feat += 1,
763            CommitIntentKind::Fix => self.fix += 1,
764            CommitIntentKind::Refactor => self.refactor += 1,
765            CommitIntentKind::Docs => self.docs += 1,
766            CommitIntentKind::Test => self.test += 1,
767            CommitIntentKind::Chore => self.chore += 1,
768            CommitIntentKind::Ci => self.ci += 1,
769            CommitIntentKind::Build => self.build += 1,
770            CommitIntentKind::Perf => self.perf += 1,
771            CommitIntentKind::Style => self.style += 1,
772            CommitIntentKind::Revert => self.revert += 1,
773            CommitIntentKind::Other => self.other += 1,
774        }
775        self.total += 1;
776    }
777}
778
779/// Per-module intent breakdown row.
780#[derive(Debug, Clone, Serialize, Deserialize)]
781pub struct ModuleIntentRow {
782    pub module: String,
783    pub counts: CommitIntentCounts,
784}
785
786// ----------------------------
787// Near-duplicate detection
788// ----------------------------
789
790/// Scope for near-duplicate comparison partitioning.
791#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
792#[serde(rename_all = "kebab-case")]
793pub enum NearDupScope {
794    /// Compare files within the same module.
795    #[default]
796    Module,
797    /// Compare files within the same language.
798    Lang,
799    /// Compare all files globally.
800    Global,
801}
802
803/// Parameters for near-duplicate detection.
804#[derive(Debug, Clone, Serialize, Deserialize)]
805pub struct NearDupParams {
806    pub scope: NearDupScope,
807    pub threshold: f64,
808    pub max_files: usize,
809    /// Maximum pairs to emit (truncation guardrail).
810    #[serde(default, skip_serializing_if = "Option::is_none")]
811    pub max_pairs: Option<usize>,
812    /// Effective per-file byte limit used for eligibility filtering.
813    #[serde(default, skip_serializing_if = "Option::is_none")]
814    pub max_file_bytes: Option<u64>,
815    /// How files were selected for analysis.
816    #[serde(default, skip_serializing_if = "Option::is_none")]
817    pub selection_method: Option<String>,
818    /// Algorithm constants used for fingerprinting.
819    #[serde(default, skip_serializing_if = "Option::is_none")]
820    pub algorithm: Option<NearDupAlgorithm>,
821    /// Glob patterns used to exclude files from near-dup analysis.
822    #[serde(default, skip_serializing_if = "Vec::is_empty")]
823    pub exclude_patterns: Vec<String>,
824}
825
826/// Algorithm constants for near-duplicate fingerprinting.
827#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
828pub struct NearDupAlgorithm {
829    /// Number of tokens per k-gram shingle.
830    pub k_gram_size: usize,
831    /// Winnowing window size.
832    pub window_size: usize,
833    /// Skip fingerprints appearing in more than this many files.
834    pub max_postings: usize,
835}
836
837/// Report of near-duplicate file pairs.
838#[derive(Debug, Clone, Serialize, Deserialize)]
839pub struct NearDuplicateReport {
840    pub params: NearDupParams,
841    pub pairs: Vec<NearDupPairRow>,
842    pub files_analyzed: usize,
843    pub files_skipped: usize,
844    /// Number of files eligible before the max_files cap.
845    #[serde(default, skip_serializing_if = "Option::is_none")]
846    pub eligible_files: Option<usize>,
847    /// Connected-component clusters derived from pairs.
848    #[serde(default, skip_serializing_if = "Option::is_none")]
849    pub clusters: Option<Vec<NearDupCluster>>,
850    /// Whether the pairs list was truncated by `max_pairs`.
851    /// Clusters are built from the complete pair set before truncation.
852    #[serde(default)]
853    pub truncated: bool,
854    /// Number of files excluded by glob patterns.
855    #[serde(default, skip_serializing_if = "Option::is_none")]
856    pub excluded_by_pattern: Option<usize>,
857    /// Runtime performance statistics.
858    #[serde(default, skip_serializing_if = "Option::is_none")]
859    pub stats: Option<NearDupStats>,
860}
861
862/// A connected component of near-duplicate files.
863#[derive(Debug, Clone, Serialize, Deserialize)]
864pub struct NearDupCluster {
865    /// Files in this cluster, sorted alphabetically.
866    pub files: Vec<String>,
867    /// Maximum pairwise similarity in the cluster.
868    pub max_similarity: f64,
869    /// Most-connected file (tie-break alphabetical).
870    pub representative: String,
871    /// Number of pairs within this cluster.
872    pub pair_count: usize,
873}
874
875/// Runtime statistics for near-duplicate detection.
876#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
877pub struct NearDupStats {
878    /// Time spent computing fingerprints (milliseconds).
879    pub fingerprinting_ms: u64,
880    /// Time spent computing pair similarities (milliseconds).
881    pub pairing_ms: u64,
882    /// Total bytes of source files processed.
883    pub bytes_processed: u64,
884}
885
886/// A pair of near-duplicate files with similarity score.
887#[derive(Debug, Clone, Serialize, Deserialize)]
888pub struct NearDupPairRow {
889    pub left: String,
890    pub right: String,
891    pub similarity: f64,
892    pub shared_fingerprints: usize,
893    pub left_fingerprints: usize,
894    pub right_fingerprints: usize,
895}
896
897// -----------------
898// Import graph info
899// -----------------
900
901#[derive(Debug, Clone, Serialize, Deserialize)]
902pub struct ImportReport {
903    pub granularity: String,
904    pub edges: Vec<ImportEdge>,
905}
906
907#[derive(Debug, Clone, Serialize, Deserialize)]
908pub struct ImportEdge {
909    pub from: String,
910    pub to: String,
911    pub count: usize,
912}
913
914// -------------------
915// Duplication metrics
916// -------------------
917
918#[derive(Debug, Clone, Serialize, Deserialize)]
919pub struct DuplicateReport {
920    pub groups: Vec<DuplicateGroup>,
921    pub wasted_bytes: u64,
922    pub strategy: String,
923    /// Duplication density summary overall and by module.
924    #[serde(skip_serializing_if = "Option::is_none")]
925    pub density: Option<DuplicationDensityReport>,
926    /// Near-duplicate file pairs detected by fingerprint similarity.
927    #[serde(default, skip_serializing_if = "Option::is_none")]
928    pub near: Option<NearDuplicateReport>,
929}
930
931#[derive(Debug, Clone, Serialize, Deserialize)]
932pub struct DuplicateGroup {
933    pub hash: String,
934    pub bytes: u64,
935    pub files: Vec<String>,
936}
937
938#[derive(Debug, Clone, Serialize, Deserialize)]
939pub struct DuplicationDensityReport {
940    pub duplicate_groups: usize,
941    pub duplicate_files: usize,
942    pub duplicated_bytes: u64,
943    pub wasted_bytes: u64,
944    pub wasted_pct_of_codebase: f64,
945    pub by_module: Vec<ModuleDuplicationDensityRow>,
946}
947
948#[derive(Debug, Clone, Serialize, Deserialize)]
949pub struct ModuleDuplicationDensityRow {
950    pub module: String,
951    pub duplicate_files: usize,
952    pub wasted_files: usize,
953    pub duplicated_bytes: u64,
954    pub wasted_bytes: u64,
955    pub module_bytes: u64,
956    pub density: f64,
957}
958
959// -------------------
960// Halstead metrics
961// -------------------
962
963/// Halstead software science metrics computed from operator/operand token counts.
964#[derive(Debug, Clone, Serialize, Deserialize)]
965pub struct HalsteadMetrics {
966    /// Number of distinct operators (n1).
967    pub distinct_operators: usize,
968    /// Number of distinct operands (n2).
969    pub distinct_operands: usize,
970    /// Total number of operators (N1).
971    pub total_operators: usize,
972    /// Total number of operands (N2).
973    pub total_operands: usize,
974    /// Program vocabulary: n1 + n2.
975    pub vocabulary: usize,
976    /// Program length: N1 + N2.
977    pub length: usize,
978    /// Volume: N * log2(n).
979    pub volume: f64,
980    /// Difficulty: (n1/2) * (N2/n2).
981    pub difficulty: f64,
982    /// Effort: D * V.
983    pub effort: f64,
984    /// Estimated programming time in seconds: E / 18.
985    pub time_seconds: f64,
986    /// Estimated number of bugs: V / 3000.
987    pub estimated_bugs: f64,
988}
989
990// -------------------
991// Maintainability Index
992// -------------------
993
994/// Composite maintainability index based on the SEI formula.
995///
996/// MI = 171 - 5.2 * ln(V) - 0.23 * CC - 16.2 * ln(LOC)
997///
998/// When Halstead volume is unavailable, a simplified formula is used.
999#[derive(Debug, Clone, Serialize, Deserialize)]
1000pub struct MaintainabilityIndex {
1001    /// Maintainability index score (0-171 scale, higher is better).
1002    pub score: f64,
1003    /// Average cyclomatic complexity used in calculation.
1004    pub avg_cyclomatic: f64,
1005    /// Average lines of code per file used in calculation.
1006    pub avg_loc: f64,
1007    /// Average Halstead volume (if Halstead metrics were computed).
1008    #[serde(skip_serializing_if = "Option::is_none")]
1009    pub avg_halstead_volume: Option<f64>,
1010    /// Letter grade: "A" (>=85), "B" (65-84), "C" (<65).
1011    pub grade: String,
1012}
1013
1014/// Complexity-to-size ratio heuristic for technical debt estimation.
1015#[derive(Debug, Clone, Serialize, Deserialize)]
1016pub struct TechnicalDebtRatio {
1017    /// Complexity points per KLOC (higher means denser debt).
1018    pub ratio: f64,
1019    /// Aggregate complexity points used in the ratio.
1020    pub complexity_points: usize,
1021    /// KLOC basis used in the ratio denominator.
1022    pub code_kloc: f64,
1023    /// Bucketed interpretation of debt ratio.
1024    pub level: TechnicalDebtLevel,
1025}
1026
1027#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1028#[serde(rename_all = "snake_case")]
1029pub enum TechnicalDebtLevel {
1030    Low,
1031    Moderate,
1032    High,
1033    Critical,
1034}
1035
1036// -------------------
1037// Complexity metrics
1038// -------------------
1039
1040#[derive(Debug, Clone, Serialize, Deserialize)]
1041pub struct ComplexityReport {
1042    pub total_functions: usize,
1043    pub avg_function_length: f64,
1044    pub max_function_length: usize,
1045    pub avg_cyclomatic: f64,
1046    pub max_cyclomatic: usize,
1047    /// Average cognitive complexity across files.
1048    #[serde(skip_serializing_if = "Option::is_none")]
1049    pub avg_cognitive: Option<f64>,
1050    /// Maximum cognitive complexity found.
1051    #[serde(skip_serializing_if = "Option::is_none")]
1052    pub max_cognitive: Option<usize>,
1053    /// Average nesting depth across files.
1054    #[serde(skip_serializing_if = "Option::is_none")]
1055    pub avg_nesting_depth: Option<f64>,
1056    /// Maximum nesting depth found.
1057    #[serde(skip_serializing_if = "Option::is_none")]
1058    pub max_nesting_depth: Option<usize>,
1059    pub high_risk_files: usize,
1060    /// Histogram of cyclomatic complexity distribution.
1061    #[serde(skip_serializing_if = "Option::is_none")]
1062    pub histogram: Option<ComplexityHistogram>,
1063    /// Halstead software science metrics (requires `halstead` feature).
1064    #[serde(skip_serializing_if = "Option::is_none")]
1065    pub halstead: Option<HalsteadMetrics>,
1066    /// Composite maintainability index.
1067    #[serde(skip_serializing_if = "Option::is_none")]
1068    pub maintainability_index: Option<MaintainabilityIndex>,
1069    /// Complexity-to-size debt heuristic.
1070    #[serde(skip_serializing_if = "Option::is_none")]
1071    pub technical_debt: Option<TechnicalDebtRatio>,
1072    pub files: Vec<FileComplexity>,
1073}
1074
1075#[derive(Debug, Clone, Serialize, Deserialize)]
1076pub struct FileComplexity {
1077    pub path: String,
1078    pub module: String,
1079    pub function_count: usize,
1080    pub max_function_length: usize,
1081    pub cyclomatic_complexity: usize,
1082    /// Cognitive complexity for this file.
1083    #[serde(skip_serializing_if = "Option::is_none")]
1084    pub cognitive_complexity: Option<usize>,
1085    /// Maximum nesting depth in this file.
1086    #[serde(skip_serializing_if = "Option::is_none")]
1087    pub max_nesting: Option<usize>,
1088    pub risk_level: ComplexityRisk,
1089    /// Function-level complexity details (only when --detail-functions is used).
1090    #[serde(skip_serializing_if = "Option::is_none")]
1091    pub functions: Option<Vec<FunctionComplexityDetail>>,
1092}
1093
1094/// Function-level complexity details.
1095#[derive(Debug, Clone, Serialize, Deserialize)]
1096pub struct FunctionComplexityDetail {
1097    /// Function name.
1098    pub name: String,
1099    /// Start line (1-indexed).
1100    pub line_start: usize,
1101    /// End line (1-indexed).
1102    pub line_end: usize,
1103    /// Function length in lines.
1104    pub length: usize,
1105    /// Cyclomatic complexity.
1106    pub cyclomatic: usize,
1107    /// Cognitive complexity (if computed).
1108    #[serde(skip_serializing_if = "Option::is_none")]
1109    pub cognitive: Option<usize>,
1110    /// Maximum nesting depth within the function.
1111    #[serde(skip_serializing_if = "Option::is_none")]
1112    pub max_nesting: Option<usize>,
1113    /// Number of parameters.
1114    #[serde(skip_serializing_if = "Option::is_none")]
1115    pub param_count: Option<usize>,
1116}
1117
1118#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1119#[serde(rename_all = "snake_case")]
1120pub enum ComplexityRisk {
1121    Low,
1122    Moderate,
1123    High,
1124    Critical,
1125}
1126
1127/// Histogram of cyclomatic complexity distribution across files.
1128///
1129/// Used to visualize the distribution of complexity values in a codebase.
1130/// Default bucket boundaries are 0-4, 5-9, 10-14, 15-19, 20-24, 25-29, 30+.
1131#[derive(Debug, Clone, Serialize, Deserialize)]
1132pub struct ComplexityHistogram {
1133    /// Bucket boundaries (e.g., [0, 5, 10, 15, 20, 25, 30]).
1134    pub buckets: Vec<u32>,
1135    /// Count of files in each bucket.
1136    pub counts: Vec<u32>,
1137    /// Total files analyzed.
1138    pub total: u32,
1139}
1140
1141impl ComplexityHistogram {
1142    /// Generate an ASCII bar chart visualization of the histogram.
1143    ///
1144    /// # Arguments
1145    /// * `width` - Maximum width of the bars in characters
1146    ///
1147    /// # Returns
1148    /// A multi-line string with labeled bars showing distribution
1149    pub fn to_ascii(&self, width: usize) -> String {
1150        let max_count = self.counts.iter().max().copied().unwrap_or(1).max(1);
1151        let mut output = String::new();
1152        for (i, count) in self.counts.iter().enumerate() {
1153            let label = if i < self.buckets.len() - 1 {
1154                format!("{:>2}-{:<2}", self.buckets[i], self.buckets[i + 1] - 1)
1155            } else {
1156                format!("{:>2}+ ", self.buckets.get(i).copied().unwrap_or(30))
1157            };
1158            let bar_len = (*count as f64 / max_count as f64 * width as f64) as usize;
1159            let bar = "\u{2588}".repeat(bar_len);
1160            output.push_str(&format!("{} |{} {}\n", label, bar, count));
1161        }
1162        output
1163    }
1164}
1165
1166// -------------------
1167// Baseline/Ratchet types
1168// -------------------
1169
1170/// Schema version for baseline files.
1171/// v1: Initial baseline format with complexity and determinism tracking.
1172pub const BASELINE_VERSION: u32 = 1;
1173
1174/// Complexity baseline for tracking trends over time.
1175///
1176/// Used by the ratchet system to enforce that complexity metrics
1177/// do not regress across commits. The baseline captures a snapshot
1178/// of complexity at a known-good state.
1179#[derive(Debug, Clone, Serialize, Deserialize)]
1180pub struct ComplexityBaseline {
1181    /// Schema version for forward compatibility.
1182    pub baseline_version: u32,
1183    /// ISO 8601 timestamp when this baseline was generated.
1184    pub generated_at: String,
1185    /// Git commit SHA at which this baseline was captured, if available.
1186    pub commit: Option<String>,
1187    /// Aggregate complexity metrics.
1188    pub metrics: BaselineMetrics,
1189    /// Per-file baseline entries for granular tracking.
1190    pub files: Vec<FileBaselineEntry>,
1191    /// Complexity section mirroring analysis receipt structure for ratchet compatibility.
1192    ///
1193    /// This allows using the same JSON pointers (e.g., `/complexity/avg_cyclomatic`)
1194    /// when comparing baselines against current analysis receipts.
1195    #[serde(skip_serializing_if = "Option::is_none")]
1196    pub complexity: Option<BaselineComplexitySection>,
1197    /// Determinism baseline for reproducibility verification.
1198    ///
1199    /// Present when the baseline was generated with `--determinism`.
1200    #[serde(skip_serializing_if = "Option::is_none")]
1201    pub determinism: Option<DeterminismBaseline>,
1202}
1203
1204impl ComplexityBaseline {
1205    /// Creates a new empty baseline with default values.
1206    pub fn new() -> Self {
1207        Self {
1208            baseline_version: BASELINE_VERSION,
1209            generated_at: String::new(),
1210            commit: None,
1211            metrics: BaselineMetrics::default(),
1212            files: Vec::new(),
1213            complexity: None,
1214            determinism: None,
1215        }
1216    }
1217
1218    /// Creates a baseline from an analysis receipt.
1219    ///
1220    /// Extracts complexity information from the receipt's complexity report
1221    /// and derived totals to build a baseline snapshot.
1222    pub fn from_analysis(receipt: &AnalysisReceipt) -> Self {
1223        let generated_at = chrono_timestamp_iso8601(receipt.generated_at_ms);
1224
1225        let (metrics, files, complexity) = if let Some(ref complexity_report) = receipt.complexity {
1226            let total_code_lines = receipt
1227                .derived
1228                .as_ref()
1229                .map(|d| d.totals.code as u64)
1230                .unwrap_or(0);
1231            let total_files = receipt
1232                .derived
1233                .as_ref()
1234                .map(|d| d.totals.files as u64)
1235                .unwrap_or(0);
1236
1237            let metrics = BaselineMetrics {
1238                total_code_lines,
1239                total_files,
1240                avg_cyclomatic: complexity_report.avg_cyclomatic,
1241                max_cyclomatic: complexity_report.max_cyclomatic as u32,
1242                avg_cognitive: complexity_report.avg_cognitive.unwrap_or(0.0),
1243                max_cognitive: complexity_report.max_cognitive.unwrap_or(0) as u32,
1244                avg_nesting_depth: complexity_report.avg_nesting_depth.unwrap_or(0.0),
1245                max_nesting_depth: complexity_report.max_nesting_depth.unwrap_or(0) as u32,
1246                function_count: complexity_report.total_functions as u64,
1247                avg_function_length: complexity_report.avg_function_length,
1248            };
1249
1250            let files: Vec<FileBaselineEntry> = complexity_report
1251                .files
1252                .iter()
1253                .map(|f| FileBaselineEntry {
1254                    path: f.path.clone(),
1255                    code_lines: 0, // Not available in FileComplexity
1256                    cyclomatic: f.cyclomatic_complexity as u32,
1257                    cognitive: f.cognitive_complexity.unwrap_or(0) as u32,
1258                    max_nesting: f.max_nesting.unwrap_or(0) as u32,
1259                    function_count: f.function_count as u32,
1260                    content_hash: None,
1261                })
1262                .collect();
1263
1264            // Build complexity section mirroring analysis receipt structure
1265            let complexity_section = BaselineComplexitySection {
1266                total_functions: complexity_report.total_functions,
1267                avg_function_length: complexity_report.avg_function_length,
1268                max_function_length: complexity_report.max_function_length,
1269                avg_cyclomatic: complexity_report.avg_cyclomatic,
1270                max_cyclomatic: complexity_report.max_cyclomatic,
1271                avg_cognitive: complexity_report.avg_cognitive,
1272                max_cognitive: complexity_report.max_cognitive,
1273                avg_nesting_depth: complexity_report.avg_nesting_depth,
1274                max_nesting_depth: complexity_report.max_nesting_depth,
1275                high_risk_files: complexity_report.high_risk_files,
1276            };
1277
1278            (metrics, files, Some(complexity_section))
1279        } else {
1280            (BaselineMetrics::default(), Vec::new(), None)
1281        };
1282
1283        Self {
1284            baseline_version: BASELINE_VERSION,
1285            generated_at,
1286            commit: None,
1287            metrics,
1288            files,
1289            complexity,
1290            determinism: None,
1291        }
1292    }
1293}
1294
1295impl Default for ComplexityBaseline {
1296    fn default() -> Self {
1297        Self::new()
1298    }
1299}
1300
1301/// Complexity section mirroring analysis receipt structure for ratchet compatibility.
1302///
1303/// This provides the same field names as `ComplexityReport` so that JSON pointers
1304/// like `/complexity/avg_cyclomatic` work consistently across baselines and receipts.
1305#[derive(Debug, Clone, Serialize, Deserialize)]
1306pub struct BaselineComplexitySection {
1307    /// Total number of functions analyzed.
1308    pub total_functions: usize,
1309    /// Average function length in lines.
1310    pub avg_function_length: f64,
1311    /// Maximum function length found.
1312    pub max_function_length: usize,
1313    /// Average cyclomatic complexity across all files.
1314    pub avg_cyclomatic: f64,
1315    /// Maximum cyclomatic complexity found in any file.
1316    pub max_cyclomatic: usize,
1317    /// Average cognitive complexity across all files.
1318    #[serde(skip_serializing_if = "Option::is_none")]
1319    pub avg_cognitive: Option<f64>,
1320    /// Maximum cognitive complexity found.
1321    #[serde(skip_serializing_if = "Option::is_none")]
1322    pub max_cognitive: Option<usize>,
1323    /// Average nesting depth across all files.
1324    #[serde(skip_serializing_if = "Option::is_none")]
1325    pub avg_nesting_depth: Option<f64>,
1326    /// Maximum nesting depth found.
1327    #[serde(skip_serializing_if = "Option::is_none")]
1328    pub max_nesting_depth: Option<usize>,
1329    /// Number of high-risk files.
1330    pub high_risk_files: usize,
1331}
1332
1333/// Aggregate baseline metrics for the entire codebase.
1334#[derive(Debug, Clone, Serialize, Deserialize)]
1335pub struct BaselineMetrics {
1336    /// Total lines of code across all files.
1337    pub total_code_lines: u64,
1338    /// Total number of source files.
1339    pub total_files: u64,
1340    /// Average cyclomatic complexity across all functions.
1341    pub avg_cyclomatic: f64,
1342    /// Maximum cyclomatic complexity found in any function.
1343    pub max_cyclomatic: u32,
1344    /// Average cognitive complexity across all functions.
1345    pub avg_cognitive: f64,
1346    /// Maximum cognitive complexity found in any function.
1347    pub max_cognitive: u32,
1348    /// Average nesting depth across all functions.
1349    pub avg_nesting_depth: f64,
1350    /// Maximum nesting depth found in any function.
1351    pub max_nesting_depth: u32,
1352    /// Total number of functions analyzed.
1353    pub function_count: u64,
1354    /// Average function length in lines.
1355    pub avg_function_length: f64,
1356}
1357
1358impl Default for BaselineMetrics {
1359    fn default() -> Self {
1360        Self {
1361            total_code_lines: 0,
1362            total_files: 0,
1363            avg_cyclomatic: 0.0,
1364            max_cyclomatic: 0,
1365            avg_cognitive: 0.0,
1366            max_cognitive: 0,
1367            avg_nesting_depth: 0.0,
1368            max_nesting_depth: 0,
1369            function_count: 0,
1370            avg_function_length: 0.0,
1371        }
1372    }
1373}
1374
1375/// Per-file baseline entry for granular complexity tracking.
1376#[derive(Debug, Clone, Serialize, Deserialize)]
1377pub struct FileBaselineEntry {
1378    /// Normalized file path (forward slashes).
1379    pub path: String,
1380    /// Lines of code in this file.
1381    pub code_lines: u64,
1382    /// Cyclomatic complexity for this file.
1383    pub cyclomatic: u32,
1384    /// Cognitive complexity for this file.
1385    pub cognitive: u32,
1386    /// Maximum nesting depth in this file.
1387    pub max_nesting: u32,
1388    /// Number of functions in this file.
1389    pub function_count: u32,
1390    /// BLAKE3 hash of file content for change detection.
1391    pub content_hash: Option<String>,
1392}
1393
1394/// Build determinism baseline for reproducibility verification.
1395///
1396/// Tracks hashes of build artifacts and source inputs to detect
1397/// non-deterministic builds.
1398#[derive(Debug, Clone, Serialize, Deserialize)]
1399pub struct DeterminismBaseline {
1400    /// Schema version for forward compatibility.
1401    pub baseline_version: u32,
1402    /// ISO 8601 timestamp when this baseline was generated.
1403    pub generated_at: String,
1404    /// Hash of the final build artifact.
1405    pub build_hash: String,
1406    /// Hash of all source files combined.
1407    pub source_hash: String,
1408    /// Hash of Cargo.lock if present (Rust projects).
1409    pub cargo_lock_hash: Option<String>,
1410}
1411
1412/// Helper to convert milliseconds timestamp to RFC 3339 / ISO 8601 string.
1413fn chrono_timestamp_iso8601(ms: u128) -> String {
1414    // Convert milliseconds to seconds and remaining millis
1415    let total_secs = (ms / 1000) as i64;
1416    let millis = (ms % 1000) as u32;
1417
1418    // Constants for date calculation
1419    const SECS_PER_MIN: i64 = 60;
1420    const SECS_PER_HOUR: i64 = 3600;
1421    const SECS_PER_DAY: i64 = 86400;
1422
1423    // Days since Unix epoch (1970-01-01)
1424    let days = total_secs / SECS_PER_DAY;
1425    let day_secs = total_secs % SECS_PER_DAY;
1426
1427    // Handle negative timestamps (before epoch)
1428    let (days, day_secs) = if day_secs < 0 {
1429        (days - 1, day_secs + SECS_PER_DAY)
1430    } else {
1431        (days, day_secs)
1432    };
1433
1434    // Time of day
1435    let hour = day_secs / SECS_PER_HOUR;
1436    let min = (day_secs % SECS_PER_HOUR) / SECS_PER_MIN;
1437    let sec = day_secs % SECS_PER_MIN;
1438
1439    // Convert days since epoch to year/month/day
1440    // Using algorithm from Howard Hinnant's date library
1441    let z = days + 719468; // shift to March 1, year 0
1442    let era = if z >= 0 { z } else { z - 146096 } / 146097;
1443    let doe = (z - era * 146097) as u32; // day of era [0, 146096]
1444    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; // year of era
1445    let y = yoe as i64 + era * 400;
1446    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); // day of year
1447    let mp = (5 * doy + 2) / 153; // month pseudo
1448    let d = doy - (153 * mp + 2) / 5 + 1; // day
1449    let m = if mp < 10 { mp + 3 } else { mp - 9 }; // month
1450    let y = if m <= 2 { y + 1 } else { y }; // year
1451
1452    // Format as RFC 3339: YYYY-MM-DDTHH:MM:SS.sssZ
1453    format!(
1454        "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:03}Z",
1455        y, m, d, hour, min, sec, millis
1456    )
1457}
1458
1459// -------------------
1460// API Surface metrics
1461// -------------------
1462
1463/// Public API surface analysis report.
1464///
1465/// Computes public export ratios per language and module by scanning
1466/// source files for exported symbols (pub fn, export function, etc.).
1467#[derive(Debug, Clone, Serialize, Deserialize)]
1468pub struct ApiSurfaceReport {
1469    /// Total items discovered across all languages.
1470    pub total_items: usize,
1471    /// Items with public visibility.
1472    pub public_items: usize,
1473    /// Items with internal/private visibility.
1474    pub internal_items: usize,
1475    /// Ratio of public to total items (0.0-1.0).
1476    pub public_ratio: f64,
1477    /// Ratio of documented public items (0.0-1.0).
1478    pub documented_ratio: f64,
1479    /// Per-language breakdown.
1480    pub by_language: BTreeMap<String, LangApiSurface>,
1481    /// Per-module breakdown.
1482    pub by_module: Vec<ModuleApiRow>,
1483    /// Top exporters (files with most public items).
1484    pub top_exporters: Vec<ApiExportItem>,
1485}
1486
1487/// Per-language API surface breakdown.
1488#[derive(Debug, Clone, Serialize, Deserialize)]
1489pub struct LangApiSurface {
1490    /// Total items in this language.
1491    pub total_items: usize,
1492    /// Public items in this language.
1493    pub public_items: usize,
1494    /// Internal items in this language.
1495    pub internal_items: usize,
1496    /// Public ratio for this language.
1497    pub public_ratio: f64,
1498}
1499
1500/// Per-module API surface row.
1501#[derive(Debug, Clone, Serialize, Deserialize)]
1502pub struct ModuleApiRow {
1503    /// Module path.
1504    pub module: String,
1505    /// Total items in this module.
1506    pub total_items: usize,
1507    /// Public items in this module.
1508    pub public_items: usize,
1509    /// Public ratio for this module.
1510    pub public_ratio: f64,
1511}
1512
1513/// A file that exports many public items.
1514#[derive(Debug, Clone, Serialize, Deserialize)]
1515pub struct ApiExportItem {
1516    /// File path.
1517    pub path: String,
1518    /// Language of the file.
1519    pub lang: String,
1520    /// Number of public items exported.
1521    pub public_items: usize,
1522    /// Total items in the file.
1523    pub total_items: usize,
1524}
1525
1526// ---------
1527// Fun stuff
1528// ---------
1529
1530#[derive(Debug, Clone, Serialize, Deserialize)]
1531pub struct FunReport {
1532    pub eco_label: Option<EcoLabel>,
1533}
1534
1535#[derive(Debug, Clone, Serialize, Deserialize)]
1536pub struct EcoLabel {
1537    pub score: f64,
1538    pub label: String,
1539    pub bytes: u64,
1540    pub notes: String,
1541}
1542
1543// =========================
1544// Ecosystem Envelope (v1) — re-exported from tokmd-envelope
1545// =========================
1546
1547/// Schema identifier for ecosystem envelope format.
1548/// v1: Initial envelope specification for multi-sensor integration.
1549pub const ENVELOPE_SCHEMA: &str = tokmd_envelope::SENSOR_REPORT_SCHEMA;
1550
1551// Re-export all envelope types with backwards-compatible aliases
1552pub use tokmd_envelope::Artifact;
1553pub use tokmd_envelope::Finding;
1554pub use tokmd_envelope::FindingLocation;
1555pub use tokmd_envelope::FindingSeverity;
1556pub use tokmd_envelope::GateItem;
1557pub use tokmd_envelope::GateResults as GatesEnvelope;
1558pub use tokmd_envelope::SensorReport as Envelope;
1559pub use tokmd_envelope::ToolMeta as EnvelopeTool;
1560pub use tokmd_envelope::Verdict;
1561
1562// Also re-export the canonical names for new code
1563pub use tokmd_envelope::GateResults;
1564pub use tokmd_envelope::SensorReport;
1565pub use tokmd_envelope::ToolMeta;
1566
1567#[cfg(test)]
1568mod tests {
1569    use super::*;
1570
1571    // ── Schema version constant ───────────────────────────────────────
1572    #[test]
1573    fn analysis_schema_version_constant() {
1574        assert_eq!(ANALYSIS_SCHEMA_VERSION, 9);
1575    }
1576
1577    #[test]
1578    fn baseline_version_constant() {
1579        assert_eq!(BASELINE_VERSION, 1);
1580    }
1581
1582    // ── Default impls ─────────────────────────────────────────────────
1583    #[test]
1584    fn complexity_baseline_default() {
1585        let b = ComplexityBaseline::default();
1586        assert_eq!(b.baseline_version, BASELINE_VERSION);
1587        assert!(b.generated_at.is_empty());
1588        assert!(b.commit.is_none());
1589        assert!(b.files.is_empty());
1590        assert!(b.complexity.is_none());
1591        assert!(b.determinism.is_none());
1592    }
1593
1594    #[test]
1595    fn complexity_baseline_new_equals_default() {
1596        let a = ComplexityBaseline::new();
1597        let b = ComplexityBaseline::default();
1598        assert_eq!(a.baseline_version, b.baseline_version);
1599        assert_eq!(a.generated_at, b.generated_at);
1600        assert_eq!(a.files.len(), b.files.len());
1601    }
1602
1603    #[test]
1604    fn baseline_metrics_default_is_zeroed() {
1605        let m = BaselineMetrics::default();
1606        assert_eq!(m.total_code_lines, 0);
1607        assert_eq!(m.total_files, 0);
1608        assert_eq!(m.avg_cyclomatic, 0.0);
1609        assert_eq!(m.max_cyclomatic, 0);
1610        assert_eq!(m.avg_cognitive, 0.0);
1611        assert_eq!(m.function_count, 0);
1612    }
1613
1614    // ── Enum serde roundtrips ─────────────────────────────────────────
1615    #[test]
1616    fn entropy_class_serde_roundtrip() {
1617        for variant in [
1618            EntropyClass::Low,
1619            EntropyClass::Normal,
1620            EntropyClass::Suspicious,
1621            EntropyClass::High,
1622        ] {
1623            let json = serde_json::to_string(&variant).unwrap();
1624            let back: EntropyClass = serde_json::from_str(&json).unwrap();
1625            assert_eq!(back, variant);
1626        }
1627    }
1628
1629    #[test]
1630    fn trend_class_serde_roundtrip() {
1631        for variant in [TrendClass::Rising, TrendClass::Flat, TrendClass::Falling] {
1632            let json = serde_json::to_string(&variant).unwrap();
1633            let back: TrendClass = serde_json::from_str(&json).unwrap();
1634            assert_eq!(back, variant);
1635        }
1636    }
1637
1638    #[test]
1639    fn license_source_kind_serde_roundtrip() {
1640        for variant in [LicenseSourceKind::Metadata, LicenseSourceKind::Text] {
1641            let json = serde_json::to_string(&variant).unwrap();
1642            let back: LicenseSourceKind = serde_json::from_str(&json).unwrap();
1643            assert_eq!(back, variant);
1644        }
1645    }
1646
1647    #[test]
1648    fn complexity_risk_serde_roundtrip() {
1649        for variant in [
1650            ComplexityRisk::Low,
1651            ComplexityRisk::Moderate,
1652            ComplexityRisk::High,
1653            ComplexityRisk::Critical,
1654        ] {
1655            let json = serde_json::to_string(&variant).unwrap();
1656            let back: ComplexityRisk = serde_json::from_str(&json).unwrap();
1657            assert_eq!(back, variant);
1658        }
1659    }
1660
1661    #[test]
1662    fn technical_debt_level_serde_roundtrip() {
1663        for variant in [
1664            TechnicalDebtLevel::Low,
1665            TechnicalDebtLevel::Moderate,
1666            TechnicalDebtLevel::High,
1667            TechnicalDebtLevel::Critical,
1668        ] {
1669            let json = serde_json::to_string(&variant).unwrap();
1670            let back: TechnicalDebtLevel = serde_json::from_str(&json).unwrap();
1671            assert_eq!(back, variant);
1672        }
1673    }
1674
1675    // ── Enum naming conventions ───────────────────────────────────────
1676    #[test]
1677    fn entropy_class_uses_snake_case() {
1678        assert_eq!(
1679            serde_json::to_string(&EntropyClass::Suspicious).unwrap(),
1680            "\"suspicious\""
1681        );
1682    }
1683
1684    #[test]
1685    fn trend_class_uses_snake_case() {
1686        assert_eq!(
1687            serde_json::to_string(&TrendClass::Rising).unwrap(),
1688            "\"rising\""
1689        );
1690    }
1691
1692    #[test]
1693    fn effort_model_display_strings_are_stable() {
1694        assert_eq!(EffortModel::Cocomo81Basic.to_string(), "cocomo81-basic");
1695        assert_eq!(EffortModel::Cocomo2Early.to_string(), "cocomo2-early");
1696        assert_eq!(EffortModel::Ensemble.to_string(), "ensemble");
1697    }
1698
1699    #[test]
1700    fn effort_confidence_level_display_strings_are_stable() {
1701        assert_eq!(EffortConfidenceLevel::Low.to_string(), "low");
1702        assert_eq!(EffortConfidenceLevel::Medium.to_string(), "medium");
1703        assert_eq!(EffortConfidenceLevel::High.to_string(), "high");
1704    }
1705
1706    #[test]
1707    fn effort_delta_classification_display_strings_are_stable() {
1708        assert_eq!(EffortDeltaClassification::Low.to_string(), "low");
1709        assert_eq!(EffortDeltaClassification::Medium.to_string(), "medium");
1710        assert_eq!(EffortDeltaClassification::High.to_string(), "high");
1711        assert_eq!(EffortDeltaClassification::Critical.to_string(), "critical");
1712    }
1713
1714    #[test]
1715    fn complexity_risk_uses_snake_case() {
1716        assert_eq!(
1717            serde_json::to_string(&ComplexityRisk::Moderate).unwrap(),
1718            "\"moderate\""
1719        );
1720    }
1721
1722    // ── Struct serde roundtrips ───────────────────────────────────────
1723    #[test]
1724    fn eco_label_serde_roundtrip() {
1725        let label = EcoLabel {
1726            score: 85.0,
1727            label: "A".into(),
1728            bytes: 1000,
1729            notes: "Good".into(),
1730        };
1731        let json = serde_json::to_string(&label).unwrap();
1732        let back: EcoLabel = serde_json::from_str(&json).unwrap();
1733        assert_eq!(back.label, "A");
1734        assert_eq!(back.bytes, 1000);
1735    }
1736
1737    #[test]
1738    fn topic_term_serde_roundtrip() {
1739        let term = TopicTerm {
1740            term: "async".into(),
1741            score: 0.95,
1742            tf: 10,
1743            df: 3,
1744        };
1745        let json = serde_json::to_string(&term).unwrap();
1746        let back: TopicTerm = serde_json::from_str(&json).unwrap();
1747        assert_eq!(back.term, "async");
1748        assert_eq!(back.tf, 10);
1749    }
1750
1751    #[test]
1752    fn complexity_baseline_serde_roundtrip() {
1753        let b = ComplexityBaseline {
1754            baseline_version: BASELINE_VERSION,
1755            generated_at: "2025-01-01T00:00:00.000Z".into(),
1756            commit: Some("abc123".into()),
1757            metrics: BaselineMetrics::default(),
1758            files: vec![FileBaselineEntry {
1759                path: "src/lib.rs".into(),
1760                code_lines: 100,
1761                cyclomatic: 5,
1762                cognitive: 3,
1763                max_nesting: 2,
1764                function_count: 10,
1765                content_hash: Some("deadbeef".into()),
1766            }],
1767            complexity: None,
1768            determinism: None,
1769        };
1770        let json = serde_json::to_string(&b).unwrap();
1771        let back: ComplexityBaseline = serde_json::from_str(&json).unwrap();
1772        assert_eq!(back.baseline_version, BASELINE_VERSION);
1773        assert_eq!(back.commit.as_deref(), Some("abc123"));
1774        assert_eq!(back.files.len(), 1);
1775        assert_eq!(back.files[0].path, "src/lib.rs");
1776    }
1777
1778    // ── ComplexityHistogram ───────────────────────────────────────────
1779    #[test]
1780    fn complexity_histogram_to_ascii_basic() {
1781        let h = ComplexityHistogram {
1782            buckets: vec![0, 5, 10],
1783            counts: vec![10, 5, 2],
1784            total: 17,
1785        };
1786        let ascii = h.to_ascii(20);
1787        assert!(!ascii.is_empty());
1788        // Should have 3 lines (one per bucket)
1789        assert_eq!(ascii.lines().count(), 3);
1790    }
1791
1792    #[test]
1793    fn complexity_histogram_to_ascii_empty_counts() {
1794        let h = ComplexityHistogram {
1795            buckets: vec![0, 5],
1796            counts: vec![0, 0],
1797            total: 0,
1798        };
1799        let ascii = h.to_ascii(20);
1800        assert!(!ascii.is_empty());
1801    }
1802
1803    // ── chrono_timestamp_iso8601 ──────────────────────────────────────
1804    #[test]
1805    fn timestamp_epoch() {
1806        let result = chrono_timestamp_iso8601(0);
1807        assert_eq!(result, "1970-01-01T00:00:00.000Z");
1808    }
1809
1810    #[test]
1811    fn timestamp_with_millis() {
1812        // 2025-01-01T00:00:00.500Z = 1735689600500 ms
1813        let result = chrono_timestamp_iso8601(1735689600500);
1814        assert!(result.ends_with(".500Z"));
1815        assert!(result.starts_with("2025-01-01"));
1816    }
1817}