tokmd_analysis_types/
lib.rs

1//! # tokmd-analysis-types
2//!
3//! **Tier 0 (Analysis Contract)**
4//!
5//! Pure data structures for analysis receipts. No I/O or business logic.
6//!
7//! ## What belongs here
8//! * Analysis-specific receipt types and findings
9//! * Schema definitions for analysis outputs
10//! * Type enums for classification results
11//!
12//! ## What does NOT belong here
13//! * Analysis computation logic (use tokmd-analysis)
14//! * Formatting logic (use tokmd-analysis-format)
15//! * File I/O operations
16
17pub mod findings;
18
19use std::collections::BTreeMap;
20
21use serde::{Deserialize, Serialize};
22use tokmd_types::{ScanStatus, ToolInfo};
23
24/// Schema version for analysis receipts.
25/// v5: Added complexity enrichers (Halstead, maintainability, histogram) with additive
26/// debt/duplication/age signals.
27pub const ANALYSIS_SCHEMA_VERSION: u32 = 5;
28
29#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct AnalysisReceipt {
31    pub schema_version: u32,
32    pub generated_at_ms: u128,
33    pub tool: ToolInfo,
34    pub mode: String,
35    pub status: ScanStatus,
36    pub warnings: Vec<String>,
37    pub source: AnalysisSource,
38    pub args: AnalysisArgsMeta,
39    pub archetype: Option<Archetype>,
40    pub topics: Option<TopicClouds>,
41    pub entropy: Option<EntropyReport>,
42    pub predictive_churn: Option<PredictiveChurnReport>,
43    pub corporate_fingerprint: Option<CorporateFingerprint>,
44    pub license: Option<LicenseReport>,
45    pub derived: Option<DerivedReport>,
46    pub assets: Option<AssetReport>,
47    pub deps: Option<DependencyReport>,
48    pub git: Option<GitReport>,
49    pub imports: Option<ImportReport>,
50    pub dup: Option<DuplicateReport>,
51    pub complexity: Option<ComplexityReport>,
52    pub fun: Option<FunReport>,
53}
54
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct AnalysisSource {
57    pub inputs: Vec<String>,
58    pub export_path: Option<String>,
59    pub base_receipt_path: Option<String>,
60    pub export_schema_version: Option<u32>,
61    pub export_generated_at_ms: Option<u128>,
62    pub base_signature: Option<String>,
63    pub module_roots: Vec<String>,
64    pub module_depth: usize,
65    pub children: String,
66}
67
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct AnalysisArgsMeta {
70    pub preset: String,
71    pub format: String,
72    pub window_tokens: Option<usize>,
73    pub git: Option<bool>,
74    pub max_files: Option<usize>,
75    pub max_bytes: Option<u64>,
76    pub max_commits: Option<usize>,
77    pub max_commit_files: Option<usize>,
78    pub max_file_bytes: Option<u64>,
79    pub import_granularity: String,
80}
81
82// ---------------
83// Project context
84// ---------------
85
86#[derive(Debug, Clone, Serialize, Deserialize)]
87pub struct Archetype {
88    pub kind: String,
89    pub evidence: Vec<String>,
90}
91
92// -----------------
93// Semantic topics
94// -----------------
95
96#[derive(Debug, Clone, Serialize, Deserialize)]
97pub struct TopicClouds {
98    pub per_module: BTreeMap<String, Vec<TopicTerm>>,
99    pub overall: Vec<TopicTerm>,
100}
101
102#[derive(Debug, Clone, Serialize, Deserialize)]
103pub struct TopicTerm {
104    pub term: String,
105    pub score: f64,
106    pub tf: u32,
107    pub df: u32,
108}
109
110// -----------------
111// Entropy profiling
112// -----------------
113
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct EntropyReport {
116    pub suspects: Vec<EntropyFinding>,
117}
118
119#[derive(Debug, Clone, Serialize, Deserialize)]
120pub struct EntropyFinding {
121    pub path: String,
122    pub module: String,
123    pub entropy_bits_per_byte: f32,
124    pub sample_bytes: u32,
125    pub class: EntropyClass,
126}
127
128#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
129#[serde(rename_all = "snake_case")]
130pub enum EntropyClass {
131    Low,
132    Normal,
133    Suspicious,
134    High,
135}
136
137// -----------------
138// Predictive churn
139// -----------------
140
141#[derive(Debug, Clone, Serialize, Deserialize)]
142pub struct PredictiveChurnReport {
143    pub per_module: BTreeMap<String, ChurnTrend>,
144}
145
146#[derive(Debug, Clone, Serialize, Deserialize)]
147pub struct ChurnTrend {
148    pub slope: f64,
149    pub r2: f64,
150    pub recent_change: i64,
151    pub classification: TrendClass,
152}
153
154#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
155#[serde(rename_all = "snake_case")]
156pub enum TrendClass {
157    Rising,
158    Flat,
159    Falling,
160}
161
162// ---------------------
163// Corporate fingerprint
164// ---------------------
165
166#[derive(Debug, Clone, Serialize, Deserialize)]
167pub struct CorporateFingerprint {
168    pub domains: Vec<DomainStat>,
169}
170
171#[derive(Debug, Clone, Serialize, Deserialize)]
172pub struct DomainStat {
173    pub domain: String,
174    pub commits: u32,
175    pub pct: f32,
176}
177
178// -------------
179// License radar
180// -------------
181
182#[derive(Debug, Clone, Serialize, Deserialize)]
183pub struct LicenseReport {
184    pub findings: Vec<LicenseFinding>,
185    pub effective: Option<String>,
186}
187
188#[derive(Debug, Clone, Serialize, Deserialize)]
189pub struct LicenseFinding {
190    pub spdx: String,
191    pub confidence: f32,
192    pub source_path: String,
193    pub source_kind: LicenseSourceKind,
194}
195
196#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
197#[serde(rename_all = "snake_case")]
198pub enum LicenseSourceKind {
199    Metadata,
200    Text,
201}
202
203// -----------------
204// Derived analytics
205// -----------------
206
207#[derive(Debug, Clone, Serialize, Deserialize)]
208pub struct DerivedReport {
209    pub totals: DerivedTotals,
210    pub doc_density: RatioReport,
211    pub whitespace: RatioReport,
212    pub verbosity: RateReport,
213    pub max_file: MaxFileReport,
214    pub lang_purity: LangPurityReport,
215    pub nesting: NestingReport,
216    pub test_density: TestDensityReport,
217    pub boilerplate: BoilerplateReport,
218    pub polyglot: PolyglotReport,
219    pub distribution: DistributionReport,
220    pub histogram: Vec<HistogramBucket>,
221    pub top: TopOffenders,
222    pub tree: Option<String>,
223    pub reading_time: ReadingTimeReport,
224    pub context_window: Option<ContextWindowReport>,
225    pub cocomo: Option<CocomoReport>,
226    pub todo: Option<TodoReport>,
227    pub integrity: IntegrityReport,
228}
229
230#[derive(Debug, Clone, Serialize, Deserialize)]
231pub struct DerivedTotals {
232    pub files: usize,
233    pub code: usize,
234    pub comments: usize,
235    pub blanks: usize,
236    pub lines: usize,
237    pub bytes: usize,
238    pub tokens: usize,
239}
240
241#[derive(Debug, Clone, Serialize, Deserialize)]
242pub struct RatioReport {
243    pub total: RatioRow,
244    pub by_lang: Vec<RatioRow>,
245    pub by_module: Vec<RatioRow>,
246}
247
248#[derive(Debug, Clone, Serialize, Deserialize)]
249pub struct RatioRow {
250    pub key: String,
251    pub numerator: usize,
252    pub denominator: usize,
253    pub ratio: f64,
254}
255
256#[derive(Debug, Clone, Serialize, Deserialize)]
257pub struct RateReport {
258    pub total: RateRow,
259    pub by_lang: Vec<RateRow>,
260    pub by_module: Vec<RateRow>,
261}
262
263#[derive(Debug, Clone, Serialize, Deserialize)]
264pub struct RateRow {
265    pub key: String,
266    pub numerator: usize,
267    pub denominator: usize,
268    pub rate: f64,
269}
270
271#[derive(Debug, Clone, Serialize, Deserialize)]
272pub struct MaxFileReport {
273    pub overall: FileStatRow,
274    pub by_lang: Vec<MaxFileRow>,
275    pub by_module: Vec<MaxFileRow>,
276}
277
278#[derive(Debug, Clone, Serialize, Deserialize)]
279pub struct MaxFileRow {
280    pub key: String,
281    pub file: FileStatRow,
282}
283
284#[derive(Debug, Clone, Serialize, Deserialize)]
285pub struct FileStatRow {
286    pub path: String,
287    pub module: String,
288    pub lang: String,
289    pub code: usize,
290    pub comments: usize,
291    pub blanks: usize,
292    pub lines: usize,
293    pub bytes: usize,
294    pub tokens: usize,
295    pub doc_pct: Option<f64>,
296    pub bytes_per_line: Option<f64>,
297    pub depth: usize,
298}
299
300#[derive(Debug, Clone, Serialize, Deserialize)]
301pub struct LangPurityReport {
302    pub rows: Vec<LangPurityRow>,
303}
304
305#[derive(Debug, Clone, Serialize, Deserialize)]
306pub struct LangPurityRow {
307    pub module: String,
308    pub lang_count: usize,
309    pub dominant_lang: String,
310    pub dominant_lines: usize,
311    pub dominant_pct: f64,
312}
313
314#[derive(Debug, Clone, Serialize, Deserialize)]
315pub struct NestingReport {
316    pub max: usize,
317    pub avg: f64,
318    pub by_module: Vec<NestingRow>,
319}
320
321#[derive(Debug, Clone, Serialize, Deserialize)]
322pub struct NestingRow {
323    pub key: String,
324    pub max: usize,
325    pub avg: f64,
326}
327
328#[derive(Debug, Clone, Serialize, Deserialize)]
329pub struct TestDensityReport {
330    pub test_lines: usize,
331    pub prod_lines: usize,
332    pub test_files: usize,
333    pub prod_files: usize,
334    pub ratio: f64,
335}
336
337#[derive(Debug, Clone, Serialize, Deserialize)]
338pub struct BoilerplateReport {
339    pub infra_lines: usize,
340    pub logic_lines: usize,
341    pub ratio: f64,
342    pub infra_langs: Vec<String>,
343}
344
345#[derive(Debug, Clone, Serialize, Deserialize)]
346pub struct PolyglotReport {
347    pub lang_count: usize,
348    pub entropy: f64,
349    pub dominant_lang: String,
350    pub dominant_lines: usize,
351    pub dominant_pct: f64,
352}
353
354#[derive(Debug, Clone, Serialize, Deserialize)]
355pub struct DistributionReport {
356    pub count: usize,
357    pub min: usize,
358    pub max: usize,
359    pub mean: f64,
360    pub median: f64,
361    pub p90: f64,
362    pub p99: f64,
363    pub gini: f64,
364}
365
366#[derive(Debug, Clone, Serialize, Deserialize)]
367pub struct HistogramBucket {
368    pub label: String,
369    pub min: usize,
370    pub max: Option<usize>,
371    pub files: usize,
372    pub pct: f64,
373}
374
375#[derive(Debug, Clone, Serialize, Deserialize)]
376pub struct TopOffenders {
377    pub largest_lines: Vec<FileStatRow>,
378    pub largest_tokens: Vec<FileStatRow>,
379    pub largest_bytes: Vec<FileStatRow>,
380    pub least_documented: Vec<FileStatRow>,
381    pub most_dense: Vec<FileStatRow>,
382}
383
384#[derive(Debug, Clone, Serialize, Deserialize)]
385pub struct ReadingTimeReport {
386    pub minutes: f64,
387    pub lines_per_minute: usize,
388    pub basis_lines: usize,
389}
390
391#[derive(Debug, Clone, Serialize, Deserialize)]
392pub struct TodoReport {
393    pub total: usize,
394    pub density_per_kloc: f64,
395    pub tags: Vec<TodoTagRow>,
396}
397
398#[derive(Debug, Clone, Serialize, Deserialize)]
399pub struct TodoTagRow {
400    pub tag: String,
401    pub count: usize,
402}
403
404#[derive(Debug, Clone, Serialize, Deserialize)]
405pub struct ContextWindowReport {
406    pub window_tokens: usize,
407    pub total_tokens: usize,
408    pub pct: f64,
409    pub fits: bool,
410}
411
412#[derive(Debug, Clone, Serialize, Deserialize)]
413pub struct CocomoReport {
414    pub mode: String,
415    pub kloc: f64,
416    pub effort_pm: f64,
417    pub duration_months: f64,
418    pub staff: f64,
419    pub a: f64,
420    pub b: f64,
421    pub c: f64,
422    pub d: f64,
423}
424
425#[derive(Debug, Clone, Serialize, Deserialize)]
426pub struct IntegrityReport {
427    pub algo: String,
428    pub hash: String,
429    pub entries: usize,
430}
431
432// -------------
433// Asset metrics
434// -------------
435
436#[derive(Debug, Clone, Serialize, Deserialize)]
437pub struct AssetReport {
438    pub total_files: usize,
439    pub total_bytes: u64,
440    pub categories: Vec<AssetCategoryRow>,
441    pub top_files: Vec<AssetFileRow>,
442}
443
444#[derive(Debug, Clone, Serialize, Deserialize)]
445pub struct AssetCategoryRow {
446    pub category: String,
447    pub files: usize,
448    pub bytes: u64,
449    pub extensions: Vec<String>,
450}
451
452#[derive(Debug, Clone, Serialize, Deserialize)]
453pub struct AssetFileRow {
454    pub path: String,
455    pub bytes: u64,
456    pub category: String,
457    pub extension: String,
458}
459
460// -----------------
461// Dependency metrics
462// -----------------
463
464#[derive(Debug, Clone, Serialize, Deserialize)]
465pub struct DependencyReport {
466    pub total: usize,
467    pub lockfiles: Vec<LockfileReport>,
468}
469
470#[derive(Debug, Clone, Serialize, Deserialize)]
471pub struct LockfileReport {
472    pub path: String,
473    pub kind: String,
474    pub dependencies: usize,
475}
476
477// ---------
478// Git report
479// ---------
480
481#[derive(Debug, Clone, Serialize, Deserialize)]
482pub struct GitReport {
483    pub commits_scanned: usize,
484    pub files_seen: usize,
485    pub hotspots: Vec<HotspotRow>,
486    pub bus_factor: Vec<BusFactorRow>,
487    pub freshness: FreshnessReport,
488    pub coupling: Vec<CouplingRow>,
489    /// Code age bucket distribution plus recent refresh trend.
490    #[serde(skip_serializing_if = "Option::is_none")]
491    pub age_distribution: Option<CodeAgeDistributionReport>,
492}
493
494#[derive(Debug, Clone, Serialize, Deserialize)]
495pub struct HotspotRow {
496    pub path: String,
497    pub commits: usize,
498    pub lines: usize,
499    pub score: usize,
500}
501
502#[derive(Debug, Clone, Serialize, Deserialize)]
503pub struct BusFactorRow {
504    pub module: String,
505    pub authors: usize,
506}
507
508#[derive(Debug, Clone, Serialize, Deserialize)]
509pub struct FreshnessReport {
510    pub threshold_days: usize,
511    pub stale_files: usize,
512    pub total_files: usize,
513    pub stale_pct: f64,
514    pub by_module: Vec<ModuleFreshnessRow>,
515}
516
517#[derive(Debug, Clone, Serialize, Deserialize)]
518pub struct ModuleFreshnessRow {
519    pub module: String,
520    pub avg_days: f64,
521    pub p90_days: f64,
522    pub stale_pct: f64,
523}
524
525#[derive(Debug, Clone, Serialize, Deserialize)]
526pub struct CouplingRow {
527    pub left: String,
528    pub right: String,
529    pub count: usize,
530}
531
532#[derive(Debug, Clone, Serialize, Deserialize)]
533pub struct CodeAgeDistributionReport {
534    pub buckets: Vec<CodeAgeBucket>,
535    pub recent_refreshes: usize,
536    pub prior_refreshes: usize,
537    pub refresh_trend: TrendClass,
538}
539
540#[derive(Debug, Clone, Serialize, Deserialize)]
541pub struct CodeAgeBucket {
542    pub label: String,
543    pub min_days: usize,
544    pub max_days: Option<usize>,
545    pub files: usize,
546    pub pct: f64,
547}
548
549// -----------------
550// Import graph info
551// -----------------
552
553#[derive(Debug, Clone, Serialize, Deserialize)]
554pub struct ImportReport {
555    pub granularity: String,
556    pub edges: Vec<ImportEdge>,
557}
558
559#[derive(Debug, Clone, Serialize, Deserialize)]
560pub struct ImportEdge {
561    pub from: String,
562    pub to: String,
563    pub count: usize,
564}
565
566// -------------------
567// Duplication metrics
568// -------------------
569
570#[derive(Debug, Clone, Serialize, Deserialize)]
571pub struct DuplicateReport {
572    pub groups: Vec<DuplicateGroup>,
573    pub wasted_bytes: u64,
574    pub strategy: String,
575    /// Duplication density summary overall and by module.
576    #[serde(skip_serializing_if = "Option::is_none")]
577    pub density: Option<DuplicationDensityReport>,
578}
579
580#[derive(Debug, Clone, Serialize, Deserialize)]
581pub struct DuplicateGroup {
582    pub hash: String,
583    pub bytes: u64,
584    pub files: Vec<String>,
585}
586
587#[derive(Debug, Clone, Serialize, Deserialize)]
588pub struct DuplicationDensityReport {
589    pub duplicate_groups: usize,
590    pub duplicate_files: usize,
591    pub duplicated_bytes: u64,
592    pub wasted_bytes: u64,
593    pub wasted_pct_of_codebase: f64,
594    pub by_module: Vec<ModuleDuplicationDensityRow>,
595}
596
597#[derive(Debug, Clone, Serialize, Deserialize)]
598pub struct ModuleDuplicationDensityRow {
599    pub module: String,
600    pub duplicate_files: usize,
601    pub wasted_files: usize,
602    pub duplicated_bytes: u64,
603    pub wasted_bytes: u64,
604    pub module_bytes: u64,
605    pub density: f64,
606}
607
608// -------------------
609// Halstead metrics
610// -------------------
611
612/// Halstead software science metrics computed from operator/operand token counts.
613#[derive(Debug, Clone, Serialize, Deserialize)]
614pub struct HalsteadMetrics {
615    /// Number of distinct operators (n1).
616    pub distinct_operators: usize,
617    /// Number of distinct operands (n2).
618    pub distinct_operands: usize,
619    /// Total number of operators (N1).
620    pub total_operators: usize,
621    /// Total number of operands (N2).
622    pub total_operands: usize,
623    /// Program vocabulary: n1 + n2.
624    pub vocabulary: usize,
625    /// Program length: N1 + N2.
626    pub length: usize,
627    /// Volume: N * log2(n).
628    pub volume: f64,
629    /// Difficulty: (n1/2) * (N2/n2).
630    pub difficulty: f64,
631    /// Effort: D * V.
632    pub effort: f64,
633    /// Estimated programming time in seconds: E / 18.
634    pub time_seconds: f64,
635    /// Estimated number of bugs: V / 3000.
636    pub estimated_bugs: f64,
637}
638
639// -------------------
640// Maintainability Index
641// -------------------
642
643/// Composite maintainability index based on the SEI formula.
644///
645/// MI = 171 - 5.2 * ln(V) - 0.23 * CC - 16.2 * ln(LOC)
646///
647/// When Halstead volume is unavailable, a simplified formula is used.
648#[derive(Debug, Clone, Serialize, Deserialize)]
649pub struct MaintainabilityIndex {
650    /// Maintainability index score (0-171 scale, higher is better).
651    pub score: f64,
652    /// Average cyclomatic complexity used in calculation.
653    pub avg_cyclomatic: f64,
654    /// Average lines of code per file used in calculation.
655    pub avg_loc: f64,
656    /// Average Halstead volume (if Halstead metrics were computed).
657    #[serde(skip_serializing_if = "Option::is_none")]
658    pub avg_halstead_volume: Option<f64>,
659    /// Letter grade: "A" (>=85), "B" (65-84), "C" (<65).
660    pub grade: String,
661}
662
663/// Complexity-to-size ratio heuristic for technical debt estimation.
664#[derive(Debug, Clone, Serialize, Deserialize)]
665pub struct TechnicalDebtRatio {
666    /// Complexity points per KLOC (higher means denser debt).
667    pub ratio: f64,
668    /// Aggregate complexity points used in the ratio.
669    pub complexity_points: usize,
670    /// KLOC basis used in the ratio denominator.
671    pub code_kloc: f64,
672    /// Bucketed interpretation of debt ratio.
673    pub level: TechnicalDebtLevel,
674}
675
676#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
677#[serde(rename_all = "snake_case")]
678pub enum TechnicalDebtLevel {
679    Low,
680    Moderate,
681    High,
682    Critical,
683}
684
685// -------------------
686// Complexity metrics
687// -------------------
688
689#[derive(Debug, Clone, Serialize, Deserialize)]
690pub struct ComplexityReport {
691    pub total_functions: usize,
692    pub avg_function_length: f64,
693    pub max_function_length: usize,
694    pub avg_cyclomatic: f64,
695    pub max_cyclomatic: usize,
696    /// Average cognitive complexity across files.
697    #[serde(skip_serializing_if = "Option::is_none")]
698    pub avg_cognitive: Option<f64>,
699    /// Maximum cognitive complexity found.
700    #[serde(skip_serializing_if = "Option::is_none")]
701    pub max_cognitive: Option<usize>,
702    /// Average nesting depth across files.
703    #[serde(skip_serializing_if = "Option::is_none")]
704    pub avg_nesting_depth: Option<f64>,
705    /// Maximum nesting depth found.
706    #[serde(skip_serializing_if = "Option::is_none")]
707    pub max_nesting_depth: Option<usize>,
708    pub high_risk_files: usize,
709    /// Histogram of cyclomatic complexity distribution.
710    #[serde(skip_serializing_if = "Option::is_none")]
711    pub histogram: Option<ComplexityHistogram>,
712    /// Halstead software science metrics (requires `halstead` feature).
713    #[serde(skip_serializing_if = "Option::is_none")]
714    pub halstead: Option<HalsteadMetrics>,
715    /// Composite maintainability index.
716    #[serde(skip_serializing_if = "Option::is_none")]
717    pub maintainability_index: Option<MaintainabilityIndex>,
718    /// Complexity-to-size debt heuristic.
719    #[serde(skip_serializing_if = "Option::is_none")]
720    pub technical_debt: Option<TechnicalDebtRatio>,
721    pub files: Vec<FileComplexity>,
722}
723
724#[derive(Debug, Clone, Serialize, Deserialize)]
725pub struct FileComplexity {
726    pub path: String,
727    pub module: String,
728    pub function_count: usize,
729    pub max_function_length: usize,
730    pub cyclomatic_complexity: usize,
731    /// Cognitive complexity for this file.
732    #[serde(skip_serializing_if = "Option::is_none")]
733    pub cognitive_complexity: Option<usize>,
734    /// Maximum nesting depth in this file.
735    #[serde(skip_serializing_if = "Option::is_none")]
736    pub max_nesting: Option<usize>,
737    pub risk_level: ComplexityRisk,
738    /// Function-level complexity details (only when --detail-functions is used).
739    #[serde(skip_serializing_if = "Option::is_none")]
740    pub functions: Option<Vec<FunctionComplexityDetail>>,
741}
742
743/// Function-level complexity details.
744#[derive(Debug, Clone, Serialize, Deserialize)]
745pub struct FunctionComplexityDetail {
746    /// Function name.
747    pub name: String,
748    /// Start line (1-indexed).
749    pub line_start: usize,
750    /// End line (1-indexed).
751    pub line_end: usize,
752    /// Function length in lines.
753    pub length: usize,
754    /// Cyclomatic complexity.
755    pub cyclomatic: usize,
756    /// Cognitive complexity (if computed).
757    #[serde(skip_serializing_if = "Option::is_none")]
758    pub cognitive: Option<usize>,
759    /// Maximum nesting depth within the function.
760    #[serde(skip_serializing_if = "Option::is_none")]
761    pub max_nesting: Option<usize>,
762    /// Number of parameters.
763    #[serde(skip_serializing_if = "Option::is_none")]
764    pub param_count: Option<usize>,
765}
766
767#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
768#[serde(rename_all = "snake_case")]
769pub enum ComplexityRisk {
770    Low,
771    Moderate,
772    High,
773    Critical,
774}
775
776/// Histogram of cyclomatic complexity distribution across files.
777///
778/// Used to visualize the distribution of complexity values in a codebase.
779/// Default bucket boundaries are 0-4, 5-9, 10-14, 15-19, 20-24, 25-29, 30+.
780#[derive(Debug, Clone, Serialize, Deserialize)]
781pub struct ComplexityHistogram {
782    /// Bucket boundaries (e.g., [0, 5, 10, 15, 20, 25, 30]).
783    pub buckets: Vec<u32>,
784    /// Count of files in each bucket.
785    pub counts: Vec<u32>,
786    /// Total files analyzed.
787    pub total: u32,
788}
789
790impl ComplexityHistogram {
791    /// Generate an ASCII bar chart visualization of the histogram.
792    ///
793    /// # Arguments
794    /// * `width` - Maximum width of the bars in characters
795    ///
796    /// # Returns
797    /// A multi-line string with labeled bars showing distribution
798    pub fn to_ascii(&self, width: usize) -> String {
799        let max_count = self.counts.iter().max().copied().unwrap_or(1).max(1);
800        let mut output = String::new();
801        for (i, count) in self.counts.iter().enumerate() {
802            let label = if i < self.buckets.len() - 1 {
803                format!("{:>2}-{:<2}", self.buckets[i], self.buckets[i + 1] - 1)
804            } else {
805                format!("{:>2}+ ", self.buckets.get(i).copied().unwrap_or(30))
806            };
807            let bar_len = (*count as f64 / max_count as f64 * width as f64) as usize;
808            let bar = "\u{2588}".repeat(bar_len);
809            output.push_str(&format!("{} |{} {}\n", label, bar, count));
810        }
811        output
812    }
813}
814
815// -------------------
816// Baseline/Ratchet types
817// -------------------
818
819/// Schema version for baseline files.
820/// v1: Initial baseline format with complexity and determinism tracking.
821pub const BASELINE_VERSION: u32 = 1;
822
823/// Complexity baseline for tracking trends over time.
824///
825/// Used by the ratchet system to enforce that complexity metrics
826/// do not regress across commits. The baseline captures a snapshot
827/// of complexity at a known-good state.
828#[derive(Debug, Clone, Serialize, Deserialize)]
829pub struct ComplexityBaseline {
830    /// Schema version for forward compatibility.
831    pub baseline_version: u32,
832    /// ISO 8601 timestamp when this baseline was generated.
833    pub generated_at: String,
834    /// Git commit SHA at which this baseline was captured, if available.
835    pub commit: Option<String>,
836    /// Aggregate complexity metrics.
837    pub metrics: BaselineMetrics,
838    /// Per-file baseline entries for granular tracking.
839    pub files: Vec<FileBaselineEntry>,
840    /// Complexity section mirroring analysis receipt structure for ratchet compatibility.
841    ///
842    /// This allows using the same JSON pointers (e.g., `/complexity/avg_cyclomatic`)
843    /// when comparing baselines against current analysis receipts.
844    #[serde(skip_serializing_if = "Option::is_none")]
845    pub complexity: Option<BaselineComplexitySection>,
846    /// Determinism baseline for reproducibility verification.
847    ///
848    /// Present when the baseline was generated with `--determinism`.
849    #[serde(skip_serializing_if = "Option::is_none")]
850    pub determinism: Option<DeterminismBaseline>,
851}
852
853impl ComplexityBaseline {
854    /// Creates a new empty baseline with default values.
855    pub fn new() -> Self {
856        Self {
857            baseline_version: BASELINE_VERSION,
858            generated_at: String::new(),
859            commit: None,
860            metrics: BaselineMetrics::default(),
861            files: Vec::new(),
862            complexity: None,
863            determinism: None,
864        }
865    }
866
867    /// Creates a baseline from an analysis receipt.
868    ///
869    /// Extracts complexity information from the receipt's complexity report
870    /// and derived totals to build a baseline snapshot.
871    pub fn from_analysis(receipt: &AnalysisReceipt) -> Self {
872        let generated_at = chrono_timestamp_iso8601(receipt.generated_at_ms);
873
874        let (metrics, files, complexity) = if let Some(ref complexity_report) = receipt.complexity {
875            let total_code_lines = receipt
876                .derived
877                .as_ref()
878                .map(|d| d.totals.code as u64)
879                .unwrap_or(0);
880            let total_files = receipt
881                .derived
882                .as_ref()
883                .map(|d| d.totals.files as u64)
884                .unwrap_or(0);
885
886            let metrics = BaselineMetrics {
887                total_code_lines,
888                total_files,
889                avg_cyclomatic: complexity_report.avg_cyclomatic,
890                max_cyclomatic: complexity_report.max_cyclomatic as u32,
891                avg_cognitive: complexity_report.avg_cognitive.unwrap_or(0.0),
892                max_cognitive: complexity_report.max_cognitive.unwrap_or(0) as u32,
893                avg_nesting_depth: complexity_report.avg_nesting_depth.unwrap_or(0.0),
894                max_nesting_depth: complexity_report.max_nesting_depth.unwrap_or(0) as u32,
895                function_count: complexity_report.total_functions as u64,
896                avg_function_length: complexity_report.avg_function_length,
897            };
898
899            let files: Vec<FileBaselineEntry> = complexity_report
900                .files
901                .iter()
902                .map(|f| FileBaselineEntry {
903                    path: f.path.clone(),
904                    code_lines: 0, // Not available in FileComplexity
905                    cyclomatic: f.cyclomatic_complexity as u32,
906                    cognitive: f.cognitive_complexity.unwrap_or(0) as u32,
907                    max_nesting: f.max_nesting.unwrap_or(0) as u32,
908                    function_count: f.function_count as u32,
909                    content_hash: None,
910                })
911                .collect();
912
913            // Build complexity section mirroring analysis receipt structure
914            let complexity_section = BaselineComplexitySection {
915                total_functions: complexity_report.total_functions,
916                avg_function_length: complexity_report.avg_function_length,
917                max_function_length: complexity_report.max_function_length,
918                avg_cyclomatic: complexity_report.avg_cyclomatic,
919                max_cyclomatic: complexity_report.max_cyclomatic,
920                avg_cognitive: complexity_report.avg_cognitive,
921                max_cognitive: complexity_report.max_cognitive,
922                avg_nesting_depth: complexity_report.avg_nesting_depth,
923                max_nesting_depth: complexity_report.max_nesting_depth,
924                high_risk_files: complexity_report.high_risk_files,
925            };
926
927            (metrics, files, Some(complexity_section))
928        } else {
929            (BaselineMetrics::default(), Vec::new(), None)
930        };
931
932        Self {
933            baseline_version: BASELINE_VERSION,
934            generated_at,
935            commit: None,
936            metrics,
937            files,
938            complexity,
939            determinism: None,
940        }
941    }
942}
943
944impl Default for ComplexityBaseline {
945    fn default() -> Self {
946        Self::new()
947    }
948}
949
950/// Complexity section mirroring analysis receipt structure for ratchet compatibility.
951///
952/// This provides the same field names as `ComplexityReport` so that JSON pointers
953/// like `/complexity/avg_cyclomatic` work consistently across baselines and receipts.
954#[derive(Debug, Clone, Serialize, Deserialize)]
955pub struct BaselineComplexitySection {
956    /// Total number of functions analyzed.
957    pub total_functions: usize,
958    /// Average function length in lines.
959    pub avg_function_length: f64,
960    /// Maximum function length found.
961    pub max_function_length: usize,
962    /// Average cyclomatic complexity across all files.
963    pub avg_cyclomatic: f64,
964    /// Maximum cyclomatic complexity found in any file.
965    pub max_cyclomatic: usize,
966    /// Average cognitive complexity across all files.
967    #[serde(skip_serializing_if = "Option::is_none")]
968    pub avg_cognitive: Option<f64>,
969    /// Maximum cognitive complexity found.
970    #[serde(skip_serializing_if = "Option::is_none")]
971    pub max_cognitive: Option<usize>,
972    /// Average nesting depth across all files.
973    #[serde(skip_serializing_if = "Option::is_none")]
974    pub avg_nesting_depth: Option<f64>,
975    /// Maximum nesting depth found.
976    #[serde(skip_serializing_if = "Option::is_none")]
977    pub max_nesting_depth: Option<usize>,
978    /// Number of high-risk files.
979    pub high_risk_files: usize,
980}
981
982/// Aggregate baseline metrics for the entire codebase.
983#[derive(Debug, Clone, Serialize, Deserialize)]
984pub struct BaselineMetrics {
985    /// Total lines of code across all files.
986    pub total_code_lines: u64,
987    /// Total number of source files.
988    pub total_files: u64,
989    /// Average cyclomatic complexity across all functions.
990    pub avg_cyclomatic: f64,
991    /// Maximum cyclomatic complexity found in any function.
992    pub max_cyclomatic: u32,
993    /// Average cognitive complexity across all functions.
994    pub avg_cognitive: f64,
995    /// Maximum cognitive complexity found in any function.
996    pub max_cognitive: u32,
997    /// Average nesting depth across all functions.
998    pub avg_nesting_depth: f64,
999    /// Maximum nesting depth found in any function.
1000    pub max_nesting_depth: u32,
1001    /// Total number of functions analyzed.
1002    pub function_count: u64,
1003    /// Average function length in lines.
1004    pub avg_function_length: f64,
1005}
1006
1007impl Default for BaselineMetrics {
1008    fn default() -> Self {
1009        Self {
1010            total_code_lines: 0,
1011            total_files: 0,
1012            avg_cyclomatic: 0.0,
1013            max_cyclomatic: 0,
1014            avg_cognitive: 0.0,
1015            max_cognitive: 0,
1016            avg_nesting_depth: 0.0,
1017            max_nesting_depth: 0,
1018            function_count: 0,
1019            avg_function_length: 0.0,
1020        }
1021    }
1022}
1023
1024/// Per-file baseline entry for granular complexity tracking.
1025#[derive(Debug, Clone, Serialize, Deserialize)]
1026pub struct FileBaselineEntry {
1027    /// Normalized file path (forward slashes).
1028    pub path: String,
1029    /// Lines of code in this file.
1030    pub code_lines: u64,
1031    /// Cyclomatic complexity for this file.
1032    pub cyclomatic: u32,
1033    /// Cognitive complexity for this file.
1034    pub cognitive: u32,
1035    /// Maximum nesting depth in this file.
1036    pub max_nesting: u32,
1037    /// Number of functions in this file.
1038    pub function_count: u32,
1039    /// BLAKE3 hash of file content for change detection.
1040    pub content_hash: Option<String>,
1041}
1042
1043/// Build determinism baseline for reproducibility verification.
1044///
1045/// Tracks hashes of build artifacts and source inputs to detect
1046/// non-deterministic builds.
1047#[derive(Debug, Clone, Serialize, Deserialize)]
1048pub struct DeterminismBaseline {
1049    /// Schema version for forward compatibility.
1050    pub baseline_version: u32,
1051    /// ISO 8601 timestamp when this baseline was generated.
1052    pub generated_at: String,
1053    /// Hash of the final build artifact.
1054    pub build_hash: String,
1055    /// Hash of all source files combined.
1056    pub source_hash: String,
1057    /// Hash of Cargo.lock if present (Rust projects).
1058    pub cargo_lock_hash: Option<String>,
1059}
1060
1061/// Helper to convert milliseconds timestamp to RFC 3339 / ISO 8601 string.
1062fn chrono_timestamp_iso8601(ms: u128) -> String {
1063    // Convert milliseconds to seconds and remaining millis
1064    let total_secs = (ms / 1000) as i64;
1065    let millis = (ms % 1000) as u32;
1066
1067    // Constants for date calculation
1068    const SECS_PER_MIN: i64 = 60;
1069    const SECS_PER_HOUR: i64 = 3600;
1070    const SECS_PER_DAY: i64 = 86400;
1071
1072    // Days since Unix epoch (1970-01-01)
1073    let days = total_secs / SECS_PER_DAY;
1074    let day_secs = total_secs % SECS_PER_DAY;
1075
1076    // Handle negative timestamps (before epoch)
1077    let (days, day_secs) = if day_secs < 0 {
1078        (days - 1, day_secs + SECS_PER_DAY)
1079    } else {
1080        (days, day_secs)
1081    };
1082
1083    // Time of day
1084    let hour = day_secs / SECS_PER_HOUR;
1085    let min = (day_secs % SECS_PER_HOUR) / SECS_PER_MIN;
1086    let sec = day_secs % SECS_PER_MIN;
1087
1088    // Convert days since epoch to year/month/day
1089    // Using algorithm from Howard Hinnant's date library
1090    let z = days + 719468; // shift to March 1, year 0
1091    let era = if z >= 0 { z } else { z - 146096 } / 146097;
1092    let doe = (z - era * 146097) as u32; // day of era [0, 146096]
1093    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; // year of era
1094    let y = yoe as i64 + era * 400;
1095    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); // day of year
1096    let mp = (5 * doy + 2) / 153; // month pseudo
1097    let d = doy - (153 * mp + 2) / 5 + 1; // day
1098    let m = if mp < 10 { mp + 3 } else { mp - 9 }; // month
1099    let y = if m <= 2 { y + 1 } else { y }; // year
1100
1101    // Format as RFC 3339: YYYY-MM-DDTHH:MM:SS.sssZ
1102    format!(
1103        "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:03}Z",
1104        y, m, d, hour, min, sec, millis
1105    )
1106}
1107
1108// ---------
1109// Fun stuff
1110// ---------
1111
1112#[derive(Debug, Clone, Serialize, Deserialize)]
1113pub struct FunReport {
1114    pub eco_label: Option<EcoLabel>,
1115}
1116
1117#[derive(Debug, Clone, Serialize, Deserialize)]
1118pub struct EcoLabel {
1119    pub score: f64,
1120    pub label: String,
1121    pub bytes: u64,
1122    pub notes: String,
1123}
1124
1125// =========================
1126// Ecosystem Envelope (v1) — re-exported from tokmd-envelope
1127// =========================
1128
1129/// Schema identifier for ecosystem envelope format.
1130/// v1: Initial envelope specification for multi-sensor integration.
1131pub const ENVELOPE_SCHEMA: &str = tokmd_envelope::SENSOR_REPORT_SCHEMA;
1132
1133// Re-export all envelope types with backwards-compatible aliases
1134pub use tokmd_envelope::Artifact;
1135pub use tokmd_envelope::Finding;
1136pub use tokmd_envelope::FindingLocation;
1137pub use tokmd_envelope::FindingSeverity;
1138pub use tokmd_envelope::GateItem;
1139pub use tokmd_envelope::GateResults as GatesEnvelope;
1140pub use tokmd_envelope::SensorReport as Envelope;
1141pub use tokmd_envelope::ToolMeta as EnvelopeTool;
1142pub use tokmd_envelope::Verdict;
1143
1144// Also re-export the canonical names for new code
1145pub use tokmd_envelope::GateResults;
1146pub use tokmd_envelope::SensorReport;
1147pub use tokmd_envelope::ToolMeta;
tokmd_analysis_types/lib.rs

tokmd_analysis_types/
lib.rs