Skip to main content

tokmd_analysis_types/
lib.rs

1//! # tokmd-analysis-types
2//!
3//! **Tier 0 (Analysis Contract)**
4//!
5//! Pure data structures for analysis receipts. No I/O or business logic.
6//!
7//! ## What belongs here
8//! * Analysis-specific receipt types and findings
9//! * Schema definitions for analysis outputs
10//! * Type enums for classification results
11//!
12//! ## What does NOT belong here
13//! * Analysis computation logic (use tokmd-analysis)
14//! * Formatting logic (use tokmd-analysis-format)
15//! * File I/O operations
16
17use std::collections::BTreeMap;
18
19use serde::{Deserialize, Serialize};
20use tokmd_types::{ScanStatus, ToolInfo};
21
22/// Schema version for analysis receipts.
23pub const ANALYSIS_SCHEMA_VERSION: u32 = 2;
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct AnalysisReceipt {
27    pub schema_version: u32,
28    pub generated_at_ms: u128,
29    pub tool: ToolInfo,
30    pub mode: String,
31    pub status: ScanStatus,
32    pub warnings: Vec<String>,
33    pub source: AnalysisSource,
34    pub args: AnalysisArgsMeta,
35    pub archetype: Option<Archetype>,
36    pub topics: Option<TopicClouds>,
37    pub entropy: Option<EntropyReport>,
38    pub predictive_churn: Option<PredictiveChurnReport>,
39    pub corporate_fingerprint: Option<CorporateFingerprint>,
40    pub license: Option<LicenseReport>,
41    pub derived: Option<DerivedReport>,
42    pub assets: Option<AssetReport>,
43    pub deps: Option<DependencyReport>,
44    pub git: Option<GitReport>,
45    pub imports: Option<ImportReport>,
46    pub dup: Option<DuplicateReport>,
47    pub fun: Option<FunReport>,
48}
49
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct AnalysisSource {
52    pub inputs: Vec<String>,
53    pub export_path: Option<String>,
54    pub base_receipt_path: Option<String>,
55    pub export_schema_version: Option<u32>,
56    pub export_generated_at_ms: Option<u128>,
57    pub base_signature: Option<String>,
58    pub module_roots: Vec<String>,
59    pub module_depth: usize,
60    pub children: String,
61}
62
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct AnalysisArgsMeta {
65    pub preset: String,
66    pub format: String,
67    pub window_tokens: Option<usize>,
68    pub git: Option<bool>,
69    pub max_files: Option<usize>,
70    pub max_bytes: Option<u64>,
71    pub max_commits: Option<usize>,
72    pub max_commit_files: Option<usize>,
73    pub max_file_bytes: Option<u64>,
74    pub import_granularity: String,
75}
76
77// ---------------
78// Project context
79// ---------------
80
81#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct Archetype {
83    pub kind: String,
84    pub evidence: Vec<String>,
85}
86
87// -----------------
88// Semantic topics
89// -----------------
90
91#[derive(Debug, Clone, Serialize, Deserialize)]
92pub struct TopicClouds {
93    pub per_module: BTreeMap<String, Vec<TopicTerm>>,
94    pub overall: Vec<TopicTerm>,
95}
96
97#[derive(Debug, Clone, Serialize, Deserialize)]
98pub struct TopicTerm {
99    pub term: String,
100    pub score: f64,
101    pub tf: u32,
102    pub df: u32,
103}
104
105// -----------------
106// Entropy profiling
107// -----------------
108
109#[derive(Debug, Clone, Serialize, Deserialize)]
110pub struct EntropyReport {
111    pub suspects: Vec<EntropyFinding>,
112}
113
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct EntropyFinding {
116    pub path: String,
117    pub module: String,
118    pub entropy_bits_per_byte: f32,
119    pub sample_bytes: u32,
120    pub class: EntropyClass,
121}
122
123#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
124#[serde(rename_all = "snake_case")]
125pub enum EntropyClass {
126    Low,
127    Normal,
128    Suspicious,
129    High,
130}
131
132// -----------------
133// Predictive churn
134// -----------------
135
136#[derive(Debug, Clone, Serialize, Deserialize)]
137pub struct PredictiveChurnReport {
138    pub per_module: BTreeMap<String, ChurnTrend>,
139}
140
141#[derive(Debug, Clone, Serialize, Deserialize)]
142pub struct ChurnTrend {
143    pub slope: f64,
144    pub r2: f64,
145    pub recent_change: i64,
146    pub classification: TrendClass,
147}
148
149#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
150#[serde(rename_all = "snake_case")]
151pub enum TrendClass {
152    Rising,
153    Flat,
154    Falling,
155}
156
157// ---------------------
158// Corporate fingerprint
159// ---------------------
160
161#[derive(Debug, Clone, Serialize, Deserialize)]
162pub struct CorporateFingerprint {
163    pub domains: Vec<DomainStat>,
164}
165
166#[derive(Debug, Clone, Serialize, Deserialize)]
167pub struct DomainStat {
168    pub domain: String,
169    pub commits: u32,
170    pub pct: f32,
171}
172
173// -------------
174// License radar
175// -------------
176
177#[derive(Debug, Clone, Serialize, Deserialize)]
178pub struct LicenseReport {
179    pub findings: Vec<LicenseFinding>,
180    pub effective: Option<String>,
181}
182
183#[derive(Debug, Clone, Serialize, Deserialize)]
184pub struct LicenseFinding {
185    pub spdx: String,
186    pub confidence: f32,
187    pub source_path: String,
188    pub source_kind: LicenseSourceKind,
189}
190
191#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
192#[serde(rename_all = "snake_case")]
193pub enum LicenseSourceKind {
194    Metadata,
195    Text,
196}
197
198// -----------------
199// Derived analytics
200// -----------------
201
202#[derive(Debug, Clone, Serialize, Deserialize)]
203pub struct DerivedReport {
204    pub totals: DerivedTotals,
205    pub doc_density: RatioReport,
206    pub whitespace: RatioReport,
207    pub verbosity: RateReport,
208    pub max_file: MaxFileReport,
209    pub lang_purity: LangPurityReport,
210    pub nesting: NestingReport,
211    pub test_density: TestDensityReport,
212    pub boilerplate: BoilerplateReport,
213    pub polyglot: PolyglotReport,
214    pub distribution: DistributionReport,
215    pub histogram: Vec<HistogramBucket>,
216    pub top: TopOffenders,
217    pub tree: Option<String>,
218    pub reading_time: ReadingTimeReport,
219    pub context_window: Option<ContextWindowReport>,
220    pub cocomo: Option<CocomoReport>,
221    pub todo: Option<TodoReport>,
222    pub integrity: IntegrityReport,
223}
224
225#[derive(Debug, Clone, Serialize, Deserialize)]
226pub struct DerivedTotals {
227    pub files: usize,
228    pub code: usize,
229    pub comments: usize,
230    pub blanks: usize,
231    pub lines: usize,
232    pub bytes: usize,
233    pub tokens: usize,
234}
235
236#[derive(Debug, Clone, Serialize, Deserialize)]
237pub struct RatioReport {
238    pub total: RatioRow,
239    pub by_lang: Vec<RatioRow>,
240    pub by_module: Vec<RatioRow>,
241}
242
243#[derive(Debug, Clone, Serialize, Deserialize)]
244pub struct RatioRow {
245    pub key: String,
246    pub numerator: usize,
247    pub denominator: usize,
248    pub ratio: f64,
249}
250
251#[derive(Debug, Clone, Serialize, Deserialize)]
252pub struct RateReport {
253    pub total: RateRow,
254    pub by_lang: Vec<RateRow>,
255    pub by_module: Vec<RateRow>,
256}
257
258#[derive(Debug, Clone, Serialize, Deserialize)]
259pub struct RateRow {
260    pub key: String,
261    pub numerator: usize,
262    pub denominator: usize,
263    pub rate: f64,
264}
265
266#[derive(Debug, Clone, Serialize, Deserialize)]
267pub struct MaxFileReport {
268    pub overall: FileStatRow,
269    pub by_lang: Vec<MaxFileRow>,
270    pub by_module: Vec<MaxFileRow>,
271}
272
273#[derive(Debug, Clone, Serialize, Deserialize)]
274pub struct MaxFileRow {
275    pub key: String,
276    pub file: FileStatRow,
277}
278
279#[derive(Debug, Clone, Serialize, Deserialize)]
280pub struct FileStatRow {
281    pub path: String,
282    pub module: String,
283    pub lang: String,
284    pub code: usize,
285    pub comments: usize,
286    pub blanks: usize,
287    pub lines: usize,
288    pub bytes: usize,
289    pub tokens: usize,
290    pub doc_pct: Option<f64>,
291    pub bytes_per_line: Option<f64>,
292    pub depth: usize,
293}
294
295#[derive(Debug, Clone, Serialize, Deserialize)]
296pub struct LangPurityReport {
297    pub rows: Vec<LangPurityRow>,
298}
299
300#[derive(Debug, Clone, Serialize, Deserialize)]
301pub struct LangPurityRow {
302    pub module: String,
303    pub lang_count: usize,
304    pub dominant_lang: String,
305    pub dominant_lines: usize,
306    pub dominant_pct: f64,
307}
308
309#[derive(Debug, Clone, Serialize, Deserialize)]
310pub struct NestingReport {
311    pub max: usize,
312    pub avg: f64,
313    pub by_module: Vec<NestingRow>,
314}
315
316#[derive(Debug, Clone, Serialize, Deserialize)]
317pub struct NestingRow {
318    pub key: String,
319    pub max: usize,
320    pub avg: f64,
321}
322
323#[derive(Debug, Clone, Serialize, Deserialize)]
324pub struct TestDensityReport {
325    pub test_lines: usize,
326    pub prod_lines: usize,
327    pub test_files: usize,
328    pub prod_files: usize,
329    pub ratio: f64,
330}
331
332#[derive(Debug, Clone, Serialize, Deserialize)]
333pub struct BoilerplateReport {
334    pub infra_lines: usize,
335    pub logic_lines: usize,
336    pub ratio: f64,
337    pub infra_langs: Vec<String>,
338}
339
340#[derive(Debug, Clone, Serialize, Deserialize)]
341pub struct PolyglotReport {
342    pub lang_count: usize,
343    pub entropy: f64,
344    pub dominant_lang: String,
345    pub dominant_lines: usize,
346    pub dominant_pct: f64,
347}
348
349#[derive(Debug, Clone, Serialize, Deserialize)]
350pub struct DistributionReport {
351    pub count: usize,
352    pub min: usize,
353    pub max: usize,
354    pub mean: f64,
355    pub median: f64,
356    pub p90: f64,
357    pub p99: f64,
358    pub gini: f64,
359}
360
361#[derive(Debug, Clone, Serialize, Deserialize)]
362pub struct HistogramBucket {
363    pub label: String,
364    pub min: usize,
365    pub max: Option<usize>,
366    pub files: usize,
367    pub pct: f64,
368}
369
370#[derive(Debug, Clone, Serialize, Deserialize)]
371pub struct TopOffenders {
372    pub largest_lines: Vec<FileStatRow>,
373    pub largest_tokens: Vec<FileStatRow>,
374    pub largest_bytes: Vec<FileStatRow>,
375    pub least_documented: Vec<FileStatRow>,
376    pub most_dense: Vec<FileStatRow>,
377}
378
379#[derive(Debug, Clone, Serialize, Deserialize)]
380pub struct ReadingTimeReport {
381    pub minutes: f64,
382    pub lines_per_minute: usize,
383    pub basis_lines: usize,
384}
385
386#[derive(Debug, Clone, Serialize, Deserialize)]
387pub struct TodoReport {
388    pub total: usize,
389    pub density_per_kloc: f64,
390    pub tags: Vec<TodoTagRow>,
391}
392
393#[derive(Debug, Clone, Serialize, Deserialize)]
394pub struct TodoTagRow {
395    pub tag: String,
396    pub count: usize,
397}
398
399#[derive(Debug, Clone, Serialize, Deserialize)]
400pub struct ContextWindowReport {
401    pub window_tokens: usize,
402    pub total_tokens: usize,
403    pub pct: f64,
404    pub fits: bool,
405}
406
407#[derive(Debug, Clone, Serialize, Deserialize)]
408pub struct CocomoReport {
409    pub mode: String,
410    pub kloc: f64,
411    pub effort_pm: f64,
412    pub duration_months: f64,
413    pub staff: f64,
414    pub a: f64,
415    pub b: f64,
416    pub c: f64,
417    pub d: f64,
418}
419
420#[derive(Debug, Clone, Serialize, Deserialize)]
421pub struct IntegrityReport {
422    pub algo: String,
423    pub hash: String,
424    pub entries: usize,
425}
426
427// -------------
428// Asset metrics
429// -------------
430
431#[derive(Debug, Clone, Serialize, Deserialize)]
432pub struct AssetReport {
433    pub total_files: usize,
434    pub total_bytes: u64,
435    pub categories: Vec<AssetCategoryRow>,
436    pub top_files: Vec<AssetFileRow>,
437}
438
439#[derive(Debug, Clone, Serialize, Deserialize)]
440pub struct AssetCategoryRow {
441    pub category: String,
442    pub files: usize,
443    pub bytes: u64,
444    pub extensions: Vec<String>,
445}
446
447#[derive(Debug, Clone, Serialize, Deserialize)]
448pub struct AssetFileRow {
449    pub path: String,
450    pub bytes: u64,
451    pub category: String,
452    pub extension: String,
453}
454
455// -----------------
456// Dependency metrics
457// -----------------
458
459#[derive(Debug, Clone, Serialize, Deserialize)]
460pub struct DependencyReport {
461    pub total: usize,
462    pub lockfiles: Vec<LockfileReport>,
463}
464
465#[derive(Debug, Clone, Serialize, Deserialize)]
466pub struct LockfileReport {
467    pub path: String,
468    pub kind: String,
469    pub dependencies: usize,
470}
471
472// ---------
473// Git report
474// ---------
475
476#[derive(Debug, Clone, Serialize, Deserialize)]
477pub struct GitReport {
478    pub commits_scanned: usize,
479    pub files_seen: usize,
480    pub hotspots: Vec<HotspotRow>,
481    pub bus_factor: Vec<BusFactorRow>,
482    pub freshness: FreshnessReport,
483    pub coupling: Vec<CouplingRow>,
484}
485
486#[derive(Debug, Clone, Serialize, Deserialize)]
487pub struct HotspotRow {
488    pub path: String,
489    pub commits: usize,
490    pub lines: usize,
491    pub score: usize,
492}
493
494#[derive(Debug, Clone, Serialize, Deserialize)]
495pub struct BusFactorRow {
496    pub module: String,
497    pub authors: usize,
498}
499
500#[derive(Debug, Clone, Serialize, Deserialize)]
501pub struct FreshnessReport {
502    pub threshold_days: usize,
503    pub stale_files: usize,
504    pub total_files: usize,
505    pub stale_pct: f64,
506    pub by_module: Vec<ModuleFreshnessRow>,
507}
508
509#[derive(Debug, Clone, Serialize, Deserialize)]
510pub struct ModuleFreshnessRow {
511    pub module: String,
512    pub avg_days: f64,
513    pub p90_days: f64,
514    pub stale_pct: f64,
515}
516
517#[derive(Debug, Clone, Serialize, Deserialize)]
518pub struct CouplingRow {
519    pub left: String,
520    pub right: String,
521    pub count: usize,
522}
523
524// -----------------
525// Import graph info
526// -----------------
527
528#[derive(Debug, Clone, Serialize, Deserialize)]
529pub struct ImportReport {
530    pub granularity: String,
531    pub edges: Vec<ImportEdge>,
532}
533
534#[derive(Debug, Clone, Serialize, Deserialize)]
535pub struct ImportEdge {
536    pub from: String,
537    pub to: String,
538    pub count: usize,
539}
540
541// -------------------
542// Duplication metrics
543// -------------------
544
545#[derive(Debug, Clone, Serialize, Deserialize)]
546pub struct DuplicateReport {
547    pub groups: Vec<DuplicateGroup>,
548    pub wasted_bytes: u64,
549    pub strategy: String,
550}
551
552#[derive(Debug, Clone, Serialize, Deserialize)]
553pub struct DuplicateGroup {
554    pub hash: String,
555    pub bytes: u64,
556    pub files: Vec<String>,
557}
558
559// ---------
560// Fun stuff
561// ---------
562
563#[derive(Debug, Clone, Serialize, Deserialize)]
564pub struct FunReport {
565    pub eco_label: Option<EcoLabel>,
566}
567
568#[derive(Debug, Clone, Serialize, Deserialize)]
569pub struct EcoLabel {
570    pub score: f64,
571    pub label: String,
572    pub bytes: u64,
573    pub notes: String,
574}