Skip to main content

tokmd_analysis/
analysis.rs

1use std::path::PathBuf;
2
3use anyhow::Result;
4use tokmd_analysis_types::{
5    AnalysisArgsMeta, AnalysisReceipt, AnalysisSource, Archetype, AssetReport, ComplexityReport,
6    CorporateFingerprint, DependencyReport, DuplicateReport, EntropyReport, FunReport, GitReport,
7    ImportReport, LicenseReport, PredictiveChurnReport, TopicClouds,
8};
9use tokmd_types::{ExportData, ScanStatus, ToolInfo};
10
11use crate::archetype::detect_archetype;
12#[cfg(feature = "walk")]
13use crate::assets::{build_assets_report, build_dependency_report};
14#[cfg(feature = "git")]
15use crate::churn::build_predictive_churn_report;
16#[cfg(feature = "content")]
17use crate::content::{build_duplicate_report, build_import_report, build_todo_report};
18use crate::derived::{build_tree, derive_report};
19#[cfg(feature = "git")]
20use crate::fingerprint::build_corporate_fingerprint;
21use crate::fun::build_fun_report;
22#[cfg(feature = "git")]
23use crate::git::build_git_report;
24#[cfg(all(feature = "content", feature = "walk"))]
25use crate::license::build_license_report;
26use crate::topics::build_topic_clouds;
27use crate::util::now_ms;
28
29#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30pub enum AnalysisPreset {
31    Receipt,
32    Health,
33    Risk,
34    Supply,
35    Architecture,
36    Topics,
37    Security,
38    Identity,
39    Git,
40    Deep,
41    Fun,
42}
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum ImportGranularity {
46    Module,
47    File,
48}
49
50#[derive(Debug, Clone, Default)]
51pub struct AnalysisLimits {
52    pub max_files: Option<usize>,
53    pub max_bytes: Option<u64>,
54    pub max_file_bytes: Option<u64>,
55    pub max_commits: Option<usize>,
56    pub max_commit_files: Option<usize>,
57}
58
59#[derive(Debug, Clone)]
60pub struct AnalysisContext {
61    pub export: ExportData,
62    pub root: PathBuf,
63    pub source: AnalysisSource,
64}
65
66#[derive(Debug, Clone)]
67pub struct AnalysisRequest {
68    pub preset: AnalysisPreset,
69    pub args: AnalysisArgsMeta,
70    pub limits: AnalysisLimits,
71    pub window_tokens: Option<usize>,
72    pub git: Option<bool>,
73    pub import_granularity: ImportGranularity,
74    pub detail_functions: bool,
75}
76
77#[derive(Debug, Clone, Copy)]
78struct AnalysisPlan {
79    assets: bool,
80    deps: bool,
81    todo: bool,
82    dup: bool,
83    imports: bool,
84    git: bool,
85    fun: bool,
86    archetype: bool,
87    topics: bool,
88    entropy: bool,
89    license: bool,
90    complexity: bool,
91    #[cfg(all(feature = "halstead", feature = "content", feature = "walk"))]
92    halstead: bool,
93    #[cfg(feature = "git")]
94    churn: bool,
95    #[cfg(feature = "git")]
96    fingerprint: bool,
97}
98
99impl AnalysisPlan {
100    #[cfg_attr(
101        not(all(feature = "halstead", feature = "content", feature = "walk")),
102        allow(unused_mut)
103    )]
104    fn needs_files(&self) -> bool {
105        let mut needs = self.assets
106            || self.deps
107            || self.todo
108            || self.dup
109            || self.imports
110            || self.entropy
111            || self.license
112            || self.complexity;
113        #[cfg(all(feature = "halstead", feature = "content", feature = "walk"))]
114        {
115            needs = needs || self.halstead;
116        }
117        needs
118    }
119}
120
121fn plan_for(preset: AnalysisPreset) -> AnalysisPlan {
122    match preset {
123        AnalysisPreset::Receipt => AnalysisPlan {
124            assets: false,
125            deps: false,
126            todo: false,
127            dup: false,
128            imports: false,
129            git: false,
130            fun: false,
131            archetype: false,
132            topics: false,
133            entropy: false,
134            license: false,
135            complexity: false,
136            #[cfg(all(feature = "halstead", feature = "content", feature = "walk"))]
137            halstead: false,
138            #[cfg(feature = "git")]
139            churn: false,
140            #[cfg(feature = "git")]
141            fingerprint: false,
142        },
143        AnalysisPreset::Health => AnalysisPlan {
144            assets: false,
145            deps: false,
146            todo: true,
147            dup: false,
148            imports: false,
149            git: false,
150            fun: false,
151            archetype: false,
152            topics: false,
153            entropy: false,
154            license: false,
155            complexity: true,
156            #[cfg(all(feature = "halstead", feature = "content", feature = "walk"))]
157            halstead: true,
158            #[cfg(feature = "git")]
159            churn: false,
160            #[cfg(feature = "git")]
161            fingerprint: false,
162        },
163        AnalysisPreset::Risk => AnalysisPlan {
164            assets: false,
165            deps: false,
166            todo: false,
167            dup: false,
168            imports: false,
169            git: true,
170            fun: false,
171            archetype: false,
172            topics: false,
173            entropy: false,
174            license: false,
175            complexity: true,
176            #[cfg(all(feature = "halstead", feature = "content", feature = "walk"))]
177            halstead: true,
178            #[cfg(feature = "git")]
179            churn: false,
180            #[cfg(feature = "git")]
181            fingerprint: false,
182        },
183        AnalysisPreset::Supply => AnalysisPlan {
184            assets: true,
185            deps: true,
186            todo: false,
187            dup: false,
188            imports: false,
189            git: false,
190            fun: false,
191            archetype: false,
192            topics: false,
193            entropy: false,
194            license: false,
195            complexity: false,
196            #[cfg(all(feature = "halstead", feature = "content", feature = "walk"))]
197            halstead: false,
198            #[cfg(feature = "git")]
199            churn: false,
200            #[cfg(feature = "git")]
201            fingerprint: false,
202        },
203        AnalysisPreset::Architecture => AnalysisPlan {
204            assets: false,
205            deps: false,
206            todo: false,
207            dup: false,
208            imports: true,
209            git: false,
210            fun: false,
211            archetype: false,
212            topics: false,
213            entropy: false,
214            license: false,
215            complexity: false,
216            #[cfg(all(feature = "halstead", feature = "content", feature = "walk"))]
217            halstead: false,
218            #[cfg(feature = "git")]
219            churn: false,
220            #[cfg(feature = "git")]
221            fingerprint: false,
222        },
223        AnalysisPreset::Topics => AnalysisPlan {
224            assets: false,
225            deps: false,
226            todo: false,
227            dup: false,
228            imports: false,
229            git: false,
230            fun: false,
231            archetype: false,
232            topics: true,
233            entropy: false,
234            license: false,
235            complexity: false,
236            #[cfg(all(feature = "halstead", feature = "content", feature = "walk"))]
237            halstead: false,
238            #[cfg(feature = "git")]
239            churn: false,
240            #[cfg(feature = "git")]
241            fingerprint: false,
242        },
243        AnalysisPreset::Security => AnalysisPlan {
244            assets: false,
245            deps: false,
246            todo: false,
247            dup: false,
248            imports: false,
249            git: false,
250            fun: false,
251            archetype: false,
252            topics: false,
253            entropy: true,
254            license: true,
255            complexity: false,
256            #[cfg(all(feature = "halstead", feature = "content", feature = "walk"))]
257            halstead: false,
258            #[cfg(feature = "git")]
259            churn: false,
260            #[cfg(feature = "git")]
261            fingerprint: false,
262        },
263        AnalysisPreset::Identity => AnalysisPlan {
264            assets: false,
265            deps: false,
266            todo: false,
267            dup: false,
268            imports: false,
269            git: true,
270            fun: false,
271            archetype: true,
272            topics: false,
273            entropy: false,
274            license: false,
275            complexity: false,
276            #[cfg(all(feature = "halstead", feature = "content", feature = "walk"))]
277            halstead: false,
278            #[cfg(feature = "git")]
279            churn: false,
280            #[cfg(feature = "git")]
281            fingerprint: true,
282        },
283        AnalysisPreset::Git => AnalysisPlan {
284            assets: false,
285            deps: false,
286            todo: false,
287            dup: false,
288            imports: false,
289            git: true,
290            fun: false,
291            archetype: false,
292            topics: false,
293            entropy: false,
294            license: false,
295            complexity: false,
296            #[cfg(all(feature = "halstead", feature = "content", feature = "walk"))]
297            halstead: false,
298            #[cfg(feature = "git")]
299            churn: true,
300            #[cfg(feature = "git")]
301            fingerprint: false,
302        },
303        AnalysisPreset::Deep => AnalysisPlan {
304            assets: true,
305            deps: true,
306            todo: true,
307            dup: true,
308            imports: true,
309            git: true,
310            fun: false,
311            archetype: true,
312            topics: true,
313            entropy: true,
314            license: true,
315            complexity: true,
316            #[cfg(all(feature = "halstead", feature = "content", feature = "walk"))]
317            halstead: true,
318            #[cfg(feature = "git")]
319            churn: true,
320            #[cfg(feature = "git")]
321            fingerprint: true,
322        },
323        AnalysisPreset::Fun => AnalysisPlan {
324            assets: false,
325            deps: false,
326            todo: false,
327            dup: false,
328            imports: false,
329            git: false,
330            fun: true,
331            archetype: false,
332            topics: false,
333            entropy: false,
334            license: false,
335            complexity: false,
336            #[cfg(all(feature = "halstead", feature = "content", feature = "walk"))]
337            halstead: false,
338            #[cfg(feature = "git")]
339            churn: false,
340            #[cfg(feature = "git")]
341            fingerprint: false,
342        },
343    }
344}
345
346pub fn analyze(ctx: AnalysisContext, req: AnalysisRequest) -> Result<AnalysisReceipt> {
347    let mut warnings: Vec<String> = Vec::new();
348    #[cfg_attr(not(feature = "content"), allow(unused_mut))]
349    let mut derived = derive_report(&ctx.export, req.window_tokens);
350    if req.args.format.contains("tree") {
351        derived.tree = Some(build_tree(&ctx.export));
352    }
353
354    let mut source = ctx.source.clone();
355    if source.base_signature.is_none() {
356        source.base_signature = Some(derived.integrity.hash.clone());
357    }
358
359    let plan = plan_for(req.preset);
360    let include_git = match req.git {
361        Some(flag) => flag,
362        None => plan.git,
363    };
364
365    #[cfg(feature = "walk")]
366    let mut assets: Option<AssetReport> = None;
367    #[cfg(not(feature = "walk"))]
368    let assets: Option<AssetReport> = None;
369
370    #[cfg(feature = "walk")]
371    let mut deps: Option<DependencyReport> = None;
372    #[cfg(not(feature = "walk"))]
373    let deps: Option<DependencyReport> = None;
374
375    #[cfg(feature = "content")]
376    let mut imports: Option<ImportReport> = None;
377    #[cfg(not(feature = "content"))]
378    let imports: Option<ImportReport> = None;
379
380    #[cfg(feature = "content")]
381    let mut dup: Option<DuplicateReport> = None;
382    #[cfg(not(feature = "content"))]
383    let dup: Option<DuplicateReport> = None;
384
385    #[cfg(feature = "git")]
386    let mut git: Option<GitReport> = None;
387    #[cfg(not(feature = "git"))]
388    let git: Option<GitReport> = None;
389
390    #[cfg(feature = "git")]
391    let mut churn: Option<PredictiveChurnReport> = None;
392    #[cfg(not(feature = "git"))]
393    let churn: Option<PredictiveChurnReport> = None;
394
395    #[cfg(feature = "git")]
396    let mut fingerprint: Option<CorporateFingerprint> = None;
397    #[cfg(not(feature = "git"))]
398    let fingerprint: Option<CorporateFingerprint> = None;
399
400    #[cfg(all(feature = "content", feature = "walk"))]
401    let mut entropy: Option<EntropyReport> = None;
402    #[cfg(not(all(feature = "content", feature = "walk")))]
403    let entropy: Option<EntropyReport> = None;
404
405    #[cfg(all(feature = "content", feature = "walk"))]
406    let mut license: Option<LicenseReport> = None;
407    #[cfg(not(all(feature = "content", feature = "walk")))]
408    let license: Option<LicenseReport> = None;
409
410    #[cfg(all(feature = "content", feature = "walk"))]
411    let mut complexity: Option<ComplexityReport> = None;
412    #[cfg(not(all(feature = "content", feature = "walk")))]
413    let complexity: Option<ComplexityReport> = None;
414
415    let mut archetype: Option<Archetype> = None;
416    let mut topics: Option<TopicClouds> = None;
417
418    let mut fun: Option<FunReport> = None;
419
420    #[cfg(any(feature = "walk", feature = "content"))]
421    let mut files: Option<Vec<PathBuf>> = None;
422    #[cfg(not(any(feature = "walk", feature = "content")))]
423    let _files: Option<Vec<PathBuf>> = None;
424
425    if plan.needs_files() {
426        #[cfg(feature = "walk")]
427        match tokmd_walk::list_files(&ctx.root, req.limits.max_files) {
428            Ok(list) => files = Some(list),
429            Err(err) => warnings.push(format!("walk failed: {}", err)),
430        }
431        #[cfg(not(feature = "walk"))]
432        {
433            warnings.push("walk feature disabled; skipping file inventory".to_string());
434        }
435    }
436
437    if plan.assets {
438        #[cfg(feature = "walk")]
439        {
440            if let Some(list) = files.as_deref() {
441                match build_assets_report(&ctx.root, list) {
442                    Ok(report) => assets = Some(report),
443                    Err(err) => warnings.push(format!("asset scan failed: {}", err)),
444                }
445            }
446        }
447    }
448
449    if plan.deps {
450        #[cfg(feature = "walk")]
451        {
452            if let Some(list) = files.as_deref() {
453                match build_dependency_report(&ctx.root, list) {
454                    Ok(report) => deps = Some(report),
455                    Err(err) => warnings.push(format!("dependency scan failed: {}", err)),
456                }
457            }
458        }
459    }
460
461    if plan.todo {
462        #[cfg(feature = "content")]
463        {
464            if let Some(list) = files.as_deref() {
465                match build_todo_report(&ctx.root, list, &req.limits, derived.totals.code) {
466                    Ok(report) => derived.todo = Some(report),
467                    Err(err) => warnings.push(format!("todo scan failed: {}", err)),
468                }
469            }
470        }
471        #[cfg(not(feature = "content"))]
472        warnings.push("content feature disabled; skipping TODO scan".to_string());
473    }
474
475    if plan.dup {
476        #[cfg(feature = "content")]
477        {
478            if let Some(list) = files.as_deref() {
479                match build_duplicate_report(&ctx.root, list, &ctx.export, &req.limits) {
480                    Ok(report) => dup = Some(report),
481                    Err(err) => warnings.push(format!("dup scan failed: {}", err)),
482                }
483            }
484        }
485        #[cfg(not(feature = "content"))]
486        warnings.push("content feature disabled; skipping duplication scan".to_string());
487    }
488
489    if plan.imports {
490        #[cfg(feature = "content")]
491        {
492            if let Some(list) = files.as_deref() {
493                match build_import_report(
494                    &ctx.root,
495                    list,
496                    &ctx.export,
497                    req.import_granularity,
498                    &req.limits,
499                ) {
500                    Ok(report) => imports = Some(report),
501                    Err(err) => warnings.push(format!("import scan failed: {}", err)),
502                }
503            }
504        }
505        #[cfg(not(feature = "content"))]
506        warnings.push("content feature disabled; skipping import scan".to_string());
507    }
508
509    if include_git {
510        #[cfg(feature = "git")]
511        {
512            let repo_root = match tokmd_git::repo_root(&ctx.root) {
513                Some(root) => root,
514                None => {
515                    warnings.push("git scan failed: not a git repo".to_string());
516                    PathBuf::new()
517                }
518            };
519            if !repo_root.as_os_str().is_empty() {
520                match tokmd_git::collect_history(
521                    &repo_root,
522                    req.limits.max_commits,
523                    req.limits.max_commit_files,
524                ) {
525                    Ok(commits) => {
526                        if plan.git {
527                            match build_git_report(&repo_root, &ctx.export, &commits) {
528                                Ok(report) => git = Some(report),
529                                Err(err) => warnings.push(format!("git scan failed: {}", err)),
530                            }
531                        }
532                        if plan.churn {
533                            churn = Some(build_predictive_churn_report(
534                                &ctx.export,
535                                &commits,
536                                &repo_root,
537                            ));
538                        }
539                        if plan.fingerprint {
540                            fingerprint = Some(build_corporate_fingerprint(&commits));
541                        }
542                    }
543                    Err(err) => warnings.push(format!("git scan failed: {}", err)),
544                }
545            }
546        }
547        #[cfg(not(feature = "git"))]
548        warnings.push("git feature disabled; skipping git metrics".to_string());
549    }
550
551    if plan.archetype {
552        archetype = detect_archetype(&ctx.export);
553    }
554
555    if plan.topics {
556        topics = Some(build_topic_clouds(&ctx.export));
557    }
558
559    if plan.entropy {
560        #[cfg(all(feature = "content", feature = "walk"))]
561        {
562            if let Some(list) = files.as_deref() {
563                match crate::entropy::build_entropy_report(
564                    &ctx.root,
565                    list,
566                    &ctx.export,
567                    &req.limits,
568                ) {
569                    Ok(report) => entropy = Some(report),
570                    Err(err) => warnings.push(format!("entropy scan failed: {}", err)),
571                }
572            }
573        }
574        #[cfg(not(all(feature = "content", feature = "walk")))]
575        warnings.push("content/walk feature disabled; skipping entropy profiling".to_string());
576    }
577
578    if plan.license {
579        #[cfg(all(feature = "content", feature = "walk"))]
580        {
581            if let Some(list) = files.as_deref() {
582                match build_license_report(&ctx.root, list, &req.limits) {
583                    Ok(report) => license = Some(report),
584                    Err(err) => warnings.push(format!("license scan failed: {}", err)),
585                }
586            }
587        }
588        #[cfg(not(all(feature = "content", feature = "walk")))]
589        warnings.push("content/walk feature disabled; skipping license radar".to_string());
590    }
591
592    if plan.complexity {
593        #[cfg(all(feature = "content", feature = "walk"))]
594        {
595            if let Some(list) = files.as_deref() {
596                match crate::complexity::build_complexity_report(
597                    &ctx.root,
598                    list,
599                    &ctx.export,
600                    &req.limits,
601                    req.detail_functions,
602                ) {
603                    Ok(report) => complexity = Some(report),
604                    Err(err) => warnings.push(format!("complexity scan failed: {}", err)),
605                }
606            }
607        }
608        #[cfg(not(all(feature = "content", feature = "walk")))]
609        warnings.push("content/walk feature disabled; skipping complexity analysis".to_string());
610    }
611
612    // Halstead metrics (feature-gated)
613    #[cfg(all(feature = "halstead", feature = "content", feature = "walk"))]
614    if plan.halstead
615        && let Some(list) = files.as_deref()
616    {
617        match crate::halstead::build_halstead_report(&ctx.root, list, &ctx.export, &req.limits) {
618            Ok(halstead_report) => {
619                // Wire Halstead into complexity report if available
620                if let Some(ref mut cx) = complexity {
621                    // Update maintainability index with Halstead volume
622                    if let Some(ref mut mi) = cx.maintainability_index {
623                        let vol = halstead_report.volume;
624                        if vol > 0.0 {
625                            mi.avg_halstead_volume = Some(vol);
626                            // Recompute with full SEI formula
627                            let score = (171.0
628                                - 5.2 * vol.ln()
629                                - 0.23 * mi.avg_cyclomatic
630                                - 16.2 * mi.avg_loc.ln())
631                            .max(0.0);
632                            let factor = 100.0;
633                            mi.score = (score * factor).round() / factor;
634                            mi.grade = if mi.score >= 85.0 {
635                                "A".to_string()
636                            } else if mi.score >= 65.0 {
637                                "B".to_string()
638                            } else {
639                                "C".to_string()
640                            };
641                        }
642                    }
643                    cx.halstead = Some(halstead_report);
644                }
645            }
646            Err(err) => warnings.push(format!("halstead scan failed: {}", err)),
647        }
648    }
649
650    if plan.fun {
651        fun = Some(build_fun_report(&derived));
652    }
653
654    let status = if warnings.is_empty() {
655        ScanStatus::Complete
656    } else {
657        ScanStatus::Partial
658    };
659
660    let receipt = AnalysisReceipt {
661        schema_version: tokmd_analysis_types::ANALYSIS_SCHEMA_VERSION,
662        generated_at_ms: now_ms(),
663        tool: ToolInfo::current(),
664        mode: "analysis".to_string(),
665        status,
666        warnings,
667        source,
668        args: req.args,
669        archetype,
670        topics,
671        entropy,
672        predictive_churn: churn,
673        corporate_fingerprint: fingerprint,
674        license,
675        derived: Some(derived),
676        assets,
677        deps,
678        git,
679        imports,
680        dup,
681        complexity,
682        fun,
683    };
684
685    Ok(receipt)
686}
687
688// Optional enrichers are implemented in later stages.
689#[allow(dead_code)]
690fn _unused_sections(
691    _assets: Option<AssetReport>,
692    _deps: Option<DependencyReport>,
693    _git: Option<GitReport>,
694    _imports: Option<ImportReport>,
695    _dup: Option<DuplicateReport>,
696    _fun: Option<FunReport>,
697) {
698}