Skip to main content

tokmd_analysis/
analysis.rs

1use std::path::PathBuf;
2
3use anyhow::Result;
4use tokmd_analysis_types::{
5    AnalysisArgsMeta, AnalysisReceipt, AnalysisSource, Archetype, AssetReport,
6    CorporateFingerprint, DependencyReport, DuplicateReport, EntropyReport, FunReport, GitReport,
7    ImportReport, LicenseReport, PredictiveChurnReport, TopicClouds,
8};
9use tokmd_types::{ExportData, ScanStatus, ToolInfo};
10
11use crate::archetype::detect_archetype;
12#[cfg(feature = "walk")]
13use crate::assets::{build_assets_report, build_dependency_report};
14#[cfg(feature = "git")]
15use crate::churn::build_predictive_churn_report;
16#[cfg(feature = "content")]
17use crate::content::{build_duplicate_report, build_import_report, build_todo_report};
18use crate::derived::{build_tree, derive_report};
19#[cfg(feature = "git")]
20use crate::fingerprint::build_corporate_fingerprint;
21use crate::fun::build_fun_report;
22#[cfg(feature = "git")]
23use crate::git::build_git_report;
24#[cfg(all(feature = "content", feature = "walk"))]
25use crate::license::build_license_report;
26use crate::topics::build_topic_clouds;
27use crate::util::now_ms;
28
29#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30pub enum AnalysisPreset {
31    Receipt,
32    Health,
33    Risk,
34    Supply,
35    Architecture,
36    Topics,
37    Security,
38    Identity,
39    Git,
40    Deep,
41    Fun,
42}
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum ImportGranularity {
46    Module,
47    File,
48}
49
50#[derive(Debug, Clone, Default)]
51pub struct AnalysisLimits {
52    pub max_files: Option<usize>,
53    pub max_bytes: Option<u64>,
54    pub max_file_bytes: Option<u64>,
55    pub max_commits: Option<usize>,
56    pub max_commit_files: Option<usize>,
57}
58
59#[derive(Debug, Clone)]
60pub struct AnalysisContext {
61    pub export: ExportData,
62    pub root: PathBuf,
63    pub source: AnalysisSource,
64}
65
66#[derive(Debug, Clone)]
67pub struct AnalysisRequest {
68    pub preset: AnalysisPreset,
69    pub args: AnalysisArgsMeta,
70    pub limits: AnalysisLimits,
71    pub window_tokens: Option<usize>,
72    pub git: Option<bool>,
73    pub import_granularity: ImportGranularity,
74}
75
76#[derive(Debug, Clone, Copy)]
77struct AnalysisPlan {
78    assets: bool,
79    deps: bool,
80    todo: bool,
81    dup: bool,
82    imports: bool,
83    git: bool,
84    fun: bool,
85    archetype: bool,
86    topics: bool,
87    entropy: bool,
88    license: bool,
89    #[cfg(feature = "git")]
90    churn: bool,
91    #[cfg(feature = "git")]
92    fingerprint: bool,
93}
94
95impl AnalysisPlan {
96    fn needs_files(&self) -> bool {
97        self.assets
98            || self.deps
99            || self.todo
100            || self.dup
101            || self.imports
102            || self.entropy
103            || self.license
104    }
105}
106
107fn plan_for(preset: AnalysisPreset) -> AnalysisPlan {
108    match preset {
109        AnalysisPreset::Receipt => AnalysisPlan {
110            assets: false,
111            deps: false,
112            todo: false,
113            dup: false,
114            imports: false,
115            git: false,
116            fun: false,
117            archetype: false,
118            topics: false,
119            entropy: false,
120            license: false,
121            #[cfg(feature = "git")]
122            churn: false,
123            #[cfg(feature = "git")]
124            fingerprint: false,
125        },
126        AnalysisPreset::Health => AnalysisPlan {
127            assets: false,
128            deps: false,
129            todo: true,
130            dup: false,
131            imports: false,
132            git: false,
133            fun: false,
134            archetype: false,
135            topics: false,
136            entropy: false,
137            license: false,
138            #[cfg(feature = "git")]
139            churn: false,
140            #[cfg(feature = "git")]
141            fingerprint: false,
142        },
143        AnalysisPreset::Risk => AnalysisPlan {
144            assets: false,
145            deps: false,
146            todo: false,
147            dup: false,
148            imports: false,
149            git: true,
150            fun: false,
151            archetype: false,
152            topics: false,
153            entropy: false,
154            license: false,
155            #[cfg(feature = "git")]
156            churn: false,
157            #[cfg(feature = "git")]
158            fingerprint: false,
159        },
160        AnalysisPreset::Supply => AnalysisPlan {
161            assets: true,
162            deps: true,
163            todo: false,
164            dup: false,
165            imports: false,
166            git: false,
167            fun: false,
168            archetype: false,
169            topics: false,
170            entropy: false,
171            license: false,
172            #[cfg(feature = "git")]
173            churn: false,
174            #[cfg(feature = "git")]
175            fingerprint: false,
176        },
177        AnalysisPreset::Architecture => AnalysisPlan {
178            assets: false,
179            deps: false,
180            todo: false,
181            dup: false,
182            imports: true,
183            git: false,
184            fun: false,
185            archetype: false,
186            topics: false,
187            entropy: false,
188            license: false,
189            #[cfg(feature = "git")]
190            churn: false,
191            #[cfg(feature = "git")]
192            fingerprint: false,
193        },
194        AnalysisPreset::Topics => AnalysisPlan {
195            assets: false,
196            deps: false,
197            todo: false,
198            dup: false,
199            imports: false,
200            git: false,
201            fun: false,
202            archetype: false,
203            topics: true,
204            entropy: false,
205            license: false,
206            #[cfg(feature = "git")]
207            churn: false,
208            #[cfg(feature = "git")]
209            fingerprint: false,
210        },
211        AnalysisPreset::Security => AnalysisPlan {
212            assets: false,
213            deps: false,
214            todo: false,
215            dup: false,
216            imports: false,
217            git: false,
218            fun: false,
219            archetype: false,
220            topics: false,
221            entropy: true,
222            license: true,
223            #[cfg(feature = "git")]
224            churn: false,
225            #[cfg(feature = "git")]
226            fingerprint: false,
227        },
228        AnalysisPreset::Identity => AnalysisPlan {
229            assets: false,
230            deps: false,
231            todo: false,
232            dup: false,
233            imports: false,
234            git: true,
235            fun: false,
236            archetype: true,
237            topics: false,
238            entropy: false,
239            license: false,
240            #[cfg(feature = "git")]
241            churn: false,
242            #[cfg(feature = "git")]
243            fingerprint: true,
244        },
245        AnalysisPreset::Git => AnalysisPlan {
246            assets: false,
247            deps: false,
248            todo: false,
249            dup: false,
250            imports: false,
251            git: true,
252            fun: false,
253            archetype: false,
254            topics: false,
255            entropy: false,
256            license: false,
257            #[cfg(feature = "git")]
258            churn: true,
259            #[cfg(feature = "git")]
260            fingerprint: false,
261        },
262        AnalysisPreset::Deep => AnalysisPlan {
263            assets: true,
264            deps: true,
265            todo: true,
266            dup: true,
267            imports: true,
268            git: true,
269            fun: false,
270            archetype: true,
271            topics: true,
272            entropy: true,
273            license: true,
274            #[cfg(feature = "git")]
275            churn: true,
276            #[cfg(feature = "git")]
277            fingerprint: true,
278        },
279        AnalysisPreset::Fun => AnalysisPlan {
280            assets: false,
281            deps: false,
282            todo: false,
283            dup: false,
284            imports: false,
285            git: false,
286            fun: true,
287            archetype: false,
288            topics: false,
289            entropy: false,
290            license: false,
291            #[cfg(feature = "git")]
292            churn: false,
293            #[cfg(feature = "git")]
294            fingerprint: false,
295        },
296    }
297}
298
299pub fn analyze(ctx: AnalysisContext, req: AnalysisRequest) -> Result<AnalysisReceipt> {
300    let mut warnings: Vec<String> = Vec::new();
301    #[cfg_attr(not(feature = "content"), allow(unused_mut))]
302    let mut derived = derive_report(&ctx.export, req.window_tokens);
303    if req.args.format.contains("tree") {
304        derived.tree = Some(build_tree(&ctx.export));
305    }
306
307    let mut source = ctx.source.clone();
308    if source.base_signature.is_none() {
309        source.base_signature = Some(derived.integrity.hash.clone());
310    }
311
312    let plan = plan_for(req.preset);
313    let include_git = match req.git {
314        Some(flag) => flag,
315        None => plan.git,
316    };
317
318    #[cfg(feature = "walk")]
319    let mut assets: Option<AssetReport> = None;
320    #[cfg(not(feature = "walk"))]
321    let assets: Option<AssetReport> = None;
322
323    #[cfg(feature = "walk")]
324    let mut deps: Option<DependencyReport> = None;
325    #[cfg(not(feature = "walk"))]
326    let deps: Option<DependencyReport> = None;
327
328    #[cfg(feature = "content")]
329    let mut imports: Option<ImportReport> = None;
330    #[cfg(not(feature = "content"))]
331    let imports: Option<ImportReport> = None;
332
333    #[cfg(feature = "content")]
334    let mut dup: Option<DuplicateReport> = None;
335    #[cfg(not(feature = "content"))]
336    let dup: Option<DuplicateReport> = None;
337
338    #[cfg(feature = "git")]
339    let mut git: Option<GitReport> = None;
340    #[cfg(not(feature = "git"))]
341    let git: Option<GitReport> = None;
342
343    #[cfg(feature = "git")]
344    let mut churn: Option<PredictiveChurnReport> = None;
345    #[cfg(not(feature = "git"))]
346    let churn: Option<PredictiveChurnReport> = None;
347
348    #[cfg(feature = "git")]
349    let mut fingerprint: Option<CorporateFingerprint> = None;
350    #[cfg(not(feature = "git"))]
351    let fingerprint: Option<CorporateFingerprint> = None;
352
353    #[cfg(all(feature = "content", feature = "walk"))]
354    let mut entropy: Option<EntropyReport> = None;
355    #[cfg(not(all(feature = "content", feature = "walk")))]
356    let entropy: Option<EntropyReport> = None;
357
358    #[cfg(all(feature = "content", feature = "walk"))]
359    let mut license: Option<LicenseReport> = None;
360    #[cfg(not(all(feature = "content", feature = "walk")))]
361    let license: Option<LicenseReport> = None;
362
363    let mut archetype: Option<Archetype> = None;
364    let mut topics: Option<TopicClouds> = None;
365
366    let mut fun: Option<FunReport> = None;
367
368    #[cfg(any(feature = "walk", feature = "content"))]
369    let mut files: Option<Vec<PathBuf>> = None;
370    #[cfg(not(any(feature = "walk", feature = "content")))]
371    let _files: Option<Vec<PathBuf>> = None;
372
373    if plan.needs_files() {
374        #[cfg(feature = "walk")]
375        match tokmd_walk::list_files(&ctx.root, req.limits.max_files) {
376            Ok(list) => files = Some(list),
377            Err(err) => warnings.push(format!("walk failed: {}", err)),
378        }
379        #[cfg(not(feature = "walk"))]
380        {
381            warnings.push("walk feature disabled; skipping file inventory".to_string());
382        }
383    }
384
385    if plan.assets {
386        #[cfg(feature = "walk")]
387        {
388            if let Some(list) = files.as_deref() {
389                match build_assets_report(&ctx.root, list) {
390                    Ok(report) => assets = Some(report),
391                    Err(err) => warnings.push(format!("asset scan failed: {}", err)),
392                }
393            }
394        }
395    }
396
397    if plan.deps {
398        #[cfg(feature = "walk")]
399        {
400            if let Some(list) = files.as_deref() {
401                match build_dependency_report(&ctx.root, list) {
402                    Ok(report) => deps = Some(report),
403                    Err(err) => warnings.push(format!("dependency scan failed: {}", err)),
404                }
405            }
406        }
407    }
408
409    if plan.todo {
410        #[cfg(feature = "content")]
411        {
412            if let Some(list) = files.as_deref() {
413                match build_todo_report(&ctx.root, list, &req.limits, derived.totals.code) {
414                    Ok(report) => derived.todo = Some(report),
415                    Err(err) => warnings.push(format!("todo scan failed: {}", err)),
416                }
417            }
418        }
419        #[cfg(not(feature = "content"))]
420        warnings.push("content feature disabled; skipping TODO scan".to_string());
421    }
422
423    if plan.dup {
424        #[cfg(feature = "content")]
425        {
426            if let Some(list) = files.as_deref() {
427                match build_duplicate_report(&ctx.root, list, &req.limits) {
428                    Ok(report) => dup = Some(report),
429                    Err(err) => warnings.push(format!("dup scan failed: {}", err)),
430                }
431            }
432        }
433        #[cfg(not(feature = "content"))]
434        warnings.push("content feature disabled; skipping duplication scan".to_string());
435    }
436
437    if plan.imports {
438        #[cfg(feature = "content")]
439        {
440            if let Some(list) = files.as_deref() {
441                match build_import_report(
442                    &ctx.root,
443                    list,
444                    &ctx.export,
445                    req.import_granularity,
446                    &req.limits,
447                ) {
448                    Ok(report) => imports = Some(report),
449                    Err(err) => warnings.push(format!("import scan failed: {}", err)),
450                }
451            }
452        }
453        #[cfg(not(feature = "content"))]
454        warnings.push("content feature disabled; skipping import scan".to_string());
455    }
456
457    if include_git {
458        #[cfg(feature = "git")]
459        {
460            let repo_root = match tokmd_git::repo_root(&ctx.root) {
461                Some(root) => root,
462                None => {
463                    warnings.push("git scan failed: not a git repo".to_string());
464                    PathBuf::new()
465                }
466            };
467            if !repo_root.as_os_str().is_empty() {
468                match tokmd_git::collect_history(
469                    &repo_root,
470                    req.limits.max_commits,
471                    req.limits.max_commit_files,
472                ) {
473                    Ok(commits) => {
474                        if plan.git {
475                            match build_git_report(&repo_root, &ctx.export, &commits) {
476                                Ok(report) => git = Some(report),
477                                Err(err) => warnings.push(format!("git scan failed: {}", err)),
478                            }
479                        }
480                        if plan.churn {
481                            churn = Some(build_predictive_churn_report(
482                                &ctx.export,
483                                &commits,
484                                &repo_root,
485                            ));
486                        }
487                        if plan.fingerprint {
488                            fingerprint = Some(build_corporate_fingerprint(&commits));
489                        }
490                    }
491                    Err(err) => warnings.push(format!("git scan failed: {}", err)),
492                }
493            }
494        }
495        #[cfg(not(feature = "git"))]
496        warnings.push("git feature disabled; skipping git metrics".to_string());
497    }
498
499    if plan.archetype {
500        archetype = detect_archetype(&ctx.export);
501    }
502
503    if plan.topics {
504        topics = Some(build_topic_clouds(&ctx.export));
505    }
506
507    if plan.entropy {
508        #[cfg(all(feature = "content", feature = "walk"))]
509        {
510            if let Some(list) = files.as_deref() {
511                match crate::entropy::build_entropy_report(
512                    &ctx.root,
513                    list,
514                    &ctx.export,
515                    &req.limits,
516                ) {
517                    Ok(report) => entropy = Some(report),
518                    Err(err) => warnings.push(format!("entropy scan failed: {}", err)),
519                }
520            }
521        }
522        #[cfg(not(all(feature = "content", feature = "walk")))]
523        warnings.push("content/walk feature disabled; skipping entropy profiling".to_string());
524    }
525
526    if plan.license {
527        #[cfg(all(feature = "content", feature = "walk"))]
528        {
529            if let Some(list) = files.as_deref() {
530                match build_license_report(&ctx.root, list, &req.limits) {
531                    Ok(report) => license = Some(report),
532                    Err(err) => warnings.push(format!("license scan failed: {}", err)),
533                }
534            }
535        }
536        #[cfg(not(all(feature = "content", feature = "walk")))]
537        warnings.push("content/walk feature disabled; skipping license radar".to_string());
538    }
539
540    if plan.fun {
541        fun = Some(build_fun_report(&derived));
542    }
543
544    let status = if warnings.is_empty() {
545        ScanStatus::Complete
546    } else {
547        ScanStatus::Partial
548    };
549
550    let receipt = AnalysisReceipt {
551        schema_version: tokmd_analysis_types::ANALYSIS_SCHEMA_VERSION,
552        generated_at_ms: now_ms(),
553        tool: ToolInfo::current(),
554        mode: "analysis".to_string(),
555        status,
556        warnings,
557        source,
558        args: req.args,
559        archetype,
560        topics,
561        entropy,
562        predictive_churn: churn,
563        corporate_fingerprint: fingerprint,
564        license,
565        derived: Some(derived),
566        assets,
567        deps,
568        git,
569        imports,
570        dup,
571        fun,
572    };
573
574    Ok(receipt)
575}
576
577// Optional enrichers are implemented in later stages.
578#[allow(dead_code)]
579fn _unused_sections(
580    _assets: Option<AssetReport>,
581    _deps: Option<DependencyReport>,
582    _git: Option<GitReport>,
583    _imports: Option<ImportReport>,
584    _dup: Option<DuplicateReport>,
585    _fun: Option<FunReport>,
586) {
587}