Skip to main content

tokmd_analysis/
analysis.rs

1use std::path::PathBuf;
2
3use anyhow::Result;
4use tokmd_analysis_grid::{PresetKind, PresetPlan, preset_plan_for};
5use tokmd_analysis_types::{
6    AnalysisArgsMeta, AnalysisReceipt, AnalysisSource, ApiSurfaceReport, Archetype, AssetReport,
7    ComplexityReport, CorporateFingerprint, DependencyReport, DuplicateReport, EntropyReport,
8    FunReport, GitReport, ImportReport, LicenseReport, NearDupScope, PredictiveChurnReport,
9    TopicClouds,
10};
11use tokmd_analysis_util::AnalysisLimits;
12use tokmd_types::{ExportData, ScanStatus, ToolInfo};
13
14#[cfg(feature = "git")]
15use crate::churn::build_predictive_churn_report;
16#[cfg(feature = "content")]
17use crate::content::{build_duplicate_report, build_import_report, build_todo_report};
18use crate::derived::{build_tree, derive_report};
19#[cfg(feature = "git")]
20use crate::git::build_git_report;
21use crate::util::now_ms;
22#[cfg(all(feature = "content", feature = "walk"))]
23use tokmd_analysis_api_surface::build_api_surface_report;
24#[cfg(feature = "archetype")]
25use tokmd_analysis_archetype::detect_archetype;
26#[cfg(feature = "walk")]
27use tokmd_analysis_assets::{build_assets_report, build_dependency_report};
28#[cfg(all(feature = "content", feature = "walk"))]
29use tokmd_analysis_complexity::build_complexity_report;
30#[cfg(all(feature = "content", feature = "walk"))]
31use tokmd_analysis_entropy::build_entropy_report;
32#[cfg(feature = "git")]
33use tokmd_analysis_fingerprint::build_corporate_fingerprint;
34#[cfg(feature = "fun")]
35use tokmd_analysis_fun::build_fun_report;
36#[cfg(all(feature = "halstead", feature = "content", feature = "walk"))]
37use tokmd_analysis_halstead::build_halstead_report;
38#[cfg(all(feature = "content", feature = "walk"))]
39use tokmd_analysis_license::build_license_report;
40#[cfg(all(feature = "halstead", feature = "content", feature = "walk"))]
41use tokmd_analysis_maintainability::attach_halstead_metrics;
42#[cfg(feature = "content")]
43use tokmd_analysis_near_dup::{NearDupLimits, build_near_dup_report};
44#[cfg(feature = "topics")]
45use tokmd_analysis_topics::build_topic_clouds;
46
47/// Canonical preset enum for analysis orchestration.
48pub type AnalysisPreset = PresetKind;
49
50#[derive(Debug, Clone, Copy, PartialEq, Eq)]
51pub enum ImportGranularity {
52    Module,
53    File,
54}
55
56#[derive(Debug, Clone)]
57pub struct AnalysisContext {
58    pub export: ExportData,
59    pub root: PathBuf,
60    pub source: AnalysisSource,
61}
62
63#[derive(Debug, Clone)]
64pub struct AnalysisRequest {
65    pub preset: AnalysisPreset,
66    pub args: AnalysisArgsMeta,
67    pub limits: AnalysisLimits,
68    pub window_tokens: Option<usize>,
69    pub git: Option<bool>,
70    pub import_granularity: ImportGranularity,
71    pub detail_functions: bool,
72    /// Enable near-duplicate detection.
73    pub near_dup: bool,
74    /// Near-duplicate similarity threshold (0.0–1.0).
75    pub near_dup_threshold: f64,
76    /// Maximum files to analyze for near-duplicates.
77    pub near_dup_max_files: usize,
78    /// Near-duplicate comparison scope.
79    pub near_dup_scope: NearDupScope,
80    /// Maximum near-duplicate pairs to emit (truncation guardrail).
81    pub near_dup_max_pairs: Option<usize>,
82    /// Glob patterns to exclude from near-duplicate analysis.
83    pub near_dup_exclude: Vec<String>,
84}
85
86fn preset_plan(preset: AnalysisPreset) -> PresetPlan {
87    preset_plan_for(preset)
88}
89
90pub fn analyze(ctx: AnalysisContext, req: AnalysisRequest) -> Result<AnalysisReceipt> {
91    let mut warnings: Vec<String> = Vec::new();
92    #[cfg_attr(not(feature = "content"), allow(unused_mut))]
93    let mut derived = derive_report(&ctx.export, req.window_tokens);
94    if req.args.format.contains("tree") {
95        derived.tree = Some(build_tree(&ctx.export));
96    }
97
98    let mut source = ctx.source.clone();
99    if source.base_signature.is_none() {
100        source.base_signature = Some(derived.integrity.hash.clone());
101    }
102
103    let plan = preset_plan(req.preset);
104    let include_git = match req.git {
105        Some(flag) => flag,
106        None => plan.git,
107    };
108
109    #[cfg(feature = "walk")]
110    let mut assets: Option<AssetReport> = None;
111    #[cfg(not(feature = "walk"))]
112    let assets: Option<AssetReport> = None;
113
114    #[cfg(feature = "walk")]
115    let mut deps: Option<DependencyReport> = None;
116    #[cfg(not(feature = "walk"))]
117    let deps: Option<DependencyReport> = None;
118
119    #[cfg(feature = "content")]
120    let mut imports: Option<ImportReport> = None;
121    #[cfg(not(feature = "content"))]
122    let imports: Option<ImportReport> = None;
123
124    #[cfg(feature = "content")]
125    let mut dup: Option<DuplicateReport> = None;
126    #[cfg(not(feature = "content"))]
127    let dup: Option<DuplicateReport> = None;
128
129    #[cfg(feature = "git")]
130    let mut git: Option<GitReport> = None;
131    #[cfg(not(feature = "git"))]
132    let git: Option<GitReport> = None;
133
134    #[cfg(feature = "git")]
135    let mut churn: Option<PredictiveChurnReport> = None;
136    #[cfg(not(feature = "git"))]
137    let churn: Option<PredictiveChurnReport> = None;
138
139    #[cfg(feature = "git")]
140    let mut fingerprint: Option<CorporateFingerprint> = None;
141    #[cfg(not(feature = "git"))]
142    let fingerprint: Option<CorporateFingerprint> = None;
143
144    #[cfg(all(feature = "content", feature = "walk"))]
145    let mut entropy: Option<EntropyReport> = None;
146    #[cfg(not(all(feature = "content", feature = "walk")))]
147    let entropy: Option<EntropyReport> = None;
148
149    #[cfg(all(feature = "content", feature = "walk"))]
150    let mut license: Option<LicenseReport> = None;
151    #[cfg(not(all(feature = "content", feature = "walk")))]
152    let license: Option<LicenseReport> = None;
153
154    #[cfg(all(feature = "content", feature = "walk"))]
155    let mut complexity: Option<ComplexityReport> = None;
156    #[cfg(not(all(feature = "content", feature = "walk")))]
157    let complexity: Option<ComplexityReport> = None;
158
159    #[cfg(all(feature = "content", feature = "walk"))]
160    let mut api_surface: Option<ApiSurfaceReport> = None;
161    #[cfg(not(all(feature = "content", feature = "walk")))]
162    let api_surface: Option<ApiSurfaceReport> = None;
163
164    #[cfg(feature = "archetype")]
165    let mut archetype: Option<Archetype> = None;
166    #[cfg(not(feature = "archetype"))]
167    let archetype: Option<Archetype> = None;
168    #[cfg(feature = "topics")]
169    let mut topics: Option<TopicClouds> = None;
170    #[cfg(not(feature = "topics"))]
171    let topics: Option<TopicClouds> = None;
172
173    let fun: Option<FunReport>;
174
175    #[cfg(any(feature = "walk", feature = "content"))]
176    let mut files: Option<Vec<PathBuf>> = None;
177    #[cfg(not(any(feature = "walk", feature = "content")))]
178    let _files: Option<Vec<PathBuf>> = None;
179
180    if plan.needs_files() {
181        #[cfg(feature = "walk")]
182        match tokmd_walk::list_files(&ctx.root, req.limits.max_files) {
183            Ok(list) => files = Some(list),
184            Err(err) => warnings.push(format!("walk failed: {}", err)),
185        }
186        #[cfg(not(feature = "walk"))]
187        {
188            warnings.push(
189                tokmd_analysis_grid::DisabledFeature::FileInventory
190                    .warning()
191                    .to_string(),
192            );
193        }
194    }
195
196    if plan.assets {
197        #[cfg(feature = "walk")]
198        {
199            if let Some(list) = files.as_deref() {
200                match build_assets_report(&ctx.root, list) {
201                    Ok(report) => assets = Some(report),
202                    Err(err) => warnings.push(format!("asset scan failed: {}", err)),
203                }
204            }
205        }
206    }
207
208    if plan.deps {
209        #[cfg(feature = "walk")]
210        {
211            if let Some(list) = files.as_deref() {
212                match build_dependency_report(&ctx.root, list) {
213                    Ok(report) => deps = Some(report),
214                    Err(err) => warnings.push(format!("dependency scan failed: {}", err)),
215                }
216            }
217        }
218    }
219
220    if plan.todo {
221        #[cfg(feature = "content")]
222        {
223            if let Some(list) = files.as_deref() {
224                match build_todo_report(&ctx.root, list, &req.limits, derived.totals.code) {
225                    Ok(report) => derived.todo = Some(report),
226                    Err(err) => warnings.push(format!("todo scan failed: {}", err)),
227                }
228            }
229        }
230        #[cfg(not(feature = "content"))]
231        warnings.push(
232            tokmd_analysis_grid::DisabledFeature::TodoScan
233                .warning()
234                .to_string(),
235        );
236    }
237
238    if plan.dup {
239        #[cfg(feature = "content")]
240        {
241            if let Some(list) = files.as_deref() {
242                match build_duplicate_report(&ctx.root, list, &ctx.export, &req.limits) {
243                    Ok(report) => dup = Some(report),
244                    Err(err) => warnings.push(format!("dup scan failed: {}", err)),
245                }
246            }
247        }
248        #[cfg(not(feature = "content"))]
249        warnings.push(
250            tokmd_analysis_grid::DisabledFeature::DuplicationScan
251                .warning()
252                .to_string(),
253        );
254    }
255
256    // Near-duplicate detection (opt-in via --near-dup)
257    if req.near_dup {
258        #[cfg(feature = "content")]
259        {
260            let near_dup_limits = NearDupLimits {
261                max_bytes: req.limits.max_bytes,
262                max_file_bytes: req.limits.max_file_bytes,
263            };
264            match build_near_dup_report(
265                &ctx.root,
266                &ctx.export,
267                req.near_dup_scope,
268                req.near_dup_threshold,
269                req.near_dup_max_files,
270                req.near_dup_max_pairs,
271                &near_dup_limits,
272                &req.near_dup_exclude,
273            ) {
274                Ok(report) => {
275                    // Attach to existing dup report or create a minimal one
276                    if let Some(ref mut d) = dup {
277                        d.near = Some(report);
278                    } else {
279                        dup = Some(DuplicateReport {
280                            groups: Vec::new(),
281                            wasted_bytes: 0,
282                            strategy: "none".to_string(),
283                            density: None,
284                            near: Some(report),
285                        });
286                    }
287                }
288                Err(err) => warnings.push(format!("near-dup scan failed: {}", err)),
289            }
290        }
291        #[cfg(not(feature = "content"))]
292        warnings.push(
293            tokmd_analysis_grid::DisabledFeature::NearDuplicateScan
294                .warning()
295                .to_string(),
296        );
297    }
298
299    if plan.imports {
300        #[cfg(feature = "content")]
301        {
302            if let Some(list) = files.as_deref() {
303                match build_import_report(
304                    &ctx.root,
305                    list,
306                    &ctx.export,
307                    req.import_granularity,
308                    &req.limits,
309                ) {
310                    Ok(report) => imports = Some(report),
311                    Err(err) => warnings.push(format!("import scan failed: {}", err)),
312                }
313            }
314        }
315        #[cfg(not(feature = "content"))]
316        warnings.push(
317            tokmd_analysis_grid::DisabledFeature::ImportScan
318                .warning()
319                .to_string(),
320        );
321    }
322
323    if include_git {
324        #[cfg(feature = "git")]
325        {
326            let repo_root = match tokmd_git::repo_root(&ctx.root) {
327                Some(root) => root,
328                None => {
329                    warnings.push("git scan failed: not a git repo".to_string());
330                    PathBuf::new()
331                }
332            };
333            if !repo_root.as_os_str().is_empty() {
334                match tokmd_git::collect_history(
335                    &repo_root,
336                    req.limits.max_commits,
337                    req.limits.max_commit_files,
338                ) {
339                    Ok(commits) => {
340                        if plan.git {
341                            match build_git_report(&repo_root, &ctx.export, &commits) {
342                                Ok(report) => git = Some(report),
343                                Err(err) => warnings.push(format!("git scan failed: {}", err)),
344                            }
345                        }
346                        if plan.churn {
347                            churn = Some(build_predictive_churn_report(
348                                &ctx.export,
349                                &commits,
350                                &repo_root,
351                            ));
352                        }
353                        if plan.fingerprint {
354                            fingerprint = Some(build_corporate_fingerprint(&commits));
355                        }
356                    }
357                    Err(err) => warnings.push(format!("git scan failed: {}", err)),
358                }
359            }
360        }
361        #[cfg(not(feature = "git"))]
362        warnings.push(
363            tokmd_analysis_grid::DisabledFeature::GitMetrics
364                .warning()
365                .to_string(),
366        );
367    }
368
369    if plan.archetype {
370        #[cfg(feature = "archetype")]
371        {
372            archetype = detect_archetype(&ctx.export);
373        }
374        #[cfg(not(feature = "archetype"))]
375        {
376            warnings.push(
377                tokmd_analysis_grid::DisabledFeature::Archetype
378                    .warning()
379                    .to_string(),
380            );
381        }
382    }
383
384    if plan.topics {
385        #[cfg(feature = "topics")]
386        {
387            topics = Some(build_topic_clouds(&ctx.export));
388        }
389        #[cfg(not(feature = "topics"))]
390        {
391            warnings.push(
392                tokmd_analysis_grid::DisabledFeature::Topics
393                    .warning()
394                    .to_string(),
395            );
396        }
397    }
398
399    if plan.entropy {
400        #[cfg(all(feature = "content", feature = "walk"))]
401        {
402            if let Some(list) = files.as_deref() {
403                match build_entropy_report(&ctx.root, list, &ctx.export, &req.limits) {
404                    Ok(report) => entropy = Some(report),
405                    Err(err) => warnings.push(format!("entropy scan failed: {}", err)),
406                }
407            }
408        }
409        #[cfg(not(all(feature = "content", feature = "walk")))]
410        warnings.push(
411            tokmd_analysis_grid::DisabledFeature::EntropyProfiling
412                .warning()
413                .to_string(),
414        );
415    }
416
417    if plan.license {
418        #[cfg(all(feature = "content", feature = "walk"))]
419        {
420            if let Some(list) = files.as_deref() {
421                match build_license_report(&ctx.root, list, &req.limits) {
422                    Ok(report) => license = Some(report),
423                    Err(err) => warnings.push(format!("license scan failed: {}", err)),
424                }
425            }
426        }
427        #[cfg(not(all(feature = "content", feature = "walk")))]
428        warnings.push(
429            tokmd_analysis_grid::DisabledFeature::LicenseRadar
430                .warning()
431                .to_string(),
432        );
433    }
434
435    if plan.complexity {
436        #[cfg(all(feature = "content", feature = "walk"))]
437        {
438            if let Some(list) = files.as_deref() {
439                match build_complexity_report(
440                    &ctx.root,
441                    list,
442                    &ctx.export,
443                    &req.limits,
444                    req.detail_functions,
445                ) {
446                    Ok(report) => complexity = Some(report),
447                    Err(err) => warnings.push(format!("complexity scan failed: {}", err)),
448                }
449            }
450        }
451        #[cfg(not(all(feature = "content", feature = "walk")))]
452        warnings.push(
453            tokmd_analysis_grid::DisabledFeature::ComplexityAnalysis
454                .warning()
455                .to_string(),
456        );
457    }
458
459    if plan.api_surface {
460        #[cfg(all(feature = "content", feature = "walk"))]
461        {
462            if let Some(list) = files.as_deref() {
463                match build_api_surface_report(&ctx.root, list, &ctx.export, &req.limits) {
464                    Ok(report) => api_surface = Some(report),
465                    Err(err) => warnings.push(format!("api surface scan failed: {}", err)),
466                }
467            }
468        }
469        #[cfg(not(all(feature = "content", feature = "walk")))]
470        warnings.push(
471            tokmd_analysis_grid::DisabledFeature::ApiSurfaceAnalysis
472                .warning()
473                .to_string(),
474        );
475    }
476
477    // Halstead metrics (feature-gated)
478    #[cfg(all(feature = "halstead", feature = "content", feature = "walk"))]
479    if plan.halstead
480        && let Some(list) = files.as_deref()
481    {
482        match build_halstead_report(&ctx.root, list, &ctx.export, &req.limits) {
483            Ok(halstead_report) => {
484                // Wire Halstead into complexity report if available
485                if let Some(ref mut cx) = complexity {
486                    attach_halstead_metrics(cx, halstead_report);
487                }
488            }
489            Err(err) => warnings.push(format!("halstead scan failed: {}", err)),
490        }
491    }
492
493    if plan.fun {
494        #[cfg(feature = "fun")]
495        {
496            fun = Some(build_fun_report(&derived));
497        }
498        #[cfg(not(feature = "fun"))]
499        {
500            warnings.push(
501                tokmd_analysis_grid::DisabledFeature::Fun
502                    .warning()
503                    .to_string(),
504            );
505            fun = None;
506        }
507    } else {
508        fun = None;
509    }
510
511    let status = if warnings.is_empty() {
512        ScanStatus::Complete
513    } else {
514        ScanStatus::Partial
515    };
516
517    let receipt = AnalysisReceipt {
518        schema_version: tokmd_analysis_types::ANALYSIS_SCHEMA_VERSION,
519        generated_at_ms: now_ms(),
520        tool: ToolInfo::current(),
521        mode: "analysis".to_string(),
522        status,
523        warnings,
524        source,
525        args: req.args,
526        archetype,
527        topics,
528        entropy,
529        predictive_churn: churn,
530        corporate_fingerprint: fingerprint,
531        license,
532        derived: Some(derived),
533        assets,
534        deps,
535        git,
536        imports,
537        dup,
538        complexity,
539        api_surface,
540        fun,
541    };
542
543    Ok(receipt)
544}