Skip to main content

tokmd_analysis/
analysis.rs

1use std::path::PathBuf;
2
3use anyhow::Result;
4#[cfg(feature = "effort")]
5use tokmd_analysis_effort::{EffortRequest, build_effort_report};
6use tokmd_analysis_grid::{PresetKind, PresetPlan, preset_plan_for};
7use tokmd_analysis_types::{
8    AnalysisArgsMeta, AnalysisReceipt, AnalysisSource, ApiSurfaceReport, Archetype, AssetReport,
9    ComplexityReport, CorporateFingerprint, DependencyReport, DuplicateReport, EntropyReport,
10    FunReport, GitReport, ImportReport, LicenseReport, NearDupScope, PredictiveChurnReport,
11    TopicClouds,
12};
13use tokmd_analysis_util::AnalysisLimits;
14use tokmd_types::{ExportData, ScanStatus, ToolInfo};
15
16#[cfg(feature = "git")]
17use crate::churn::build_predictive_churn_report;
18#[cfg(feature = "content")]
19use crate::content::{build_duplicate_report, build_import_report, build_todo_report};
20use crate::derived::{build_tree, derive_report};
21#[cfg(feature = "git")]
22use crate::git::build_git_report;
23use crate::util::now_ms;
24#[cfg(all(feature = "content", feature = "walk"))]
25use tokmd_analysis_api_surface::build_api_surface_report;
26#[cfg(feature = "archetype")]
27use tokmd_analysis_archetype::detect_archetype;
28#[cfg(feature = "walk")]
29use tokmd_analysis_assets::{build_assets_report, build_dependency_report};
30#[cfg(all(feature = "content", feature = "walk"))]
31use tokmd_analysis_complexity::build_complexity_report;
32#[cfg(all(feature = "content", feature = "walk"))]
33use tokmd_analysis_entropy::build_entropy_report;
34#[cfg(feature = "git")]
35use tokmd_analysis_fingerprint::build_corporate_fingerprint;
36#[cfg(feature = "fun")]
37use tokmd_analysis_fun::build_fun_report;
38#[cfg(all(feature = "halstead", feature = "content", feature = "walk"))]
39use tokmd_analysis_halstead::build_halstead_report;
40#[cfg(all(feature = "content", feature = "walk"))]
41use tokmd_analysis_license::build_license_report;
42#[cfg(all(feature = "halstead", feature = "content", feature = "walk"))]
43use tokmd_analysis_maintainability::attach_halstead_metrics;
44#[cfg(feature = "content")]
45use tokmd_analysis_near_dup::{NearDupLimits, build_near_dup_report};
46#[cfg(feature = "topics")]
47use tokmd_analysis_topics::build_topic_clouds;
48
49/// Canonical preset enum for analysis orchestration.
50pub type AnalysisPreset = PresetKind;
51
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum ImportGranularity {
54    Module,
55    File,
56}
57
58#[derive(Debug, Clone)]
59pub struct AnalysisContext {
60    pub export: ExportData,
61    pub root: PathBuf,
62    pub source: AnalysisSource,
63}
64
65#[derive(Debug, Clone)]
66pub struct AnalysisRequest {
67    pub preset: AnalysisPreset,
68    pub args: AnalysisArgsMeta,
69    pub limits: AnalysisLimits,
70    #[cfg(feature = "effort")]
71    pub effort: Option<EffortRequest>,
72    pub window_tokens: Option<usize>,
73    pub git: Option<bool>,
74    pub import_granularity: ImportGranularity,
75    pub detail_functions: bool,
76    /// Enable near-duplicate detection.
77    pub near_dup: bool,
78    /// Near-duplicate similarity threshold (0.0–1.0).
79    pub near_dup_threshold: f64,
80    /// Maximum files to analyze for near-duplicates.
81    pub near_dup_max_files: usize,
82    /// Near-duplicate comparison scope.
83    pub near_dup_scope: NearDupScope,
84    /// Maximum near-duplicate pairs to emit (truncation guardrail).
85    pub near_dup_max_pairs: Option<usize>,
86    /// Glob patterns to exclude from near-duplicate analysis.
87    pub near_dup_exclude: Vec<String>,
88}
89
90fn preset_plan(preset: AnalysisPreset) -> PresetPlan {
91    preset_plan_for(preset)
92}
93
94pub fn analyze(ctx: AnalysisContext, req: AnalysisRequest) -> Result<AnalysisReceipt> {
95    let mut warnings: Vec<String> = Vec::new();
96    #[cfg_attr(not(feature = "content"), allow(unused_mut))]
97    let mut derived = derive_report(&ctx.export, req.window_tokens);
98    if req.args.format.contains("tree") {
99        derived.tree = Some(build_tree(&ctx.export));
100    }
101
102    let mut source = ctx.source.clone();
103    if source.base_signature.is_none() {
104        source.base_signature = Some(derived.integrity.hash.clone());
105    }
106
107    let plan = preset_plan(req.preset);
108    let include_git = match req.git {
109        Some(flag) => flag,
110        None => plan.git,
111    };
112
113    #[cfg(feature = "walk")]
114    let mut assets: Option<AssetReport> = None;
115    #[cfg(not(feature = "walk"))]
116    let assets: Option<AssetReport> = None;
117
118    #[cfg(feature = "walk")]
119    let mut deps: Option<DependencyReport> = None;
120    #[cfg(not(feature = "walk"))]
121    let deps: Option<DependencyReport> = None;
122
123    #[cfg(feature = "content")]
124    let mut imports: Option<ImportReport> = None;
125    #[cfg(not(feature = "content"))]
126    let imports: Option<ImportReport> = None;
127
128    #[cfg(feature = "content")]
129    let mut dup: Option<DuplicateReport> = None;
130    #[cfg(not(feature = "content"))]
131    let dup: Option<DuplicateReport> = None;
132
133    #[cfg(feature = "git")]
134    let mut git: Option<GitReport> = None;
135    #[cfg(not(feature = "git"))]
136    let git: Option<GitReport> = None;
137
138    #[cfg(feature = "git")]
139    let mut churn: Option<PredictiveChurnReport> = None;
140    #[cfg(not(feature = "git"))]
141    let churn: Option<PredictiveChurnReport> = None;
142
143    #[cfg(feature = "git")]
144    let mut fingerprint: Option<CorporateFingerprint> = None;
145    #[cfg(not(feature = "git"))]
146    let fingerprint: Option<CorporateFingerprint> = None;
147
148    #[cfg(all(feature = "content", feature = "walk"))]
149    let mut entropy: Option<EntropyReport> = None;
150    #[cfg(not(all(feature = "content", feature = "walk")))]
151    let entropy: Option<EntropyReport> = None;
152
153    #[cfg(all(feature = "content", feature = "walk"))]
154    let mut license: Option<LicenseReport> = None;
155    #[cfg(not(all(feature = "content", feature = "walk")))]
156    let license: Option<LicenseReport> = None;
157
158    #[cfg(all(feature = "content", feature = "walk"))]
159    let mut complexity: Option<ComplexityReport> = None;
160    #[cfg(not(all(feature = "content", feature = "walk")))]
161    let complexity: Option<ComplexityReport> = None;
162
163    #[cfg(all(feature = "content", feature = "walk"))]
164    let mut api_surface: Option<ApiSurfaceReport> = None;
165    #[cfg(not(all(feature = "content", feature = "walk")))]
166    let api_surface: Option<ApiSurfaceReport> = None;
167
168    #[cfg(feature = "archetype")]
169    let mut archetype: Option<Archetype> = None;
170    #[cfg(not(feature = "archetype"))]
171    let archetype: Option<Archetype> = None;
172    #[cfg(feature = "topics")]
173    let mut topics: Option<TopicClouds> = None;
174    #[cfg(not(feature = "topics"))]
175    let topics: Option<TopicClouds> = None;
176
177    let fun: Option<FunReport>;
178
179    #[cfg(any(feature = "walk", feature = "content"))]
180    let mut files: Option<Vec<PathBuf>> = None;
181    #[cfg(not(any(feature = "walk", feature = "content")))]
182    let _files: Option<Vec<PathBuf>> = None;
183
184    if plan.needs_files() {
185        #[cfg(feature = "walk")]
186        match tokmd_walk::list_files(&ctx.root, req.limits.max_files) {
187            Ok(list) => files = Some(list),
188            Err(err) => warnings.push(format!("walk failed: {}", err)),
189        }
190        #[cfg(not(feature = "walk"))]
191        {
192            warnings.push(
193                tokmd_analysis_grid::DisabledFeature::FileInventory
194                    .warning()
195                    .to_string(),
196            );
197        }
198    }
199
200    if plan.assets {
201        #[cfg(feature = "walk")]
202        {
203            if let Some(list) = files.as_deref() {
204                match build_assets_report(&ctx.root, list) {
205                    Ok(report) => assets = Some(report),
206                    Err(err) => warnings.push(format!("asset scan failed: {}", err)),
207                }
208            }
209        }
210    }
211
212    if plan.deps {
213        #[cfg(feature = "walk")]
214        {
215            if let Some(list) = files.as_deref() {
216                match build_dependency_report(&ctx.root, list) {
217                    Ok(report) => deps = Some(report),
218                    Err(err) => warnings.push(format!("dependency scan failed: {}", err)),
219                }
220            }
221        }
222    }
223
224    if plan.todo {
225        #[cfg(feature = "content")]
226        {
227            if let Some(list) = files.as_deref() {
228                match build_todo_report(&ctx.root, list, &req.limits, derived.totals.code) {
229                    Ok(report) => derived.todo = Some(report),
230                    Err(err) => warnings.push(format!("todo scan failed: {}", err)),
231                }
232            }
233        }
234        #[cfg(not(feature = "content"))]
235        warnings.push(
236            tokmd_analysis_grid::DisabledFeature::TodoScan
237                .warning()
238                .to_string(),
239        );
240    }
241
242    if plan.dup {
243        #[cfg(feature = "content")]
244        {
245            if let Some(list) = files.as_deref() {
246                match build_duplicate_report(&ctx.root, list, &ctx.export, &req.limits) {
247                    Ok(report) => dup = Some(report),
248                    Err(err) => warnings.push(format!("dup scan failed: {}", err)),
249                }
250            }
251        }
252        #[cfg(not(feature = "content"))]
253        warnings.push(
254            tokmd_analysis_grid::DisabledFeature::DuplicationScan
255                .warning()
256                .to_string(),
257        );
258    }
259
260    // Near-duplicate detection (opt-in via --near-dup)
261    if req.near_dup {
262        #[cfg(feature = "content")]
263        {
264            let near_dup_limits = NearDupLimits {
265                max_bytes: req.limits.max_bytes,
266                max_file_bytes: req.limits.max_file_bytes,
267            };
268            match build_near_dup_report(
269                &ctx.root,
270                &ctx.export,
271                req.near_dup_scope,
272                req.near_dup_threshold,
273                req.near_dup_max_files,
274                req.near_dup_max_pairs,
275                &near_dup_limits,
276                &req.near_dup_exclude,
277            ) {
278                Ok(report) => {
279                    // Attach to existing dup report or create a minimal one
280                    if let Some(ref mut d) = dup {
281                        d.near = Some(report);
282                    } else {
283                        dup = Some(DuplicateReport {
284                            groups: Vec::new(),
285                            wasted_bytes: 0,
286                            strategy: "none".to_string(),
287                            density: None,
288                            near: Some(report),
289                        });
290                    }
291                }
292                Err(err) => warnings.push(format!("near-dup scan failed: {}", err)),
293            }
294        }
295        #[cfg(not(feature = "content"))]
296        warnings.push(
297            tokmd_analysis_grid::DisabledFeature::NearDuplicateScan
298                .warning()
299                .to_string(),
300        );
301    }
302
303    if plan.imports {
304        #[cfg(feature = "content")]
305        {
306            if let Some(list) = files.as_deref() {
307                match build_import_report(
308                    &ctx.root,
309                    list,
310                    &ctx.export,
311                    req.import_granularity,
312                    &req.limits,
313                ) {
314                    Ok(report) => imports = Some(report),
315                    Err(err) => warnings.push(format!("import scan failed: {}", err)),
316                }
317            }
318        }
319        #[cfg(not(feature = "content"))]
320        warnings.push(
321            tokmd_analysis_grid::DisabledFeature::ImportScan
322                .warning()
323                .to_string(),
324        );
325    }
326
327    if include_git {
328        #[cfg(feature = "git")]
329        {
330            let repo_root = match tokmd_git::repo_root(&ctx.root) {
331                Some(root) => root,
332                None => {
333                    warnings.push("git scan failed: not a git repo".to_string());
334                    PathBuf::new()
335                }
336            };
337            if !repo_root.as_os_str().is_empty() {
338                match tokmd_git::collect_history(
339                    &repo_root,
340                    req.limits.max_commits,
341                    req.limits.max_commit_files,
342                ) {
343                    Ok(commits) => {
344                        if plan.git {
345                            match build_git_report(&repo_root, &ctx.export, &commits) {
346                                Ok(report) => git = Some(report),
347                                Err(err) => warnings.push(format!("git scan failed: {}", err)),
348                            }
349                        }
350                        if plan.churn {
351                            churn = Some(build_predictive_churn_report(
352                                &ctx.export,
353                                &commits,
354                                &repo_root,
355                            ));
356                        }
357                        if plan.fingerprint {
358                            fingerprint = Some(build_corporate_fingerprint(&commits));
359                        }
360                    }
361                    Err(err) => warnings.push(format!("git scan failed: {}", err)),
362                }
363            }
364        }
365        #[cfg(not(feature = "git"))]
366        warnings.push(
367            tokmd_analysis_grid::DisabledFeature::GitMetrics
368                .warning()
369                .to_string(),
370        );
371    }
372
373    if plan.archetype {
374        #[cfg(feature = "archetype")]
375        {
376            archetype = detect_archetype(&ctx.export);
377        }
378        #[cfg(not(feature = "archetype"))]
379        {
380            warnings.push(
381                tokmd_analysis_grid::DisabledFeature::Archetype
382                    .warning()
383                    .to_string(),
384            );
385        }
386    }
387
388    if plan.topics {
389        #[cfg(feature = "topics")]
390        {
391            topics = Some(build_topic_clouds(&ctx.export));
392        }
393        #[cfg(not(feature = "topics"))]
394        {
395            warnings.push(
396                tokmd_analysis_grid::DisabledFeature::Topics
397                    .warning()
398                    .to_string(),
399            );
400        }
401    }
402
403    if plan.entropy {
404        #[cfg(all(feature = "content", feature = "walk"))]
405        {
406            if let Some(list) = files.as_deref() {
407                match build_entropy_report(&ctx.root, list, &ctx.export, &req.limits) {
408                    Ok(report) => entropy = Some(report),
409                    Err(err) => warnings.push(format!("entropy scan failed: {}", err)),
410                }
411            }
412        }
413        #[cfg(not(all(feature = "content", feature = "walk")))]
414        warnings.push(
415            tokmd_analysis_grid::DisabledFeature::EntropyProfiling
416                .warning()
417                .to_string(),
418        );
419    }
420
421    if plan.license {
422        #[cfg(all(feature = "content", feature = "walk"))]
423        {
424            if let Some(list) = files.as_deref() {
425                match build_license_report(&ctx.root, list, &req.limits) {
426                    Ok(report) => license = Some(report),
427                    Err(err) => warnings.push(format!("license scan failed: {}", err)),
428                }
429            }
430        }
431        #[cfg(not(all(feature = "content", feature = "walk")))]
432        warnings.push(
433            tokmd_analysis_grid::DisabledFeature::LicenseRadar
434                .warning()
435                .to_string(),
436        );
437    }
438
439    if plan.complexity {
440        #[cfg(all(feature = "content", feature = "walk"))]
441        {
442            if let Some(list) = files.as_deref() {
443                match build_complexity_report(
444                    &ctx.root,
445                    list,
446                    &ctx.export,
447                    &req.limits,
448                    req.detail_functions,
449                ) {
450                    Ok(report) => complexity = Some(report),
451                    Err(err) => warnings.push(format!("complexity scan failed: {}", err)),
452                }
453            }
454        }
455        #[cfg(not(all(feature = "content", feature = "walk")))]
456        warnings.push(
457            tokmd_analysis_grid::DisabledFeature::ComplexityAnalysis
458                .warning()
459                .to_string(),
460        );
461    }
462
463    if plan.api_surface {
464        #[cfg(all(feature = "content", feature = "walk"))]
465        {
466            if let Some(list) = files.as_deref() {
467                match build_api_surface_report(&ctx.root, list, &ctx.export, &req.limits) {
468                    Ok(report) => api_surface = Some(report),
469                    Err(err) => warnings.push(format!("api surface scan failed: {}", err)),
470                }
471            }
472        }
473        #[cfg(not(all(feature = "content", feature = "walk")))]
474        warnings.push(
475            tokmd_analysis_grid::DisabledFeature::ApiSurfaceAnalysis
476                .warning()
477                .to_string(),
478        );
479    }
480
481    // Halstead metrics (feature-gated)
482    #[cfg(all(feature = "halstead", feature = "content", feature = "walk"))]
483    if plan.halstead
484        && let Some(list) = files.as_deref()
485    {
486        match build_halstead_report(&ctx.root, list, &ctx.export, &req.limits) {
487            Ok(halstead_report) => {
488                // Wire Halstead into complexity report if available
489                if let Some(ref mut cx) = complexity {
490                    attach_halstead_metrics(cx, halstead_report);
491                }
492            }
493            Err(err) => warnings.push(format!("halstead scan failed: {}", err)),
494        }
495    }
496
497    if plan.fun {
498        #[cfg(feature = "fun")]
499        {
500            fun = Some(build_fun_report(&derived));
501        }
502        #[cfg(not(feature = "fun"))]
503        {
504            warnings.push(
505                tokmd_analysis_grid::DisabledFeature::Fun
506                    .warning()
507                    .to_string(),
508            );
509            fun = None;
510        }
511    } else {
512        fun = None;
513    }
514
515    #[cfg(feature = "effort")]
516    let effort = if let Some(effort_request) = &req.effort {
517        match build_effort_report(
518            &ctx.root,
519            &ctx.export,
520            &derived,
521            git.as_ref(),
522            complexity.as_ref(),
523            api_surface.as_ref(),
524            dup.as_ref(),
525            effort_request,
526        ) {
527            Ok(report) => Some(report),
528            Err(err) => {
529                warnings.push(format!("effort estimate failed: {}", err));
530                None
531            }
532        }
533    } else {
534        None
535    };
536    #[cfg(not(feature = "effort"))]
537    let effort: Option<tokmd_analysis_types::EffortEstimateReport> = None;
538
539    let status = if warnings.is_empty() {
540        ScanStatus::Complete
541    } else {
542        ScanStatus::Partial
543    };
544
545    let receipt = AnalysisReceipt {
546        schema_version: tokmd_analysis_types::ANALYSIS_SCHEMA_VERSION,
547        generated_at_ms: now_ms(),
548        tool: ToolInfo::current(),
549        mode: "analysis".to_string(),
550        status,
551        warnings,
552        source,
553        args: req.args,
554        archetype,
555        topics,
556        entropy,
557        predictive_churn: churn,
558        corporate_fingerprint: fingerprint,
559        license,
560        derived: Some(derived),
561        assets,
562        deps,
563        git,
564        imports,
565        dup,
566        complexity,
567        api_surface,
568        effort,
569        fun,
570    };
571
572    Ok(receipt)
573}