Skip to main content

tokmd_analysis/
analysis.rs

1#[cfg(any(feature = "walk", feature = "content", feature = "git"))]
2use std::path::Path;
3use std::path::PathBuf;
4
5use anyhow::Result;
6#[cfg(feature = "effort")]
7use tokmd_analysis_effort::{EffortRequest, build_effort_report};
8use tokmd_analysis_grid::{PresetKind, PresetPlan, preset_plan_for};
9use tokmd_analysis_types::{
10    AnalysisArgsMeta, AnalysisReceipt, AnalysisSource, ApiSurfaceReport, Archetype, AssetReport,
11    ComplexityReport, CorporateFingerprint, DependencyReport, DuplicateReport, EntropyReport,
12    FunReport, GitReport, ImportReport, LicenseReport, NearDupScope, PredictiveChurnReport,
13    TopicClouds,
14};
15use tokmd_analysis_util::AnalysisLimits;
16use tokmd_types::{ExportData, ScanStatus, ToolInfo};
17
18#[cfg(feature = "git")]
19use crate::churn::build_predictive_churn_report;
20#[cfg(feature = "content")]
21use crate::content::{build_duplicate_report, build_import_report, build_todo_report};
22use crate::derived::{build_tree, derive_report};
23#[cfg(feature = "git")]
24use crate::git::build_git_report;
25use crate::util::now_ms;
26#[cfg(all(feature = "content", feature = "walk"))]
27use tokmd_analysis_api_surface::build_api_surface_report;
28#[cfg(feature = "archetype")]
29use tokmd_analysis_archetype::detect_archetype;
30#[cfg(feature = "walk")]
31use tokmd_analysis_assets::{build_assets_report, build_dependency_report};
32#[cfg(all(feature = "content", feature = "walk"))]
33use tokmd_analysis_complexity::build_complexity_report;
34#[cfg(all(feature = "content", feature = "walk"))]
35use tokmd_analysis_entropy::build_entropy_report;
36#[cfg(feature = "git")]
37use tokmd_analysis_fingerprint::build_corporate_fingerprint;
38#[cfg(feature = "fun")]
39use tokmd_analysis_fun::build_fun_report;
40#[cfg(all(feature = "halstead", feature = "content", feature = "walk"))]
41use tokmd_analysis_halstead::build_halstead_report;
42#[cfg(all(feature = "content", feature = "walk"))]
43use tokmd_analysis_license::build_license_report;
44#[cfg(all(feature = "halstead", feature = "content", feature = "walk"))]
45use tokmd_analysis_maintainability::attach_halstead_metrics;
46#[cfg(feature = "content")]
47use tokmd_analysis_near_dup::{NearDupLimits, build_near_dup_report};
48#[cfg(feature = "topics")]
49use tokmd_analysis_topics::build_topic_clouds;
50
51/// Canonical preset enum for analysis orchestration.
52pub type AnalysisPreset = PresetKind;
53
54#[derive(Debug, Clone, Copy, PartialEq, Eq)]
55pub enum ImportGranularity {
56    Module,
57    File,
58}
59
60#[derive(Debug, Clone)]
61pub struct AnalysisContext {
62    pub export: ExportData,
63    pub root: PathBuf,
64    pub source: AnalysisSource,
65}
66
67#[derive(Debug, Clone)]
68pub struct AnalysisRequest {
69    pub preset: AnalysisPreset,
70    pub args: AnalysisArgsMeta,
71    pub limits: AnalysisLimits,
72    #[cfg(feature = "effort")]
73    pub effort: Option<EffortRequest>,
74    pub window_tokens: Option<usize>,
75    pub git: Option<bool>,
76    pub import_granularity: ImportGranularity,
77    pub detail_functions: bool,
78    /// Enable near-duplicate detection.
79    pub near_dup: bool,
80    /// Near-duplicate similarity threshold (0.0–1.0).
81    pub near_dup_threshold: f64,
82    /// Maximum files to analyze for near-duplicates.
83    pub near_dup_max_files: usize,
84    /// Near-duplicate comparison scope.
85    pub near_dup_scope: NearDupScope,
86    /// Maximum near-duplicate pairs to emit (truncation guardrail).
87    pub near_dup_max_pairs: Option<usize>,
88    /// Glob patterns to exclude from near-duplicate analysis.
89    pub near_dup_exclude: Vec<String>,
90}
91
92fn preset_plan(preset: AnalysisPreset) -> PresetPlan {
93    preset_plan_for(preset)
94}
95
96#[cfg(any(feature = "walk", feature = "content", feature = "git"))]
97const ROOTLESS_FILE_ANALYSIS_WARNING: &str =
98    "in-memory analysis has no host root; skipping file-backed enrichers";
99#[cfg(any(feature = "walk", feature = "content", feature = "git"))]
100const ROOTLESS_GIT_ANALYSIS_WARNING: &str =
101    "in-memory analysis has no host root; skipping git-backed enrichers";
102
103#[cfg(any(feature = "walk", feature = "content", feature = "git"))]
104fn has_host_root(root: &Path) -> bool {
105    !root.as_os_str().is_empty()
106}
107
108#[cfg(any(feature = "walk", feature = "content", feature = "git"))]
109fn push_warning_once(warnings: &mut Vec<String>, warning: &str) {
110    if warnings.iter().all(|existing| existing != warning) {
111        warnings.push(warning.to_string());
112    }
113}
114
115pub fn analyze(ctx: AnalysisContext, req: AnalysisRequest) -> Result<AnalysisReceipt> {
116    let mut warnings: Vec<String> = Vec::new();
117    #[cfg_attr(not(feature = "content"), allow(unused_mut))]
118    let mut derived = derive_report(&ctx.export, req.window_tokens);
119    if req.args.format.contains("tree") {
120        derived.tree = Some(build_tree(&ctx.export));
121    }
122
123    let mut source = ctx.source.clone();
124    if source.base_signature.is_none() {
125        source.base_signature = Some(derived.integrity.hash.clone());
126    }
127
128    let plan = preset_plan(req.preset);
129    let include_git = match req.git {
130        Some(flag) => flag,
131        None => plan.git,
132    };
133    #[cfg(any(feature = "walk", feature = "content", feature = "git"))]
134    let has_host_root = has_host_root(&ctx.root);
135
136    #[cfg(feature = "walk")]
137    let mut assets: Option<AssetReport> = None;
138    #[cfg(not(feature = "walk"))]
139    let assets: Option<AssetReport> = None;
140
141    #[cfg(feature = "walk")]
142    let mut deps: Option<DependencyReport> = None;
143    #[cfg(not(feature = "walk"))]
144    let deps: Option<DependencyReport> = None;
145
146    #[cfg(feature = "content")]
147    let mut imports: Option<ImportReport> = None;
148    #[cfg(not(feature = "content"))]
149    let imports: Option<ImportReport> = None;
150
151    #[cfg(feature = "content")]
152    let mut dup: Option<DuplicateReport> = None;
153    #[cfg(not(feature = "content"))]
154    let dup: Option<DuplicateReport> = None;
155
156    #[cfg(feature = "git")]
157    let mut git: Option<GitReport> = None;
158    #[cfg(not(feature = "git"))]
159    let git: Option<GitReport> = None;
160
161    #[cfg(feature = "git")]
162    let mut churn: Option<PredictiveChurnReport> = None;
163    #[cfg(not(feature = "git"))]
164    let churn: Option<PredictiveChurnReport> = None;
165
166    #[cfg(feature = "git")]
167    let mut fingerprint: Option<CorporateFingerprint> = None;
168    #[cfg(not(feature = "git"))]
169    let fingerprint: Option<CorporateFingerprint> = None;
170
171    #[cfg(all(feature = "content", feature = "walk"))]
172    let mut entropy: Option<EntropyReport> = None;
173    #[cfg(not(all(feature = "content", feature = "walk")))]
174    let entropy: Option<EntropyReport> = None;
175
176    #[cfg(all(feature = "content", feature = "walk"))]
177    let mut license: Option<LicenseReport> = None;
178    #[cfg(not(all(feature = "content", feature = "walk")))]
179    let license: Option<LicenseReport> = None;
180
181    #[cfg(all(feature = "content", feature = "walk"))]
182    let mut complexity: Option<ComplexityReport> = None;
183    #[cfg(not(all(feature = "content", feature = "walk")))]
184    let complexity: Option<ComplexityReport> = None;
185
186    #[cfg(all(feature = "content", feature = "walk"))]
187    let mut api_surface: Option<ApiSurfaceReport> = None;
188    #[cfg(not(all(feature = "content", feature = "walk")))]
189    let api_surface: Option<ApiSurfaceReport> = None;
190
191    #[cfg(feature = "archetype")]
192    let mut archetype: Option<Archetype> = None;
193    #[cfg(not(feature = "archetype"))]
194    let archetype: Option<Archetype> = None;
195    #[cfg(feature = "topics")]
196    let mut topics: Option<TopicClouds> = None;
197    #[cfg(not(feature = "topics"))]
198    let topics: Option<TopicClouds> = None;
199
200    let fun: Option<FunReport>;
201
202    #[cfg(any(feature = "walk", feature = "content"))]
203    let mut files: Option<Vec<PathBuf>> = None;
204    #[cfg(not(any(feature = "walk", feature = "content")))]
205    let _files: Option<Vec<PathBuf>> = None;
206
207    if plan.needs_files() {
208        #[cfg(feature = "walk")]
209        if has_host_root {
210            match tokmd_walk::list_files(&ctx.root, req.limits.max_files) {
211                Ok(list) => files = Some(list),
212                Err(err) => warnings.push(format!("walk failed: {}", err)),
213            }
214        } else {
215            push_warning_once(&mut warnings, ROOTLESS_FILE_ANALYSIS_WARNING);
216        }
217        #[cfg(not(feature = "walk"))]
218        {
219            warnings.push(
220                tokmd_analysis_grid::DisabledFeature::FileInventory
221                    .warning()
222                    .to_string(),
223            );
224        }
225    }
226
227    if plan.assets {
228        #[cfg(feature = "walk")]
229        {
230            if let Some(list) = files.as_deref() {
231                match build_assets_report(&ctx.root, list) {
232                    Ok(report) => assets = Some(report),
233                    Err(err) => warnings.push(format!("asset scan failed: {}", err)),
234                }
235            }
236        }
237    }
238
239    if plan.deps {
240        #[cfg(feature = "walk")]
241        {
242            if let Some(list) = files.as_deref() {
243                match build_dependency_report(&ctx.root, list) {
244                    Ok(report) => deps = Some(report),
245                    Err(err) => warnings.push(format!("dependency scan failed: {}", err)),
246                }
247            }
248        }
249    }
250
251    if plan.todo {
252        #[cfg(feature = "content")]
253        {
254            if let Some(list) = files.as_deref() {
255                match build_todo_report(&ctx.root, list, &req.limits, derived.totals.code) {
256                    Ok(report) => derived.todo = Some(report),
257                    Err(err) => warnings.push(format!("todo scan failed: {}", err)),
258                }
259            }
260        }
261        #[cfg(not(feature = "content"))]
262        warnings.push(
263            tokmd_analysis_grid::DisabledFeature::TodoScan
264                .warning()
265                .to_string(),
266        );
267    }
268
269    if plan.dup {
270        #[cfg(feature = "content")]
271        {
272            if let Some(list) = files.as_deref() {
273                match build_duplicate_report(&ctx.root, list, &ctx.export, &req.limits) {
274                    Ok(report) => dup = Some(report),
275                    Err(err) => warnings.push(format!("dup scan failed: {}", err)),
276                }
277            }
278        }
279        #[cfg(not(feature = "content"))]
280        warnings.push(
281            tokmd_analysis_grid::DisabledFeature::DuplicationScan
282                .warning()
283                .to_string(),
284        );
285    }
286
287    // Near-duplicate detection (opt-in via --near-dup)
288    if req.near_dup {
289        #[cfg(feature = "content")]
290        {
291            if has_host_root {
292                let near_dup_limits = NearDupLimits {
293                    max_bytes: req.limits.max_bytes,
294                    max_file_bytes: req.limits.max_file_bytes,
295                };
296                match build_near_dup_report(
297                    &ctx.root,
298                    &ctx.export,
299                    req.near_dup_scope,
300                    req.near_dup_threshold,
301                    req.near_dup_max_files,
302                    req.near_dup_max_pairs,
303                    &near_dup_limits,
304                    &req.near_dup_exclude,
305                ) {
306                    Ok(report) => {
307                        // Attach to existing dup report or create a minimal one
308                        if let Some(ref mut d) = dup {
309                            d.near = Some(report);
310                        } else {
311                            dup = Some(DuplicateReport {
312                                groups: Vec::new(),
313                                wasted_bytes: 0,
314                                strategy: "none".to_string(),
315                                density: None,
316                                near: Some(report),
317                            });
318                        }
319                    }
320                    Err(err) => warnings.push(format!("near-dup scan failed: {}", err)),
321                }
322            } else {
323                push_warning_once(&mut warnings, ROOTLESS_FILE_ANALYSIS_WARNING);
324            }
325        }
326        #[cfg(not(feature = "content"))]
327        warnings.push(
328            tokmd_analysis_grid::DisabledFeature::NearDuplicateScan
329                .warning()
330                .to_string(),
331        );
332    }
333
334    if plan.imports {
335        #[cfg(feature = "content")]
336        {
337            if let Some(list) = files.as_deref() {
338                match build_import_report(
339                    &ctx.root,
340                    list,
341                    &ctx.export,
342                    req.import_granularity,
343                    &req.limits,
344                ) {
345                    Ok(report) => imports = Some(report),
346                    Err(err) => warnings.push(format!("import scan failed: {}", err)),
347                }
348            }
349        }
350        #[cfg(not(feature = "content"))]
351        warnings.push(
352            tokmd_analysis_grid::DisabledFeature::ImportScan
353                .warning()
354                .to_string(),
355        );
356    }
357
358    if include_git {
359        #[cfg(feature = "git")]
360        {
361            if has_host_root {
362                let repo_root = match tokmd_git::repo_root(&ctx.root) {
363                    Some(root) => root,
364                    None => {
365                        warnings.push("git scan failed: not a git repo".to_string());
366                        PathBuf::new()
367                    }
368                };
369                if !repo_root.as_os_str().is_empty() {
370                    match tokmd_git::collect_history(
371                        &repo_root,
372                        req.limits.max_commits,
373                        req.limits.max_commit_files,
374                    ) {
375                        Ok(commits) => {
376                            if plan.git {
377                                match build_git_report(&repo_root, &ctx.export, &commits) {
378                                    Ok(report) => git = Some(report),
379                                    Err(err) => warnings.push(format!("git scan failed: {}", err)),
380                                }
381                            }
382                            if plan.churn {
383                                churn = Some(build_predictive_churn_report(
384                                    &ctx.export,
385                                    &commits,
386                                    &repo_root,
387                                ));
388                            }
389                            if plan.fingerprint {
390                                fingerprint = Some(build_corporate_fingerprint(&commits));
391                            }
392                        }
393                        Err(err) => warnings.push(format!("git scan failed: {}", err)),
394                    }
395                }
396            } else {
397                push_warning_once(&mut warnings, ROOTLESS_GIT_ANALYSIS_WARNING);
398            }
399        }
400        #[cfg(not(feature = "git"))]
401        warnings.push(
402            tokmd_analysis_grid::DisabledFeature::GitMetrics
403                .warning()
404                .to_string(),
405        );
406    }
407
408    if plan.archetype {
409        #[cfg(feature = "archetype")]
410        {
411            archetype = detect_archetype(&ctx.export);
412        }
413        #[cfg(not(feature = "archetype"))]
414        {
415            warnings.push(
416                tokmd_analysis_grid::DisabledFeature::Archetype
417                    .warning()
418                    .to_string(),
419            );
420        }
421    }
422
423    if plan.topics {
424        #[cfg(feature = "topics")]
425        {
426            topics = Some(build_topic_clouds(&ctx.export));
427        }
428        #[cfg(not(feature = "topics"))]
429        {
430            warnings.push(
431                tokmd_analysis_grid::DisabledFeature::Topics
432                    .warning()
433                    .to_string(),
434            );
435        }
436    }
437
438    if plan.entropy {
439        #[cfg(all(feature = "content", feature = "walk"))]
440        {
441            if let Some(list) = files.as_deref() {
442                match build_entropy_report(&ctx.root, list, &ctx.export, &req.limits) {
443                    Ok(report) => entropy = Some(report),
444                    Err(err) => warnings.push(format!("entropy scan failed: {}", err)),
445                }
446            }
447        }
448        #[cfg(not(all(feature = "content", feature = "walk")))]
449        warnings.push(
450            tokmd_analysis_grid::DisabledFeature::EntropyProfiling
451                .warning()
452                .to_string(),
453        );
454    }
455
456    if plan.license {
457        #[cfg(all(feature = "content", feature = "walk"))]
458        {
459            if let Some(list) = files.as_deref() {
460                match build_license_report(&ctx.root, list, &req.limits) {
461                    Ok(report) => license = Some(report),
462                    Err(err) => warnings.push(format!("license scan failed: {}", err)),
463                }
464            }
465        }
466        #[cfg(not(all(feature = "content", feature = "walk")))]
467        warnings.push(
468            tokmd_analysis_grid::DisabledFeature::LicenseRadar
469                .warning()
470                .to_string(),
471        );
472    }
473
474    if plan.complexity {
475        #[cfg(all(feature = "content", feature = "walk"))]
476        {
477            if let Some(list) = files.as_deref() {
478                match build_complexity_report(
479                    &ctx.root,
480                    list,
481                    &ctx.export,
482                    &req.limits,
483                    req.detail_functions,
484                ) {
485                    Ok(report) => complexity = Some(report),
486                    Err(err) => warnings.push(format!("complexity scan failed: {}", err)),
487                }
488            }
489        }
490        #[cfg(not(all(feature = "content", feature = "walk")))]
491        warnings.push(
492            tokmd_analysis_grid::DisabledFeature::ComplexityAnalysis
493                .warning()
494                .to_string(),
495        );
496    }
497
498    if plan.api_surface {
499        #[cfg(all(feature = "content", feature = "walk"))]
500        {
501            if let Some(list) = files.as_deref() {
502                match build_api_surface_report(&ctx.root, list, &ctx.export, &req.limits) {
503                    Ok(report) => api_surface = Some(report),
504                    Err(err) => warnings.push(format!("api surface scan failed: {}", err)),
505                }
506            }
507        }
508        #[cfg(not(all(feature = "content", feature = "walk")))]
509        warnings.push(
510            tokmd_analysis_grid::DisabledFeature::ApiSurfaceAnalysis
511                .warning()
512                .to_string(),
513        );
514    }
515
516    // Halstead metrics (feature-gated)
517    #[cfg(all(feature = "halstead", feature = "content", feature = "walk"))]
518    if plan.halstead
519        && let Some(list) = files.as_deref()
520    {
521        match build_halstead_report(&ctx.root, list, &ctx.export, &req.limits) {
522            Ok(halstead_report) => {
523                // Wire Halstead into complexity report if available
524                if let Some(ref mut cx) = complexity {
525                    attach_halstead_metrics(cx, halstead_report);
526                }
527            }
528            Err(err) => warnings.push(format!("halstead scan failed: {}", err)),
529        }
530    }
531
532    if plan.fun {
533        #[cfg(feature = "fun")]
534        {
535            fun = Some(build_fun_report(&derived));
536        }
537        #[cfg(not(feature = "fun"))]
538        {
539            warnings.push(
540                tokmd_analysis_grid::DisabledFeature::Fun
541                    .warning()
542                    .to_string(),
543            );
544            fun = None;
545        }
546    } else {
547        fun = None;
548    }
549
550    #[cfg(feature = "effort")]
551    let effort = if let Some(effort_request) = &req.effort {
552        match build_effort_report(
553            &ctx.root,
554            &ctx.export,
555            &derived,
556            git.as_ref(),
557            complexity.as_ref(),
558            api_surface.as_ref(),
559            dup.as_ref(),
560            effort_request,
561        ) {
562            Ok(report) => Some(report),
563            Err(err) => {
564                warnings.push(format!("effort estimate failed: {}", err));
565                None
566            }
567        }
568    } else {
569        None
570    };
571    #[cfg(not(feature = "effort"))]
572    let effort: Option<tokmd_analysis_types::EffortEstimateReport> = None;
573
574    let status = if warnings.is_empty() {
575        ScanStatus::Complete
576    } else {
577        ScanStatus::Partial
578    };
579
580    let receipt = AnalysisReceipt {
581        schema_version: tokmd_analysis_types::ANALYSIS_SCHEMA_VERSION,
582        generated_at_ms: now_ms(),
583        tool: ToolInfo::current(),
584        mode: "analysis".to_string(),
585        status,
586        warnings,
587        source,
588        args: req.args,
589        archetype,
590        topics,
591        entropy,
592        predictive_churn: churn,
593        corporate_fingerprint: fingerprint,
594        license,
595        derived: Some(derived),
596        assets,
597        deps,
598        git,
599        imports,
600        dup,
601        complexity,
602        api_surface,
603        effort,
604        fun,
605    };
606
607    Ok(receipt)
608}