Skip to main content

webspec_index/analyze/
orchestrate.rs

1//! Filesystem orchestration for the `analyze` workflow.
2//!
3//! Collects source files, resolves spec sections from the local DB, and runs
4//! per-file analysis. Returns structured results; callers (the CLI, the Python
5//! bindings) decide how to render or persist them.
6
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9
10use anyhow::Result;
11
12use super::file::{analyze_file, FileAnalysis, SpecResolver};
13use super::scanner::SpecUrl;
14
15/// Source file extensions to scan when analyzing directories.
16pub const SOURCE_EXTENSIONS: &[&str] = &[
17    "cpp", "cc", "cxx", "c", "h", "hpp", "hxx", "rs", "js", "mjs", "jsm", "py", "java",
18];
19
20/// Whether `path` has a recognized source-file extension.
21pub fn is_source_file(path: &Path) -> bool {
22    path.extension()
23        .and_then(|e| e.to_str())
24        .is_some_and(|ext| SOURCE_EXTENSIONS.contains(&ext))
25}
26
27/// Collect source files to analyze from a file or directory.
28pub fn collect_files(path: &Path, recursive: bool) -> Result<Vec<PathBuf>> {
29    if path.is_file() {
30        return Ok(vec![path.to_path_buf()]);
31    }
32    if !path.is_dir() {
33        anyhow::bail!("{} is not a file or directory", path.display());
34    }
35    let mut files = Vec::new();
36    let mut dirs = vec![path.to_path_buf()];
37    while let Some(dir) = dirs.pop() {
38        for entry in std::fs::read_dir(&dir)? {
39            let entry = entry?;
40            let ft = entry.file_type()?;
41            if ft.is_file() && is_source_file(&entry.path()) {
42                files.push(entry.path());
43            } else if ft.is_dir() && recursive {
44                dirs.push(entry.path());
45            }
46        }
47    }
48    files.sort();
49    Ok(files)
50}
51
52/// DB-backed spec resolver for the analyze workflow.
53///
54/// Uses `DashMap` for thread-safe caching (safe for future parallelization).
55pub struct DbResolver {
56    cache: dashmap::DashMap<String, Option<String>>,
57}
58
59impl DbResolver {
60    pub fn new() -> Self {
61        DbResolver {
62            cache: dashmap::DashMap::new(),
63        }
64    }
65
66    /// Return all successfully resolved sections as a map of
67    /// "SPEC_<spec>_<anchor>" -> content (the same symbol names used in
68    /// searchfox analysis records).
69    pub fn resolved_sections(&self) -> HashMap<String, String> {
70        self.cache
71            .iter()
72            .filter_map(|entry| {
73                let content = entry.value().as_ref()?;
74                let (spec, anchor) = entry.key().split_once('#')?;
75                let sym = format!("SPEC_{spec}_{anchor}");
76                Some((sym, content.clone()))
77            })
78            .collect()
79    }
80}
81
82impl Default for DbResolver {
83    fn default() -> Self {
84        Self::new()
85    }
86}
87
88impl SpecResolver for DbResolver {
89    fn resolve(&self, spec: &str, anchor: &str) -> Option<String> {
90        let key = format!("{spec}#{anchor}");
91        if let Some(cached) = self.cache.get(&key) {
92            return cached.clone();
93        }
94        let result = tokio::task::block_in_place(|| {
95            tokio::runtime::Handle::current()
96                .block_on(crate::query_section(&key, None))
97                .ok()
98        });
99        let content = result.and_then(|r| r.content).filter(|c| !c.is_empty());
100        self.cache.insert(key, content.clone());
101        content
102    }
103}
104
105/// A single analyzed file with its path and analysis result.
106pub struct AnalyzedFile {
107    pub path: PathBuf,
108    pub analysis: FileAnalysis,
109}
110
111/// Result of analyzing a path (file or directory).
112pub struct AnalysisRun {
113    /// Total number of source files scanned (before scope filtering).
114    pub total_files_scanned: usize,
115    /// Files that contained at least one spec scope.
116    pub files: Vec<AnalyzedFile>,
117    /// Files that could not be read, as (path, error message).
118    pub read_errors: Vec<(PathBuf, String)>,
119    /// Spec sections resolved during analysis (symbol -> content).
120    pub resolved_sections: HashMap<String, String>,
121}
122
123/// Analyze a file or directory for spec references and step-comment validation.
124///
125/// Scans each source file for spec URLs and validates step comments against the
126/// referenced spec algorithms (fetched/cached via the local DB). Only files with
127/// at least one spec scope are included in [`AnalysisRun::files`].
128///
129/// Must be called from within a multi-threaded Tokio runtime: spec resolution
130/// blocks the current worker thread via `block_in_place`.
131pub async fn analyze_paths(path: &Path, recursive: bool, threshold: f64) -> Result<AnalysisRun> {
132    let files = collect_files(path, recursive)?;
133    let total_files_scanned = files.len();
134
135    let spec_urls: Vec<SpecUrl> = crate::spec_urls()
136        .into_iter()
137        .map(|e| SpecUrl {
138            spec: e.spec,
139            base_url: e.base_url,
140        })
141        .collect();
142
143    let resolver = DbResolver::new();
144    let mut analyzed = Vec::new();
145    let mut read_errors = Vec::new();
146
147    for file_path in files {
148        let text = match std::fs::read_to_string(&file_path) {
149            Ok(t) => t,
150            Err(e) => {
151                read_errors.push((file_path, e.to_string()));
152                continue;
153            }
154        };
155
156        let analysis = analyze_file(&text, &spec_urls, &resolver, threshold);
157        if analysis.scopes.is_empty() {
158            continue;
159        }
160        analyzed.push(AnalyzedFile {
161            path: file_path,
162            analysis,
163        });
164    }
165
166    Ok(AnalysisRun {
167        total_files_scanned,
168        resolved_sections: resolver.resolved_sections(),
169        files: analyzed,
170        read_errors,
171    })
172}