webspec_index/analyze/
orchestrate.rs1use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9
10use anyhow::Result;
11
12use super::file::{analyze_file, FileAnalysis, SpecResolver};
13use super::scanner::SpecUrl;
14
15pub const SOURCE_EXTENSIONS: &[&str] = &[
17 "cpp", "cc", "cxx", "c", "h", "hpp", "hxx", "rs", "js", "mjs", "jsm", "py", "java",
18];
19
20pub fn is_source_file(path: &Path) -> bool {
22 path.extension()
23 .and_then(|e| e.to_str())
24 .is_some_and(|ext| SOURCE_EXTENSIONS.contains(&ext))
25}
26
27pub fn collect_files(path: &Path, recursive: bool) -> Result<Vec<PathBuf>> {
29 if path.is_file() {
30 return Ok(vec![path.to_path_buf()]);
31 }
32 if !path.is_dir() {
33 anyhow::bail!("{} is not a file or directory", path.display());
34 }
35 let mut files = Vec::new();
36 let mut dirs = vec![path.to_path_buf()];
37 while let Some(dir) = dirs.pop() {
38 for entry in std::fs::read_dir(&dir)? {
39 let entry = entry?;
40 let ft = entry.file_type()?;
41 if ft.is_file() && is_source_file(&entry.path()) {
42 files.push(entry.path());
43 } else if ft.is_dir() && recursive {
44 dirs.push(entry.path());
45 }
46 }
47 }
48 files.sort();
49 Ok(files)
50}
51
52pub struct DbResolver {
56 cache: dashmap::DashMap<String, Option<String>>,
57}
58
59impl DbResolver {
60 pub fn new() -> Self {
61 DbResolver {
62 cache: dashmap::DashMap::new(),
63 }
64 }
65
66 pub fn resolved_sections(&self) -> HashMap<String, String> {
70 self.cache
71 .iter()
72 .filter_map(|entry| {
73 let content = entry.value().as_ref()?;
74 let (spec, anchor) = entry.key().split_once('#')?;
75 let sym = format!("SPEC_{spec}_{anchor}");
76 Some((sym, content.clone()))
77 })
78 .collect()
79 }
80}
81
82impl Default for DbResolver {
83 fn default() -> Self {
84 Self::new()
85 }
86}
87
88impl SpecResolver for DbResolver {
89 fn resolve(&self, spec: &str, anchor: &str) -> Option<String> {
90 let key = format!("{spec}#{anchor}");
91 if let Some(cached) = self.cache.get(&key) {
92 return cached.clone();
93 }
94 let result = tokio::task::block_in_place(|| {
95 tokio::runtime::Handle::current()
96 .block_on(crate::query_section(&key, None))
97 .ok()
98 });
99 let content = result.and_then(|r| r.content).filter(|c| !c.is_empty());
100 self.cache.insert(key, content.clone());
101 content
102 }
103}
104
105pub struct AnalyzedFile {
107 pub path: PathBuf,
108 pub analysis: FileAnalysis,
109}
110
111pub struct AnalysisRun {
113 pub total_files_scanned: usize,
115 pub files: Vec<AnalyzedFile>,
117 pub read_errors: Vec<(PathBuf, String)>,
119 pub resolved_sections: HashMap<String, String>,
121}
122
123pub async fn analyze_paths(path: &Path, recursive: bool, threshold: f64) -> Result<AnalysisRun> {
132 let files = collect_files(path, recursive)?;
133 let total_files_scanned = files.len();
134
135 let spec_urls: Vec<SpecUrl> = crate::spec_urls()
136 .into_iter()
137 .map(|e| SpecUrl {
138 spec: e.spec,
139 base_url: e.base_url,
140 })
141 .collect();
142
143 let resolver = DbResolver::new();
144 let mut analyzed = Vec::new();
145 let mut read_errors = Vec::new();
146
147 for file_path in files {
148 let text = match std::fs::read_to_string(&file_path) {
149 Ok(t) => t,
150 Err(e) => {
151 read_errors.push((file_path, e.to_string()));
152 continue;
153 }
154 };
155
156 let analysis = analyze_file(&text, &spec_urls, &resolver, threshold);
157 if analysis.scopes.is_empty() {
158 continue;
159 }
160 analyzed.push(AnalyzedFile {
161 path: file_path,
162 analysis,
163 });
164 }
165
166 Ok(AnalysisRun {
167 total_files_scanned,
168 resolved_sections: resolver.resolved_sections(),
169 files: analyzed,
170 read_errors,
171 })
172}