Skip to main content

lisette_semantics/
analyze.rs

1use rayon::prelude::*;
2use rustc_hash::{FxHashMap as HashMap, FxHashSet as HashSet};
3use std::path::PathBuf;
4use std::sync::Arc;
5
6use diagnostics::{LocalSink, SemanticResult};
7use syntax::ast::Expression;
8use syntax::program::{File, ModuleInfo, MutationInfo, UnusedInfo};
9
10use deps::TypedefLocator;
11
12use crate::cache::{
13    CompiledModule, EmitStamp, compute_emit_artifact_hash, compute_module_hash,
14    get_dependency_module_hashes,
15    go_stdlib::{self, load_cached_go_module},
16    hash_module_sources, is_cache_disabled, prelude as prelude_cache, register_cached_module,
17    save_module_cache, try_load_cache,
18};
19use crate::checker::TaskState;
20use crate::diagnostics::emit_for_locator_result;
21use crate::facts::{BindingIdAllocator, Facts};
22use crate::loader::Loader;
23use crate::module_graph::build_module_graph;
24use crate::passes;
25use crate::prelude::parse_and_register_prelude;
26use crate::store::{ENTRY_MODULE_ID, Store};
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
29pub enum CompilePhase {
30    #[default]
31    Check,
32    Emit,
33}
34
35#[derive(Debug, Clone, Default)]
36pub struct SemanticConfig {
37    pub run_lints: bool,
38    pub standalone_mode: bool,
39    pub load_siblings: bool,
40}
41
42pub struct AnalyzeInput<'a> {
43    pub config: SemanticConfig,
44    pub loader: &'a dyn Loader,
45    pub source: String,
46    /// Bare identity name of the entry file (e.g. `main.lis`).
47    pub filename: String,
48    /// Cwd-relative display path for the entry file (e.g. `src/main.lis`);
49    /// equals `filename` when there is no separate display path.
50    pub display_path: String,
51    pub ast: Vec<Expression>,
52    pub project_root: Option<PathBuf>,
53    pub compile_phase: CompilePhase,
54    pub locator: TypedefLocator,
55    /// Go module path (from `lisette.toml`); folded into the cache emit-artifact
56    /// hash so a project rename invalidates Go outputs.
57    pub go_module: String,
58    /// When true, `analyze` skips both cache load and save. Set by the CLI for
59    /// `--debug` Emit so cwd-decorated Go files are not reused across cwds.
60    pub disable_cache: bool,
61}
62
63/// Wraps `SemanticResult` plus per-module emit stamps the CLI uses to update
64/// the cache after a successful artifact write.
65pub struct AnalyzeOutput {
66    pub result: SemanticResult,
67    pub facts: Facts,
68    pub emit_stamps: Vec<EmitStamp>,
69}
70
71pub fn analyze(input: AnalyzeInput) -> AnalyzeOutput {
72    let mut store = Store::new();
73
74    store.init_entry_module();
75    store.store_entry_file(
76        &input.filename,
77        &input.display_path,
78        &input.source,
79        input.ast,
80    );
81
82    let sink = LocalSink::new();
83
84    if input.config.load_siblings {
85        for (filename, content) in input.loader.scan_folder(ENTRY_MODULE_ID) {
86            if filename == input.filename
87                || !filename.ends_with(".lis")
88                || filename.ends_with(".d.lis")
89            {
90                continue;
91            }
92            let file_id = store.new_file_id();
93            let result = syntax::build_ast(&content.source, file_id);
94            sink.extend_parse_errors(result.errors);
95            store.store_file(
96                ENTRY_MODULE_ID,
97                File::new(
98                    ENTRY_MODULE_ID,
99                    &filename,
100                    &content.display_path,
101                    &content.source,
102                    result.ast,
103                    file_id,
104                ),
105            );
106        }
107    }
108
109    let entry_module = store.entry_module_id().to_string();
110    let mut graph_result = build_module_graph(
111        &mut store,
112        Some(input.loader),
113        &entry_module,
114        &sink,
115        input.config.standalone_mode,
116        &input.locator,
117    );
118
119    for cycle in &graph_result.cycles {
120        sink.push(diagnostics::module_graph::import_cycle(cycle));
121    }
122
123    let has_pre_check_errors = sink.has_errors();
124
125    let cache_disabled = is_cache_disabled();
126
127    let prelude_cache_hit = if cache_disabled {
128        false
129    } else if let Some(cached) = prelude_cache::try_load_prelude_cache() {
130        prelude_cache::register_cached_prelude(&mut store, cached);
131        true
132    } else {
133        false
134    };
135
136    if !prelude_cache_hit {
137        parse_and_register_prelude(&mut store, &sink);
138    }
139
140    let cache_enabled = input.project_root.is_some() && !cache_disabled && !input.disable_cache;
141    let check_go_files = input.compile_phase == CompilePhase::Emit;
142
143    let binding_ids = Arc::new(BindingIdAllocator::new());
144
145    let (mut facts, cached_modules, compiled_modules, ufcs_methods) = {
146        let mut checker = TaskState::new(&sink, binding_ids.clone());
147        checker
148            .ufcs_methods
149            .extend(crate::prelude::compute_prelude_ufcs(&store));
150
151        let mut module_hashes: HashMap<String, u64> = HashMap::default();
152        let mut cached_modules: HashSet<String> = HashSet::default();
153        let mut compiled_modules: Vec<CompiledModule> = vec![];
154
155        let order = std::mem::take(&mut graph_result.order);
156        let edges = &graph_result.edges;
157
158        let go_cache = if cache_disabled {
159            None
160        } else {
161            go_stdlib::try_load_go_stdlib_cache(input.locator.target())
162        };
163
164        let mut to_infer: Vec<String> = Vec::new();
165
166        for module_id in order {
167            if let Some(go_pkg) = module_id.strip_prefix("go:") {
168                if graph_result.link_only_modules.contains(&module_id) {
169                    continue;
170                }
171
172                if deps::is_stdlib(go_pkg)
173                    && let Some(ref cache) = go_cache
174                {
175                    load_cached_go_module(&mut store, &module_id, cache, input.locator.target());
176                    if store.is_visited(&module_id) {
177                        continue;
178                    }
179                }
180
181                match input.locator.find_typedef_content(go_pkg) {
182                    deps::TypedefLocatorResult::Found { content, origin } => {
183                        checker.parse_and_register_go_module(
184                            &mut store,
185                            &module_id,
186                            content.as_ref(),
187                            origin.into_cache_path(),
188                            &input.locator,
189                        );
190                    }
191                    other => {
192                        emit_for_locator_result(
193                            &other,
194                            &module_id,
195                            go_pkg,
196                            None,
197                            input.locator.target(),
198                            input.config.standalone_mode,
199                            &sink,
200                        );
201                    }
202                }
203                continue;
204            }
205
206            if store.is_visited(&module_id) {
207                continue;
208            }
209
210            let files = graph_result.files.remove(&module_id).unwrap_or_default();
211            let source_hash = hash_module_sources(&files);
212
213            let dep_hashes = get_dependency_module_hashes(&module_id, edges, &module_hashes);
214            let module_hash = compute_module_hash(source_hash, &dep_hashes);
215            module_hashes.insert(module_id.clone(), module_hash);
216
217            let is_entry = module_id == ENTRY_MODULE_ID;
218
219            let expected_artifact_hash =
220                check_go_files.then(|| compute_emit_artifact_hash(source_hash, &input.go_module));
221
222            if cache_enabled
223                && !is_entry
224                && let Some(ref project_root) = input.project_root
225                && let Some(cached) = try_load_cache(
226                    &module_id,
227                    source_hash,
228                    &dep_hashes,
229                    expected_artifact_hash,
230                    project_root,
231                    check_go_files,
232                )
233            {
234                checker
235                    .ufcs_methods
236                    .extend(cached.ufcs_methods.iter().cloned());
237                register_cached_module(&mut store, &module_id, cached, project_root);
238                cached_modules.insert(module_id.clone());
239                continue;
240            }
241
242            store.store_module(&module_id, files);
243            checker.register_module(&mut store, &module_id);
244
245            if !is_entry {
246                compiled_modules.push(CompiledModule {
247                    module_id: module_id.clone(),
248                    source_hash,
249                    dep_hashes,
250                });
251            }
252
253            to_infer.push(module_id);
254        }
255
256        let module_files: Vec<(String, Vec<File>)> = to_infer
257            .iter()
258            .map(|module_id| {
259                let files = checker.take_module_files(&mut store, module_id);
260                (module_id.clone(), files)
261            })
262            .collect();
263
264        // Single-file or tiny multi-module projects stay serial to avoid rayon
265        // overhead. This threshold is a conservative starting point, not a
266        // measured inflection point. To be tuned in future.
267        const PARALLEL_THRESHOLD: usize = 4;
268
269        if module_files.len() < PARALLEL_THRESHOLD {
270            for (module_id, files) in module_files {
271                checker.infer_module(&store, &module_id, files);
272            }
273        } else {
274            let allocator = binding_ids.clone();
275            let ufcs_snapshot = checker.ufcs_methods.clone();
276            let store_ref: &Store = &store;
277
278            type WorkerOutput = (Vec<(String, File)>, Facts, LocalSink);
279            let outputs: Vec<WorkerOutput> = module_files
280                .into_par_iter()
281                .map(|(module_id, files)| {
282                    let local_sink = LocalSink::new();
283                    let mut worker = TaskState::new(&local_sink, allocator.clone());
284                    worker.ufcs_methods = ufcs_snapshot.clone();
285                    worker.infer_module(store_ref, &module_id, files);
286                    let typed_files = std::mem::take(&mut worker.typed_files);
287                    let facts = std::mem::replace(&mut worker.facts, Facts::new(allocator.clone()));
288                    (typed_files, facts, local_sink)
289                })
290                .collect();
291
292            let mut worker_sinks: Vec<LocalSink> = Vec::with_capacity(outputs.len());
293            for (typed_files, facts, sink_local) in outputs {
294                checker.typed_files.extend(typed_files);
295                checker.facts.merge(facts);
296                worker_sinks.push(sink_local);
297            }
298            sink.extend(LocalSink::merge(worker_sinks));
299        }
300
301        for (module_id, typed_file) in std::mem::take(&mut checker.typed_files) {
302            store.store_file(&module_id, typed_file);
303        }
304
305        // Save Go stdlib cache if store has Go modules not already in cache
306        if !cache_disabled {
307            let all_go_modules: Vec<String> = store
308                .modules
309                .keys()
310                .filter(|id| id.strip_prefix("go:").is_some_and(deps::is_stdlib))
311                .cloned()
312                .collect();
313            let needs_save = !all_go_modules.is_empty()
314                && go_cache.as_ref().is_none_or(|c| {
315                    all_go_modules.len() != c.modules.len()
316                        || all_go_modules.iter().any(|id| !c.modules.contains_key(id))
317                });
318            if needs_save {
319                go_stdlib::save_go_stdlib_cache(&store, &all_go_modules, input.locator.target());
320            }
321        }
322
323        if !cache_disabled && !prelude_cache_hit {
324            prelude_cache::save_prelude_cache(&store);
325        }
326
327        (
328            checker.facts,
329            cached_modules,
330            compiled_modules,
331            checker.ufcs_methods,
332        )
333    };
334
335    let analysis = crate::context::AnalysisContext::new(&store, &ufcs_methods);
336
337    let mut unused = UnusedInfo::default();
338    if !has_pre_check_errors {
339        passes::run(
340            &analysis,
341            &mut facts,
342            &sink,
343            &mut unused,
344            input.config.run_lints,
345        );
346    }
347
348    let mut mutations = MutationInfo::default();
349    for (&binding_id, b) in facts.bindings.iter() {
350        if b.mutated {
351            mutations.mark_binding_mutated(binding_id);
352        }
353    }
354
355    // Canonicalize diagnostic order so the output is stable regardless of
356    // phase ordering, FxHashMap iteration, or parallel inference scheduling.
357    let mut all_diagnostics = sink.take();
358    all_diagnostics.sort_by(diagnostics::LisetteDiagnostic::sort_key);
359    let (errors, lints): (Vec<_>, Vec<_>) = all_diagnostics.into_iter().partition(|d| d.is_error());
360
361    let emit_stamps: Vec<EmitStamp> = compiled_modules
362        .iter()
363        .map(|c| EmitStamp {
364            module_id: c.module_id.clone(),
365            artifact_hash: compute_emit_artifact_hash(c.source_hash, &input.go_module),
366        })
367        .collect();
368
369    if cache_enabled && let Some(ref project_root) = input.project_root {
370        let has_errors = errors.iter().any(|e| e.is_error());
371        if !has_errors {
372            for compiled in compiled_modules {
373                let file_ids: HashSet<u32> = store
374                    .get_module(&compiled.module_id)
375                    .map(|m| m.file_ids().collect())
376                    .unwrap_or_default();
377
378                let has_module_warnings = lints.iter().any(|lint| {
379                    lint.file_id()
380                        .map(|fid| file_ids.contains(&fid))
381                        .unwrap_or(true)
382                });
383                if !has_module_warnings
384                    && let Err(e) =
385                        save_module_cache(&compiled, &store, project_root, &ufcs_methods)
386                {
387                    eprintln!(
388                        "warning: failed to write cache for {}: {e}",
389                        compiled.module_id
390                    );
391                }
392            }
393        }
394    }
395
396    let mut files = HashMap::default();
397    let mut definitions = HashMap::default();
398    let mut modules = HashMap::default();
399
400    let go_module_ids: HashSet<String> = store
401        .modules
402        .keys()
403        .filter(|id| id.starts_with(syntax::types::GO_IMPORT_PREFIX))
404        .cloned()
405        .collect();
406
407    for (mod_id, module) in store.modules {
408        let is_internal = module.is_internal();
409
410        definitions.extend(module.definitions);
411
412        // Internal modules (prelude, **nominal, go:...) stay out of `modules`
413        // so emit and lints skip them; their typedef files still join `files`
414        // so the LSP can map typedef file IDs to URIs for go-to-definition.
415        if is_internal {
416            files.extend(module.typedefs);
417            continue;
418        }
419
420        modules.insert(
421            mod_id,
422            ModuleInfo {
423                file_ids: module.files.keys().copied().collect(),
424                typedef_ids: module.typedefs.keys().copied().collect(),
425                id: module.id.clone(),
426                path: module.id,
427            },
428        );
429
430        files.extend(module.files);
431        files.extend(module.typedefs);
432    }
433
434    let result = SemanticResult {
435        files,
436        definitions,
437        modules,
438        errors,
439        lints,
440        entry_module_id: ENTRY_MODULE_ID.to_string(),
441        unused,
442        mutations,
443        cached_modules,
444        ufcs_methods,
445        typedef_paths: store.typedef_paths,
446        go_package_names: store.go_package_names,
447        go_module_ids,
448    };
449
450    AnalyzeOutput {
451        result,
452        facts,
453        emit_stamps,
454    }
455}