Skip to main content

lisette_semantics/
analyze.rs

1use rayon::prelude::*;
2use rustc_hash::{FxHashMap as HashMap, FxHashSet as HashSet};
3use std::path::PathBuf;
4use std::sync::Arc;
5
6use diagnostics::{LocalSink, SemanticResult};
7use syntax::ast::Expression;
8use syntax::program::{File, ModuleInfo, MutationInfo, UnusedInfo};
9
10use deps::TypedefLocator;
11
12use crate::cache::{
13    CompiledModule, EmitStamp, compute_emit_artifact_hash, compute_module_hash,
14    get_dependency_module_hashes,
15    go_stdlib::{self, load_cached_go_module},
16    hash_module_sources, is_cache_disabled, prelude as prelude_cache, register_cached_module,
17    save_module_cache, try_load_cache,
18};
19use crate::checker::TaskState;
20use crate::diagnostics::emit_for_locator_result;
21use crate::facts::{BindingIdAllocator, Facts};
22use crate::loader::Loader;
23use crate::module_graph::build_module_graph;
24use crate::passes;
25use crate::prelude::parse_and_register_prelude;
26use crate::store::{ENTRY_MODULE_ID, Store};
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
29pub enum CompilePhase {
30    #[default]
31    Check,
32    Emit,
33}
34
35#[derive(Debug, Clone, Default)]
36pub struct SemanticConfig {
37    pub run_lints: bool,
38    pub standalone_mode: bool,
39    pub load_siblings: bool,
40}
41
42pub struct AnalyzeInput<'a> {
43    pub config: SemanticConfig,
44    pub loader: &'a dyn Loader,
45    pub source: String,
46    /// Bare identity name of the entry file (e.g. `main.lis`).
47    pub filename: String,
48    /// Cwd-relative display path for the entry file (e.g. `src/main.lis`);
49    /// equals `filename` when there is no separate display path.
50    pub display_path: String,
51    pub ast: Vec<Expression>,
52    pub project_root: Option<PathBuf>,
53    pub compile_phase: CompilePhase,
54    pub locator: TypedefLocator,
55    /// Go module path (from `lisette.toml`); folded into the cache emit-artifact
56    /// hash so a project rename invalidates Go outputs.
57    pub go_module: String,
58    /// When true, `analyze` skips both cache load and save. Set by the CLI for
59    /// `--debug` Emit so cwd-decorated Go files are not reused across cwds.
60    pub disable_cache: bool,
61}
62
63/// Wraps `SemanticResult` plus per-module emit stamps the CLI uses to update
64/// the cache after a successful artifact write.
65pub struct AnalyzeOutput {
66    pub result: SemanticResult,
67    pub facts: Facts,
68    pub emit_stamps: Vec<EmitStamp>,
69}
70
71pub fn analyze(input: AnalyzeInput) -> AnalyzeOutput {
72    let mut store = Store::new();
73
74    store.init_entry_module();
75    store.store_entry_file(
76        &input.filename,
77        &input.display_path,
78        &input.source,
79        input.ast,
80    );
81
82    let sink = LocalSink::new();
83
84    if input.config.load_siblings {
85        for (filename, content) in input.loader.scan_folder(ENTRY_MODULE_ID) {
86            if filename == input.filename
87                || !filename.ends_with(".lis")
88                || filename.ends_with(".d.lis")
89            {
90                continue;
91            }
92            let file_id = store.new_file_id();
93            let result = syntax::build_ast(&content.source, file_id);
94            sink.extend_parse_errors(result.errors);
95            store.store_file(
96                ENTRY_MODULE_ID,
97                File::new(
98                    ENTRY_MODULE_ID,
99                    &filename,
100                    &content.display_path,
101                    &content.source,
102                    result.ast,
103                    file_id,
104                ),
105            );
106        }
107    }
108
109    let entry_module = store.entry_module_id().to_string();
110    let mut graph_result = build_module_graph(
111        &mut store,
112        Some(input.loader),
113        &entry_module,
114        &sink,
115        input.config.standalone_mode,
116        &input.locator,
117    );
118
119    for cycle in &graph_result.cycles {
120        sink.push(diagnostics::module_graph::import_cycle(cycle));
121    }
122
123    let has_pre_check_errors = sink.has_errors();
124
125    let cache_disabled = is_cache_disabled();
126
127    let prelude_cache_hit = if cache_disabled {
128        false
129    } else if let Some(cached) = prelude_cache::try_load_prelude_cache() {
130        prelude_cache::register_cached_prelude(&mut store, cached);
131        true
132    } else {
133        false
134    };
135
136    if !prelude_cache_hit {
137        parse_and_register_prelude(&mut store, &sink);
138    }
139
140    let cache_enabled = input.project_root.is_some() && !cache_disabled && !input.disable_cache;
141    let check_go_files = input.compile_phase == CompilePhase::Emit;
142
143    let binding_ids = Arc::new(BindingIdAllocator::new());
144
145    let (mut facts, cached_modules, compiled_modules, ufcs_methods) = {
146        let mut checker = TaskState::new(&sink, binding_ids.clone());
147        checker
148            .ufcs_methods
149            .extend(crate::prelude::compute_prelude_ufcs(&store));
150
151        let mut module_hashes: HashMap<String, u64> = HashMap::default();
152        let mut cached_modules: HashSet<String> = HashSet::default();
153        let mut compiled_modules: Vec<CompiledModule> = vec![];
154
155        let order = std::mem::take(&mut graph_result.order);
156        let edges = &graph_result.edges;
157
158        let go_cache = if cache_disabled {
159            None
160        } else {
161            go_stdlib::try_load_go_stdlib_cache(input.locator.target())
162        };
163
164        let mut to_infer: Vec<String> = Vec::new();
165
166        for module_id in order {
167            if let Some(go_pkg) = module_id.strip_prefix("go:") {
168                if graph_result.link_only_modules.contains(&module_id) {
169                    continue;
170                }
171
172                if deps::is_stdlib(go_pkg)
173                    && let Some(ref cache) = go_cache
174                {
175                    load_cached_go_module(&mut store, &module_id, cache, input.locator.target());
176                    if store.is_visited(&module_id) {
177                        continue;
178                    }
179                }
180
181                match input.locator.find_typedef_content(go_pkg) {
182                    deps::TypedefLocatorResult::Found { content, origin } => {
183                        checker.parse_and_register_go_module(
184                            &mut store,
185                            &module_id,
186                            content.as_ref(),
187                            origin.into_cache_path(),
188                            &input.locator,
189                        );
190                    }
191                    other => {
192                        emit_for_locator_result(
193                            &other,
194                            &module_id,
195                            go_pkg,
196                            None,
197                            input.locator.target(),
198                            input.config.standalone_mode,
199                            &sink,
200                        );
201                    }
202                }
203                continue;
204            }
205
206            if store.is_visited(&module_id) {
207                continue;
208            }
209
210            let files = graph_result.files.remove(&module_id).unwrap_or_default();
211            let source_hash = hash_module_sources(&files);
212
213            let dep_hashes = get_dependency_module_hashes(&module_id, edges, &module_hashes);
214            let module_hash = compute_module_hash(source_hash, &dep_hashes);
215            module_hashes.insert(module_id.clone(), module_hash);
216
217            let is_entry = module_id == ENTRY_MODULE_ID;
218
219            let expected_artifact_hash =
220                check_go_files.then(|| compute_emit_artifact_hash(source_hash, &input.go_module));
221
222            if cache_enabled
223                && !is_entry
224                && let Some(ref project_root) = input.project_root
225                && let Some(cached) = try_load_cache(
226                    &module_id,
227                    source_hash,
228                    &dep_hashes,
229                    expected_artifact_hash,
230                    project_root,
231                    check_go_files,
232                )
233            {
234                checker
235                    .ufcs_methods
236                    .extend(cached.ufcs_methods.iter().cloned());
237                register_cached_module(&mut store, &module_id, cached, project_root);
238                cached_modules.insert(module_id.clone());
239                continue;
240            }
241
242            store.store_module(&module_id, files);
243            checker.register_module(&mut store, &module_id);
244
245            if !is_entry {
246                compiled_modules.push(CompiledModule {
247                    module_id: module_id.clone(),
248                    source_hash,
249                    dep_hashes,
250                });
251            }
252
253            to_infer.push(module_id);
254        }
255
256        let module_files: Vec<(String, Vec<File>)> = to_infer
257            .iter()
258            .map(|module_id| {
259                let files = checker.take_module_files(&mut store, module_id);
260                (module_id.clone(), files)
261            })
262            .collect();
263
264        // Single-file or tiny multi-module projects stay serial to avoid rayon
265        // overhead. This threshold is a conservative starting point, not a
266        // measured inflection point. To be tuned in future.
267        const PARALLEL_THRESHOLD: usize = 4;
268
269        if module_files.len() < PARALLEL_THRESHOLD {
270            for (module_id, files) in module_files {
271                checker.infer_module(&store, &module_id, files);
272            }
273        } else {
274            let allocator = binding_ids.clone();
275            let ufcs_shared = Arc::new(std::mem::take(&mut checker.ufcs_methods));
276            let store_ref: &Store = &store;
277
278            type WorkerOutput = (Vec<(String, File)>, Facts, LocalSink);
279            let outputs: Vec<WorkerOutput> = module_files
280                .into_par_iter()
281                .map(|(module_id, files)| {
282                    let local_sink = LocalSink::new();
283                    let mut worker = TaskState::new(&local_sink, allocator.clone());
284                    worker.ufcs_shared = Some(ufcs_shared.clone());
285                    worker.infer_module(store_ref, &module_id, files);
286                    let typed_files = std::mem::take(&mut worker.typed_files);
287                    let facts = std::mem::replace(&mut worker.facts, Facts::new(allocator.clone()));
288                    (typed_files, facts, local_sink)
289                })
290                .collect();
291
292            checker.ufcs_methods =
293                Arc::try_unwrap(ufcs_shared).unwrap_or_else(|arc| (*arc).clone());
294
295            let mut worker_sinks: Vec<LocalSink> = Vec::with_capacity(outputs.len());
296            for (typed_files, facts, sink_local) in outputs {
297                checker.typed_files.extend(typed_files);
298                checker.facts.merge(facts);
299                worker_sinks.push(sink_local);
300            }
301            sink.extend(LocalSink::merge(worker_sinks));
302        }
303
304        for (module_id, typed_file) in std::mem::take(&mut checker.typed_files) {
305            store.store_file(&module_id, typed_file);
306        }
307
308        // Save Go stdlib cache if store has Go modules not already in cache
309        if !cache_disabled {
310            let all_go_modules: Vec<String> = store
311                .modules
312                .keys()
313                .filter(|id| id.strip_prefix("go:").is_some_and(deps::is_stdlib))
314                .cloned()
315                .collect();
316            let needs_save = !all_go_modules.is_empty()
317                && go_cache.as_ref().is_none_or(|c| {
318                    all_go_modules.len() != c.modules.len()
319                        || all_go_modules.iter().any(|id| !c.modules.contains_key(id))
320                });
321            if needs_save {
322                go_stdlib::save_go_stdlib_cache(&store, &all_go_modules, input.locator.target());
323            }
324        }
325
326        if !cache_disabled && !prelude_cache_hit {
327            prelude_cache::save_prelude_cache(&store);
328        }
329
330        (
331            checker.facts,
332            cached_modules,
333            compiled_modules,
334            checker.ufcs_methods,
335        )
336    };
337
338    let analysis = crate::context::AnalysisContext::new(&store, &ufcs_methods);
339
340    let mut unused = UnusedInfo::default();
341    if !has_pre_check_errors {
342        passes::run(
343            &analysis,
344            &mut facts,
345            &sink,
346            &mut unused,
347            input.config.run_lints,
348        );
349    }
350
351    let mut mutations = MutationInfo::default();
352    for (&binding_id, b) in facts.bindings.iter() {
353        if b.mutated {
354            mutations.mark_binding_mutated(binding_id);
355        }
356    }
357
358    // Canonicalize diagnostic order so the output is stable regardless of
359    // phase ordering, FxHashMap iteration, or parallel inference scheduling.
360    let mut all_diagnostics = sink.take();
361    all_diagnostics.sort_by(diagnostics::LisetteDiagnostic::sort_key);
362    let (errors, lints): (Vec<_>, Vec<_>) = all_diagnostics.into_iter().partition(|d| d.is_error());
363
364    let emit_stamps: Vec<EmitStamp> = compiled_modules
365        .iter()
366        .map(|c| EmitStamp {
367            module_id: c.module_id.clone(),
368            artifact_hash: compute_emit_artifact_hash(c.source_hash, &input.go_module),
369        })
370        .collect();
371
372    if cache_enabled && let Some(ref project_root) = input.project_root {
373        let has_errors = errors.iter().any(|e| e.is_error());
374        if !has_errors {
375            for compiled in compiled_modules {
376                let file_ids: HashSet<u32> = store
377                    .get_module(&compiled.module_id)
378                    .map(|m| m.file_ids().collect())
379                    .unwrap_or_default();
380
381                let has_module_warnings = lints.iter().any(|lint| {
382                    lint.file_id()
383                        .map(|fid| file_ids.contains(&fid))
384                        .unwrap_or(true)
385                });
386                if !has_module_warnings
387                    && let Err(e) =
388                        save_module_cache(&compiled, &store, project_root, &ufcs_methods)
389                {
390                    eprintln!(
391                        "warning: failed to write cache for {}: {e}",
392                        compiled.module_id
393                    );
394                }
395            }
396        }
397    }
398
399    let mut files = HashMap::default();
400    let mut definitions = HashMap::default();
401    let mut modules = HashMap::default();
402
403    let go_module_ids: HashSet<String> = store
404        .modules
405        .keys()
406        .filter(|id| id.starts_with(syntax::types::GO_IMPORT_PREFIX))
407        .cloned()
408        .collect();
409
410    for (mod_id, module) in store.modules {
411        let is_internal = module.is_internal();
412
413        definitions.extend(module.definitions);
414
415        // Internal modules (prelude, **nominal, go:...) stay out of `modules`
416        // so emit and lints skip them; their typedef files still join `files`
417        // so the LSP can map typedef file IDs to URIs for go-to-definition.
418        if is_internal {
419            files.extend(module.typedefs);
420            continue;
421        }
422
423        modules.insert(
424            mod_id,
425            ModuleInfo {
426                file_ids: module.files.keys().copied().collect(),
427                typedef_ids: module.typedefs.keys().copied().collect(),
428                id: module.id.clone(),
429                path: module.id,
430            },
431        );
432
433        files.extend(module.files);
434        files.extend(module.typedefs);
435    }
436
437    let result = SemanticResult {
438        files,
439        definitions,
440        modules,
441        errors,
442        lints,
443        entry_module_id: ENTRY_MODULE_ID.to_string(),
444        unused,
445        mutations,
446        cached_modules,
447        ufcs_methods,
448        typedef_paths: store.typedef_paths,
449        go_package_names: store.go_package_names,
450        go_module_ids,
451    };
452
453    AnalyzeOutput {
454        result,
455        facts,
456        emit_stamps,
457    }
458}