Skip to main content

lisette_semantics/
analyze.rs

1use rayon::prelude::*;
2use rustc_hash::{FxHashMap as HashMap, FxHashSet as HashSet};
3use std::path::PathBuf;
4use std::sync::Arc;
5
6use diagnostics::{LocalSink, SemanticResult};
7use syntax::ast::Expression;
8use syntax::program::{File, ModuleInfo, MutationInfo, UnusedInfo};
9
10use deps::TypedefLocator;
11
12use crate::cache::{
13    CompiledModule, EmitStamp, compute_emit_artifact_hash, compute_module_hash,
14    get_dependency_module_hashes,
15    go_stdlib::{self, load_cached_go_module},
16    hash_module_sources, is_cache_disabled, prelude as prelude_cache, register_cached_module,
17    save_module_cache, try_load_cache,
18};
19use crate::checker::TaskState;
20use crate::checker::infer::InferCtx;
21use crate::diagnostics::emit_for_locator_result;
22use crate::facts::{BindingIdAllocator, Facts};
23use crate::loader::Loader;
24use crate::module_graph::build_module_graph;
25use crate::passes;
26use crate::prelude::parse_and_register_prelude;
27use crate::store::{ENTRY_MODULE_ID, Store};
28
29#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
30pub enum CompilePhase {
31    #[default]
32    Check,
33    Emit,
34}
35
36#[derive(Debug, Clone, Default)]
37pub struct SemanticConfig {
38    pub run_lints: bool,
39    pub standalone_mode: bool,
40    pub load_siblings: bool,
41}
42
43pub struct AnalyzeInput<'a> {
44    pub config: SemanticConfig,
45    pub loader: &'a dyn Loader,
46    pub source: String,
47    /// Bare identity name of the entry file (e.g. `main.lis`).
48    pub filename: String,
49    /// Cwd-relative display path for the entry file (e.g. `src/main.lis`);
50    /// equals `filename` when there is no separate display path.
51    pub display_path: String,
52    pub ast: Vec<Expression>,
53    pub project_root: Option<PathBuf>,
54    pub compile_phase: CompilePhase,
55    pub locator: TypedefLocator,
56    /// Go module path (from `lisette.toml`); folded into the cache emit-artifact
57    /// hash so a project rename invalidates Go outputs.
58    pub go_module: String,
59    /// When true, `analyze` skips both cache load and save. Set by the CLI for
60    /// `--debug` Emit so cwd-decorated Go files are not reused across cwds.
61    pub disable_cache: bool,
62}
63
64/// Wraps `SemanticResult` plus per-module emit stamps the CLI uses to update
65/// the cache after a successful artifact write.
66pub struct AnalyzeOutput {
67    pub result: SemanticResult,
68    pub facts: Facts,
69    pub emit_stamps: Vec<EmitStamp>,
70}
71
72pub fn analyze(input: AnalyzeInput) -> AnalyzeOutput {
73    let mut store = Store::new();
74
75    store.init_entry_module();
76    store.store_entry_file(
77        &input.filename,
78        &input.display_path,
79        &input.source,
80        input.ast,
81    );
82
83    let sink = LocalSink::new();
84
85    if input.config.load_siblings {
86        for (filename, content) in input.loader.scan_folder(ENTRY_MODULE_ID) {
87            if filename == input.filename
88                || !filename.ends_with(".lis")
89                || filename.ends_with(".d.lis")
90            {
91                continue;
92            }
93            let file_id = store.new_file_id();
94            let result = syntax::build_ast(&content.source, file_id);
95            sink.extend_parse_errors(result.errors);
96            store.store_file(
97                ENTRY_MODULE_ID,
98                File::new(
99                    ENTRY_MODULE_ID,
100                    &filename,
101                    &content.display_path,
102                    &content.source,
103                    result.ast,
104                    file_id,
105                ),
106            );
107        }
108    }
109
110    let entry_module = store.entry_module_id().to_string();
111    let mut graph_result = build_module_graph(
112        &mut store,
113        Some(input.loader),
114        &entry_module,
115        &sink,
116        input.config.standalone_mode,
117        &input.locator,
118    );
119
120    for cycle in &graph_result.cycles {
121        sink.push(diagnostics::module_graph::import_cycle(cycle));
122    }
123
124    let has_pre_check_errors = sink.has_errors();
125
126    let cache_disabled = is_cache_disabled();
127
128    let prelude_cache_hit = if cache_disabled {
129        false
130    } else if let Some(cached) = prelude_cache::try_load_prelude_cache() {
131        prelude_cache::register_cached_prelude(&mut store, cached);
132        true
133    } else {
134        false
135    };
136
137    if !prelude_cache_hit {
138        parse_and_register_prelude(&mut store, &sink);
139    }
140
141    let cache_enabled = input.project_root.is_some() && !cache_disabled && !input.disable_cache;
142    let check_go_files = input.compile_phase == CompilePhase::Emit;
143
144    let binding_ids = Arc::new(BindingIdAllocator::new());
145
146    let (mut facts, cached_modules, compiled_modules, ufcs_methods) = {
147        let mut checker = TaskState::new(&sink, binding_ids.clone());
148        checker
149            .ufcs_methods
150            .extend(crate::prelude::compute_prelude_ufcs(&store));
151
152        let mut module_hashes: HashMap<String, u64> = HashMap::default();
153        let mut cached_modules: HashSet<String> = HashSet::default();
154        let mut compiled_modules: Vec<CompiledModule> = vec![];
155
156        let order = std::mem::take(&mut graph_result.order);
157        let edges = &graph_result.edges;
158
159        let go_cache = if cache_disabled {
160            None
161        } else {
162            go_stdlib::try_load_go_stdlib_cache(input.locator.target())
163        };
164
165        let mut to_infer: Vec<String> = Vec::new();
166
167        for module_id in order {
168            if let Some(go_pkg) = module_id.strip_prefix("go:") {
169                if graph_result.link_only_modules.contains(&module_id) {
170                    continue;
171                }
172
173                if deps::is_stdlib(go_pkg)
174                    && let Some(ref cache) = go_cache
175                {
176                    load_cached_go_module(&mut store, &module_id, cache, input.locator.target());
177                    if store.is_visited(&module_id) {
178                        continue;
179                    }
180                }
181
182                match input.locator.find_typedef_content(go_pkg) {
183                    deps::TypedefLocatorResult::Found { content, origin } => {
184                        checker.parse_and_register_go_module(
185                            &mut store,
186                            &module_id,
187                            content.as_ref(),
188                            origin.into_cache_path(),
189                            &input.locator,
190                        );
191                    }
192                    other => {
193                        emit_for_locator_result(
194                            &other,
195                            &module_id,
196                            go_pkg,
197                            None,
198                            input.locator.target(),
199                            input.config.standalone_mode,
200                            &sink,
201                        );
202                    }
203                }
204                continue;
205            }
206
207            if store.is_visited(&module_id) {
208                continue;
209            }
210
211            let files = graph_result.files.remove(&module_id).unwrap_or_default();
212            let source_hash = hash_module_sources(&files);
213
214            let dep_hashes = get_dependency_module_hashes(&module_id, edges, &module_hashes);
215            let module_hash = compute_module_hash(source_hash, &dep_hashes);
216            module_hashes.insert(module_id.clone(), module_hash);
217
218            let is_entry = module_id == ENTRY_MODULE_ID;
219
220            let expected_artifact_hash =
221                check_go_files.then(|| compute_emit_artifact_hash(source_hash, &input.go_module));
222
223            if cache_enabled
224                && !is_entry
225                && let Some(ref project_root) = input.project_root
226                && let Some(cached) = try_load_cache(
227                    &module_id,
228                    source_hash,
229                    &dep_hashes,
230                    expected_artifact_hash,
231                    project_root,
232                    check_go_files,
233                )
234            {
235                checker
236                    .ufcs_methods
237                    .extend(cached.ufcs_methods.iter().cloned());
238                register_cached_module(&mut store, &module_id, cached, project_root);
239                cached_modules.insert(module_id.clone());
240                continue;
241            }
242
243            store.store_module(&module_id, files);
244            checker.register_module(&mut store, &module_id);
245
246            if !is_entry {
247                compiled_modules.push(CompiledModule {
248                    module_id: module_id.clone(),
249                    source_hash,
250                    dep_hashes,
251                });
252            }
253
254            to_infer.push(module_id);
255        }
256
257        let module_files: Vec<(String, Vec<File>)> = to_infer
258            .iter()
259            .map(|module_id| {
260                let files = checker.take_module_files(&mut store, module_id);
261                (module_id.clone(), files)
262            })
263            .collect();
264
265        // Single-file or tiny multi-module projects stay serial to avoid rayon
266        // overhead. This threshold is a conservative starting point, not a
267        // measured inflection point. To be tuned in future.
268        const PARALLEL_THRESHOLD: usize = 4;
269
270        if module_files.len() < PARALLEL_THRESHOLD {
271            for (module_id, files) in module_files {
272                InferCtx::new(&mut checker, &store).infer_module(&module_id, files);
273            }
274        } else {
275            let allocator = binding_ids.clone();
276            let ufcs_shared = Arc::new(std::mem::take(&mut checker.ufcs_methods));
277            // Share register-built projections so workers do not rebuild them.
278            let fields_shared = Arc::new(checker.module_fields_snapshot());
279            let store_ref: &Store = &store;
280
281            type WorkerOutput = (Vec<(String, File)>, Facts, LocalSink);
282            let outputs: Vec<WorkerOutput> = module_files
283                .into_par_iter()
284                .map(|(module_id, files)| {
285                    let local_sink = LocalSink::new();
286                    let mut worker = TaskState::new(&local_sink, allocator.clone());
287                    worker.ufcs_shared = Some(ufcs_shared.clone());
288                    worker.module_fields_shared = Some(fields_shared.clone());
289                    InferCtx::new(&mut worker, store_ref).infer_module(&module_id, files);
290                    let typed_files = std::mem::take(&mut worker.typed_files);
291                    let facts = std::mem::replace(&mut worker.facts, Facts::new(allocator.clone()));
292                    (typed_files, facts, local_sink)
293                })
294                .collect();
295
296            checker.ufcs_methods =
297                Arc::try_unwrap(ufcs_shared).unwrap_or_else(|arc| (*arc).clone());
298
299            let mut worker_sinks: Vec<LocalSink> = Vec::with_capacity(outputs.len());
300            for (typed_files, facts, sink_local) in outputs {
301                checker.typed_files.extend(typed_files);
302                checker.facts.merge(facts);
303                worker_sinks.push(sink_local);
304            }
305            sink.extend(LocalSink::merge(worker_sinks));
306        }
307
308        for (module_id, typed_file) in std::mem::take(&mut checker.typed_files) {
309            store.store_file(&module_id, typed_file);
310        }
311
312        // Save Go stdlib cache if store has Go modules not already in cache
313        if !cache_disabled {
314            let all_go_modules: Vec<String> = store
315                .modules
316                .keys()
317                .filter(|id| id.strip_prefix("go:").is_some_and(deps::is_stdlib))
318                .cloned()
319                .collect();
320            let needs_save = !all_go_modules.is_empty()
321                && go_cache.as_ref().is_none_or(|c| {
322                    all_go_modules.len() != c.modules.len()
323                        || all_go_modules.iter().any(|id| !c.modules.contains_key(id))
324                });
325            if needs_save {
326                go_stdlib::save_go_stdlib_cache(&store, &all_go_modules, input.locator.target());
327            }
328        }
329
330        if !cache_disabled && !prelude_cache_hit {
331            prelude_cache::save_prelude_cache(&store);
332        }
333
334        (
335            checker.facts,
336            cached_modules,
337            compiled_modules,
338            checker.ufcs_methods,
339        )
340    };
341
342    store.build_closed_domains();
343
344    let analysis = crate::context::AnalysisContext::new(&store, &ufcs_methods);
345
346    let mut unused = UnusedInfo::default();
347    if !has_pre_check_errors {
348        passes::run(
349            &analysis,
350            &mut facts,
351            &sink,
352            &mut unused,
353            input.config.run_lints,
354        );
355    }
356
357    let mut mutations = MutationInfo::default();
358    for (&binding_id, b) in facts.bindings.iter() {
359        if b.mutated {
360            mutations.mark_binding_mutated(binding_id);
361        }
362    }
363
364    // Canonicalize diagnostic order so the output is stable regardless of
365    // phase ordering, FxHashMap iteration, or parallel inference scheduling.
366    let mut all_diagnostics = sink.take();
367    all_diagnostics.sort_by(diagnostics::LisetteDiagnostic::sort_key);
368    let (errors, lints): (Vec<_>, Vec<_>) = all_diagnostics.into_iter().partition(|d| d.is_error());
369
370    let emit_stamps: Vec<EmitStamp> = compiled_modules
371        .iter()
372        .map(|c| EmitStamp {
373            module_id: c.module_id.clone(),
374            artifact_hash: compute_emit_artifact_hash(c.source_hash, &input.go_module),
375        })
376        .collect();
377
378    if cache_enabled && let Some(ref project_root) = input.project_root {
379        let has_errors = errors.iter().any(|e| e.is_error());
380        if !has_errors {
381            for compiled in compiled_modules {
382                let file_ids: HashSet<u32> = store
383                    .get_module(&compiled.module_id)
384                    .map(|m| m.file_ids().collect())
385                    .unwrap_or_default();
386
387                let has_module_lints = lints.iter().any(|lint| {
388                    lint.file_id()
389                        .map(|fid| file_ids.contains(&fid))
390                        .unwrap_or(true)
391                });
392                if !has_module_lints
393                    && let Err(e) =
394                        save_module_cache(&compiled, &store, project_root, &ufcs_methods)
395                {
396                    eprintln!(
397                        "warning: failed to write cache for {}: {e}",
398                        compiled.module_id
399                    );
400                }
401            }
402        }
403    }
404
405    let mut files = HashMap::default();
406    let mut definitions = HashMap::default();
407    let mut modules = HashMap::default();
408
409    let go_module_ids: HashSet<String> = store
410        .modules
411        .keys()
412        .filter(|id| id.starts_with(syntax::types::GO_IMPORT_PREFIX))
413        .cloned()
414        .collect();
415
416    for (mod_id, module) in store.modules {
417        let is_internal = module.is_internal();
418
419        definitions.extend(module.definitions);
420
421        // Internal modules (prelude, **nominal, go:...) stay out of `modules`
422        // so emit and lints skip them; their typedef files still join `files`
423        // so the LSP can map typedef file IDs to URIs for go-to-definition.
424        if is_internal {
425            files.extend(module.typedefs);
426            continue;
427        }
428
429        modules.insert(
430            mod_id,
431            ModuleInfo {
432                file_ids: module.files.keys().copied().collect(),
433                typedef_ids: module.typedefs.keys().copied().collect(),
434                id: module.id.clone(),
435                path: module.id,
436            },
437        );
438
439        files.extend(module.files);
440        files.extend(module.typedefs);
441    }
442
443    let result = SemanticResult {
444        files,
445        definitions,
446        modules,
447        errors,
448        lints,
449        entry_module_id: ENTRY_MODULE_ID.to_string(),
450        unused,
451        mutations,
452        cached_modules,
453        ufcs_methods,
454        typedef_paths: store.typedef_paths,
455        go_package_names: store.go_package_names,
456        go_module_ids,
457    };
458
459    AnalyzeOutput {
460        result,
461        facts,
462        emit_stamps,
463    }
464}