Skip to main content

lisette_semantics/
analyze.rs

1use rayon::prelude::*;
2use rustc_hash::{FxHashMap as HashMap, FxHashSet as HashSet};
3use std::path::PathBuf;
4use std::sync::Arc;
5
6use diagnostics::{LocalSink, SemanticResult};
7use ecow::EcoString;
8use syntax::ast::{Expression, StructFieldDefinition};
9use syntax::program::{File, Module, ModuleInfo, MutationInfo, UnusedInfo};
10
11use deps::TypedefLocator;
12
13use crate::cache::{
14    CompiledModule, EmitStamp, compute_emit_artifact_hash, compute_module_hash,
15    get_dependency_module_hashes,
16    go_stdlib::{self, load_cached_go_module},
17    hash_module_sources, is_cache_disabled, prelude as prelude_cache, register_cached_module,
18    save_module_cache, try_load_cache,
19};
20use crate::checker::TaskState;
21use crate::checker::infer::InferCtx;
22use crate::diagnostics::emit_for_locator_result;
23use crate::facts::{BindingIdAllocator, Facts};
24use crate::loader::Loader;
25use crate::module_graph::build_module_graph;
26use crate::passes;
27use crate::prelude::parse_and_register_prelude;
28use crate::store::{ENTRY_MODULE_ID, Store};
29
30#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
31pub enum CompilePhase {
32    #[default]
33    Check,
34    Emit,
35}
36
37#[derive(Debug, Clone, Default)]
38pub struct SemanticConfig {
39    pub run_lints: bool,
40    pub standalone_mode: bool,
41    pub load_siblings: bool,
42}
43
44pub struct AnalyzeInput<'a> {
45    pub config: SemanticConfig,
46    pub loader: &'a dyn Loader,
47    pub source: String,
48    /// Bare identity name of the entry file (e.g. `main.lis`).
49    pub filename: String,
50    /// Cwd-relative display path for the entry file (e.g. `src/main.lis`);
51    /// equals `filename` when there is no separate display path.
52    pub display_path: String,
53    pub ast: Vec<Expression>,
54    pub project_root: Option<PathBuf>,
55    pub compile_phase: CompilePhase,
56    pub locator: TypedefLocator,
57    /// Go module path (from `lisette.toml`); folded into the cache emit-artifact
58    /// hash so a project rename invalidates Go outputs.
59    pub go_module: String,
60    /// When true, `analyze` skips both cache load and save. Set by the CLI for
61    /// `--sourcemap` Emit so cwd-decorated Go files are not reused across cwds.
62    pub disable_cache: bool,
63}
64
65/// Wraps `SemanticResult` plus per-module emit stamps the CLI uses to update
66/// the cache after a successful artifact write.
67pub struct AnalyzeOutput {
68    pub result: SemanticResult,
69    pub facts: Facts,
70    pub emit_stamps: Vec<EmitStamp>,
71}
72
73/// Groups topologically ordered modules into dependency waves, so a wave only
74/// reads modules registered in earlier waves.
75fn registration_waves(
76    modules: &[String],
77    edges: &HashMap<String, HashSet<String>>,
78) -> Vec<Vec<String>> {
79    let mut wave_of: HashMap<&str, usize> = HashMap::default();
80    let mut waves: Vec<Vec<String>> = Vec::new();
81    for module_id in modules {
82        let wave = edges
83            .get(module_id)
84            .into_iter()
85            .flatten()
86            .filter_map(|dep| wave_of.get(dep.as_str()))
87            .map(|dep_wave| dep_wave + 1)
88            .max()
89            .unwrap_or(0);
90        wave_of.insert(module_id, wave);
91        if waves.len() == wave {
92            waves.push(Vec::new());
93        }
94        waves[wave].push(module_id.clone());
95    }
96    waves
97}
98
99pub fn analyze(input: AnalyzeInput) -> AnalyzeOutput {
100    let mut store = Store::new();
101
102    store.init_entry_module();
103    store.store_entry_file(
104        &input.filename,
105        &input.display_path,
106        &input.source,
107        input.ast,
108    );
109
110    let sink = LocalSink::new();
111
112    if input.config.load_siblings {
113        for (filename, content) in input.loader.scan_folder(ENTRY_MODULE_ID) {
114            if filename == input.filename
115                || !filename.ends_with(".lis")
116                || filename.ends_with(".d.lis")
117            {
118                continue;
119            }
120            let file_id = store.new_file_id();
121            let result = syntax::build_ast(&content.source, file_id);
122            sink.extend_parse_errors(result.errors);
123            store.store_file(
124                ENTRY_MODULE_ID,
125                File::new(
126                    ENTRY_MODULE_ID,
127                    &filename,
128                    &content.display_path,
129                    &content.source,
130                    result.ast,
131                    file_id,
132                ),
133            );
134        }
135    }
136
137    let entry_module = store.entry_module_id().to_string();
138    let mut graph_result = build_module_graph(
139        &mut store,
140        Some(input.loader),
141        &entry_module,
142        &sink,
143        input.config.standalone_mode,
144        &input.locator,
145    );
146
147    for cycle in &graph_result.cycles {
148        sink.push(diagnostics::module_graph::import_cycle(cycle));
149    }
150
151    let has_pre_check_errors = sink.has_errors();
152
153    let cache_disabled = is_cache_disabled();
154
155    let prelude_cache_hit = if cache_disabled {
156        false
157    } else if let Some(cached) = prelude_cache::try_load_prelude_cache() {
158        prelude_cache::register_cached_prelude(&mut store, cached);
159        true
160    } else {
161        false
162    };
163
164    if !prelude_cache_hit {
165        parse_and_register_prelude(&mut store, &sink);
166    }
167
168    let cache_enabled = input.project_root.is_some() && !cache_disabled && !input.disable_cache;
169    let check_go_files = input.compile_phase == CompilePhase::Emit;
170
171    let binding_ids = Arc::new(BindingIdAllocator::new());
172
173    let (mut facts, cached_modules, compiled_modules, ufcs_methods) = {
174        let mut checker = TaskState::new(&sink, binding_ids.clone());
175        checker
176            .ufcs_methods
177            .extend(crate::prelude::compute_prelude_ufcs(&store));
178
179        let mut module_hashes: HashMap<String, u64> = HashMap::default();
180        let mut cached_modules: HashSet<String> = HashSet::default();
181        let mut compiled_modules: Vec<CompiledModule> = vec![];
182
183        let order = std::mem::take(&mut graph_result.order);
184        let edges = &graph_result.edges;
185
186        // Outer `None` = not attempted: the deserialize costs milliseconds,
187        // which a project without stdlib imports should not pay.
188        let mut go_cache: Option<Option<go_stdlib::GoStdlibCache>> =
189            if cache_disabled { Some(None) } else { None };
190
191        let mut to_infer: Vec<String> = Vec::new();
192
193        for module_id in order {
194            if let Some(go_pkg) = module_id.strip_prefix("go:") {
195                if graph_result.link_only_modules.contains(&module_id) {
196                    continue;
197                }
198
199                if deps::is_stdlib(go_pkg)
200                    && let Some(ref cache) = *go_cache.get_or_insert_with(|| {
201                        go_stdlib::try_load_go_stdlib_cache(input.locator.target())
202                    })
203                {
204                    load_cached_go_module(&mut store, &module_id, cache, input.locator.target());
205                    if store.is_visited(&module_id) {
206                        continue;
207                    }
208                }
209
210                match input.locator.find_typedef_content(go_pkg) {
211                    deps::TypedefLocatorResult::Found { content, origin } => {
212                        checker.parse_and_register_go_module(
213                            &mut store,
214                            &module_id,
215                            content.as_ref(),
216                            origin.into_cache_path(),
217                            &input.locator,
218                        );
219                    }
220                    other => {
221                        emit_for_locator_result(
222                            &other,
223                            &module_id,
224                            go_pkg,
225                            None,
226                            input.locator.target(),
227                            input.config.standalone_mode,
228                            &sink,
229                        );
230                    }
231                }
232                continue;
233            }
234
235            if store.is_visited(&module_id) {
236                continue;
237            }
238
239            let files = graph_result.files.remove(&module_id).unwrap_or_default();
240            let source_hash = hash_module_sources(&files);
241
242            let dep_hashes = get_dependency_module_hashes(&module_id, edges, &module_hashes);
243            let module_hash = compute_module_hash(source_hash, &dep_hashes);
244            module_hashes.insert(module_id.clone(), module_hash);
245
246            let is_entry = module_id == ENTRY_MODULE_ID;
247
248            let expected_artifact_hash =
249                check_go_files.then(|| compute_emit_artifact_hash(source_hash, &input.go_module));
250
251            if cache_enabled
252                && !is_entry
253                && let Some(ref project_root) = input.project_root
254                && let Some(cached) = try_load_cache(
255                    &module_id,
256                    source_hash,
257                    &dep_hashes,
258                    expected_artifact_hash,
259                    project_root,
260                    check_go_files,
261                )
262            {
263                checker
264                    .ufcs_methods
265                    .extend(cached.ufcs_methods.iter().cloned());
266                register_cached_module(&mut store, &module_id, cached, project_root);
267                cached_modules.insert(module_id.clone());
268                continue;
269            }
270
271            store.store_module(&module_id, files);
272
273            if !is_entry {
274                compiled_modules.push(CompiledModule {
275                    module_id: module_id.clone(),
276                    source_hash,
277                    dep_hashes,
278                });
279            }
280
281            to_infer.push(module_id);
282        }
283
284        // Single-file or tiny multi-module projects stay serial to avoid rayon
285        // overhead. This threshold is a conservative starting point, not a
286        // measured inflection point. To be tuned in future.
287        const PARALLEL_THRESHOLD: usize = 4;
288
289        // Same-wave modules never read each other, so each worker mutates
290        // only its own detached module and reads the rest through a snapshot.
291        if to_infer.len() < PARALLEL_THRESHOLD {
292            for module_id in &to_infer {
293                checker.register_module(&mut store, module_id);
294            }
295        } else {
296            for wave in registration_waves(&to_infer, edges) {
297                if wave.len() == 1 {
298                    checker.register_module(&mut store, &wave[0]);
299                    continue;
300                }
301
302                let detached: Vec<(String, Arc<Module>)> = wave
303                    .into_iter()
304                    .map(|module_id| {
305                        let module = store
306                            .modules
307                            .remove(&module_id)
308                            .expect("fresh module must be stored before registration");
309                        (module_id, module)
310                    })
311                    .collect();
312
313                // One worker per thread-sized chunk: the store view and the
314                // `TaskState` caches are too expensive to rebuild per module.
315                let chunk_size = detached.len().div_ceil(rayon::current_num_threads()).max(1);
316                let mut chunks: Vec<Vec<(String, Arc<Module>)>> = Vec::new();
317                let mut remaining = detached.into_iter();
318                loop {
319                    let chunk: Vec<_> = remaining.by_ref().take(chunk_size).collect();
320                    if chunk.is_empty() {
321                        break;
322                    }
323                    chunks.push(chunk);
324                }
325
326                let allocator = binding_ids.clone();
327                let store_ref: &Store = &store;
328                let fields_shared = Arc::new(checker.module_fields_snapshot());
329
330                type RegisterOutput = (
331                    Vec<(String, Arc<Module>)>,
332                    HashSet<(String, String)>,
333                    HashMap<EcoString, Arc<[StructFieldDefinition]>>,
334                    Facts,
335                    LocalSink,
336                );
337                let outputs: Vec<RegisterOutput> = chunks
338                    .into_par_iter()
339                    .map(|chunk| {
340                        let local_sink = LocalSink::new();
341                        let mut worker = TaskState::new(&local_sink, allocator.clone());
342                        worker.module_fields_shared = Some(fields_shared.clone());
343                        let mut view = store_ref.registration_view();
344                        let mut registered = Vec::with_capacity(chunk.len());
345                        for (module_id, module) in chunk {
346                            view.modules.insert(module_id.clone(), module);
347                            worker.register_module(&mut view, &module_id);
348                            let module = view
349                                .modules
350                                .remove(&module_id)
351                                .expect("registered module must remain in view");
352                            registered.push((module_id, module));
353                        }
354                        let facts =
355                            std::mem::replace(&mut worker.facts, Facts::new(allocator.clone()));
356                        (
357                            registered,
358                            std::mem::take(&mut worker.ufcs_methods),
359                            worker.module_fields_snapshot(),
360                            facts,
361                            local_sink,
362                        )
363                    })
364                    .collect();
365
366                let mut worker_sinks: Vec<LocalSink> = Vec::with_capacity(outputs.len());
367                for (registered, ufcs_methods, module_fields, facts, sink_local) in outputs {
368                    for (module_id, module) in registered {
369                        store.modules.insert(module_id, module);
370                    }
371                    checker.ufcs_methods.extend(ufcs_methods);
372                    checker.merge_module_fields(module_fields);
373                    checker.facts.merge(facts);
374                    worker_sinks.push(sink_local);
375                }
376                sink.extend(LocalSink::merge(worker_sinks));
377            }
378        }
379
380        let module_files: Vec<(String, Vec<File>)> = to_infer
381            .iter()
382            .map(|module_id| {
383                let files = checker.take_module_files(&mut store, module_id);
384                (module_id.clone(), files)
385            })
386            .collect();
387
388        if module_files.len() < PARALLEL_THRESHOLD {
389            for (module_id, files) in module_files {
390                InferCtx::new(&mut checker, &store).infer_module(&module_id, files);
391            }
392        } else {
393            let allocator = binding_ids.clone();
394            let ufcs_shared = Arc::new(std::mem::take(&mut checker.ufcs_methods));
395            // Share register-built projections so workers do not rebuild them.
396            let fields_shared = Arc::new(checker.module_fields_snapshot());
397            let store_ref: &Store = &store;
398
399            type WorkerOutput = (Vec<(String, File)>, Facts, LocalSink);
400            let outputs: Vec<WorkerOutput> = module_files
401                .into_par_iter()
402                .map(|(module_id, files)| {
403                    let local_sink = LocalSink::new();
404                    let mut worker = TaskState::new(&local_sink, allocator.clone());
405                    worker.ufcs_shared = Some(ufcs_shared.clone());
406                    worker.module_fields_shared = Some(fields_shared.clone());
407                    InferCtx::new(&mut worker, store_ref).infer_module(&module_id, files);
408                    let typed_files = std::mem::take(&mut worker.typed_files);
409                    let facts = std::mem::replace(&mut worker.facts, Facts::new(allocator.clone()));
410                    (typed_files, facts, local_sink)
411                })
412                .collect();
413
414            checker.ufcs_methods =
415                Arc::try_unwrap(ufcs_shared).unwrap_or_else(|arc| (*arc).clone());
416
417            let mut worker_sinks: Vec<LocalSink> = Vec::with_capacity(outputs.len());
418            for (typed_files, facts, sink_local) in outputs {
419                checker.typed_files.extend(typed_files);
420                checker.facts.merge(facts);
421                worker_sinks.push(sink_local);
422            }
423            sink.extend(LocalSink::merge(worker_sinks));
424        }
425
426        for (module_id, typed_file) in std::mem::take(&mut checker.typed_files) {
427            store.store_file(&module_id, typed_file);
428        }
429
430        // Save Go stdlib cache if store has Go modules not already in cache
431        if !cache_disabled {
432            let all_go_modules: Vec<String> = store
433                .modules
434                .keys()
435                .filter(|id| id.strip_prefix("go:").is_some_and(deps::is_stdlib))
436                .cloned()
437                .collect();
438            // A non-empty list implies the lazy cache load was attempted.
439            let needs_save = !all_go_modules.is_empty()
440                && go_cache.as_ref().and_then(Option::as_ref).is_none_or(|c| {
441                    all_go_modules.len() != c.modules.len()
442                        || all_go_modules.iter().any(|id| !c.modules.contains_key(id))
443                });
444            if needs_save {
445                go_stdlib::save_go_stdlib_cache(&store, &all_go_modules, input.locator.target());
446            }
447        }
448
449        if !cache_disabled && !prelude_cache_hit {
450            prelude_cache::save_prelude_cache(&store);
451        }
452
453        (
454            checker.facts,
455            cached_modules,
456            compiled_modules,
457            checker.ufcs_methods,
458        )
459    };
460
461    store.build_closed_domains();
462
463    let analysis = crate::context::AnalysisContext::new(&store, &ufcs_methods);
464
465    let mut unused = UnusedInfo::default();
466    if !has_pre_check_errors {
467        passes::run(
468            &analysis,
469            &mut facts,
470            &sink,
471            &mut unused,
472            input.config.run_lints,
473        );
474    }
475
476    let mut mutations = MutationInfo::default();
477    for (&binding_id, b) in facts.bindings.iter() {
478        if b.mutated {
479            mutations.mark_binding_mutated(binding_id);
480        }
481    }
482
483    // Canonicalize diagnostic order so the output is stable regardless of
484    // phase ordering, FxHashMap iteration, or parallel inference scheduling.
485    let mut all_diagnostics = sink.take();
486    all_diagnostics.sort_by(diagnostics::LisetteDiagnostic::sort_key);
487    let (errors, lints): (Vec<_>, Vec<_>) = all_diagnostics.into_iter().partition(|d| d.is_error());
488
489    let emit_stamps: Vec<EmitStamp> = compiled_modules
490        .iter()
491        .map(|c| EmitStamp {
492            module_id: c.module_id.clone(),
493            artifact_hash: compute_emit_artifact_hash(c.source_hash, &input.go_module),
494        })
495        .collect();
496
497    if cache_enabled && let Some(ref project_root) = input.project_root {
498        let has_errors = errors.iter().any(|e| e.is_error());
499        if !has_errors {
500            for compiled in compiled_modules {
501                let file_ids: HashSet<u32> = store
502                    .get_module(&compiled.module_id)
503                    .map(|m| m.file_ids().collect())
504                    .unwrap_or_default();
505
506                let has_module_lints = lints.iter().any(|lint| {
507                    lint.file_id()
508                        .map(|fid| file_ids.contains(&fid))
509                        .unwrap_or(true)
510                });
511                if !has_module_lints
512                    && let Err(e) =
513                        save_module_cache(&compiled, &store, project_root, &ufcs_methods)
514                {
515                    eprintln!(
516                        "warning: failed to write cache for {}: {e}",
517                        compiled.module_id
518                    );
519                }
520            }
521        }
522    }
523
524    let mut files = HashMap::default();
525    let mut definitions = HashMap::default();
526    let mut modules = HashMap::default();
527
528    let go_module_ids: HashSet<String> = store
529        .modules
530        .keys()
531        .filter(|id| id.starts_with(syntax::types::GO_IMPORT_PREFIX))
532        .cloned()
533        .collect();
534
535    for (mod_id, module) in store.modules {
536        // Worker views are gone by now, so this unwraps without cloning.
537        let module = Arc::try_unwrap(module).unwrap_or_else(|shared| (*shared).clone());
538        let is_internal = module.is_internal();
539
540        definitions.extend(module.definitions);
541
542        // Internal modules (prelude, **nominal, go:...) stay out of `modules`
543        // so emit and lints skip them; their typedef files still join `files`
544        // so the LSP can map typedef file IDs to URIs for go-to-definition.
545        if is_internal {
546            files.extend(module.typedefs);
547            continue;
548        }
549
550        modules.insert(
551            mod_id,
552            ModuleInfo {
553                file_ids: module.files.keys().copied().collect(),
554                typedef_ids: module.typedefs.keys().copied().collect(),
555                id: module.id.clone(),
556                path: module.id,
557            },
558        );
559
560        files.extend(module.files);
561        files.extend(module.typedefs);
562    }
563
564    let result = SemanticResult {
565        files,
566        definitions,
567        modules,
568        errors,
569        lints,
570        entry_module_id: ENTRY_MODULE_ID.to_string(),
571        unused,
572        mutations,
573        cached_modules,
574        ufcs_methods,
575        typedef_paths: store.typedef_paths,
576        go_package_names: store.go_package_names,
577        go_module_ids,
578    };
579
580    AnalyzeOutput {
581        result,
582        facts,
583        emit_stamps,
584    }
585}