llmcc_core/
context.rs

1use rayon::prelude::*;
2use std::cmp::Ordering as CmpOrdering;
3use std::collections::{BTreeMap, HashMap};
4use std::ops::Deref;
5use std::sync::{
6    atomic::{AtomicU32, Ordering},
7    RwLock,
8};
9use std::time::Instant;
10use tree_sitter::Tree;
11
12use crate::block::{Arena as BlockArena, BasicBlock, BlockId, BlockKind};
13use crate::block_rel::BlockRelationMap;
14use crate::file::File;
15use crate::interner::{InternPool, InternedStr};
16use crate::ir::{Arena, HirId, HirNode};
17use crate::lang_def::LanguageTrait;
18use crate::symbol::{Scope, SymId, Symbol};
19
20#[derive(Debug, Copy, Clone)]
21pub struct CompileUnit<'tcx> {
22    pub cc: &'tcx CompileCtxt<'tcx>,
23    pub index: usize,
24}
25
26impl<'tcx> CompileUnit<'tcx> {
27    pub fn file(&self) -> &'tcx File {
28        &self.cc.files[self.index]
29    }
30
31    pub fn tree(&self) -> &'tcx Tree {
32        self.cc.trees[self.index].as_ref().unwrap()
33    }
34
35    /// Access the shared string interner.
36    pub fn interner(&self) -> &InternPool {
37        &self.cc.interner
38    }
39
40    /// Intern a string and return its symbol.
41    pub fn intern_str<S>(&self, value: S) -> InternedStr
42    where
43        S: AsRef<str>,
44    {
45        self.cc.interner.intern(value)
46    }
47
48    /// Resolve an interned symbol into an owned string.
49    pub fn resolve_interned_owned(&self, symbol: InternedStr) -> Option<String> {
50        self.cc.interner.resolve_owned(symbol)
51    }
52
53    pub fn reserve_hir_id(&self) -> HirId {
54        self.cc.reserve_hir_id()
55    }
56
57    pub fn reserve_block_id(&self) -> BlockId {
58        self.cc.reserve_block_id()
59    }
60
61    pub fn register_file_start(&self) -> HirId {
62        let start = self.cc.current_hir_id();
63        self.cc.set_file_start(self.index, start);
64        start
65    }
66
67    pub fn file_start_hir_id(&self) -> Option<HirId> {
68        self.cc.file_start(self.index)
69    }
70
71    pub fn file_path(&self) -> Option<&str> {
72        self.cc.file_path(self.index)
73    }
74
75    /// Get text from the file between start and end byte positions
76    pub fn get_text(&self, start: usize, end: usize) -> String {
77        self.file().get_text(start, end)
78    }
79
80    /// Get a HIR node by ID, returning None if not found
81    pub fn opt_hir_node(self, id: HirId) -> Option<HirNode<'tcx>> {
82        self.cc
83            .hir_map
84            .read()
85            .unwrap()
86            .get(&id)
87            .map(|parented| parented.node)
88    }
89
90    /// Get a HIR node by ID, panicking if not found
91    pub fn hir_node(self, id: HirId) -> HirNode<'tcx> {
92        self.opt_hir_node(id)
93            .unwrap_or_else(|| panic!("hir node not found {}", id))
94    }
95
96    /// Get a HIR node by ID, returning None if not found
97    pub fn opt_bb(self, id: BlockId) -> Option<BasicBlock<'tcx>> {
98        self.cc
99            .block_map
100            .read()
101            .unwrap()
102            .get(&id)
103            .map(|parented| parented.block.clone())
104    }
105
106    /// Get a HIR node by ID, panicking if not found
107    pub fn bb(self, id: BlockId) -> BasicBlock<'tcx> {
108        self.opt_bb(id)
109            .unwrap_or_else(|| panic!("basic block not found: {}", id))
110    }
111
112    /// Get the parent of a HIR node
113    pub fn parent_node(self, id: HirId) -> Option<HirId> {
114        self.cc
115            .hir_map
116            .read()
117            .unwrap()
118            .get(&id)
119            .and_then(|parented| parented.parent())
120    }
121
122    /// Get an existing scope or None if it doesn't exist
123    pub fn opt_get_scope(self, owner: HirId) -> Option<&'tcx Scope<'tcx>> {
124        self.cc.scope_map.read().unwrap().get(&owner).copied()
125    }
126
127    pub fn opt_get_symbol(self, owner: SymId) -> Option<&'tcx Symbol> {
128        self.cc.symbol_map.read().unwrap().get(&owner).copied()
129    }
130
131    /// Get an existing scope or None if it doesn't exist
132    pub fn get_scope(self, owner: HirId) -> &'tcx Scope<'tcx> {
133        self.cc
134            .scope_map
135            .read()
136            .unwrap()
137            .get(&owner)
138            .copied()
139            .unwrap()
140    }
141
142    /// Find an existing scope or create a new one
143    pub fn alloc_scope(self, owner: HirId) -> &'tcx Scope<'tcx> {
144        self.cc.alloc_scope(owner)
145    }
146
147    /// Add a HIR node to the map
148    pub fn insert_hir_node(self, id: HirId, node: HirNode<'tcx>) {
149        let parented = ParentedNode::new(node);
150        self.cc.hir_map.write().unwrap().insert(id, parented);
151    }
152
153    /// Get all child nodes of a given parent
154    pub fn children_of(self, parent: HirId) -> Vec<(HirId, HirNode<'tcx>)> {
155        let Some(parent_node) = self.opt_hir_node(parent) else {
156            return Vec::new();
157        };
158        parent_node
159            .children()
160            .iter()
161            .map(|&child_id| (child_id, self.hir_node(child_id)))
162            .collect()
163    }
164
165    /// Walk up the parent chain to find an ancestor of a specific type
166    pub fn find_ancestor<F>(self, mut current: HirId, predicate: F) -> Option<HirId>
167    where
168        F: Fn(&HirNode<'tcx>) -> bool,
169    {
170        while let Some(parent_id) = self.parent_node(current) {
171            if let Some(parent_node) = self.opt_hir_node(parent_id) {
172                if predicate(&parent_node) {
173                    return Some(parent_id);
174                }
175                current = parent_id;
176            } else {
177                break;
178            }
179        }
180        None
181    }
182
183    pub fn add_unresolved_symbol(&self, symbol: &'tcx Symbol) {
184        self.cc.unresolve_symbols.write().unwrap().push(symbol);
185    }
186
187    pub fn insert_block(&self, id: BlockId, block: BasicBlock<'tcx>, parent: BlockId) {
188        let parented = ParentedBlock::new(parent, block.clone());
189        self.cc.block_map.write().unwrap().insert(id, parented);
190
191        // Register the block in the index maps
192        let block_kind = block.kind();
193        let block_name = block
194            .base()
195            .and_then(|base| base.opt_get_name())
196            .map(|s| s.to_string());
197
198        self.cc
199            .block_indexes
200            .write()
201            .unwrap()
202            .insert_block(id, block_name, block_kind, self.index);
203    }
204}
205
206impl<'tcx> Deref for CompileUnit<'tcx> {
207    type Target = CompileCtxt<'tcx>;
208
209    #[inline(always)]
210    fn deref(&self) -> &Self::Target {
211        self.cc
212    }
213}
214
215#[derive(Debug, Clone)]
216pub struct ParentedNode<'tcx> {
217    pub node: HirNode<'tcx>,
218}
219
220impl<'tcx> ParentedNode<'tcx> {
221    pub fn new(node: HirNode<'tcx>) -> Self {
222        Self { node }
223    }
224
225    /// Get a reference to the wrapped node
226    pub fn node(&self) -> &HirNode<'tcx> {
227        &self.node
228    }
229
230    /// Get the parent ID
231    pub fn parent(&self) -> Option<HirId> {
232        self.node.parent()
233    }
234}
235
236#[derive(Debug, Clone)]
237pub struct ParentedBlock<'tcx> {
238    pub parent: BlockId,
239    pub block: BasicBlock<'tcx>,
240}
241
242impl<'tcx> ParentedBlock<'tcx> {
243    pub fn new(parent: BlockId, block: BasicBlock<'tcx>) -> Self {
244        Self { parent, block }
245    }
246
247    /// Get a reference to the wrapped node
248    pub fn block(&self) -> &BasicBlock<'tcx> {
249        &self.block
250    }
251
252    /// Get the parent ID
253    pub fn parent(&self) -> BlockId {
254        self.parent
255    }
256}
257
258/// BlockIndexMaps provides efficient lookup of blocks by various indices.
259///
260/// Best practices for usage:
261/// - block_name_index: Use when you want to find blocks by name (multiple blocks can share the same name)
262/// - unit_index_index: Use when you want all blocks in a specific unit
263/// - block_kind_index: Use when you want all blocks of a specific kind (e.g., all functions)
264/// - block_id_index: Use for O(1) lookup of block metadata by BlockId
265///
266/// Important: The "name" field is optional since Root blocks and some other blocks may not have names.
267///
268/// Rationale for data structure choices:
269/// - BTreeMap is used for name and unit indexes for better iteration and range queries
270/// - HashMap is used for kind index since BlockKind doesn't implement Ord
271/// - HashMap is used for block_id_index (direct lookup by BlockId) for O(1) access
272/// - Vec is used for values to handle multiple blocks with the same index (same name/kind/unit)
273#[derive(Debug, Default, Clone)]
274pub struct BlockIndexMaps {
275    /// block_name -> Vec<(unit_index, block_kind, block_id)>
276    /// Multiple blocks can share the same name across units or within the same unit
277    pub block_name_index: BTreeMap<String, Vec<(usize, BlockKind, BlockId)>>,
278
279    /// unit_index -> Vec<(block_name, block_kind, block_id)>
280    /// Allows retrieval of all blocks in a specific compilation unit
281    pub unit_index_map: BTreeMap<usize, Vec<(Option<String>, BlockKind, BlockId)>>,
282
283    /// block_kind -> Vec<(unit_index, block_name, block_id)>
284    /// Allows retrieval of all blocks of a specific kind across all units
285    pub block_kind_index: HashMap<BlockKind, Vec<(usize, Option<String>, BlockId)>>,
286
287    /// block_id -> (unit_index, block_name, block_kind)
288    /// Direct O(1) lookup of block metadata by ID
289    pub block_id_index: HashMap<BlockId, (usize, Option<String>, BlockKind)>,
290}
291
292impl BlockIndexMaps {
293    /// Create a new empty BlockIndexMaps
294    pub fn new() -> Self {
295        Self::default()
296    }
297
298    /// Register a new block in all indexes
299    ///
300    /// # Arguments
301    /// - `block_id`: The unique block identifier
302    /// - `block_name`: Optional name of the block (None for unnamed blocks)
303    /// - `block_kind`: The kind of block (Func, Class, Stmt, etc.)
304    /// - `unit_index`: The compilation unit index this block belongs to
305    pub fn insert_block(
306        &mut self,
307        block_id: BlockId,
308        block_name: Option<String>,
309        block_kind: BlockKind,
310        unit_index: usize,
311    ) {
312        // Insert into block_id_index for O(1) lookups
313        self.block_id_index
314            .insert(block_id, (unit_index, block_name.clone(), block_kind));
315
316        // Insert into block_name_index (if name exists)
317        if let Some(ref name) = block_name {
318            self.block_name_index
319                .entry(name.clone())
320                .or_default()
321                .push((unit_index, block_kind, block_id));
322        }
323
324        // Insert into unit_index_map
325        self.unit_index_map.entry(unit_index).or_default().push((
326            block_name.clone(),
327            block_kind,
328            block_id,
329        ));
330
331        // Insert into block_kind_index
332        self.block_kind_index
333            .entry(block_kind)
334            .or_default()
335            .push((unit_index, block_name, block_id));
336    }
337
338    /// Find all blocks with a given name (may return multiple blocks)
339    ///
340    /// Returns a vector of (unit_index, block_kind, block_id) tuples
341    pub fn find_by_name(&self, name: &str) -> Vec<(usize, BlockKind, BlockId)> {
342        self.block_name_index.get(name).cloned().unwrap_or_default()
343    }
344
345    /// Find all blocks in a specific unit
346    ///
347    /// Returns a vector of (block_name, block_kind, block_id) tuples
348    pub fn find_by_unit(&self, unit_index: usize) -> Vec<(Option<String>, BlockKind, BlockId)> {
349        self.unit_index_map
350            .get(&unit_index)
351            .cloned()
352            .unwrap_or_default()
353    }
354
355    /// Find all blocks of a specific kind across all units
356    ///
357    /// Returns a vector of (unit_index, block_name, block_id) tuples
358    pub fn find_by_kind(&self, block_kind: BlockKind) -> Vec<(usize, Option<String>, BlockId)> {
359        self.block_kind_index
360            .get(&block_kind)
361            .cloned()
362            .unwrap_or_default()
363    }
364
365    /// Find all blocks of a specific kind in a specific unit
366    ///
367    /// Returns a vector of block_ids
368    pub fn find_by_kind_and_unit(&self, block_kind: BlockKind, unit_index: usize) -> Vec<BlockId> {
369        let by_kind = self.find_by_kind(block_kind);
370        by_kind
371            .into_iter()
372            .filter(|(unit, _, _)| *unit == unit_index)
373            .map(|(_, _, block_id)| block_id)
374            .collect()
375    }
376
377    /// Look up block metadata by BlockId for O(1) access
378    ///
379    /// Returns (unit_index, block_name, block_kind) if found
380    pub fn get_block_info(&self, block_id: BlockId) -> Option<(usize, Option<String>, BlockKind)> {
381        self.block_id_index.get(&block_id).cloned()
382    }
383
384    /// Get total number of blocks indexed
385    pub fn block_count(&self) -> usize {
386        self.block_id_index.len()
387    }
388
389    /// Get the number of unique block names
390    pub fn unique_names_count(&self) -> usize {
391        self.block_name_index.len()
392    }
393
394    /// Check if a block with the given ID exists
395    pub fn contains_block(&self, block_id: BlockId) -> bool {
396        self.block_id_index.contains_key(&block_id)
397    }
398
399    /// Clear all indexes
400    pub fn clear(&mut self) {
401        self.block_name_index.clear();
402        self.unit_index_map.clear();
403        self.block_kind_index.clear();
404        self.block_id_index.clear();
405    }
406}
407
408#[derive(Debug, Clone, Default)]
409pub struct FileParseMetric {
410    pub path: String,
411    pub seconds: f64,
412}
413
414#[derive(Debug, Clone, Default)]
415pub struct BuildMetrics {
416    pub file_read_seconds: f64,
417    pub parse_wall_seconds: f64,
418    pub parse_cpu_seconds: f64,
419    pub parse_avg_seconds: f64,
420    pub parse_file_count: usize,
421    pub parse_slowest: Vec<FileParseMetric>,
422}
423
424#[derive(Debug, Default)]
425pub struct CompileCtxt<'tcx> {
426    pub arena: Arena<'tcx>,
427    pub interner: InternPool,
428    pub files: Vec<File>,
429    pub trees: Vec<Option<Tree>>,
430    pub hir_next_id: AtomicU32,
431    pub hir_start_ids: RwLock<Vec<Option<HirId>>>,
432
433    // HirId -> ParentedNode
434    pub hir_map: RwLock<HashMap<HirId, ParentedNode<'tcx>>>,
435    // HirId -> &Scope (scopes owned by this HIR node)
436    pub scope_map: RwLock<HashMap<HirId, &'tcx Scope<'tcx>>>,
437    // SymId -> &Symbol
438    pub symbol_map: RwLock<HashMap<SymId, &'tcx Symbol>>,
439
440    pub block_arena: BlockArena<'tcx>,
441    pub block_next_id: AtomicU32,
442    // BlockId -> ParentedBlock
443    pub block_map: RwLock<HashMap<BlockId, ParentedBlock<'tcx>>>,
444    pub unresolve_symbols: RwLock<Vec<&'tcx Symbol>>,
445    pub related_map: BlockRelationMap,
446
447    /// Index maps for efficient block lookups by name, kind, unit, and id
448    pub block_indexes: RwLock<BlockIndexMaps>,
449
450    /// Metrics collected while building the compilation context
451    pub build_metrics: BuildMetrics,
452}
453
454impl<'tcx> CompileCtxt<'tcx> {
455    /// Create a new CompileCtxt from source code
456    pub fn from_sources<L: LanguageTrait>(sources: &[Vec<u8>]) -> Self {
457        let files: Vec<File> = sources
458            .iter()
459            .map(|src| File::new_source(src.clone()))
460            .collect();
461        let (trees, mut metrics) = Self::parse_files_with_metrics::<L>(&files);
462        metrics.file_read_seconds = 0.0;
463        let count = files.len();
464        Self {
465            arena: Arena::default(),
466            interner: InternPool::default(),
467            files,
468            trees,
469            hir_next_id: AtomicU32::new(0),
470            hir_start_ids: RwLock::new(vec![None; count]),
471            hir_map: RwLock::new(HashMap::new()),
472            scope_map: RwLock::new(HashMap::new()),
473            symbol_map: RwLock::new(HashMap::new()),
474            block_arena: BlockArena::default(),
475            block_next_id: AtomicU32::new(0),
476            block_map: RwLock::new(HashMap::new()),
477            unresolve_symbols: RwLock::new(Vec::new()),
478            related_map: BlockRelationMap::default(),
479            block_indexes: RwLock::new(BlockIndexMaps::new()),
480            build_metrics: metrics,
481        }
482    }
483
484    /// Create a new CompileCtxt from files
485    pub fn from_files<L: LanguageTrait>(paths: &[String]) -> std::io::Result<Self> {
486        let read_start = Instant::now();
487
488        let mut files_with_index: Vec<(usize, File)> = paths
489            .par_iter()
490            .enumerate()
491            .map(|(index, path)| -> std::io::Result<(usize, File)> {
492                let file = File::new_file(path.clone())?;
493                Ok((index, file))
494            })
495            .collect::<std::io::Result<Vec<_>>>()?;
496
497        files_with_index.sort_by_key(|(index, _)| *index);
498        let files: Vec<File> = files_with_index.into_iter().map(|(_, file)| file).collect();
499
500        let file_read_seconds = read_start.elapsed().as_secs_f64();
501
502        let (trees, mut metrics) = Self::parse_files_with_metrics::<L>(&files);
503        metrics.file_read_seconds = file_read_seconds;
504
505        let count = files.len();
506        Ok(Self {
507            arena: Arena::default(),
508            interner: InternPool::default(),
509            files,
510            trees,
511            hir_next_id: AtomicU32::new(0),
512            hir_start_ids: RwLock::new(vec![None; count]),
513            hir_map: RwLock::new(HashMap::new()),
514            scope_map: RwLock::new(HashMap::new()),
515            symbol_map: RwLock::new(HashMap::new()),
516            block_arena: BlockArena::default(),
517            block_next_id: AtomicU32::new(0),
518            block_map: RwLock::new(HashMap::new()),
519            unresolve_symbols: RwLock::new(Vec::new()),
520            related_map: BlockRelationMap::default(),
521            block_indexes: RwLock::new(BlockIndexMaps::new()),
522            build_metrics: metrics,
523        })
524    }
525
526    fn parse_files_with_metrics<L: LanguageTrait>(
527        files: &[File],
528    ) -> (Vec<Option<Tree>>, BuildMetrics) {
529        struct ParseRecord {
530            tree: Option<Tree>,
531            elapsed: f64,
532            path: Option<String>,
533        }
534
535        let parse_wall_start = Instant::now();
536        let records: Vec<ParseRecord> = files
537            .par_iter()
538            .map(|file| {
539                let path = file.path().map(|p| p.to_string());
540                let per_file_start = Instant::now();
541                let tree = L::parse(file.content());
542                let elapsed = per_file_start.elapsed().as_secs_f64();
543                ParseRecord {
544                    tree,
545                    elapsed,
546                    path,
547                }
548            })
549            .collect();
550        let parse_wall_seconds = parse_wall_start.elapsed().as_secs_f64();
551
552        let mut trees = Vec::with_capacity(records.len());
553        let parse_file_count = records.len();
554        let mut parse_cpu_seconds = 0.0;
555        let mut slowest = Vec::with_capacity(records.len());
556
557        for record in records {
558            parse_cpu_seconds += record.elapsed;
559            trees.push(record.tree);
560            let path = record.path.unwrap_or_else(|| "<memory>".to_string());
561            slowest.push(FileParseMetric {
562                path,
563                seconds: record.elapsed,
564            });
565        }
566
567        slowest.sort_by(|a, b| {
568            b.seconds
569                .partial_cmp(&a.seconds)
570                .unwrap_or(CmpOrdering::Equal)
571        });
572        slowest.truncate(5);
573
574        let metrics = BuildMetrics {
575            file_read_seconds: 0.0,
576            parse_wall_seconds,
577            parse_cpu_seconds,
578            parse_avg_seconds: if parse_file_count == 0 {
579                0.0
580            } else {
581                parse_cpu_seconds / parse_file_count as f64
582            },
583            parse_file_count,
584            parse_slowest: slowest,
585        };
586
587        (trees, metrics)
588    }
589
590    /// Create a context that references this CompileCtxt for a specific file index
591    pub fn compile_unit(&'tcx self, index: usize) -> CompileUnit<'tcx> {
592        CompileUnit { cc: self, index }
593    }
594
595    pub fn create_globals(&'tcx self) -> &'tcx Scope<'tcx> {
596        self.alloc_scope(HirId(0))
597    }
598
599    pub fn get_scope(&'tcx self, owner: HirId) -> &'tcx Scope<'tcx> {
600        self.scope_map.read().unwrap().get(&owner).copied().unwrap()
601    }
602
603    pub fn opt_get_symbol(&'tcx self, owner: SymId) -> Option<&'tcx Symbol> {
604        self.symbol_map.read().unwrap().get(&owner).cloned()
605    }
606
607    /// Find the primary symbol associated with a block ID
608    pub fn find_symbol_by_block_id(&'tcx self, block_id: BlockId) -> Option<&'tcx Symbol> {
609        self.symbol_map
610            .read()
611            .unwrap()
612            .values()
613            .find(|symbol| symbol.block_id() == Some(block_id))
614            .copied()
615    }
616
617    pub fn alloc_scope(&'tcx self, owner: HirId) -> &'tcx Scope<'tcx> {
618        if let Some(existing) = self.scope_map.read().unwrap().get(&owner) {
619            return existing;
620        }
621
622        let scope = self.arena.alloc(Scope::new(owner));
623        self.scope_map.write().unwrap().insert(owner, scope);
624        scope
625    }
626
627    pub fn reserve_hir_id(&self) -> HirId {
628        let id = self.hir_next_id.fetch_add(1, Ordering::Relaxed);
629        HirId(id)
630    }
631
632    pub fn reserve_block_id(&self) -> BlockId {
633        let id = self.block_next_id.fetch_add(1, Ordering::Relaxed);
634        BlockId::new(id)
635    }
636
637    pub fn current_hir_id(&self) -> HirId {
638        HirId(self.hir_next_id.load(Ordering::Relaxed))
639    }
640
641    pub fn set_file_start(&self, index: usize, start: HirId) {
642        let mut starts = self.hir_start_ids.write().unwrap();
643        if index < starts.len() && starts[index].is_none() {
644            starts[index] = Some(start);
645        }
646    }
647
648    pub fn file_start(&self, index: usize) -> Option<HirId> {
649        self.hir_start_ids
650            .read()
651            .unwrap()
652            .get(index)
653            .and_then(|opt| *opt)
654    }
655
656    pub fn file_path(&self, index: usize) -> Option<&str> {
657        self.files.get(index).and_then(|file| file.path())
658    }
659
660    /// Get all file paths from the compilation context
661    pub fn get_files(&self) -> Vec<String> {
662        self.files
663            .iter()
664            .filter_map(|f| f.path().map(|p| p.to_string()))
665            .collect()
666    }
667
668    /// Clear all maps (useful for testing)
669    #[cfg(test)]
670    pub fn clear(&self) {
671        self.hir_map.write().unwrap().clear();
672        self.scope_map.write().unwrap().clear();
673    }
674}