llmcc_core/
ir.rs

1use parking_lot::RwLock;
2use smallvec::SmallVec;
3use std::sync::atomic::AtomicUsize;
4use std::sync::atomic::{AtomicPtr, Ordering};
5use strum_macros::{Display, EnumIter, EnumString, FromRepr};
6
7use crate::context::CompileUnit;
8use crate::declare_arena;
9use crate::scope::Scope;
10use crate::symbol::Symbol;
11
12// Declare the arena with all HIR types
13// Using DashMap-based arena for concurrent O(1) lookup
14declare_arena!(Arena {
15    hir_node: HirNode<'a>,
16    hir_file: HirFile,
17    hir_text: HirText<'a>,
18    hir_internal: HirInternal,
19    hir_scope: HirScope<'a>,
20    hir_ident: HirIdent<'a>,
21    scope: Scope<'a>,
22    symbol: Symbol,
23});
24
25#[derive(
26    Debug, Clone, Copy, PartialEq, Eq, Hash, EnumIter, EnumString, FromRepr, Display, Default,
27)]
28#[strum(serialize_all = "snake_case")]
29pub enum HirKind {
30    #[default]
31    Undefined,
32    Error,
33    File,
34    Scope,
35    Text,
36    Internal,
37    Comment,
38    Identifier,
39}
40
41#[derive(Debug, Clone, Copy, Default)]
42pub enum HirNode<'hir> {
43    #[default]
44    Undefined,
45    Root(&'hir HirRoot),
46    Text(&'hir HirText<'hir>),
47    Internal(&'hir HirInternal),
48    Scope(&'hir HirScope<'hir>),
49    File(&'hir HirFile),
50    Ident(&'hir HirIdent<'hir>),
51}
52
53impl<'hir> HirNode<'hir> {
54    pub fn format(&self, _unit: CompileUnit<'hir>) -> String {
55        let id = self.id();
56        let kind = self.kind();
57        format!("{kind}:{id}")
58    }
59
60    /// Get the base information for any HIR node
61    pub fn base(&self) -> Option<&HirBase> {
62        match self {
63            HirNode::Undefined => None,
64            HirNode::Root(node) => Some(&node.base),
65            HirNode::Text(node) => Some(&node.base),
66            HirNode::Internal(node) => Some(&node.base),
67            HirNode::Scope(node) => Some(&node.base),
68            HirNode::File(node) => Some(&node.base),
69            HirNode::Ident(node) => Some(&node.base),
70        }
71    }
72
73    /// Get the kind of this HIR node
74    pub fn kind(&self) -> HirKind {
75        self.base().map_or(HirKind::Undefined, |base| base.kind)
76    }
77
78    /// Check if this node is of a specific kind
79    pub fn is_kind(&self, kind: HirKind) -> bool {
80        self.kind() == kind
81    }
82
83    /// Get the field ID of this node (used in structured tree navigation)
84    ///
85    /// For example, in a function declaration, the name field might have field_id=1
86    /// and the body field_id=2. Panics on Undefined node.
87    pub fn field_id(&self) -> u16 {
88        self.base().unwrap().field_id
89    }
90
91    /// Get child IDs of this node
92    pub fn child_ids(&self) -> &[HirId] {
93        self.base().map_or(&[], |base| &base.children)
94    }
95
96    /// Get children nodes of this node - uses SmallVec to avoid heap allocation for small child counts
97    pub fn children(&self, unit: &CompileUnit<'hir>) -> SmallVec<[HirNode<'hir>; 8]> {
98        self.base().map_or(SmallVec::new(), |base| {
99            base.children.iter().map(|id| unit.hir_node(*id)).collect()
100        })
101    }
102
103    /// Get tree-sitter kind ID for this node (distinct from HirKind)
104    pub fn kind_id(&self) -> u16 {
105        self.base().unwrap().kind_id
106    }
107
108    /// Get unique HirId for this node within its compilation unit. Panics on Undefined.
109    pub fn id(&self) -> HirId {
110        self.base().unwrap().id
111    }
112
113    /// Get byte offset where this node starts in source. Panics on Undefined.
114    pub fn start_byte(&self) -> usize {
115        self.base().unwrap().start_byte
116    }
117
118    /// Get byte offset where this node ends (exclusive). Panics on Undefined.
119    pub fn end_byte(&self) -> usize {
120        self.base().unwrap().end_byte
121    }
122
123    /// Get count of direct children
124    pub fn child_count(&self) -> usize {
125        self.child_ids().len()
126    }
127
128    /// Get parent HirId if it exists
129    pub fn parent(&self) -> Option<HirId> {
130        self.base().and_then(|base| base.parent)
131    }
132
133    /// Find optional child with matching field ID
134    pub fn child_by_field(&self, unit: &CompileUnit<'hir>, field_id: u16) -> Option<HirNode<'hir>> {
135        self.base().unwrap().child_by_field(unit, field_id)
136    }
137
138    pub fn child_by_kind(&self, unit: &CompileUnit<'hir>, kind_id: u16) -> Option<HirNode<'hir>> {
139        self.children(unit)
140            .into_iter()
141            .find(|&child| child.kind_id() == kind_id)
142    }
143
144    /// Returns the symbol referenced by the identifier within a specific child field.
145    pub fn ident_symbol_by_field(
146        &self,
147        unit: &CompileUnit<'hir>,
148        field_id: u16,
149    ) -> Option<&'hir Symbol> {
150        let child = self.child_by_field(unit, field_id)?;
151        let ident = child.find_ident(unit)?;
152        ident.opt_symbol()
153    }
154
155    /// Returns the ident symbol if any.
156    /// Prefers finding an identifier that has a symbol set (useful for scoped paths
157    /// where the target identifier has the resolved symbol).
158    pub fn ident_symbol(&self, unit: &CompileUnit<'hir>) -> Option<&'hir Symbol> {
159        // First try to find an identifier that already has a symbol set
160        if let Some(ident) = self.find_symboled_ident(unit) {
161            return ident.opt_symbol();
162        }
163        // Fall back to finding any identifier
164        let ident = self.find_ident(unit)?;
165        ident.opt_symbol()
166    }
167
168    /// Recursively search down the tree for a child with matching field ID.
169    /// Keeps going deeper until it finds a match or reaches a leaf node.
170    pub fn child_by_field_recursive(
171        &self,
172        unit: &CompileUnit<'hir>,
173        field_id: u16,
174    ) -> Option<HirNode<'hir>> {
175        // First check immediate children
176        if let Some(direct_child) = self.child_by_field(unit, field_id) {
177            return Some(direct_child);
178        }
179
180        // If no direct child with this field, recurse into all children
181        for child in self.children(unit) {
182            if let Some(recursive_match) = child.child_by_field_recursive(unit, field_id) {
183                return Some(recursive_match);
184            }
185        }
186
187        None
188    }
189
190    /// Find the identifier for the first child node that is an identifier or interior node.
191    /// Recursively searches for identifiers within interior nodes.
192    pub fn find_ident(&self, unit: &CompileUnit<'hir>) -> Option<&'hir HirIdent<'hir>> {
193        if self.is_kind(HirKind::Identifier) {
194            return self.as_ident();
195        }
196        for child in self.children(unit) {
197            if child.is_kind(HirKind::Identifier) {
198                return child.as_ident();
199            }
200            if child.is_kind(HirKind::Internal)
201                && let Some(id) = child.find_ident(unit)
202            {
203                return Some(id);
204            }
205        }
206        None
207    }
208
209    /// Find the deepest/rightmost identifier that has a symbol set.
210    /// This is useful for call expressions where we want the resolved callee,
211    /// not just the first identifier in a scoped path like `crate::module::func`.
212    pub fn find_symboled_ident(&self, unit: &CompileUnit<'hir>) -> Option<&'hir HirIdent<'hir>> {
213        let mut result: Option<&'hir HirIdent<'hir>> = None;
214        self.find_symboled_ident_recursive(unit, &mut result);
215        result
216    }
217
218    fn find_symboled_ident_recursive(
219        &self,
220        unit: &CompileUnit<'hir>,
221        result: &mut Option<&'hir HirIdent<'hir>>,
222    ) {
223        if self.is_kind(HirKind::Identifier) {
224            if let Some(ident) = self.as_ident()
225                && ident.opt_symbol().is_some()
226            {
227                *result = Some(ident);
228            }
229            return;
230        }
231        for child in self.children(unit) {
232            if child.is_kind(HirKind::Identifier) {
233                if let Some(ident) = child.as_ident()
234                    && ident.opt_symbol().is_some()
235                {
236                    *result = Some(ident);
237                }
238            } else if child.is_kind(HirKind::Internal) {
239                child.find_symboled_ident_recursive(unit, result);
240            }
241        }
242    }
243
244    /// Find the first text node's content in children (for keywords like "self").
245    pub fn find_text(&self, unit: &CompileUnit<'hir>) -> Option<&str> {
246        for child in self.children(unit) {
247            if child.is_kind(HirKind::Text)
248                && let Some(text) = child.as_text()
249            {
250                return Some(text.text());
251            }
252        }
253        None
254    }
255
256    /// Find identifier for the first child with a matching field ID.
257    /// For scoped types like `crate::module::Type`, returns `Type` (the direct type_identifier child).
258    /// For generic types like `Repository<User>`, recurses into the type child to get `Repository`.
259    pub fn ident_by_field(
260        &self,
261        unit: &CompileUnit<'hir>,
262        field_id: u16,
263    ) -> Option<&'hir HirIdent<'hir>> {
264        debug_assert!(!self.is_kind(HirKind::Identifier));
265        for child in self.children(unit) {
266            if child.field_id() == field_id {
267                return Self::find_type_ident(&child, unit);
268            }
269        }
270        None
271    }
272
273    /// Find the type identifier from a node, handling scoped and generic types correctly.
274    /// Looks for direct identifier children first, then recurses into the first internal child.
275    fn find_type_ident(
276        node: &HirNode<'hir>,
277        unit: &CompileUnit<'hir>,
278    ) -> Option<&'hir HirIdent<'hir>> {
279        if node.is_kind(HirKind::Identifier) {
280            return node.as_ident();
281        }
282        // First pass: look for direct identifier children
283        for child in node.children(unit) {
284            if child.is_kind(HirKind::Identifier) {
285                return child.as_ident();
286            }
287        }
288        // Second pass: recurse into the FIRST internal child only (e.g., generic_type → type child)
289        // This avoids recursing into type_arguments which would give wrong results
290        for child in node.children(unit) {
291            if child.is_kind(HirKind::Internal) {
292                return Self::find_type_ident(&child, unit);
293            }
294        }
295        None
296    }
297
298    #[inline]
299    pub fn as_root(&self) -> Option<&'hir HirRoot> {
300        match self {
301            HirNode::Root(r) => Some(r),
302            _ => None,
303        }
304    }
305
306    #[inline]
307    pub fn as_text(&self) -> Option<&'hir HirText<'hir>> {
308        match self {
309            HirNode::Text(r) => Some(r),
310            _ => None,
311        }
312    }
313
314    #[inline]
315    pub fn as_scope(&self) -> Option<&'hir HirScope<'hir>> {
316        match self {
317            HirNode::Scope(r) => Some(r),
318            _ => None,
319        }
320    }
321
322    /// Get scope and child identifier by field - convenience method combining as_scope() and ident_by_field()
323    #[inline]
324    pub fn scope_and_ident_by_field(
325        &self,
326        unit: &CompileUnit<'hir>,
327        field_id: u16,
328    ) -> Option<(&'hir HirScope<'hir>, &'hir HirIdent<'hir>)> {
329        let scope = self.as_scope()?;
330        let ident = self.ident_by_field(unit, field_id)?;
331        Some((scope, ident))
332    }
333
334    /// Collect identifiers by field kind matching a specific field ID
335    pub fn collect_by_field_kind(
336        &self,
337        unit: &CompileUnit<'hir>,
338        field_id: u16,
339    ) -> Vec<&'hir HirIdent<'hir>> {
340        let mut idents = Vec::new();
341        self.collect_by_field_kind_impl(unit, field_id, &mut idents);
342        idents
343    }
344
345    /// Helper for recursively collecting identifiers by field kind
346    fn collect_by_field_kind_impl(
347        &self,
348        unit: &CompileUnit<'hir>,
349        field_id: u16,
350        idents: &mut Vec<&'hir HirIdent<'hir>>,
351    ) {
352        // If this node has matching field ID and is an identifier, collect it
353        if self.field_id() == field_id
354            && let Some(ident) = self.as_ident()
355        {
356            idents.push(ident);
357        }
358
359        // Recursively collect from all children
360        for child in self.children(unit) {
361            child.collect_by_field_kind_impl(unit, field_id, idents);
362        }
363    }
364
365    #[inline]
366    pub fn as_file(&self) -> Option<&'hir HirFile> {
367        match self {
368            HirNode::File(r) => Some(r),
369            _ => None,
370        }
371    }
372
373    #[inline]
374    pub fn as_internal(&self) -> Option<&'hir HirInternal> {
375        match self {
376            HirNode::Internal(r) => Some(r),
377            _ => None,
378        }
379    }
380
381    #[inline]
382    pub fn as_ident(&self) -> Option<&'hir HirIdent<'hir>> {
383        match self {
384            HirNode::Ident(r) => Some(r),
385            _ => None,
386        }
387    }
388
389    /// Recursively collect all identifier nodes under this node
390    pub fn collect_idents(&self, unit: &CompileUnit<'hir>) -> Vec<&'hir HirIdent<'hir>> {
391        let mut idents = Vec::new();
392        self.collect_idents_impl(unit, &mut idents);
393        idents
394    }
395
396    /// Helper function for recursively collecting identifier nodes
397    fn collect_idents_impl(
398        &self,
399        unit: &CompileUnit<'hir>,
400        idents: &mut Vec<&'hir HirIdent<'hir>>,
401    ) {
402        // If this node is an identifier, collect it
403        if let Some(ident) = self.as_ident() {
404            idents.push(ident);
405        }
406
407        // Recursively collect from all children
408        for child in self.children(unit) {
409            child.collect_idents_impl(unit, idents);
410        }
411    }
412
413    /// Check if node is trivia (whitespace, comment, etc.)
414    pub fn is_trivia(&self) -> bool {
415        matches!(self.kind(), HirKind::Text | HirKind::Comment)
416    }
417
418    /// Set the block ID on the symbol associated with this node.
419    /// Works for both HirScope (gets symbol from scope) and HirIdent (has direct symbol).
420    /// Does nothing if no symbol is associated or if the symbol is a primitive (shared globally).
421    pub fn set_block_id(&self, block_id: crate::block::BlockId) {
422        use crate::symbol::SymKind;
423        // Try HirScope first
424        if let Some(scope) = self.as_scope() {
425            // First try scope's symbol
426            if let Some(symbol) = scope.opt_symbol() {
427                // Don't set block_id on primitives - they are shared globally
428                if symbol.kind() != SymKind::Primitive {
429                    symbol.set_block_id(block_id);
430                }
431                return;
432            }
433            // If no scope symbol, try the scope's ident (for type aliases, etc.)
434            if let Some(ident) = scope.opt_ident()
435                && let Some(symbol) = ident.opt_symbol()
436            {
437                if symbol.kind() != SymKind::Primitive {
438                    symbol.set_block_id(block_id);
439                }
440                return;
441            }
442        }
443        // Try HirIdent
444        if let Some(ident) = self.as_ident()
445            && let Some(symbol) = ident.opt_symbol()
446        {
447            // Don't set block_id on primitives - they are shared globally
448            if symbol.kind() != SymKind::Primitive {
449                symbol.set_block_id(block_id);
450            }
451        }
452    }
453
454    /// Get the symbol associated with this node if any.
455    /// Works for both HirScope and HirIdent nodes.
456    pub fn opt_symbol(&self) -> Option<&'hir Symbol> {
457        if let Some(scope) = self.as_scope() {
458            return scope.opt_symbol();
459        }
460        if let Some(ident) = self.as_ident() {
461            return ident.opt_symbol();
462        }
463        None
464    }
465}
466
467#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash, Default)]
468/// Unique identifier for a HIR node within a compilation unit. IDs are stable,
469/// sequential, and used for parent-child relationships and symbol references.
470pub struct HirId(pub usize);
471
472/// Global counter for allocating unique HIR IDs
473static HIR_ID_COUNTER: AtomicUsize = AtomicUsize::new(0);
474
475impl HirId {
476    /// Allocate a new unique HIR ID
477    pub fn new() -> Self {
478        let id = HIR_ID_COUNTER.fetch_add(1, Ordering::Relaxed);
479        HirId(id)
480    }
481
482    /// Get the next HIR ID that will be allocated (useful for diagnostics)
483    pub fn next() -> Self {
484        HirId(HIR_ID_COUNTER.load(Ordering::Relaxed))
485    }
486}
487
488impl std::fmt::Display for HirId {
489    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
490        write!(f, "{}", self.0)
491    }
492}
493
494/// Common metadata shared by all HIR node types. Provides identity, parent link,
495/// tree-sitter connection, and child references for tree structure.
496#[derive(Debug, Clone, Default)]
497pub struct HirBase {
498    pub id: HirId,
499    pub parent: Option<HirId>,
500    pub kind_id: u16,
501    pub start_byte: usize,
502    pub end_byte: usize,
503    pub kind: HirKind,
504    pub field_id: u16,
505    pub children: SmallVec<[HirId; 4]>,
506}
507
508impl HirBase {
509    /// Find child with matching field ID (linear search, O(n))
510    pub fn child_by_field<'hir>(
511        &self,
512        unit: &CompileUnit<'hir>,
513        field_id: u16,
514    ) -> Option<HirNode<'hir>> {
515        self.children
516            .iter()
517            .map(|id| unit.hir_node(*id))
518            .find(|child| child.field_id() == field_id)
519    }
520}
521
522#[derive(Debug, Clone)]
523/// Root node as topmost parent for all nodes in compilation unit's HIR.
524pub struct HirRoot {
525    pub base: HirBase,
526    pub file_name: Option<String>,
527}
528
529impl HirRoot {
530    /// Create new root node with optional file name
531    pub fn new(base: HirBase, file_name: Option<String>) -> Self {
532        Self { base, file_name }
533    }
534}
535
536#[derive(Debug, Clone)]
537/// Leaf node containing textual content (strings, comments, etc.)
538pub struct HirText<'hir> {
539    pub base: HirBase,
540    pub text: &'hir str,
541}
542
543impl<'hir> HirText<'hir> {
544    /// Create new text node with given content
545    pub fn new(base: HirBase, text: &'hir str) -> Self {
546        Self { base, text }
547    }
548
549    pub fn text(&self) -> &str {
550        self.text
551    }
552}
553
554#[derive(Debug, Clone)]
555/// Synthetic node created during parsing/transformation, not directly from source.
556pub struct HirInternal {
557    pub base: HirBase,
558}
559
560impl HirInternal {
561    /// Create new internal node
562    pub fn new(base: HirBase) -> Self {
563        Self { base }
564    }
565}
566
567#[derive(Debug)]
568/// Node representing a named scope (functions, classes, modules, blocks, etc.).
569/// Scopes are critical for symbol resolution - collected symbols are associated with scope lifetime.
570pub struct HirScope<'hir> {
571    pub base: HirBase,
572    pub ident: RwLock<Option<&'hir HirIdent<'hir>>>,
573    pub scope: RwLock<Option<&'hir Scope<'hir>>>,
574}
575
576impl<'hir> HirScope<'hir> {
577    /// Create new scope node with optional identifier
578    pub fn new(base: HirBase, ident: Option<&'hir HirIdent<'hir>>) -> Self {
579        Self {
580            base,
581            ident: RwLock::new(ident),
582            scope: RwLock::new(None),
583        }
584    }
585
586    /// Get human-readable name (identifier name or "unamed_scope")
587    pub fn owner_name(&self) -> String {
588        if let Some(id) = *self.ident.read() {
589            id.name.to_string()
590        } else {
591            "unamed_scope".to_string()
592        }
593    }
594
595    /// Set the scope reference for this scope node
596    pub fn set_scope(&self, scope: &'hir Scope<'hir>) {
597        *self.scope.write() = Some(scope);
598    }
599
600    /// Get the scope reference if it has been set
601    pub fn scope(&self) -> &'hir Scope<'hir> {
602        self.scope
603            .read()
604            .unwrap_or_else(|| panic!("scope must be set for HirScope {}", self.base.id))
605    }
606
607    pub fn opt_scope(&self) -> Option<&'hir Scope<'hir>> {
608        *self.scope.read()
609    }
610
611    pub fn set_ident(&self, ident: &'hir HirIdent<'hir>) {
612        *self.ident.write() = Some(ident);
613    }
614
615    pub fn opt_ident(&self) -> Option<&'hir HirIdent<'hir>> {
616        *self.ident.read()
617    }
618
619    pub fn ident(&self) -> &'hir HirIdent<'hir> {
620        self.ident.read().expect("ident must be set")
621    }
622
623    pub fn opt_symbol(&self) -> Option<&'hir Symbol> {
624        self.opt_scope().and_then(|scope| scope.opt_symbol())
625    }
626}
627
628impl<'hir> Clone for HirScope<'hir> {
629    fn clone(&self) -> Self {
630        Self {
631            base: self.base.clone(),
632            ident: RwLock::new(*self.ident.read()),
633            scope: RwLock::new(*self.scope.read()),
634        }
635    }
636}
637
638#[derive(Debug)]
639/// Node representing a named identifier/reference (variables, functions, types, etc.).
640/// Identifiers are primary targets for symbol collection and resolution.
641pub struct HirIdent<'hir> {
642    pub base: HirBase,
643    pub name: &'hir str,
644    pub symbol: AtomicPtr<Symbol>,
645    _phantom: std::marker::PhantomData<&'hir ()>,
646}
647
648impl<'hir> HirIdent<'hir> {
649    /// Create new identifier node with name
650    pub fn new(base: HirBase, name: &'hir str) -> Self {
651        Self {
652            base,
653            name,
654            symbol: AtomicPtr::new(std::ptr::null_mut()),
655            _phantom: std::marker::PhantomData,
656        }
657    }
658
659    pub fn id(&self) -> HirId {
660        self.base.id
661    }
662
663    pub fn set_symbol(&self, symbol: &'hir Symbol) {
664        self.symbol
665            .store(symbol as *const _ as *mut _, Ordering::Release);
666    }
667
668    #[inline]
669    pub fn opt_symbol(&self) -> Option<&'hir Symbol> {
670        let ptr = self.symbol.load(Ordering::Acquire);
671        if ptr.is_null() {
672            None
673        } else {
674            unsafe { Some(&*ptr) }
675        }
676    }
677}
678
679#[derive(Debug, Clone)]
680/// Node representing a source file. Provides entry point for language-specific analysis.
681pub struct HirFile {
682    pub base: HirBase,
683    pub file_path: String,
684}
685
686impl HirFile {
687    /// Create new file node with path
688    pub fn new(base: HirBase, file_path: String) -> Self {
689        Self { base, file_path }
690    }
691}