llmcc_core/
ir_builder.rs

1use std::collections::HashMap;
2use std::marker::PhantomData;
3use std::sync::atomic::{AtomicU32, Ordering};
4
5use tree_sitter::Node;
6
7use crate::context::{CompileCtxt, ParentedNode};
8use crate::ir::{
9    Arena, HirBase, HirFile, HirId, HirIdent, HirInternal, HirKind, HirNode, HirScope, HirText,
10};
11use crate::lang_def::LanguageTrait;
12
13/// Global atomic counter for HIR ID allocation
14static HIR_ID_COUNTER: AtomicU32 = AtomicU32::new(0);
15
16/// Builder that directly assigns HIR nodes to compile context
17struct HirBuilder<'a, Language> {
18    arena: &'a Arena<'a>,
19    hir_map: HashMap<HirId, ParentedNode<'a>>,
20    file_path: Option<String>,
21    file_content: String,
22    _language: PhantomData<Language>,
23}
24
25impl<'a, Language: LanguageTrait> HirBuilder<'a, Language> {
26    /// Create a new builder that directly assigns to context
27    fn new(arena: &'a Arena<'a>, file_path: Option<String>, file_content: String) -> Self {
28        Self {
29            arena,
30            hir_map: HashMap::new(),
31            file_path,
32            file_content,
33            _language: PhantomData,
34        }
35    }
36
37    /// Reserve a new HIR ID
38    fn reserve_hir_id(&self) -> HirId {
39        let id = HIR_ID_COUNTER.fetch_add(1, Ordering::SeqCst);
40        HirId(id)
41    }
42
43    fn build(mut self, root: Node<'a>) -> (HirId, HashMap<HirId, ParentedNode<'a>>) {
44        let file_start_id = self.build_node(root, None);
45        (file_start_id, self.hir_map)
46    }
47
48    fn build_node(&mut self, node: Node<'a>, parent: Option<HirId>) -> HirId {
49        let hir_id = self.reserve_hir_id();
50        let child_ids = self.collect_children(node, hir_id);
51
52        let kind = Language::hir_kind(node.kind_id());
53        let base = self.make_base(hir_id, parent, node, kind, child_ids);
54
55        let hir_node = match kind {
56            HirKind::File => {
57                let path = self.file_path.clone().unwrap_or_default();
58                let file_node = HirFile::new(base, path);
59                HirNode::File(self.arena.alloc(file_node))
60            }
61            HirKind::Text => {
62                let text = self.extract_text(&base);
63                let text_node = HirText::new(base, text);
64                HirNode::Text(self.arena.alloc(text_node))
65            }
66            HirKind::Internal => {
67                let internal = HirInternal::new(base);
68                HirNode::Internal(self.arena.alloc(internal))
69            }
70            HirKind::Scope => {
71                // Try to extract the name identifier from the scope node
72                let ident = self.extract_scope_ident(&base, node);
73                let scope = HirScope::new(base, ident);
74                HirNode::Scope(self.arena.alloc(scope))
75            }
76            HirKind::Identifier => {
77                let text = self.extract_text(&base);
78                let ident = HirIdent::new(base, text);
79                HirNode::Ident(self.arena.alloc(ident))
80            }
81            other => panic!("unsupported HIR kind for node {:?}", (other, node)),
82        };
83
84        self.hir_map.insert(hir_id, ParentedNode::new(hir_node));
85        hir_id
86    }
87
88    fn collect_children(&mut self, node: Node<'a>, _parent: HirId) -> Vec<HirId> {
89        let mut cursor = node.walk();
90        node.children(&mut cursor)
91            .map(|child| self.build_node(child, None))
92            .collect()
93    }
94
95    fn make_base(
96        &self,
97        hir_id: HirId,
98        parent: Option<HirId>,
99        node: Node<'a>,
100        kind: HirKind,
101        children: Vec<HirId>,
102    ) -> HirBase<'a> {
103        let field_id = Self::field_id_of(node).unwrap_or(u16::MAX);
104        HirBase {
105            hir_id,
106            parent,
107            node,
108            kind,
109            field_id,
110            children,
111        }
112    }
113
114    fn extract_text(&self, base: &HirBase<'a>) -> String {
115        let start = base.node.start_byte();
116        let end = base.node.end_byte();
117        if end > start && end <= self.file_content.len() {
118            self.file_content[start..end].to_string()
119        } else {
120            String::new()
121        }
122    }
123
124    fn extract_scope_ident(&self, base: &HirBase<'a>, node: Node<'a>) -> Option<&'a HirIdent<'a>> {
125        // Try to get the name field from the tree-sitter node
126        // For Rust, the name field is typically "name"
127        let name_node = node.child_by_field_name("name")?;
128
129        // Create an identifier for the name node
130        let hir_id = self.reserve_hir_id();
131        let ident_base = HirBase {
132            hir_id,
133            parent: Some(base.hir_id),
134            node: name_node,
135            kind: HirKind::Identifier,
136            field_id: u16::MAX,
137            children: Vec::new(),
138        };
139
140        let text = self.extract_text(&ident_base);
141        let ident = HirIdent::new(ident_base, text);
142        Some(self.arena.alloc(ident))
143    }
144
145    fn field_id_of(node: Node<'_>) -> Option<u16> {
146        let parent = node.parent()?;
147        let mut cursor = parent.walk();
148
149        if !cursor.goto_first_child() {
150            return None;
151        }
152
153        loop {
154            if cursor.node().id() == node.id() {
155                return cursor.field_id().map(|id| id.get());
156            }
157            if !cursor.goto_next_sibling() {
158                break;
159            }
160        }
161
162        None
163    }
164}
165
166pub fn build_llmcc_ir_inner<'a, L: LanguageTrait>(
167    arena: &'a Arena<'a>,
168    file_path: Option<String>,
169    file_content: String,
170    tree: &'a tree_sitter::Tree,
171) -> Result<(HirId, HashMap<HirId, ParentedNode<'a>>), Box<dyn std::error::Error>> {
172    let builder = HirBuilder::<L>::new(arena, file_path, file_content);
173    let root = tree.root_node();
174    let result = builder.build(root);
175    Ok(result)
176}
177
178/// Build IR for all units in the context
179/// TODO: make this run in parallel
180pub fn build_llmcc_ir<'a, L: LanguageTrait>(
181    cc: &'a CompileCtxt<'a>,
182) -> Result<(), Box<dyn std::error::Error>> {
183    for index in 0..cc.files.len() {
184        let unit = cc.compile_unit(index);
185        let file_path = unit.file_path().map(|p| p.to_string());
186        let file_content = String::from_utf8_lossy(&unit.file().content()).to_string();
187        let tree = unit.tree();
188
189        let (_file_start_id, hir_map) =
190            build_llmcc_ir_inner::<L>(&cc.arena, file_path, file_content, tree)?;
191
192        // Insert all nodes into the compile context
193        for (hir_id, parented_node) in hir_map {
194            cc.hir_map.borrow_mut().insert(hir_id, parented_node);
195        }
196        cc.set_file_start(index, _file_start_id);
197    }
198    Ok(())
199}