llmcc_core/
ir_builder.rs

1use std::collections::HashMap;
2use std::marker::PhantomData;
3use std::sync::atomic::{AtomicU32, Ordering};
4
5use tree_sitter::Node;
6
7use crate::context::{CompileCtxt, ParentedNode};
8use crate::ir::{
9    Arena, HirBase, HirFile, HirId, HirIdent, HirInternal, HirKind, HirNode, HirScope, HirText,
10};
11use crate::lang_def::LanguageTrait;
12
13/// Global atomic counter for HIR ID allocation
14static HIR_ID_COUNTER: AtomicU32 = AtomicU32::new(0);
15
16#[derive(Debug, Clone, Copy)]
17pub struct IrBuildConfig {
18    pub compact: bool,
19}
20
21impl IrBuildConfig {
22    pub fn compact() -> Self {
23        Self { compact: true }
24    }
25}
26
27impl Default for IrBuildConfig {
28    fn default() -> Self {
29        Self { compact: false }
30    }
31}
32
33/// Builder that directly assigns HIR nodes to compile context
34struct HirBuilder<'a, Language> {
35    arena: &'a Arena<'a>,
36    hir_map: HashMap<HirId, ParentedNode<'a>>,
37    file_path: Option<String>,
38    file_content: String,
39    config: IrBuildConfig,
40    _language: PhantomData<Language>,
41}
42
43impl<'a, Language: LanguageTrait> HirBuilder<'a, Language> {
44    /// Create a new builder that directly assigns to context
45    fn new(
46        arena: &'a Arena<'a>,
47        file_path: Option<String>,
48        file_content: String,
49        config: IrBuildConfig,
50    ) -> Self {
51        Self {
52            arena,
53            hir_map: HashMap::new(),
54            file_path,
55            file_content,
56            config,
57            _language: PhantomData,
58        }
59    }
60
61    /// Reserve a new HIR ID
62    fn reserve_hir_id(&self) -> HirId {
63        let id = HIR_ID_COUNTER.fetch_add(1, Ordering::SeqCst);
64        HirId(id)
65    }
66
67    fn build(mut self, root: Node<'a>) -> (HirId, HashMap<HirId, ParentedNode<'a>>) {
68        let file_start_id = self.build_node(root, None);
69        (file_start_id, self.hir_map)
70    }
71
72    fn build_node(&mut self, node: Node<'a>, parent: Option<HirId>) -> HirId {
73        let hir_id = self.reserve_hir_id();
74        let child_ids = self.collect_children(node, hir_id);
75
76        let kind = Language::hir_kind(node.kind_id());
77        let base = self.make_base(hir_id, parent, node, kind, child_ids);
78
79        let hir_node = match kind {
80            HirKind::File => {
81                let path = self.file_path.clone().unwrap_or_default();
82                let file_node = HirFile::new(base, path);
83                HirNode::File(self.arena.alloc(file_node))
84            }
85            HirKind::Text => {
86                let text = self.extract_text(&base);
87                let text_node = HirText::new(base, text);
88                HirNode::Text(self.arena.alloc(text_node))
89            }
90            HirKind::Internal => {
91                let internal = HirInternal::new(base);
92                HirNode::Internal(self.arena.alloc(internal))
93            }
94            HirKind::Scope => {
95                // Try to extract the name identifier from the scope node
96                let ident = self.extract_scope_ident(&base, node);
97                let scope = HirScope::new(base, ident);
98                HirNode::Scope(self.arena.alloc(scope))
99            }
100            HirKind::Identifier => {
101                let text = self.extract_text(&base);
102                let ident = HirIdent::new(base, text);
103                HirNode::Ident(self.arena.alloc(ident))
104            }
105            other => panic!("unsupported HIR kind for node {:?}", (other, node)),
106        };
107
108        self.hir_map.insert(hir_id, ParentedNode::new(hir_node));
109        hir_id
110    }
111
112    fn collect_children(&mut self, node: Node<'a>, _parent: HirId) -> Vec<HirId> {
113        let mut cursor = node.walk();
114
115        // In compact mode, skip children for Text nodes to reduce tree size
116        if self.config.compact {
117            let kind = Language::hir_kind(node.kind_id());
118            if kind == HirKind::Text {
119                return Vec::new();
120            }
121        }
122
123        node.children(&mut cursor)
124            .filter_map(|child| {
125                // In compact mode, skip certain node types to reduce tree construction overhead
126                if self.config.compact {
127                    // Skip error nodes and unnamed children in compact mode
128                    if child.is_error() || (child.is_missing() && !child.is_named()) {
129                        return None;
130                    }
131                }
132                Some(self.build_node(child, None))
133            })
134            .collect()
135    }
136
137    fn make_base(
138        &self,
139        hir_id: HirId,
140        parent: Option<HirId>,
141        node: Node<'a>,
142        kind: HirKind,
143        children: Vec<HirId>,
144    ) -> HirBase<'a> {
145        let field_id = Self::field_id_of(node).unwrap_or(u16::MAX);
146        HirBase {
147            hir_id,
148            parent,
149            node,
150            kind,
151            field_id,
152            children,
153        }
154    }
155
156    fn extract_text(&self, base: &HirBase<'a>) -> String {
157        let start = base.node.start_byte();
158        let end = base.node.end_byte();
159        if end > start && end <= self.file_content.len() {
160            self.file_content[start..end].to_string()
161        } else {
162            String::new()
163        }
164    }
165
166    fn extract_scope_ident(&self, base: &HirBase<'a>, node: Node<'a>) -> Option<&'a HirIdent<'a>> {
167        // Try to get the name field from the tree-sitter node
168        // For Rust, the name field is typically "name"
169        let name_node = node.child_by_field_name("name")?;
170
171        // Create an identifier for the name node
172        let hir_id = self.reserve_hir_id();
173        let ident_base = HirBase {
174            hir_id,
175            parent: Some(base.hir_id),
176            node: name_node,
177            kind: HirKind::Identifier,
178            field_id: u16::MAX,
179            children: Vec::new(),
180        };
181
182        let text = self.extract_text(&ident_base);
183        let ident = HirIdent::new(ident_base, text);
184        Some(self.arena.alloc(ident))
185    }
186
187    fn field_id_of(node: Node<'_>) -> Option<u16> {
188        let parent = node.parent()?;
189        let mut cursor = parent.walk();
190
191        if !cursor.goto_first_child() {
192            return None;
193        }
194
195        loop {
196            if cursor.node().id() == node.id() {
197                return cursor.field_id().map(|id| id.get());
198            }
199            if !cursor.goto_next_sibling() {
200                break;
201            }
202        }
203
204        None
205    }
206}
207
208pub fn build_llmcc_ir_inner<'a, L: LanguageTrait>(
209    arena: &'a Arena<'a>,
210    file_path: Option<String>,
211    file_content: String,
212    tree: &'a tree_sitter::Tree,
213    config: IrBuildConfig,
214) -> Result<(HirId, HashMap<HirId, ParentedNode<'a>>), Box<dyn std::error::Error>> {
215    let builder = HirBuilder::<L>::new(arena, file_path, file_content, config);
216    let root = tree.root_node();
217    let result = builder.build(root);
218    Ok(result)
219}
220
221/// Build IR for all units in the context
222/// TODO: make this run in parallel
223pub fn build_llmcc_ir<'a, L: LanguageTrait>(
224    cc: &'a CompileCtxt<'a>,
225) -> Result<(), Box<dyn std::error::Error>> {
226    build_llmcc_ir_with_config::<L>(cc, IrBuildConfig::default())
227}
228
229/// Build IR for all units in the context with custom config
230pub fn build_llmcc_ir_with_config<'a, L: LanguageTrait>(
231    cc: &'a CompileCtxt<'a>,
232    config: IrBuildConfig,
233) -> Result<(), Box<dyn std::error::Error>> {
234    for index in 0..cc.files.len() {
235        let unit = cc.compile_unit(index);
236        let file_path = unit.file_path().map(|p| p.to_string());
237        let file_content = String::from_utf8_lossy(&unit.file().content()).to_string();
238        let tree = unit.tree();
239
240        let (_file_start_id, hir_map) =
241            build_llmcc_ir_inner::<L>(&cc.arena, file_path, file_content, tree, config)?;
242
243        // Insert all nodes into the compile context
244        for (hir_id, parented_node) in hir_map {
245            cc.hir_map.borrow_mut().insert(hir_id, parented_node);
246        }
247        cc.set_file_start(index, _file_start_id);
248    }
249    Ok(())
250}