llmcc_core/
ir_builder.rs

1use std::collections::HashMap;
2use std::marker::PhantomData;
3use std::sync::atomic::{AtomicU32, Ordering};
4
5use tree_sitter::Node;
6
7use crate::block::BlockKind;
8use crate::context::{CompileCtxt, ParentedNode};
9use crate::ir::{
10    Arena, HirBase, HirFile, HirId, HirIdent, HirInternal, HirKind, HirNode, HirScope, HirText,
11};
12use crate::lang_def::LanguageTrait;
13
14/// Global atomic counter for HIR ID allocation
15static HIR_ID_COUNTER: AtomicU32 = AtomicU32::new(0);
16
17#[derive(Debug, Clone, Copy)]
18pub struct IrBuildConfig {
19    pub compact: bool,
20}
21
22impl IrBuildConfig {
23    pub fn compact() -> Self {
24        Self { compact: true }
25    }
26}
27
28impl Default for IrBuildConfig {
29    fn default() -> Self {
30        Self { compact: false }
31    }
32}
33
34/// Builder that directly assigns HIR nodes to compile context
35struct HirBuilder<'a, Language> {
36    arena: &'a Arena<'a>,
37    hir_map: HashMap<HirId, ParentedNode<'a>>,
38    file_path: Option<String>,
39    file_content: String,
40    config: IrBuildConfig,
41    _language: PhantomData<Language>,
42}
43
44impl<'a, Language: LanguageTrait> HirBuilder<'a, Language> {
45    /// Create a new builder that directly assigns to context
46    fn new(
47        arena: &'a Arena<'a>,
48        file_path: Option<String>,
49        file_content: String,
50        config: IrBuildConfig,
51    ) -> Self {
52        Self {
53            arena,
54            hir_map: HashMap::new(),
55            file_path,
56            file_content,
57            config,
58            _language: PhantomData,
59        }
60    }
61
62    /// Reserve a new HIR ID
63    fn reserve_hir_id(&self) -> HirId {
64        let id = HIR_ID_COUNTER.fetch_add(1, Ordering::SeqCst);
65        HirId(id)
66    }
67
68    fn build(mut self, root: Node<'a>) -> (HirId, HashMap<HirId, ParentedNode<'a>>) {
69        let file_start_id = self.build_node(root, None);
70        (file_start_id, self.hir_map)
71    }
72
73    fn build_node(&mut self, node: Node<'a>, parent: Option<HirId>) -> HirId {
74        let hir_id = self.reserve_hir_id();
75        let kind_id = node.kind_id();
76        let kind = Language::hir_kind(kind_id);
77        let block_kind = Language::block_kind(kind_id);
78        let child_ids = if self.should_collect_children(kind, block_kind) {
79            self.collect_children(node, hir_id)
80        } else {
81            Vec::new()
82        };
83        let base = self.make_base(hir_id, parent, node, kind, child_ids);
84
85        let hir_node = match kind {
86            HirKind::File => {
87                let path = self.file_path.clone().unwrap_or_default();
88                let file_node = HirFile::new(base, path);
89                HirNode::File(self.arena.alloc(file_node))
90            }
91            HirKind::Text => {
92                let text = self.extract_text(&base);
93                let text_node = HirText::new(base, text);
94                HirNode::Text(self.arena.alloc(text_node))
95            }
96            HirKind::Internal => {
97                let internal = HirInternal::new(base);
98                HirNode::Internal(self.arena.alloc(internal))
99            }
100            HirKind::Scope => {
101                // Try to extract the name identifier from the scope node
102                let ident = self.extract_scope_ident(&base, node);
103                let scope = HirScope::new(base, ident);
104                HirNode::Scope(self.arena.alloc(scope))
105            }
106            HirKind::Identifier => {
107                let text = self.extract_text(&base);
108                let ident = HirIdent::new(base, text);
109                HirNode::Ident(self.arena.alloc(ident))
110            }
111            other => panic!("unsupported HIR kind for node {:?}", (other, node)),
112        };
113
114        self.hir_map.insert(hir_id, ParentedNode::new(hir_node));
115        hir_id
116    }
117
118    fn collect_children(&mut self, node: Node<'a>, parent_id: HirId) -> Vec<HirId> {
119        let mut cursor = node.walk();
120
121        // In compact mode, skip children for Text nodes to reduce tree size
122        if self.config.compact {
123            let kind = Language::hir_kind(node.kind_id());
124            if kind == HirKind::Text {
125                return Vec::new();
126            }
127        }
128
129        node.children(&mut cursor)
130            .filter_map(|child| {
131                if self.config.compact {
132                    if child.is_error()
133                        || child.is_extra()
134                        || child.is_missing()
135                        || !child.is_named()
136                    {
137                        return None;
138                    }
139                    let child_kind = Language::hir_kind(child.kind_id());
140                    if child_kind == HirKind::Text {
141                        return None;
142                    }
143                    let child_block_kind = Language::block_kind(child.kind_id());
144                    if matches!(child_block_kind, BlockKind::Stmt | BlockKind::Call) {
145                        return None;
146                    }
147                    // Keep Python/Rust block nodes so nested declarations remain visible in compact mode.
148                }
149                Some(self.build_node(child, Some(parent_id)))
150            })
151            .collect()
152    }
153
154    fn should_collect_children(&self, kind: HirKind, block_kind: BlockKind) -> bool {
155        if !self.config.compact {
156            return true;
157        }
158
159        match kind {
160            HirKind::File => true,
161            HirKind::Scope => matches!(
162                block_kind,
163                BlockKind::Root
164                    | BlockKind::Scope
165                    | BlockKind::Class
166                    | BlockKind::Enum
167                    | BlockKind::Impl
168                    | BlockKind::Func
169                    | BlockKind::Const
170            ),
171            HirKind::Internal => matches!(
172                block_kind,
173                BlockKind::Scope | BlockKind::Field | BlockKind::Const | BlockKind::Undefined
174            ),
175            _ => false,
176        }
177    }
178
179    fn make_base(
180        &self,
181        hir_id: HirId,
182        parent: Option<HirId>,
183        node: Node<'a>,
184        kind: HirKind,
185        children: Vec<HirId>,
186    ) -> HirBase<'a> {
187        let field_id = Self::field_id_of(node).unwrap_or(u16::MAX);
188        HirBase {
189            hir_id,
190            parent,
191            node,
192            kind,
193            field_id,
194            children,
195        }
196    }
197
198    fn extract_text(&self, base: &HirBase<'a>) -> String {
199        let start = base.node.start_byte();
200        let end = base.node.end_byte();
201        if end > start && end <= self.file_content.len() {
202            self.file_content[start..end].to_string()
203        } else {
204            String::new()
205        }
206    }
207
208    fn extract_scope_ident(&self, base: &HirBase<'a>, node: Node<'a>) -> Option<&'a HirIdent<'a>> {
209        // Try to get the name field from the tree-sitter node
210        // For Rust, the name field is typically "name"
211        let name_node = node.child_by_field_name("name")?;
212
213        // Create an identifier for the name node
214        let hir_id = self.reserve_hir_id();
215        let ident_base = HirBase {
216            hir_id,
217            parent: Some(base.hir_id),
218            node: name_node,
219            kind: HirKind::Identifier,
220            field_id: u16::MAX,
221            children: Vec::new(),
222        };
223
224        let text = self.extract_text(&ident_base);
225        let ident = HirIdent::new(ident_base, text);
226        Some(self.arena.alloc(ident))
227    }
228
229    fn field_id_of(node: Node<'_>) -> Option<u16> {
230        let parent = node.parent()?;
231        let mut cursor = parent.walk();
232
233        if !cursor.goto_first_child() {
234            return None;
235        }
236
237        loop {
238            if cursor.node().id() == node.id() {
239                return cursor.field_id().map(|id| id.get());
240            }
241            if !cursor.goto_next_sibling() {
242                break;
243            }
244        }
245
246        None
247    }
248}
249
250pub fn build_llmcc_ir_inner<'a, L: LanguageTrait>(
251    arena: &'a Arena<'a>,
252    file_path: Option<String>,
253    file_content: String,
254    tree: &'a tree_sitter::Tree,
255    config: IrBuildConfig,
256) -> Result<(HirId, HashMap<HirId, ParentedNode<'a>>), Box<dyn std::error::Error>> {
257    let builder = HirBuilder::<L>::new(arena, file_path, file_content, config);
258    let root = tree.root_node();
259    let result = builder.build(root);
260    Ok(result)
261}
262
263/// Build IR for all units in the context
264/// TODO: make this run in parallel
265pub fn build_llmcc_ir<'a, L: LanguageTrait>(
266    cc: &'a CompileCtxt<'a>,
267) -> Result<(), Box<dyn std::error::Error>> {
268    build_llmcc_ir_with_config::<L>(cc, IrBuildConfig::default())
269}
270
271/// Build IR for all units in the context with custom config
272pub fn build_llmcc_ir_with_config<'a, L: LanguageTrait>(
273    cc: &'a CompileCtxt<'a>,
274    config: IrBuildConfig,
275) -> Result<(), Box<dyn std::error::Error>> {
276    for index in 0..cc.files.len() {
277        let unit = cc.compile_unit(index);
278        let file_path = unit.file_path().map(|p| p.to_string());
279        let file_content = String::from_utf8_lossy(&unit.file().content()).to_string();
280        let tree = unit.tree();
281
282        let (_file_start_id, hir_map) =
283            build_llmcc_ir_inner::<L>(&cc.arena, file_path, file_content, tree, config)?;
284
285        // Insert all nodes into the compile context
286        for (hir_id, parented_node) in hir_map {
287            cc.hir_map.borrow_mut().insert(hir_id, parented_node);
288        }
289        cc.set_file_start(index, _file_start_id);
290    }
291    Ok(())
292}