llmcc_core/
ir_builder.rs

1use std::collections::HashMap;
2use std::marker::PhantomData;
3use std::sync::atomic::{AtomicU32, Ordering};
4
5use tree_sitter::Node;
6
7use crate::block::BlockKind;
8use crate::context::{CompileCtxt, ParentedNode};
9use crate::ir::{
10    Arena, HirBase, HirFile, HirId, HirIdent, HirInternal, HirKind, HirNode, HirScope, HirText,
11};
12use crate::lang_def::LanguageTrait;
13
14/// Global atomic counter for HIR ID allocation
15static HIR_ID_COUNTER: AtomicU32 = AtomicU32::new(0);
16
17#[derive(Debug, Clone, Copy, Default)]
18pub struct IrBuildConfig {
19    pub compact: bool,
20}
21
22impl IrBuildConfig {
23    pub fn compact() -> Self {
24        Self { compact: true }
25    }
26}
27
28/// Builder that directly assigns HIR nodes to compile context
29struct HirBuilder<'a, Language> {
30    arena: &'a Arena<'a>,
31    hir_map: HashMap<HirId, ParentedNode<'a>>,
32    file_path: Option<String>,
33    file_content: String,
34    config: IrBuildConfig,
35    _language: PhantomData<Language>,
36}
37
38impl<'a, Language: LanguageTrait> HirBuilder<'a, Language> {
39    /// Create a new builder that directly assigns to context
40    fn new(
41        arena: &'a Arena<'a>,
42        file_path: Option<String>,
43        file_content: String,
44        config: IrBuildConfig,
45    ) -> Self {
46        Self {
47            arena,
48            hir_map: HashMap::new(),
49            file_path,
50            file_content,
51            config,
52            _language: PhantomData,
53        }
54    }
55
56    /// Reserve a new HIR ID
57    fn reserve_hir_id(&self) -> HirId {
58        let id = HIR_ID_COUNTER.fetch_add(1, Ordering::SeqCst);
59        HirId(id)
60    }
61
62    fn build(mut self, root: Node<'a>) -> (HirId, HashMap<HirId, ParentedNode<'a>>) {
63        let file_start_id = self.build_node(root, None);
64        (file_start_id, self.hir_map)
65    }
66
67    fn build_node(&mut self, node: Node<'a>, parent: Option<HirId>) -> HirId {
68        let hir_id = self.reserve_hir_id();
69        let kind_id = node.kind_id();
70        let kind = Language::hir_kind(kind_id);
71        let block_kind = Language::block_kind(kind_id);
72        let child_ids = if self.should_collect_children(kind, block_kind) {
73            self.collect_children(node, hir_id)
74        } else {
75            Vec::new()
76        };
77        let base = self.make_base(hir_id, parent, node, kind, child_ids);
78
79        let hir_node = match kind {
80            HirKind::File => {
81                let path = self.file_path.clone().unwrap_or_default();
82                let file_node = HirFile::new(base, path);
83                HirNode::File(self.arena.alloc(file_node))
84            }
85            HirKind::Text => {
86                let text = self.extract_text(&base);
87                let text_node = HirText::new(base, text);
88                HirNode::Text(self.arena.alloc(text_node))
89            }
90            HirKind::Internal => {
91                let internal = HirInternal::new(base);
92                HirNode::Internal(self.arena.alloc(internal))
93            }
94            HirKind::Scope => {
95                // Try to extract the name identifier from the scope node
96                let ident = self.extract_scope_ident(&base, node);
97                let scope = HirScope::new(base, ident);
98                HirNode::Scope(self.arena.alloc(scope))
99            }
100            HirKind::Identifier => {
101                let text = self.extract_text(&base);
102                let ident = HirIdent::new(base, text);
103                HirNode::Ident(self.arena.alloc(ident))
104            }
105            other => panic!("unsupported HIR kind for node {:?}", (other, node)),
106        };
107
108        self.hir_map.insert(hir_id, ParentedNode::new(hir_node));
109        hir_id
110    }
111
112    fn collect_children(&mut self, node: Node<'a>, parent_id: HirId) -> Vec<HirId> {
113        let mut cursor = node.walk();
114
115        // In compact mode, skip children for Text nodes to reduce tree size
116        if self.config.compact {
117            let kind = Language::hir_kind(node.kind_id());
118            if kind == HirKind::Text {
119                return Vec::new();
120            }
121        }
122
123        node.children(&mut cursor)
124            .filter_map(|child| {
125                if self.config.compact {
126                    if child.is_error()
127                        || child.is_extra()
128                        || child.is_missing()
129                        || !child.is_named()
130                    {
131                        return None;
132                    }
133                    let child_kind = Language::hir_kind(child.kind_id());
134                    if child_kind == HirKind::Text {
135                        return None;
136                    }
137                    let child_block_kind = Language::block_kind(child.kind_id());
138                    if matches!(child_block_kind, BlockKind::Stmt | BlockKind::Call) {
139                        return None;
140                    }
141                    // Keep Python/Rust block nodes so nested declarations remain visible in compact mode.
142                }
143                Some(self.build_node(child, Some(parent_id)))
144            })
145            .collect()
146    }
147
148    fn should_collect_children(&self, kind: HirKind, block_kind: BlockKind) -> bool {
149        if !self.config.compact {
150            return true;
151        }
152
153        match kind {
154            HirKind::File => true,
155            HirKind::Scope => matches!(
156                block_kind,
157                BlockKind::Root
158                    | BlockKind::Scope
159                    | BlockKind::Class
160                    | BlockKind::Enum
161                    | BlockKind::Impl
162                    | BlockKind::Func
163                    | BlockKind::Const
164            ),
165            HirKind::Internal => matches!(
166                block_kind,
167                BlockKind::Scope | BlockKind::Field | BlockKind::Const | BlockKind::Undefined
168            ),
169            _ => false,
170        }
171    }
172
173    fn make_base(
174        &self,
175        hir_id: HirId,
176        parent: Option<HirId>,
177        node: Node<'a>,
178        kind: HirKind,
179        children: Vec<HirId>,
180    ) -> HirBase<'a> {
181        let field_id = Self::field_id_of(node).unwrap_or(u16::MAX);
182        HirBase {
183            hir_id,
184            parent,
185            node,
186            kind,
187            field_id,
188            children,
189        }
190    }
191
192    fn extract_text(&self, base: &HirBase<'a>) -> String {
193        let start = base.node.start_byte();
194        let end = base.node.end_byte();
195        if end > start && end <= self.file_content.len() {
196            self.file_content[start..end].to_string()
197        } else {
198            String::new()
199        }
200    }
201
202    fn extract_scope_ident(&self, base: &HirBase<'a>, node: Node<'a>) -> Option<&'a HirIdent<'a>> {
203        // Try to get the name field from the tree-sitter node
204        // For Rust, the name field is typically "name"
205        let name_node = node.child_by_field_name("name")?;
206
207        // Create an identifier for the name node
208        let hir_id = self.reserve_hir_id();
209        let ident_base = HirBase {
210            hir_id,
211            parent: Some(base.hir_id),
212            node: name_node,
213            kind: HirKind::Identifier,
214            field_id: u16::MAX,
215            children: Vec::new(),
216        };
217
218        let text = self.extract_text(&ident_base);
219        let ident = HirIdent::new(ident_base, text);
220        Some(self.arena.alloc(ident))
221    }
222
223    fn field_id_of(node: Node<'_>) -> Option<u16> {
224        let parent = node.parent()?;
225        let mut cursor = parent.walk();
226
227        if !cursor.goto_first_child() {
228            return None;
229        }
230
231        loop {
232            if cursor.node().id() == node.id() {
233                return cursor.field_id().map(|id| id.get());
234            }
235            if !cursor.goto_next_sibling() {
236                break;
237            }
238        }
239
240        None
241    }
242}
243
244pub fn build_llmcc_ir_inner<'a, L: LanguageTrait>(
245    arena: &'a Arena<'a>,
246    file_path: Option<String>,
247    file_content: String,
248    tree: &'a tree_sitter::Tree,
249    config: IrBuildConfig,
250) -> Result<(HirId, HashMap<HirId, ParentedNode<'a>>), Box<dyn std::error::Error>> {
251    let builder = HirBuilder::<L>::new(arena, file_path, file_content, config);
252    let root = tree.root_node();
253    let result = builder.build(root);
254    Ok(result)
255}
256
257/// Build IR for all units in the context
258/// TODO: make this run in parallel
259pub fn build_llmcc_ir<'a, L: LanguageTrait>(
260    cc: &'a CompileCtxt<'a>,
261) -> Result<(), Box<dyn std::error::Error>> {
262    build_llmcc_ir_with_config::<L>(cc, IrBuildConfig::default())
263}
264
265/// Build IR for all units in the context with custom config
266pub fn build_llmcc_ir_with_config<'a, L: LanguageTrait>(
267    cc: &'a CompileCtxt<'a>,
268    config: IrBuildConfig,
269) -> Result<(), Box<dyn std::error::Error>> {
270    for index in 0..cc.files.len() {
271        let unit = cc.compile_unit(index);
272        let file_path = unit.file_path().map(|p| p.to_string());
273        let file_content = String::from_utf8_lossy(&unit.file().content()).to_string();
274        let tree = unit.tree();
275
276        let (_file_start_id, hir_map) =
277            build_llmcc_ir_inner::<L>(&cc.arena, file_path, file_content, tree, config)?;
278
279        // Insert all nodes into the compile context
280        for (hir_id, parented_node) in hir_map {
281            cc.hir_map.borrow_mut().insert(hir_id, parented_node);
282        }
283        cc.set_file_start(index, _file_start_id);
284    }
285    Ok(())
286}