llmcc_core/
ir_builder.rs

1use std::collections::HashMap;
2use std::marker::PhantomData;
3use std::sync::atomic::{AtomicU32, Ordering};
4
5use rayon::prelude::*;
6use tree_sitter::Node;
7
8use crate::block::BlockKind;
9use crate::context::{CompileCtxt, ParentedNode};
10use crate::ir::{
11    Arena, HirBase, HirFile, HirId, HirIdent, HirInternal, HirKind, HirNode, HirScope, HirText,
12};
13use crate::lang_def::LanguageTrait;
14use crate::DynError;
15
16/// Global atomic counter for HIR ID allocation
17static HIR_ID_COUNTER: AtomicU32 = AtomicU32::new(0);
18
19#[derive(Debug, Clone, Copy, Default)]
20pub struct IrBuildConfig {
21    pub compact: bool,
22}
23
24impl IrBuildConfig {
25    pub fn compact() -> Self {
26        Self { compact: true }
27    }
28}
29
30#[derive(Clone)]
31struct HirNodeSpec<'hir> {
32    base: HirBase<'hir>,
33    variant: HirNodeVariantSpec<'hir>,
34}
35
36#[derive(Clone)]
37enum HirNodeVariantSpec<'hir> {
38    File {
39        file_path: String,
40    },
41    Text {
42        text: String,
43    },
44    Internal,
45    Scope {
46        ident: Option<HirScopeIdentSpec<'hir>>,
47    },
48    Ident {
49        name: String,
50    },
51}
52
53#[derive(Clone)]
54struct HirScopeIdentSpec<'hir> {
55    base: HirBase<'hir>,
56    name: String,
57}
58
59/// Builder that directly assigns HIR nodes to compile context
60struct HirBuilder<'a, Language> {
61    node_specs: HashMap<HirId, HirNodeSpec<'a>>,
62    file_path: Option<String>,
63    file_bytes: &'a [u8],
64    config: IrBuildConfig,
65    _language: PhantomData<Language>,
66}
67
68impl<'a, Language: LanguageTrait> HirBuilder<'a, Language> {
69    /// Create a new builder that directly assigns to context
70    fn new(file_path: Option<String>, file_bytes: &'a [u8], config: IrBuildConfig) -> Self {
71        Self {
72            node_specs: HashMap::new(),
73            file_path,
74            file_bytes,
75            config,
76            _language: PhantomData,
77        }
78    }
79
80    /// Reserve a new HIR ID
81    fn reserve_hir_id(&self) -> HirId {
82        let id = HIR_ID_COUNTER.fetch_add(1, Ordering::SeqCst);
83        HirId(id)
84    }
85
86    fn build(mut self, root: Node<'a>) -> (HirId, HashMap<HirId, HirNodeSpec<'a>>) {
87        let file_start_id = self.build_node(root, None);
88        (file_start_id, self.node_specs)
89    }
90
91    fn build_node(&mut self, node: Node<'a>, parent: Option<HirId>) -> HirId {
92        let hir_id = self.reserve_hir_id();
93        let kind_id = node.kind_id();
94        let kind = Language::hir_kind(kind_id);
95        let block_kind = Language::block_kind(kind_id);
96        let child_ids = if self.should_collect_children(kind, block_kind) {
97            self.collect_children(node, hir_id)
98        } else {
99            Vec::new()
100        };
101        let base = self.make_base(hir_id, parent, node, kind, child_ids);
102
103        let variant = match kind {
104            HirKind::File => {
105                let path = self.file_path.clone().unwrap_or_default();
106                HirNodeVariantSpec::File { file_path: path }
107            }
108            HirKind::Text => {
109                let text = self.extract_text(&base);
110                HirNodeVariantSpec::Text { text }
111            }
112            HirKind::Internal => HirNodeVariantSpec::Internal,
113            HirKind::Scope => {
114                // Try to extract the name identifier from the scope node
115                let ident = self.extract_scope_ident(&base, node);
116                HirNodeVariantSpec::Scope { ident }
117            }
118            HirKind::Identifier => {
119                let text = self.extract_text(&base);
120                HirNodeVariantSpec::Ident { name: text }
121            }
122            other => panic!("unsupported HIR kind for node {:?}", (other, node)),
123        };
124
125        self.node_specs
126            .insert(hir_id, HirNodeSpec { base, variant });
127        hir_id
128    }
129
130    fn collect_children(&mut self, node: Node<'a>, parent_id: HirId) -> Vec<HirId> {
131        let mut cursor = node.walk();
132
133        // In compact mode, skip children for Text nodes to reduce tree size
134        if self.config.compact {
135            let kind = Language::hir_kind(node.kind_id());
136            if kind == HirKind::Text {
137                return Vec::new();
138            }
139        }
140
141        node.children(&mut cursor)
142            .filter_map(|child| {
143                if self.config.compact {
144                    if child.is_error()
145                        || child.is_extra()
146                        || child.is_missing()
147                        || !child.is_named()
148                    {
149                        return None;
150                    }
151                    let child_kind = Language::hir_kind(child.kind_id());
152                    if child_kind == HirKind::Text {
153                        return None;
154                    }
155                    let child_block_kind = Language::block_kind(child.kind_id());
156                    if matches!(child_block_kind, BlockKind::Stmt | BlockKind::Call) {
157                        return None;
158                    }
159                    // Keep Python/Rust block nodes so nested declarations remain visible in compact mode.
160                }
161                Some(self.build_node(child, Some(parent_id)))
162            })
163            .collect()
164    }
165
166    fn should_collect_children(&self, kind: HirKind, block_kind: BlockKind) -> bool {
167        if !self.config.compact {
168            return true;
169        }
170
171        match kind {
172            HirKind::File => true,
173            HirKind::Scope => matches!(
174                block_kind,
175                BlockKind::Root
176                    | BlockKind::Scope
177                    | BlockKind::Class
178                    | BlockKind::Enum
179                    | BlockKind::Impl
180                    | BlockKind::Func
181                    | BlockKind::Const
182            ),
183            HirKind::Internal => matches!(
184                block_kind,
185                BlockKind::Scope | BlockKind::Field | BlockKind::Const | BlockKind::Undefined
186            ),
187            _ => false,
188        }
189    }
190
191    fn make_base(
192        &self,
193        hir_id: HirId,
194        parent: Option<HirId>,
195        node: Node<'a>,
196        kind: HirKind,
197        children: Vec<HirId>,
198    ) -> HirBase<'a> {
199        let field_id = Self::field_id_of(node).unwrap_or(u16::MAX);
200        HirBase {
201            hir_id,
202            parent,
203            node,
204            kind,
205            field_id,
206            children,
207        }
208    }
209
210    fn extract_text(&self, base: &HirBase<'a>) -> String {
211        let start = base.node.start_byte();
212        let end = base.node.end_byte();
213        if end > start && end <= self.file_bytes.len() {
214            match std::str::from_utf8(&self.file_bytes[start..end]) {
215                Ok(text) => text.to_owned(),
216                Err(_) => String::from_utf8_lossy(&self.file_bytes[start..end]).into_owned(),
217            }
218        } else {
219            String::new()
220        }
221    }
222
223    fn extract_scope_ident(
224        &self,
225        base: &HirBase<'a>,
226        node: Node<'a>,
227    ) -> Option<HirScopeIdentSpec<'a>> {
228        // Try to get the name field from the tree-sitter node
229        // For Rust, the name field is typically "name"
230        let name_node = node.child_by_field_name("name")?;
231
232        // Create an identifier for the name node
233        let hir_id = self.reserve_hir_id();
234        let ident_base = HirBase {
235            hir_id,
236            parent: Some(base.hir_id),
237            node: name_node,
238            kind: HirKind::Identifier,
239            field_id: u16::MAX,
240            children: Vec::new(),
241        };
242
243        let text = self.extract_text(&ident_base);
244        Some(HirScopeIdentSpec {
245            base: ident_base,
246            name: text,
247        })
248    }
249
250    fn field_id_of(node: Node<'_>) -> Option<u16> {
251        let parent = node.parent()?;
252        let mut cursor = parent.walk();
253
254        if !cursor.goto_first_child() {
255            return None;
256        }
257
258        loop {
259            if cursor.node().id() == node.id() {
260                return cursor.field_id().map(|id| id.get());
261            }
262            if !cursor.goto_next_sibling() {
263                break;
264            }
265        }
266
267        None
268    }
269}
270
271impl<'hir> HirNodeSpec<'hir> {
272    fn into_parented_node(self, arena: &'hir Arena<'hir>) -> ParentedNode<'hir> {
273        let HirNodeSpec { base, variant } = self;
274
275        let hir_node = match variant {
276            HirNodeVariantSpec::File { file_path } => {
277                let node = HirFile::new(base, file_path);
278                HirNode::File(arena.alloc(node))
279            }
280            HirNodeVariantSpec::Text { text } => {
281                let node = HirText::new(base, text);
282                HirNode::Text(arena.alloc(node))
283            }
284            HirNodeVariantSpec::Internal => {
285                let node = HirInternal::new(base);
286                HirNode::Internal(arena.alloc(node))
287            }
288            HirNodeVariantSpec::Scope { ident } => {
289                let ident_ref = ident.map(|spec| {
290                    let HirScopeIdentSpec { base, name } = spec;
291                    let ident_node = HirIdent::new(base, name);
292                    arena.alloc(ident_node)
293                });
294                let node = HirScope::new(base, ident_ref);
295                HirNode::Scope(arena.alloc(node))
296            }
297            HirNodeVariantSpec::Ident { name } => {
298                let node = HirIdent::new(base, name);
299                HirNode::Ident(arena.alloc(node))
300            }
301        };
302
303        ParentedNode::new(hir_node)
304    }
305}
306
307fn build_llmcc_ir_inner<'a, L: LanguageTrait>(
308    file_path: Option<String>,
309    file_bytes: &'a [u8],
310    tree: &'a tree_sitter::Tree,
311    config: IrBuildConfig,
312) -> Result<(HirId, HashMap<HirId, HirNodeSpec<'a>>), DynError> {
313    let builder = HirBuilder::<L>::new(file_path, file_bytes, config);
314    let root = tree.root_node();
315    let result = builder.build(root);
316    Ok(result)
317}
318
319/// Build IR for all units in the context
320pub fn build_llmcc_ir<'a, L: LanguageTrait>(cc: &'a CompileCtxt<'a>) -> Result<(), DynError> {
321    build_llmcc_ir_with_config::<L>(cc, IrBuildConfig::default())
322}
323
324struct FileIrBuildResult<'hir> {
325    index: usize,
326    file_start_id: HirId,
327    node_specs: HashMap<HirId, HirNodeSpec<'hir>>,
328}
329
330/// Build IR for all units in the context with custom config
331pub fn build_llmcc_ir_with_config<'a, L: LanguageTrait>(
332    cc: &'a CompileCtxt<'a>,
333    config: IrBuildConfig,
334) -> Result<(), DynError> {
335    let results: Vec<Result<FileIrBuildResult<'a>, DynError>> = (0..cc.files.len())
336        .into_par_iter()
337        .map(|index| {
338            let unit = cc.compile_unit(index);
339            let file_path = unit.file_path().map(|p| p.to_string());
340            let file_bytes = unit.file().content();
341            let tree = unit.tree();
342
343            build_llmcc_ir_inner::<L>(file_path, file_bytes, tree, config).map(
344                |(file_start_id, node_specs)| FileIrBuildResult {
345                    index,
346                    file_start_id,
347                    node_specs,
348                },
349            )
350        })
351        .collect();
352
353    let mut results: Vec<FileIrBuildResult<'a>> =
354        results.into_iter().collect::<Result<Vec<_>, _>>()?;
355
356    results.sort_by_key(|result| result.index);
357
358    for result in results {
359        let FileIrBuildResult {
360            index,
361            file_start_id,
362            node_specs,
363        } = result;
364
365        {
366            let mut hir_map = cc.hir_map.write().unwrap();
367            for (hir_id, spec) in node_specs {
368                let parented_node = spec.into_parented_node(&cc.arena);
369                hir_map.insert(hir_id, parented_node);
370            }
371        }
372
373        cc.set_file_start(index, file_start_id);
374    }
375
376    Ok(())
377}