llmcc_core/
ir_builder.rs

1//! IR Builder: Transform parse trees into High-level Intermediate Representation (HIR).
2//!
3//! Uses per-unit arenas for parallel building, then merges results into global context.
4//! This avoids locks during parallel builds and ensures deterministic ID allocation.
5use smallvec::SmallVec;
6use std::marker::PhantomData;
7use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
8use std::time::Instant;
9
10use rayon::prelude::*;
11
12use crate::DynError;
13use crate::context::CompileCtxt;
14use crate::ir::{
15    Arena, HirBase, HirFile, HirId, HirIdent, HirInternal, HirKind, HirNode, HirScope, HirText,
16};
17use crate::lang_def::{LanguageTrait, ParseNode, ParseTree};
18
19/// Global atomic counter for HIR ID allocation (used during parallel builds).
20static HIR_ID_COUNTER: AtomicUsize = AtomicUsize::new(0);
21
22// Timing counters for IR building
23static IR_BUILD_CPU_TIME_NS: AtomicU64 = AtomicU64::new(0);
24
25pub fn reset_ir_build_counters() {
26    IR_BUILD_CPU_TIME_NS.store(0, Ordering::Relaxed);
27}
28
29pub fn get_ir_build_cpu_time_ms() -> f64 {
30    IR_BUILD_CPU_TIME_NS.load(Ordering::Relaxed) as f64 / 1_000_000.0
31}
32
33/// Reserve a new globally-unique HIR ID.
34#[inline]
35pub fn next_hir_id() -> HirId {
36    let id = HIR_ID_COUNTER.fetch_add(1, Ordering::Relaxed);
37    HirId(id)
38}
39
40/// Reset the global HIR ID counter to 0 (for testing isolation)
41pub fn reset_hir_id_counter() {
42    HIR_ID_COUNTER.store(0, Ordering::Relaxed);
43}
44
45/// Configuration for IR building behavior.
46///
47/// This configuration controls how the IR builder processes files.
48/// By default, files are processed in parallel for better performance.
49#[derive(Debug, Clone, Copy, Default)]
50pub struct IrBuildOption {
51    /// When true, process files sequentially to ensure deterministic ordering.
52    /// When false (default), process files in parallel for better performance.
53    pub sequential: bool,
54}
55
56impl IrBuildOption {
57    /// Create a new IrBuildOption with default settings.
58    pub fn new() -> Self {
59        Self::default()
60    }
61
62    /// Set whether to process files sequentially.
63    pub fn with_sequential(mut self, sequential: bool) -> Self {
64        self.sequential = sequential;
65        self
66    }
67}
68
69/// IR builder that transforms parse trees into HIR nodes using a per-unit arena.
70struct HirBuilder<'unit, Language> {
71    /// Per-unit arena for allocating all HIR nodes during this build
72    arena: &'unit Arena<'unit>,
73    /// Optional file path for the File node
74    file_path: Option<String>,
75    /// Source file content bytes for text extraction
76    file_bytes: &'unit [u8],
77    /// Language-specific handler (used via PhantomData for compile-time only)
78    _language: PhantomData<Language>,
79}
80
81impl<'unit, Language: LanguageTrait> HirBuilder<'unit, Language> {
82    /// Create a new HIR builder for a single file using a per-unit arena.
83    fn new(
84        arena: &'unit Arena<'unit>,
85        file_path: Option<String>,
86        file_bytes: &'unit [u8],
87        _config: IrBuildOption,
88    ) -> Self {
89        Self {
90            arena,
91            file_path,
92            file_bytes,
93            _language: PhantomData,
94        }
95    }
96
97    /// Build HIR nodes from a parse tree root.
98    fn build(self, root: &dyn ParseNode) -> HirNode<'unit> {
99        // Root node has no field_id (no parent)
100        self.build_node(root, None, u16::MAX)
101    }
102
103    /// Recursively build a single HIR node and all descendants, allocating directly into arena.
104    /// `field_id` is passed from the parent's child collection (avoids O(n) lookup per node).
105    fn build_node(
106        &self,
107        node: &dyn ParseNode,
108        parent: Option<HirId>,
109        field_id: u16,
110    ) -> HirNode<'unit> {
111        let id = next_hir_id();
112        let kind_id = node.kind_id();
113        let kind = Language::hir_kind(kind_id);
114
115        // Skip collecting children for leaf node types (Text, Identifier)
116        // This provides a massive performance improvement for large array/object literals
117        let children = if matches!(kind, HirKind::Text | HirKind::Identifier) {
118            SmallVec::new()
119        } else {
120            self.collect_children(node, id)
121        };
122        let child_ids: SmallVec<[HirId; 4]> = children.iter().map(|n| n.id()).collect();
123        let base = self.make_base(id, parent, node, kind, child_ids, field_id);
124
125        let hir_node = match kind {
126            HirKind::File => {
127                let path = self.file_path.clone().unwrap_or_default();
128                let hir_file = HirFile::new(base, path);
129                let allocated = self.arena.alloc(hir_file);
130                HirNode::File(allocated)
131            }
132            HirKind::Text => {
133                let text = self.get_text(&base);
134                let hir_text = HirText::new(base, text);
135                let allocated = self.arena.alloc(hir_text);
136                HirNode::Text(allocated)
137            }
138            HirKind::Internal => {
139                let hir_internal = HirInternal::new(base);
140                let allocated = self.arena.alloc(hir_internal);
141                HirNode::Internal(allocated)
142            }
143            HirKind::Scope => {
144                // Find the first identifier child
145                let ident = children
146                    .iter()
147                    .map(|child| {
148                        if let HirNode::Ident(ident_node) = child {
149                            *ident_node
150                        } else {
151                            let text = self.get_text(&base);
152                            tracing::trace!("scope crate non-identifier ident '{}'", text);
153                            let hir_ident = HirIdent::new(base.clone(), text);
154                            self.arena.alloc(hir_ident)
155                        }
156                    })
157                    .next();
158                let hir_scope = HirScope::new(base, ident);
159                let allocated = self.arena.alloc(hir_scope);
160                HirNode::Scope(allocated)
161            }
162            HirKind::Identifier => {
163                let text = self.get_text(&base);
164                let hir_ident = HirIdent::new(base, text);
165                let allocated = self.arena.alloc(hir_ident);
166                HirNode::Ident(allocated)
167            }
168            _other => panic!("unsupported HIR kind for node {}", node.debug_info()),
169        };
170
171        // Allocate the HirNode wrapper with its ID for O(1) lookup
172        *self.arena.alloc_with_id(id.0, hir_node)
173    }
174
175    /// Collect all valid child nodes from a parse node.
176    /// Filters out test code (items with #[test] or #[cfg(test)] attributes).
177    /// Uses cursor-based iteration to get field_id during traversal (O(n) total instead of O(n²)).
178    fn collect_children(
179        &self,
180        node: &dyn ParseNode,
181        parent_id: HirId,
182    ) -> SmallVec<[HirNode<'unit>; 8]> {
183        let mut child_nodes = SmallVec::new();
184        let mut skip_next = false;
185
186        // Use efficient cursor-based collection that provides field_id during iteration
187        let children_with_fields = node.collect_children_with_field_ids();
188
189        for child_with_field in children_with_fields {
190            let child = child_with_field.node;
191            let field_id = child_with_field.field_id;
192
193            // Check if this is a test attribute that should cause the next item to be skipped
194            if Language::is_test_attribute(child.as_ref(), self.file_bytes) {
195                skip_next = true;
196                // Skip the attribute for cleaner HIR
197                continue;
198            }
199
200            // Skip items that follow test attributes
201            if skip_next {
202                skip_next = false;
203                continue;
204            }
205
206            let child_node = self.build_node(child.as_ref(), Some(parent_id), field_id);
207            child_nodes.push(child_node);
208        }
209        child_nodes
210    }
211
212    /// Construct the base metadata for a HIR node.
213    /// `field_id` is passed from parent's child collection (already looked up via cursor).
214    fn make_base(
215        &self,
216        id: HirId,
217        parent: Option<HirId>,
218        node: &dyn ParseNode,
219        kind: HirKind,
220        children: SmallVec<[HirId; 4]>,
221        field_id: u16,
222    ) -> HirBase {
223        let kind_id = node.kind_id();
224        let start_byte = node.start_byte();
225        let end_byte = node.end_byte();
226
227        HirBase {
228            id,
229            parent,
230            kind_id,
231            start_byte,
232            end_byte,
233            kind,
234            field_id,
235            children,
236        }
237    }
238
239    /// Extract text content from source for a text-type node.
240    /// Allocates the string in the arena to avoid heap allocation.
241    fn get_text(&self, base: &HirBase) -> &'unit str {
242        let start = base.start_byte;
243        let end = base.end_byte;
244        if end > start && end <= self.file_bytes.len() {
245            match std::str::from_utf8(&self.file_bytes[start..end]) {
246                Ok(text) => self.arena.alloc_str(text),
247                Err(_) => {
248                    let lossy = String::from_utf8_lossy(&self.file_bytes[start..end]);
249                    self.arena.alloc_str(&lossy)
250                }
251            }
252        } else {
253            ""
254        }
255    }
256}
257/// Build IR for a single file with language-specific handling.
258/// Build IR for a single file (inner implementation).
259/// This is public for use by fused build+collect in the resolver.
260pub fn build_llmcc_ir_inner<'unit, L: LanguageTrait>(
261    file_path: Option<String>,
262    file_bytes: &'unit [u8],
263    parse_tree: &'unit dyn ParseTree,
264    unit_arena: &'unit Arena<'unit>,
265    config: IrBuildOption,
266) -> Result<HirId, DynError> {
267    let root = parse_tree
268        .root_node()
269        .ok_or_else(|| "ParseTree does not provide a root node".to_string())?;
270
271    let builder = HirBuilder::<L>::new(unit_arena, file_path, file_bytes, config);
272    let root = builder.build(root.as_ref());
273    Ok(root.id())
274}
275
276struct BuildResult {
277    /// Index of this file in the compile context
278    index: usize,
279    /// HirId of the file's root node
280    file_root_id: HirId,
281}
282
283/// Build IR for all files in the compile context.
284pub fn build_llmcc_ir<'tcx, L: LanguageTrait>(
285    cc: &'tcx CompileCtxt<'tcx>,
286    config: IrBuildOption,
287) -> Result<(), DynError> {
288    let total_start = Instant::now();
289    reset_ir_build_counters();
290
291    let build_one = |index: usize| -> Result<BuildResult, DynError> {
292        let build_start = Instant::now();
293
294        let file_path = cc.file_path(index).map(|p| p.to_string());
295        let file_bytes = cc.files[index].content();
296
297        let parse_tree = cc
298            .get_parse_tree(index)
299            .ok_or_else(|| format!("No parse tree for unit {index}"))?;
300
301        let file_root_id =
302            build_llmcc_ir_inner::<L>(file_path, file_bytes, parse_tree, &cc.arena, config)?;
303
304        IR_BUILD_CPU_TIME_NS.fetch_add(build_start.elapsed().as_nanos() as u64, Ordering::Relaxed);
305
306        Ok(BuildResult {
307            index,
308            file_root_id,
309        })
310    };
311
312    let parallel_start = Instant::now();
313    let results: Vec<Result<BuildResult, DynError>> = if config.sequential {
314        (0..cc.files.len()).map(build_one).collect()
315    } else {
316        (0..cc.files.len()).into_par_iter().map(build_one).collect()
317    };
318    let parallel_time = parallel_start.elapsed();
319
320    let collect_start = Instant::now();
321    // Collect results (no sorting needed - DashMap provides O(1) lookup by ID)
322    let results: Vec<BuildResult> = results.into_iter().collect::<Result<Vec<_>, _>>()?;
323
324    // Register all file start IDs
325    for BuildResult {
326        index,
327        file_root_id,
328    } in results
329    {
330        cc.set_file_root_id(index, file_root_id);
331    }
332
333    // No sort needed: DashMap already provides O(1) lookup by ID
334    let collect_time = collect_start.elapsed();
335
336    let total_time = total_start.elapsed();
337    let build_cpu_ms = get_ir_build_cpu_time_ms();
338
339    tracing::info!(
340        "ir_build breakdown: parallel={:.2}ms (build_cpu={:.2}ms), collect={:.2}ms, total={:.2}ms",
341        parallel_time.as_secs_f64() * 1000.0,
342        build_cpu_ms,
343        collect_time.as_secs_f64() * 1000.0,
344        total_time.as_secs_f64() * 1000.0,
345    );
346
347    Ok(())
348}