1use std::collections::HashMap;
2use std::marker::PhantomData;
3use std::sync::atomic::{AtomicU32, Ordering};
4
5use rayon::prelude::*;
6use tree_sitter::Node;
7
8use crate::block::BlockKind;
9use crate::context::{CompileCtxt, ParentedNode};
10use crate::ir::{
11 Arena, HirBase, HirFile, HirId, HirIdent, HirInternal, HirKind, HirNode, HirScope, HirText,
12};
13use crate::lang_def::LanguageTrait;
14use crate::DynError;
15
16static HIR_ID_COUNTER: AtomicU32 = AtomicU32::new(0);
18
19#[derive(Debug, Clone, Copy, Default)]
20pub struct IrBuildConfig {
21 pub compact: bool,
22}
23
24impl IrBuildConfig {
25 pub fn compact() -> Self {
26 Self { compact: true }
27 }
28}
29
30#[derive(Clone)]
31struct HirNodeSpec<'hir> {
32 base: HirBase<'hir>,
33 variant: HirNodeVariantSpec<'hir>,
34}
35
36#[derive(Clone)]
37enum HirNodeVariantSpec<'hir> {
38 File {
39 file_path: String,
40 },
41 Text {
42 text: String,
43 },
44 Internal,
45 Scope {
46 ident: Option<HirScopeIdentSpec<'hir>>,
47 },
48 Ident {
49 name: String,
50 },
51}
52
53#[derive(Clone)]
54struct HirScopeIdentSpec<'hir> {
55 base: HirBase<'hir>,
56 name: String,
57}
58
59struct HirBuilder<'a, Language> {
61 node_specs: HashMap<HirId, HirNodeSpec<'a>>,
62 file_path: Option<String>,
63 file_bytes: &'a [u8],
64 config: IrBuildConfig,
65 _language: PhantomData<Language>,
66}
67
68impl<'a, Language: LanguageTrait> HirBuilder<'a, Language> {
69 fn new(file_path: Option<String>, file_bytes: &'a [u8], config: IrBuildConfig) -> Self {
71 Self {
72 node_specs: HashMap::new(),
73 file_path,
74 file_bytes,
75 config,
76 _language: PhantomData,
77 }
78 }
79
80 fn reserve_hir_id(&self) -> HirId {
82 let id = HIR_ID_COUNTER.fetch_add(1, Ordering::SeqCst);
83 HirId(id)
84 }
85
86 fn build(mut self, root: Node<'a>) -> (HirId, HashMap<HirId, HirNodeSpec<'a>>) {
87 let file_start_id = self.build_node(root, None);
88 (file_start_id, self.node_specs)
89 }
90
91 fn build_node(&mut self, node: Node<'a>, parent: Option<HirId>) -> HirId {
92 let hir_id = self.reserve_hir_id();
93 let kind_id = node.kind_id();
94 let kind = Language::hir_kind(kind_id);
95 let block_kind = Language::block_kind(kind_id);
96 let child_ids = if self.should_collect_children(kind, block_kind) {
97 self.collect_children(node, hir_id)
98 } else {
99 Vec::new()
100 };
101 let base = self.make_base(hir_id, parent, node, kind, child_ids);
102
103 let variant = match kind {
104 HirKind::File => {
105 let path = self.file_path.clone().unwrap_or_default();
106 HirNodeVariantSpec::File { file_path: path }
107 }
108 HirKind::Text => {
109 let text = self.extract_text(&base);
110 HirNodeVariantSpec::Text { text }
111 }
112 HirKind::Internal => HirNodeVariantSpec::Internal,
113 HirKind::Scope => {
114 let ident = self.extract_scope_ident(&base, node);
116 HirNodeVariantSpec::Scope { ident }
117 }
118 HirKind::Identifier => {
119 let text = self.extract_text(&base);
120 HirNodeVariantSpec::Ident { name: text }
121 }
122 other => panic!("unsupported HIR kind for node {:?}", (other, node)),
123 };
124
125 self.node_specs
126 .insert(hir_id, HirNodeSpec { base, variant });
127 hir_id
128 }
129
130 fn collect_children(&mut self, node: Node<'a>, parent_id: HirId) -> Vec<HirId> {
131 let mut cursor = node.walk();
132
133 if self.config.compact {
135 let kind = Language::hir_kind(node.kind_id());
136 if kind == HirKind::Text {
137 return Vec::new();
138 }
139 }
140
141 node.children(&mut cursor)
142 .filter_map(|child| {
143 if self.config.compact {
144 if child.is_error()
145 || child.is_extra()
146 || child.is_missing()
147 || !child.is_named()
148 {
149 return None;
150 }
151 let child_kind = Language::hir_kind(child.kind_id());
152 if child_kind == HirKind::Text {
153 return None;
154 }
155 let child_block_kind = Language::block_kind(child.kind_id());
156 if matches!(child_block_kind, BlockKind::Stmt | BlockKind::Call) {
157 return None;
158 }
159 }
161 Some(self.build_node(child, Some(parent_id)))
162 })
163 .collect()
164 }
165
166 fn should_collect_children(&self, kind: HirKind, block_kind: BlockKind) -> bool {
167 if !self.config.compact {
168 return true;
169 }
170
171 match kind {
172 HirKind::File => true,
173 HirKind::Scope => matches!(
174 block_kind,
175 BlockKind::Root
176 | BlockKind::Scope
177 | BlockKind::Class
178 | BlockKind::Enum
179 | BlockKind::Impl
180 | BlockKind::Func
181 | BlockKind::Const
182 ),
183 HirKind::Internal => matches!(
184 block_kind,
185 BlockKind::Scope | BlockKind::Field | BlockKind::Const | BlockKind::Undefined
186 ),
187 _ => false,
188 }
189 }
190
191 fn make_base(
192 &self,
193 hir_id: HirId,
194 parent: Option<HirId>,
195 node: Node<'a>,
196 kind: HirKind,
197 children: Vec<HirId>,
198 ) -> HirBase<'a> {
199 let field_id = Self::field_id_of(node).unwrap_or(u16::MAX);
200 HirBase {
201 hir_id,
202 parent,
203 node,
204 kind,
205 field_id,
206 children,
207 }
208 }
209
210 fn extract_text(&self, base: &HirBase<'a>) -> String {
211 let start = base.node.start_byte();
212 let end = base.node.end_byte();
213 if end > start && end <= self.file_bytes.len() {
214 match std::str::from_utf8(&self.file_bytes[start..end]) {
215 Ok(text) => text.to_owned(),
216 Err(_) => String::from_utf8_lossy(&self.file_bytes[start..end]).into_owned(),
217 }
218 } else {
219 String::new()
220 }
221 }
222
223 fn extract_scope_ident(
224 &self,
225 base: &HirBase<'a>,
226 node: Node<'a>,
227 ) -> Option<HirScopeIdentSpec<'a>> {
228 let name_node = node.child_by_field_name("name")?;
231
232 let hir_id = self.reserve_hir_id();
234 let ident_base = HirBase {
235 hir_id,
236 parent: Some(base.hir_id),
237 node: name_node,
238 kind: HirKind::Identifier,
239 field_id: u16::MAX,
240 children: Vec::new(),
241 };
242
243 let text = self.extract_text(&ident_base);
244 Some(HirScopeIdentSpec {
245 base: ident_base,
246 name: text,
247 })
248 }
249
250 fn field_id_of(node: Node<'_>) -> Option<u16> {
251 let parent = node.parent()?;
252 let mut cursor = parent.walk();
253
254 if !cursor.goto_first_child() {
255 return None;
256 }
257
258 loop {
259 if cursor.node().id() == node.id() {
260 return cursor.field_id().map(|id| id.get());
261 }
262 if !cursor.goto_next_sibling() {
263 break;
264 }
265 }
266
267 None
268 }
269}
270
271impl<'hir> HirNodeSpec<'hir> {
272 fn into_parented_node(self, arena: &'hir Arena<'hir>) -> ParentedNode<'hir> {
273 let HirNodeSpec { base, variant } = self;
274
275 let hir_node = match variant {
276 HirNodeVariantSpec::File { file_path } => {
277 let node = HirFile::new(base, file_path);
278 HirNode::File(arena.alloc(node))
279 }
280 HirNodeVariantSpec::Text { text } => {
281 let node = HirText::new(base, text);
282 HirNode::Text(arena.alloc(node))
283 }
284 HirNodeVariantSpec::Internal => {
285 let node = HirInternal::new(base);
286 HirNode::Internal(arena.alloc(node))
287 }
288 HirNodeVariantSpec::Scope { ident } => {
289 let ident_ref = ident.map(|spec| {
290 let HirScopeIdentSpec { base, name } = spec;
291 let ident_node = HirIdent::new(base, name);
292 arena.alloc(ident_node)
293 });
294 let node = HirScope::new(base, ident_ref);
295 HirNode::Scope(arena.alloc(node))
296 }
297 HirNodeVariantSpec::Ident { name } => {
298 let node = HirIdent::new(base, name);
299 HirNode::Ident(arena.alloc(node))
300 }
301 };
302
303 ParentedNode::new(hir_node)
304 }
305}
306
307fn build_llmcc_ir_inner<'a, L: LanguageTrait>(
308 file_path: Option<String>,
309 file_bytes: &'a [u8],
310 tree: &'a tree_sitter::Tree,
311 config: IrBuildConfig,
312) -> Result<(HirId, HashMap<HirId, HirNodeSpec<'a>>), DynError> {
313 let builder = HirBuilder::<L>::new(file_path, file_bytes, config);
314 let root = tree.root_node();
315 let result = builder.build(root);
316 Ok(result)
317}
318
319pub fn build_llmcc_ir<'a, L: LanguageTrait>(cc: &'a CompileCtxt<'a>) -> Result<(), DynError> {
321 build_llmcc_ir_with_config::<L>(cc, IrBuildConfig::default())
322}
323
324struct FileIrBuildResult<'hir> {
325 index: usize,
326 file_start_id: HirId,
327 node_specs: HashMap<HirId, HirNodeSpec<'hir>>,
328}
329
330pub fn build_llmcc_ir_with_config<'a, L: LanguageTrait>(
332 cc: &'a CompileCtxt<'a>,
333 config: IrBuildConfig,
334) -> Result<(), DynError> {
335 let results: Vec<Result<FileIrBuildResult<'a>, DynError>> = (0..cc.files.len())
336 .into_par_iter()
337 .map(|index| {
338 let unit = cc.compile_unit(index);
339 let file_path = unit.file_path().map(|p| p.to_string());
340 let file_bytes = unit.file().content();
341 let tree = unit.tree();
342
343 build_llmcc_ir_inner::<L>(file_path, file_bytes, tree, config).map(
344 |(file_start_id, node_specs)| FileIrBuildResult {
345 index,
346 file_start_id,
347 node_specs,
348 },
349 )
350 })
351 .collect();
352
353 let mut results: Vec<FileIrBuildResult<'a>> =
354 results.into_iter().collect::<Result<Vec<_>, _>>()?;
355
356 results.sort_by_key(|result| result.index);
357
358 for result in results {
359 let FileIrBuildResult {
360 index,
361 file_start_id,
362 node_specs,
363 } = result;
364
365 {
366 let mut hir_map = cc.hir_map.write().unwrap();
367 for (hir_id, spec) in node_specs {
368 let parented_node = spec.into_parented_node(&cc.arena);
369 hir_map.insert(hir_id, parented_node);
370 }
371 }
372
373 cc.set_file_start(index, file_start_id);
374 }
375
376 Ok(())
377}