1use super::string_interner::{StringId, StringInterner};
4use super::value_ref::ValueRef;
5use formualizer_parse::parser::{ExternalRefKind, TableSpecifier};
6use rustc_hash::FxHashMap;
7use std::collections::hash_map::DefaultHasher;
8use std::fmt;
9use std::hash::{Hash, Hasher};
10
11#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
13pub struct AstNodeId(u32);
14
15impl AstNodeId {
16 pub fn as_u32(self) -> u32 {
17 self.0
18 }
19
20 pub(crate) const fn from_u32(raw: u32) -> Self {
21 Self(raw)
22 }
23}
24
25impl fmt::Display for AstNodeId {
26 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
27 write!(f, "AstNode({})", self.0)
28 }
29}
30
31#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
32pub struct TableSpecId(u32);
33
34impl TableSpecId {
35 pub fn as_u32(self) -> u32 {
36 self.0
37 }
38}
39
40#[derive(Debug, Clone, PartialEq, Eq, Hash)]
42pub enum AstNodeData {
43 Literal(ValueRef),
45
46 Reference {
48 original_id: StringId, ref_type: CompactRefType, },
51
52 UnaryOp { op_id: StringId, expr_id: AstNodeId },
54
55 BinaryOp {
57 op_id: StringId,
58 left_id: AstNodeId,
59 right_id: AstNodeId,
60 },
61
62 Function {
64 name_id: StringId,
65 args_offset: u32, args_count: u16, },
68
69 Array {
71 rows: u16,
72 cols: u16,
73 elements_offset: u32, },
75}
76
77#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
79pub enum SheetKey {
80 Id(u16),
81 Name(StringId),
82}
83
84#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
86pub enum CompactRefType {
87 Cell {
88 sheet: Option<SheetKey>,
89 row: u32,
90 col: u32,
91 row_abs: bool,
92 col_abs: bool,
93 },
94 Range {
95 sheet: Option<SheetKey>,
96 start_row: u32,
97 start_col: u32,
98 end_row: u32,
99 end_col: u32,
100 start_row_abs: bool,
101 start_col_abs: bool,
102 end_row_abs: bool,
103 end_col_abs: bool,
104 },
105 External {
106 raw_id: StringId,
107 book_id: StringId,
108 sheet_id: StringId,
109 kind: ExternalRefKind,
110 },
111 NamedRange(StringId),
112 Table {
113 name_id: StringId,
114 specifier_id: Option<TableSpecId>,
115 },
116 Cell3D {
118 sheet_first: StringId,
119 sheet_last: StringId,
120 row: u32,
121 col: u32,
122 row_abs: bool,
123 col_abs: bool,
124 },
125 Range3D {
127 sheet_first: StringId,
128 sheet_last: StringId,
129 start_row: u32,
130 start_col: u32,
131 end_row: u32,
132 end_col: u32,
133 start_row_abs: bool,
134 start_col_abs: bool,
135 end_row_abs: bool,
136 end_col_abs: bool,
137 },
138}
139
140#[derive(Debug, Clone, PartialEq, Eq, Hash)]
146pub(crate) struct AstNodeEntry {
147 pub(crate) data: AstNodeData,
148 pub(crate) meta: AstNodeMetadata,
149}
150
151#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)]
153pub(crate) struct AstNodeMetadata {
154 pub(crate) canonical_hash: u64,
155 pub(crate) labels: CanonicalLabels,
156}
157
158#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)]
160pub(crate) struct CanonicalLabels {
161 pub(crate) flags: u64,
162 pub(crate) rejects: u64,
163}
164
165#[allow(dead_code)]
166impl CanonicalLabels {
167 pub(crate) const FLAG_RELATIVE_ONLY: u64 = 1 << 0;
168 pub(crate) const FLAG_ABSOLUTE_ONLY: u64 = 1 << 1;
169 pub(crate) const FLAG_MIXED_ANCHORS: u64 = 1 << 2;
170 pub(crate) const FLAG_VOLATILE: u64 = 1 << 3;
171 pub(crate) const FLAG_DYNAMIC: u64 = 1 << 4;
172 pub(crate) const FLAG_CONTAINS_STRUCTURED_REF: u64 = 1 << 5;
173 pub(crate) const FLAG_NEEDS_PLACEMENT_REWRITE: u64 = 1 << 6;
174 pub(crate) const FLAG_CONTAINS_NAME: u64 = 1 << 7;
175 pub(crate) const FLAG_CONTAINS_TABLE: u64 = 1 << 8;
176 pub(crate) const FLAG_CONTAINS_RANGE: u64 = 1 << 9;
177 pub(crate) const FLAG_CONTAINS_ARRAY: u64 = 1 << 10;
178 pub(crate) const FLAG_CONTAINS_LET_LAMBDA: u64 = 1 << 11;
179 pub(crate) const FLAG_CONTAINS_FUNCTION: u64 = 1 << 12;
180 pub(crate) const FLAG_EXPLICIT_SHEET: u64 = 1 << 13;
181 pub(crate) const FLAG_CURRENT_SHEET: u64 = 1 << 14;
182
183 pub(crate) const REJECT_INVALID_PLACEMENT_ANCHOR: u64 = 1 << 0;
186 pub(crate) const REJECT_DYNAMIC_REFERENCE: u64 = 1 << 1;
187 pub(crate) const REJECT_UNKNOWN_OR_CUSTOM_FUNCTION: u64 = 1 << 2;
188 pub(crate) const REJECT_LOCAL_ENVIRONMENT: u64 = 1 << 3;
189 pub(crate) const REJECT_PARSER_VOLATILE_FLAG: u64 = 1 << 4;
190 pub(crate) const REJECT_VOLATILE_FUNCTION: u64 = 1 << 5;
191 pub(crate) const REJECT_REFERENCE_RETURNING_FUNCTION: u64 = 1 << 6;
192 pub(crate) const REJECT_ARRAY_OR_SPILL_FUNCTION: u64 = 1 << 7;
193 pub(crate) const REJECT_ARRAY_LITERAL: u64 = 1 << 8;
194 pub(crate) const REJECT_SPILL_REFERENCE: u64 = 1 << 9;
195 pub(crate) const REJECT_SPILL_RESULT_REGION_OPERATOR: u64 = 1 << 10;
196 pub(crate) const REJECT_IMPLICIT_INTERSECTION_OPERATOR: u64 = 1 << 11;
197 pub(crate) const REJECT_CALL_EXPRESSION: u64 = 1 << 12;
198 pub(crate) const REJECT_STRUCTURED_REFERENCE: u64 = 1 << 14;
201 pub(crate) const REJECT_STRUCTURED_REFERENCE_CURRENT_ROW: u64 = 1 << 15;
202 pub(crate) const REJECT_THREE_D_REFERENCE: u64 = 1 << 16;
203 pub(crate) const REJECT_EXTERNAL_REFERENCE: u64 = 1 << 17;
204 pub(crate) const REJECT_OPEN_RANGE_REFERENCE: u64 = 1 << 18;
205 pub(crate) const REJECT_WHOLE_AXIS_REFERENCE: u64 = 1 << 19;
206 pub(crate) const REJECT_UNSUPPORTED_REFERENCE: u64 = 1 << 20;
207
208 pub(crate) fn has_flag(self, flag: u64) -> bool {
209 self.flags & flag != 0
210 }
211
212 pub(crate) fn has_reject(self, reject: u64) -> bool {
213 self.rejects & reject != 0
214 }
215}
216
217pub struct AstArena {
219 nodes: Vec<AstNodeEntry>,
221
222 dedup_map: FxHashMap<u64, AstNodeId>,
224
225 function_args: Vec<AstNodeId>,
227
228 array_elements: Vec<AstNodeId>,
230
231 strings: StringInterner,
233
234 table_specs: Vec<TableSpecifier>,
236 table_spec_dedup: FxHashMap<u64, TableSpecId>,
237
238 dedup_hits: usize,
240}
241
242impl AstArena {
243 pub fn new() -> Self {
244 Self {
245 nodes: Vec::new(),
246 dedup_map: FxHashMap::default(),
247 function_args: Vec::new(),
248 array_elements: Vec::new(),
249 strings: StringInterner::new(),
250 table_specs: Vec::new(),
251 table_spec_dedup: FxHashMap::default(),
252 dedup_hits: 0,
253 }
254 }
255
256 pub fn with_capacity(node_cap: usize) -> Self {
257 Self {
258 nodes: Vec::with_capacity(node_cap),
259 dedup_map: FxHashMap::with_capacity_and_hasher(node_cap, Default::default()),
260 function_args: Vec::with_capacity(node_cap * 2), array_elements: Vec::with_capacity(node_cap),
262 strings: StringInterner::with_capacity(node_cap / 10),
263 table_specs: Vec::new(),
264 table_spec_dedup: FxHashMap::default(),
265 dedup_hits: 0,
266 }
267 }
268
269 pub fn insert(&mut self, node: AstNodeData) -> AstNodeId {
274 self.insert_entry(node, AstNodeMetadata::default())
275 }
276
277 #[allow(dead_code)]
284 pub(crate) fn insert_with_meta(
285 &mut self,
286 node: AstNodeData,
287 meta: AstNodeMetadata,
288 ) -> AstNodeId {
289 self.insert_entry(node, meta)
290 }
291
292 fn insert_entry(&mut self, node: AstNodeData, meta: AstNodeMetadata) -> AstNodeId {
293 let hash = self.hash_node(&node);
295
296 if let Some(&id) = self.dedup_map.get(&hash) {
298 if self.nodes[id.0 as usize].data == node {
300 self.dedup_hits += 1;
301 return id;
302 }
303 }
304
305 let id = AstNodeId(self.nodes.len() as u32);
307 self.nodes.push(AstNodeEntry { data: node, meta });
308 self.dedup_map.insert(hash, id);
309 id
310 }
311
312 pub fn insert_literal(&mut self, value: ValueRef) -> AstNodeId {
314 self.insert(AstNodeData::Literal(value))
315 }
316
317 pub fn insert_reference(&mut self, original: &str, ref_type: CompactRefType) -> AstNodeId {
319 let original_id = self.strings.intern(original);
320 self.insert(AstNodeData::Reference {
321 original_id,
322 ref_type,
323 })
324 }
325
326 pub fn insert_unary_op(&mut self, op: &str, expr: AstNodeId) -> AstNodeId {
328 let op_id = self.strings.intern(op);
329 self.insert(AstNodeData::UnaryOp {
330 op_id,
331 expr_id: expr,
332 })
333 }
334
335 pub fn insert_binary_op(&mut self, op: &str, left: AstNodeId, right: AstNodeId) -> AstNodeId {
337 let op_id = self.strings.intern(op);
338 self.insert(AstNodeData::BinaryOp {
339 op_id,
340 left_id: left,
341 right_id: right,
342 })
343 }
344
345 pub fn insert_function(&mut self, name: &str, args: Vec<AstNodeId>) -> AstNodeId {
347 let name_id = self.strings.intern(name);
348 let args_offset = self.function_args.len() as u32;
349 let args_count = args.len() as u16;
350
351 self.function_args.extend(args);
352
353 self.insert(AstNodeData::Function {
354 name_id,
355 args_offset,
356 args_count,
357 })
358 }
359
360 pub fn insert_array(&mut self, rows: u16, cols: u16, elements: Vec<AstNodeId>) -> AstNodeId {
362 assert_eq!(
363 elements.len(),
364 (rows * cols) as usize,
365 "Array dimensions don't match element count"
366 );
367
368 let elements_offset = self.array_elements.len() as u32;
369 self.array_elements.extend(elements);
370
371 self.insert(AstNodeData::Array {
372 rows,
373 cols,
374 elements_offset,
375 })
376 }
377
378 pub fn get(&self, id: AstNodeId) -> Option<&AstNodeData> {
380 self.nodes.get(id.0 as usize).map(|entry| &entry.data)
381 }
382
383 #[allow(dead_code)]
385 pub(crate) fn entry(&self, id: AstNodeId) -> Option<&AstNodeEntry> {
386 self.nodes.get(id.0 as usize)
387 }
388
389 #[allow(dead_code)]
391 pub(crate) fn metadata(&self, id: AstNodeId) -> Option<AstNodeMetadata> {
392 self.entry(id).map(|entry| entry.meta)
393 }
394
395 pub fn get_function_args(&self, id: AstNodeId) -> Option<&[AstNodeId]> {
397 match self.get(id)? {
398 AstNodeData::Function {
399 args_offset,
400 args_count,
401 ..
402 } => {
403 let start = *args_offset as usize;
404 let end = start + *args_count as usize;
405 Some(&self.function_args[start..end])
406 }
407 _ => None,
408 }
409 }
410
411 pub fn get_array_elements(&self, id: AstNodeId) -> Option<&[AstNodeId]> {
413 match self.get(id)? {
414 AstNodeData::Array {
415 rows,
416 cols,
417 elements_offset,
418 } => {
419 let start = *elements_offset as usize;
420 let count = (*rows * *cols) as usize;
421 let end = start + count;
422 Some(&self.array_elements[start..end])
423 }
424 _ => None,
425 }
426 }
427
428 pub fn get_array_elements_info(&self, id: AstNodeId) -> Option<(u16, u16, &[AstNodeId])> {
429 match self.get(id)? {
430 AstNodeData::Array { rows, cols, .. } => {
431 let elements = self.get_array_elements(id)?;
432 Some((*rows, *cols, elements))
433 }
434 _ => None,
435 }
436 }
437
438 pub fn resolve_string(&self, id: StringId) -> &str {
440 self.strings.resolve(id)
441 }
442
443 pub fn strings(&self) -> &StringInterner {
445 &self.strings
446 }
447
448 pub fn strings_mut(&mut self) -> &mut StringInterner {
450 &mut self.strings
451 }
452
453 pub fn intern_table_specifier(&mut self, specifier: &TableSpecifier) -> TableSpecId {
454 let hash = {
455 let mut hasher = DefaultHasher::new();
456 specifier.hash(&mut hasher);
457 hasher.finish()
458 };
459
460 if let Some(&id) = self.table_spec_dedup.get(&hash)
461 && self
462 .table_specs
463 .get(id.0 as usize)
464 .is_some_and(|existing| existing == specifier)
465 {
466 return id;
467 }
468
469 let id = TableSpecId(self.table_specs.len() as u32);
470 self.table_specs.push(specifier.clone());
471 self.table_spec_dedup.insert(hash, id);
472 id
473 }
474
475 pub fn resolve_table_specifier(&self, id: TableSpecId) -> Option<&TableSpecifier> {
476 self.table_specs.get(id.0 as usize)
477 }
478
479 fn hash_node(&self, node: &AstNodeData) -> u64 {
481 let mut hasher = DefaultHasher::new();
482 node.hash(&mut hasher);
483 hasher.finish()
484 }
485
486 pub fn stats(&self) -> AstArenaStats {
488 AstArenaStats {
489 node_count: self.nodes.len(),
490 dedup_hits: self.dedup_hits,
491 string_count: self.strings.len(),
492 table_spec_count: self.table_specs.len(),
493 total_args: self.function_args.len(),
494 total_array_elements: self.array_elements.len(),
495 }
496 }
497
498 pub fn memory_usage(&self) -> usize {
500 self.nodes.capacity() * std::mem::size_of::<AstNodeEntry>()
501 + self.dedup_map.capacity() * (8 + 4) + self.function_args.capacity() * 4
503 + self.array_elements.capacity() * 4
504 + self.strings.memory_usage()
505 + self.table_specs.capacity() * std::mem::size_of::<TableSpecifier>()
506 + self.table_spec_dedup.capacity() * (8 + 4)
507 }
508
509 pub fn clear(&mut self) {
511 self.nodes.clear();
512 self.dedup_map.clear();
513 self.function_args.clear();
514 self.array_elements.clear();
515 self.strings.clear();
516 self.table_specs.clear();
517 self.table_spec_dedup.clear();
518 self.dedup_hits = 0;
519 }
520}
521
522impl Default for AstArena {
523 fn default() -> Self {
524 Self::new()
525 }
526}
527
528impl fmt::Debug for AstArena {
529 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
530 f.debug_struct("AstArena")
531 .field("nodes", &self.nodes.len())
532 .field("dedup_hits", &self.dedup_hits)
533 .field("strings", &self.strings.len())
534 .finish()
535 }
536}
537
538#[derive(Debug, Clone)]
540pub struct AstArenaStats {
541 pub node_count: usize,
542 pub dedup_hits: usize,
543 pub string_count: usize,
544 pub table_spec_count: usize,
545 pub total_args: usize,
546 pub total_array_elements: usize,
547}
548
549#[cfg(test)]
550mod tests {
551 use super::*;
552
553 #[test]
554 fn test_ast_arena_literal() {
555 let mut arena = AstArena::new();
556
557 let lit1 = arena.insert_literal(ValueRef::small_int(42).unwrap());
558 let lit2 = arena.insert_literal(ValueRef::boolean(true));
559
560 assert_ne!(lit1, lit2);
561
562 match arena.get(lit1) {
563 Some(AstNodeData::Literal(v)) => {
564 assert_eq!(v.as_small_int(), Some(42));
565 }
566 _ => panic!("Expected literal node"),
567 }
568 }
569
570 #[test]
571 fn test_ast_arena_deduplication() {
572 let mut arena = AstArena::new();
573
574 let lit1 = arena.insert_literal(ValueRef::small_int(42).unwrap());
576 let lit2 = arena.insert_literal(ValueRef::small_int(42).unwrap());
577
578 assert_eq!(lit1, lit2); assert_eq!(arena.stats().dedup_hits, 1);
580 }
581
582 #[test]
583 fn test_ast_arena_binary_op() {
584 let mut arena = AstArena::new();
585
586 let left = arena.insert_literal(ValueRef::small_int(1).unwrap());
587 let right = arena.insert_literal(ValueRef::small_int(2).unwrap());
588 let add = arena.insert_binary_op("+", left, right);
589
590 match arena.get(add) {
591 Some(AstNodeData::BinaryOp {
592 op_id,
593 left_id,
594 right_id,
595 }) => {
596 assert_eq!(arena.resolve_string(*op_id), "+");
597 assert_eq!(*left_id, left);
598 assert_eq!(*right_id, right);
599 }
600 _ => panic!("Expected binary op node"),
601 }
602 }
603
604 #[test]
605 fn test_ast_arena_function() {
606 let mut arena = AstArena::new();
607
608 let arg1 = arena.insert_literal(ValueRef::small_int(10).unwrap());
609 let arg2 = arena.insert_literal(ValueRef::small_int(20).unwrap());
610 let arg3 = arena.insert_literal(ValueRef::small_int(30).unwrap());
611
612 let func = arena.insert_function("SUM", vec![arg1, arg2, arg3]);
613
614 match arena.get(func) {
615 Some(AstNodeData::Function {
616 name_id,
617 args_count,
618 ..
619 }) => {
620 assert_eq!(arena.resolve_string(*name_id), "SUM");
621 assert_eq!(*args_count, 3);
622 }
623 _ => panic!("Expected function node"),
624 }
625
626 let args = arena.get_function_args(func).unwrap();
627 assert_eq!(args, &[arg1, arg2, arg3]);
628 }
629
630 #[test]
631 fn test_ast_arena_structural_sharing() {
632 let mut arena = AstArena::new();
633
634 let a1_ref = arena.insert_reference(
636 "A1",
637 CompactRefType::Cell {
638 sheet: None,
639 row: 1,
640 col: 1,
641 row_abs: false,
642 col_abs: false,
643 },
644 );
645
646 let one = arena.insert_literal(ValueRef::small_int(1).unwrap());
648 let expr1 = arena.insert_binary_op("+", a1_ref, one);
649
650 let two = arena.insert_literal(ValueRef::small_int(2).unwrap());
652 let expr2 = arena.insert_binary_op("*", a1_ref, two);
653
654 assert_eq!(arena.stats().node_count, 5); let a1_ref2 = arena.insert_reference(
659 "A1",
660 CompactRefType::Cell {
661 sheet: None,
662 row: 1,
663 col: 1,
664 row_abs: false,
665 col_abs: false,
666 },
667 );
668 assert_eq!(a1_ref, a1_ref2);
669 }
670
671 #[test]
672 fn test_ast_arena_array() {
673 let mut arena = AstArena::new();
674
675 let elements = vec![
676 arena.insert_literal(ValueRef::small_int(1).unwrap()),
677 arena.insert_literal(ValueRef::small_int(2).unwrap()),
678 arena.insert_literal(ValueRef::small_int(3).unwrap()),
679 arena.insert_literal(ValueRef::small_int(4).unwrap()),
680 ];
681
682 let array = arena.insert_array(2, 2, elements.clone());
683
684 match arena.get(array) {
685 Some(AstNodeData::Array { rows, cols, .. }) => {
686 assert_eq!(*rows, 2);
687 assert_eq!(*cols, 2);
688 }
689 _ => panic!("Expected array node"),
690 }
691
692 let stored_elements = arena.get_array_elements(array).unwrap();
693 assert_eq!(stored_elements, &elements[..]);
694 }
695
696 #[test]
697 fn test_ast_arena_complex_expression() {
698 let mut arena = AstArena::new();
699
700 let range = arena.insert_reference(
704 "A1:A10",
705 CompactRefType::Range {
706 sheet: None,
707 start_row: 1,
708 start_col: 1,
709 end_row: 10,
710 end_col: 1,
711 start_row_abs: false,
712 start_col_abs: false,
713 end_row_abs: false,
714 end_col_abs: false,
715 },
716 );
717
718 let sum = arena.insert_function("SUM", vec![range]);
720
721 let b1 = arena.insert_reference(
723 "B1",
724 CompactRefType::Cell {
725 sheet: None,
726 row: 1,
727 col: 2,
728 row_abs: false,
729 col_abs: false,
730 },
731 );
732
733 let zero = arena.insert_literal(ValueRef::small_int(0).unwrap());
735
736 let condition = arena.insert_binary_op(">", b1, zero);
738
739 let c1 = arena.insert_reference(
741 "C1",
742 CompactRefType::Cell {
743 sheet: None,
744 row: 1,
745 col: 3,
746 row_abs: false,
747 col_abs: false,
748 },
749 );
750 let d1 = arena.insert_reference(
751 "D1",
752 CompactRefType::Cell {
753 sheet: None,
754 row: 1,
755 col: 4,
756 row_abs: false,
757 col_abs: false,
758 },
759 );
760
761 let if_expr = arena.insert_function("IF", vec![condition, c1, d1]);
763
764 let final_expr = arena.insert_binary_op("+", sum, if_expr);
766
767 assert!(arena.get(final_expr).is_some());
769 assert_eq!(arena.stats().node_count, 9); }
774
775 #[test]
776 fn test_ast_arena_string_deduplication() {
777 let mut arena = AstArena::new();
778
779 let one = arena.insert_literal(ValueRef::small_int(1).unwrap());
781 let two = arena.insert_literal(ValueRef::small_int(2).unwrap());
782 let three = arena.insert_literal(ValueRef::small_int(3).unwrap());
783
784 let add1 = arena.insert_binary_op("+", one, two);
785 let add2 = arena.insert_binary_op("+", two, three);
786 let add3 = arena.insert_binary_op("+", one, three);
787
788 assert_eq!(arena.strings().len(), 1);
790 }
791
792 #[test]
793 fn test_ast_arena_clear() {
794 let mut arena = AstArena::new();
795
796 arena.insert_literal(ValueRef::small_int(1).unwrap());
797 arena.insert_literal(ValueRef::small_int(2).unwrap());
798 let left = arena.insert_literal(ValueRef::small_int(3).unwrap());
799 let right = arena.insert_literal(ValueRef::small_int(4).unwrap());
800 arena.insert_binary_op("+", left, right);
801
802 assert_eq!(arena.stats().node_count, 5);
803
804 arena.clear();
805
806 assert_eq!(arena.stats().node_count, 0);
807 assert_eq!(arena.strings().len(), 0);
808 }
809}