1use crate::error::{Error, Result};
10use crate::parser::parse_entity;
11use crate::schema_gen::{AttributeValue, DecodedEntity};
12use rustc_hash::FxHashMap;
13use std::sync::Arc;
14
15pub type EntityIndex = FxHashMap<u32, (usize, usize)>;
17
18#[inline]
21pub fn build_entity_index(content: &str) -> EntityIndex {
22 let bytes = content.as_bytes();
23 let len = bytes.len();
24
25 let estimated_entities = len / 50;
27 let mut index = FxHashMap::with_capacity_and_hasher(estimated_entities, Default::default());
28
29 let mut pos = 0;
30
31 while pos < len {
32 let remaining = &bytes[pos..];
34 let hash_offset = match memchr::memchr(b'#', remaining) {
35 Some(offset) => offset,
36 None => break,
37 };
38
39 let start = pos + hash_offset;
40 pos = start + 1;
41
42 let id_start = pos;
44 while pos < len && bytes[pos].is_ascii_digit() {
45 pos += 1;
46 }
47 let id_end = pos;
48
49 while pos < len && bytes[pos].is_ascii_whitespace() {
51 pos += 1;
52 }
53
54 if id_end > id_start && pos < len && bytes[pos] == b'=' {
55 let id = parse_u32_inline(bytes, id_start, id_end);
57
58 let entity_content = &bytes[pos..];
60 if let Some(semicolon_offset) = memchr::memchr(b';', entity_content) {
61 pos += semicolon_offset + 1; index.insert(id, (start, pos));
63 } else {
64 break; }
66 }
67 }
68
69 index
70}
71
72#[inline]
74fn parse_u32_inline(bytes: &[u8], start: usize, end: usize) -> u32 {
75 let mut result: u32 = 0;
76 for &byte in &bytes[start..end] {
77 let digit = byte.wrapping_sub(b'0');
78 result = result.wrapping_mul(10).wrapping_add(digit as u32);
79 }
80 result
81}
82
83pub struct EntityDecoder<'a> {
85 content: &'a str,
86 cache: FxHashMap<u32, Arc<DecodedEntity>>,
89 entity_index: Option<Arc<EntityIndex>>,
93 point_cache: FxHashMap<u32, (f64, f64, f64)>,
96}
97
98impl<'a> EntityDecoder<'a> {
99 pub fn new(content: &'a str) -> Self {
101 Self {
102 content,
103 cache: FxHashMap::default(),
104 entity_index: None,
105 point_cache: FxHashMap::default(),
106 }
107 }
108
109 pub fn with_index(content: &'a str, index: EntityIndex) -> Self {
111 Self {
112 content,
113 cache: FxHashMap::default(),
114 entity_index: Some(Arc::new(index)),
115 point_cache: FxHashMap::default(),
116 }
117 }
118
119 pub fn with_arc_index(content: &'a str, index: Arc<EntityIndex>) -> Self {
121 Self {
122 content,
123 cache: FxHashMap::default(),
124 entity_index: Some(index),
125 point_cache: FxHashMap::default(),
126 }
127 }
128
129 fn build_index(&mut self) {
132 if self.entity_index.is_some() {
133 return; }
135 self.entity_index = Some(Arc::new(build_entity_index(self.content)));
136 }
137
138 #[inline]
141 pub fn decode_at(&mut self, start: usize, end: usize) -> Result<DecodedEntity> {
142 let line = &self.content[start..end];
143 let (id, ifc_type, tokens) = parse_entity(line).map_err(|e| {
144 Error::parse(
146 0,
147 format!(
148 "Failed to parse entity: {:?}, input: {:?}",
149 e,
150 &line[..line.len().min(100)]
151 ),
152 )
153 })?;
154
155 if let Some(entity_arc) = self.cache.get(&id) {
157 return Ok(entity_arc.as_ref().clone());
158 }
159
160 let attributes = tokens
162 .iter()
163 .map(|token| AttributeValue::from_token(token))
164 .collect();
165
166 let entity = DecodedEntity::new(id, ifc_type, attributes);
167 self.cache.insert(id, Arc::new(entity.clone()));
168 Ok(entity)
169 }
170
171 #[inline]
174 pub fn decode_at_with_id(&mut self, id: u32, start: usize, end: usize) -> Result<DecodedEntity> {
175 if let Some(entity_arc) = self.cache.get(&id) {
177 return Ok(entity_arc.as_ref().clone());
178 }
179
180 self.decode_at(start, end)
182 }
183
184 #[inline]
186 pub fn decode_by_id(&mut self, entity_id: u32) -> Result<DecodedEntity> {
187 if let Some(entity_arc) = self.cache.get(&entity_id) {
189 return Ok(entity_arc.as_ref().clone());
190 }
191
192 self.build_index();
194
195 let (start, end) = self
197 .entity_index
198 .as_ref()
199 .and_then(|idx| idx.get(&entity_id).copied())
200 .ok_or_else(|| Error::parse(0, format!("Entity #{} not found", entity_id)))?;
201
202 self.decode_at(start, end)
203 }
204
205 #[inline]
208 pub fn resolve_ref(&mut self, attr: &AttributeValue) -> Result<Option<DecodedEntity>> {
209 match attr.as_entity_ref() {
210 Some(id) => Ok(Some(self.decode_by_id(id)?)),
211 None => Ok(None),
212 }
213 }
214
215 pub fn resolve_ref_list(&mut self, attr: &AttributeValue) -> Result<Vec<DecodedEntity>> {
217 let list = attr
218 .as_list()
219 .ok_or_else(|| Error::parse(0, "Expected list".to_string()))?;
220
221 let mut entities = Vec::with_capacity(list.len());
222 for item in list {
223 if let Some(id) = item.as_entity_ref() {
224 entities.push(self.decode_by_id(id)?);
225 }
226 }
227 Ok(entities)
228 }
229
230 pub fn get_cached(&self, entity_id: u32) -> Option<DecodedEntity> {
232 self.cache.get(&entity_id).map(|arc| arc.as_ref().clone())
233 }
234
235 pub fn reserve_cache(&mut self, additional: usize) {
241 self.cache.reserve(additional);
242 }
243
244 pub fn clear_cache(&mut self) {
246 self.cache.clear();
247 self.point_cache.clear();
248 }
249
250 pub fn clear_point_cache(&mut self) {
253 self.point_cache.clear();
254 }
255
256 pub fn cache_size(&self) -> usize {
258 self.cache.len()
259 }
260
261 #[inline]
264 pub fn get_raw_bytes(&mut self, entity_id: u32) -> Option<&'a [u8]> {
265 self.build_index();
266 let (start, end) = self.entity_index.as_ref()?.get(&entity_id).copied()?;
267 Some(&self.content.as_bytes()[start..end])
268 }
269
270 #[inline]
272 pub fn get_raw_content(&mut self, entity_id: u32) -> Option<&'a str> {
273 self.build_index();
274 let (start, end) = self.entity_index.as_ref()?.get(&entity_id).copied()?;
275 Some(&self.content[start..end])
276 }
277
278 #[inline]
282 pub fn get_first_entity_ref_fast(&mut self, entity_id: u32) -> Option<u32> {
283 let bytes = self.get_raw_bytes(entity_id)?;
284 let len = bytes.len();
285 let mut i = 0;
286
287 while i < len && bytes[i] != b'(' {
289 i += 1;
290 }
291 if i >= len {
292 return None;
293 }
294 i += 1; while i < len {
298 while i < len
300 && (bytes[i] == b' ' || bytes[i] == b'\n' || bytes[i] == b'\r')
301 {
302 i += 1;
303 }
304
305 if i >= len {
306 return None;
307 }
308
309 if bytes[i] == b'#' {
310 i += 1;
311 let start = i;
312 while i < len && bytes[i].is_ascii_digit() {
313 i += 1;
314 }
315 if i > start {
316 let mut id = 0u32;
317 for &b in &bytes[start..i] {
318 id = id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
319 }
320 return Some(id);
321 }
322 }
323 i += 1;
324 }
325
326 None
327 }
328
329 #[inline]
333 pub fn get_entity_ref_list_fast(&mut self, entity_id: u32) -> Option<Vec<u32>> {
334 let bytes = self.get_raw_bytes(entity_id)?;
335
336 let mut i = 0;
338 let len = bytes.len();
339
340 while i < len && bytes[i] != b'(' {
342 i += 1;
343 }
344 if i >= len {
345 return None;
346 }
347 i += 1; while i < len && bytes[i] != b'(' {
351 i += 1;
352 }
353 if i >= len {
354 return None;
355 }
356 i += 1; let mut ids = Vec::with_capacity(32);
360
361 while i < len {
362 while i < len
364 && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
365 {
366 i += 1;
367 }
368
369 if i >= len || bytes[i] == b')' {
370 break;
371 }
372
373 if bytes[i] == b'#' {
375 i += 1;
376 let start = i;
377 while i < len && bytes[i].is_ascii_digit() {
378 i += 1;
379 }
380 if i > start {
381 let mut id = 0u32;
383 for &b in &bytes[start..i] {
384 id = id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
385 }
386 ids.push(id);
387 }
388 } else {
389 i += 1; }
391 }
392
393 if ids.is_empty() {
394 None
395 } else {
396 Some(ids)
397 }
398 }
399
400 #[inline]
404 pub fn get_polyloop_point_ids_fast(&mut self, entity_id: u32) -> Option<Vec<u32>> {
405 let bytes = self.get_raw_bytes(entity_id)?;
406
407 let mut i = 0;
409 let len = bytes.len();
410
411 while i < len && bytes[i] != b'(' {
413 i += 1;
414 }
415 if i >= len {
416 return None;
417 }
418 i += 1; while i < len && bytes[i] != b'(' {
422 i += 1;
423 }
424 if i >= len {
425 return None;
426 }
427 i += 1; let mut point_ids = Vec::with_capacity(8); while i < len {
433 while i < len
435 && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
436 {
437 i += 1;
438 }
439
440 if i >= len || bytes[i] == b')' {
441 break;
442 }
443
444 if bytes[i] == b'#' {
446 i += 1;
447 let start = i;
448 while i < len && bytes[i].is_ascii_digit() {
449 i += 1;
450 }
451 if i > start {
452 let mut id = 0u32;
454 for &b in &bytes[start..i] {
455 id = id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
456 }
457 point_ids.push(id);
458 }
459 } else {
460 i += 1; }
462 }
463
464 if point_ids.is_empty() {
465 None
466 } else {
467 Some(point_ids)
468 }
469 }
470
471 #[inline]
475 pub fn get_cartesian_point_fast(&mut self, entity_id: u32) -> Option<(f64, f64, f64)> {
476 let bytes = self.get_raw_bytes(entity_id)?;
477
478 let mut i = 0;
480 let len = bytes.len();
481
482 while i < len && bytes[i] != b'(' {
484 i += 1;
485 }
486 if i >= len {
487 return None;
488 }
489 i += 1; while i < len && bytes[i] != b'(' {
493 i += 1;
494 }
495 if i >= len {
496 return None;
497 }
498 i += 1; let x = parse_next_float(&bytes[i..], &mut i)?;
502
503 let y = parse_next_float(&bytes[i..], &mut i)?;
505
506 let z = parse_next_float(&bytes[i..], &mut i).unwrap_or(0.0);
508
509 Some((x, y, z))
510 }
511
512 #[inline]
516 pub fn get_face_bound_fast(&mut self, entity_id: u32) -> Option<(u32, bool, bool)> {
517 let bytes = self.get_raw_bytes(entity_id)?;
518 let len = bytes.len();
519
520 let mut eq_pos = 0;
522 while eq_pos < len && bytes[eq_pos] != b'=' {
523 eq_pos += 1;
524 }
525 if eq_pos >= len {
526 return None;
527 }
528
529 let mut is_outer = false;
533 let mut i = eq_pos + 1;
534 while i + 4 < len && bytes[i] != b'(' {
536 if bytes[i] == b'O'
537 && bytes[i + 1] == b'U'
538 && bytes[i + 2] == b'T'
539 && bytes[i + 3] == b'E'
540 && bytes[i + 4] == b'R'
541 {
542 is_outer = true;
543 break;
544 }
545 i += 1;
546 }
547 while i < len && bytes[i] != b'(' {
549 i += 1;
550 }
551 if i >= len {
552 return None;
553 }
554
555 i += 1; while i < len && (bytes[i] == b' ' || bytes[i] == b'\n' || bytes[i] == b'\r') {
559 i += 1;
560 }
561
562 if i >= len || bytes[i] != b'#' {
564 return None;
565 }
566 i += 1;
567
568 let start = i;
570 while i < len && bytes[i].is_ascii_digit() {
571 i += 1;
572 }
573 if i <= start {
574 return None;
575 }
576 let mut loop_id = 0u32;
577 for &b in &bytes[start..i] {
578 loop_id = loop_id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
579 }
580
581 while i < len && bytes[i] != b',' {
584 i += 1;
585 }
586 i += 1; while i < len && (bytes[i] == b' ' || bytes[i] == b'\n' || bytes[i] == b'\r') {
590 i += 1;
591 }
592
593 let orientation = if i + 2 < len && bytes[i] == b'.' && bytes[i + 2] == b'.' {
595 bytes[i + 1] != b'F'
596 } else {
597 true };
599
600 Some((loop_id, orientation, is_outer))
601 }
602
603 #[inline]
608 pub fn get_polyloop_coords_fast(&mut self, entity_id: u32) -> Option<Vec<(f64, f64, f64)>> {
609 self.build_index();
611 let index = self.entity_index.as_ref()?;
612 let bytes_full = self.content.as_bytes();
613
614 let (start, end) = index.get(&entity_id).copied()?;
616 let bytes = &bytes_full[start..end];
617
618 let mut i = 0;
620 let len = bytes.len();
621
622 while i < len && bytes[i] != b'(' {
624 i += 1;
625 }
626 if i >= len {
627 return None;
628 }
629 i += 1; while i < len && bytes[i] != b'(' {
633 i += 1;
634 }
635 if i >= len {
636 return None;
637 }
638 i += 1; let mut coords = Vec::with_capacity(8); while i < len {
644 while i < len
646 && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
647 {
648 i += 1;
649 }
650
651 if i >= len || bytes[i] == b')' {
652 break;
653 }
654
655 if bytes[i] == b'#' {
657 i += 1;
658 let id_start = i;
659 while i < len && bytes[i].is_ascii_digit() {
660 i += 1;
661 }
662 if i > id_start {
663 let mut point_id = 0u32;
665 for &b in &bytes[id_start..i] {
666 point_id = point_id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
667 }
668
669 if let Some((pt_start, pt_end)) = index.get(&point_id).copied() {
672 if let Some(coord) =
673 parse_cartesian_point_inline(&bytes_full[pt_start..pt_end])
674 {
675 coords.push(coord);
676 }
677 }
678 }
679 } else {
680 i += 1; }
682 }
683
684 if coords.len() >= 3 {
685 Some(coords)
686 } else {
687 None
688 }
689 }
690
691 #[inline]
695 pub fn get_polyloop_coords_cached(&mut self, entity_id: u32) -> Option<Vec<(f64, f64, f64)>> {
696 self.build_index();
698 let index = self.entity_index.as_ref()?;
699 let bytes_full = self.content.as_bytes();
700
701 let (start, end) = index.get(&entity_id).copied()?;
703 let bytes = &bytes_full[start..end];
704
705 let mut i = 0;
707 let len = bytes.len();
708
709 while i < len && bytes[i] != b'(' {
711 i += 1;
712 }
713 if i >= len {
714 return None;
715 }
716 i += 1; while i < len && bytes[i] != b'(' {
720 i += 1;
721 }
722 if i >= len {
723 return None;
724 }
725 i += 1; let mut coords = Vec::with_capacity(8);
730 let mut expected_count = 0u32;
731
732 while i < len {
733 while i < len
735 && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
736 {
737 i += 1;
738 }
739
740 if i >= len || bytes[i] == b')' {
741 break;
742 }
743
744 if bytes[i] == b'#' {
746 i += 1;
747 let id_start = i;
748 while i < len && bytes[i].is_ascii_digit() {
749 i += 1;
750 }
751 if i > id_start {
752 expected_count += 1; let mut point_id = 0u32;
756 for &b in &bytes[id_start..i] {
757 point_id = point_id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
758 }
759
760 if let Some(&coord) = self.point_cache.get(&point_id) {
762 coords.push(coord);
763 } else {
764 if let Some((pt_start, pt_end)) = index.get(&point_id).copied() {
766 if let Some(coord) =
767 parse_cartesian_point_inline(&bytes_full[pt_start..pt_end])
768 {
769 self.point_cache.insert(point_id, coord);
770 coords.push(coord);
771 }
772 }
773 }
774 }
775 } else {
776 i += 1; }
778 }
779
780 if coords.len() >= 3 && coords.len() == expected_count as usize {
783 Some(coords)
784 } else {
785 None
786 }
787 }
788}
789
790#[inline]
793fn parse_cartesian_point_inline(bytes: &[u8]) -> Option<(f64, f64, f64)> {
794 let len = bytes.len();
795 let mut i = 0;
796
797 while i < len && bytes[i] != b'(' {
799 i += 1;
800 }
801 if i >= len {
802 return None;
803 }
804 i += 1; while i < len && bytes[i] != b'(' {
808 i += 1;
809 }
810 if i >= len {
811 return None;
812 }
813 i += 1; let x = parse_float_inline(&bytes[i..], &mut i)?;
817
818 let y = parse_float_inline(&bytes[i..], &mut i)?;
820
821 let z = parse_float_inline(&bytes[i..], &mut i).unwrap_or(0.0);
823
824 Some((x, y, z))
825}
826
827#[inline]
829fn parse_float_inline(bytes: &[u8], offset: &mut usize) -> Option<f64> {
830 let len = bytes.len();
831 let mut i = 0;
832
833 while i < len
835 && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
836 {
837 i += 1;
838 }
839
840 if i >= len || bytes[i] == b')' {
841 return None;
842 }
843
844 match fast_float::parse_partial::<f64, _>(&bytes[i..]) {
846 Ok((value, consumed)) if consumed > 0 => {
847 *offset += i + consumed;
848 Some(value)
849 }
850 _ => None,
851 }
852}
853
854#[inline]
856fn parse_next_float(bytes: &[u8], offset: &mut usize) -> Option<f64> {
857 let len = bytes.len();
858 let mut i = 0;
859
860 while i < len
862 && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
863 {
864 i += 1;
865 }
866
867 if i >= len || bytes[i] == b')' {
868 return None;
869 }
870
871 match fast_float::parse_partial::<f64, _>(&bytes[i..]) {
873 Ok((value, consumed)) if consumed > 0 => {
874 *offset += i + consumed;
875 Some(value)
876 }
877 _ => None,
878 }
879}
880
881#[cfg(test)]
882mod tests {
883 use super::*;
884 use crate::IfcType;
885
886 #[test]
887 fn test_decode_entity() {
888 let content = r#"
889#1=IFCPROJECT('2vqT3bvqj9RBFjLlXpN8n9',$,$,$,$,$,$,$,$);
890#2=IFCWALL('3a4T3bvqj9RBFjLlXpN8n0',$,$,$,'Wall-001',$,#3,#4);
891#3=IFCLOCALPLACEMENT($,#4);
892#4=IFCAXIS2PLACEMENT3D(#5,$,$);
893#5=IFCCARTESIANPOINT((0.,0.,0.));
894"#;
895
896 let mut decoder = EntityDecoder::new(content);
897
898 let start = content.find("#2=").unwrap();
900 let end = content[start..].find(';').unwrap() + start + 1;
901
902 let entity = decoder.decode_at(start, end).unwrap();
903 assert_eq!(entity.id, 2);
904 assert_eq!(entity.ifc_type, IfcType::IfcWall);
905 assert_eq!(entity.attributes.len(), 8);
906 assert_eq!(entity.get_string(4), Some("Wall-001"));
907 assert_eq!(entity.get_ref(6), Some(3));
908 assert_eq!(entity.get_ref(7), Some(4));
909 }
910
911 #[test]
912 fn test_decode_by_id() {
913 let content = r#"
914#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
915#5=IFCWALL('guid2',$,$,$,'Wall-001',$,$,$);
916#10=IFCDOOR('guid3',$,$,$,'Door-001',$,$,$);
917"#;
918
919 let mut decoder = EntityDecoder::new(content);
920
921 let entity = decoder.decode_by_id(5).unwrap();
922 assert_eq!(entity.id, 5);
923 assert_eq!(entity.ifc_type, IfcType::IfcWall);
924 assert_eq!(entity.get_string(4), Some("Wall-001"));
925
926 assert_eq!(decoder.cache_size(), 1);
928 let cached = decoder.get_cached(5).unwrap();
929 assert_eq!(cached.id, 5);
930 }
931
932 #[test]
933 fn test_resolve_ref() {
934 let content = r#"
935#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
936#2=IFCWALL('guid2',$,$,$,$,$,#1,$);
937"#;
938
939 let mut decoder = EntityDecoder::new(content);
940
941 let wall = decoder.decode_by_id(2).unwrap();
942 let placement_attr = wall.get(6).unwrap();
943
944 let referenced = decoder.resolve_ref(placement_attr).unwrap().unwrap();
945 assert_eq!(referenced.id, 1);
946 assert_eq!(referenced.ifc_type, IfcType::IfcProject);
947 }
948
949 #[test]
950 fn test_resolve_ref_list() {
951 let content = r#"
952#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
953#2=IFCWALL('guid1',$,$,$,$,$,$,$);
954#3=IFCDOOR('guid2',$,$,$,$,$,$,$);
955#4=IFCRELCONTAINEDINSPATIALSTRUCTURE('guid3',$,$,$,(#2,#3),$,#1);
956"#;
957
958 let mut decoder = EntityDecoder::new(content);
959
960 let rel = decoder.decode_by_id(4).unwrap();
961 let elements_attr = rel.get(4).unwrap();
962
963 let elements = decoder.resolve_ref_list(elements_attr).unwrap();
964 assert_eq!(elements.len(), 2);
965 assert_eq!(elements[0].id, 2);
966 assert_eq!(elements[0].ifc_type, IfcType::IfcWall);
967 assert_eq!(elements[1].id, 3);
968 assert_eq!(elements[1].ifc_type, IfcType::IfcDoor);
969 }
970
971 #[test]
972 fn test_cache() {
973 let content = r#"
974#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
975#2=IFCWALL('guid2',$,$,$,$,$,$,$);
976"#;
977
978 let mut decoder = EntityDecoder::new(content);
979
980 assert_eq!(decoder.cache_size(), 0);
981
982 decoder.decode_by_id(1).unwrap();
983 assert_eq!(decoder.cache_size(), 1);
984
985 decoder.decode_by_id(2).unwrap();
986 assert_eq!(decoder.cache_size(), 2);
987
988 decoder.decode_by_id(1).unwrap();
990 assert_eq!(decoder.cache_size(), 2);
991
992 decoder.clear_cache();
993 assert_eq!(decoder.cache_size(), 0);
994 }
995}