1use crate::error::{Error, Result};
10use crate::parser::parse_entity;
11use crate::schema_gen::{AttributeValue, DecodedEntity};
12use rustc_hash::FxHashMap;
13use std::sync::Arc;
14
15pub type EntityIndex = FxHashMap<u32, (usize, usize)>;
17
18#[inline]
21pub fn build_entity_index(content: &str) -> EntityIndex {
22 let bytes = content.as_bytes();
23 let len = bytes.len();
24
25 let estimated_entities = len / 50;
27 let mut index = FxHashMap::with_capacity_and_hasher(estimated_entities, Default::default());
28
29 let mut pos = 0;
30
31 while pos < len {
32 let remaining = &bytes[pos..];
34 let hash_offset = match memchr::memchr(b'#', remaining) {
35 Some(offset) => offset,
36 None => break,
37 };
38
39 let start = pos + hash_offset;
40 pos = start + 1;
41
42 let id_start = pos;
44 while pos < len && bytes[pos].is_ascii_digit() {
45 pos += 1;
46 }
47 let id_end = pos;
48
49 while pos < len && bytes[pos].is_ascii_whitespace() {
51 pos += 1;
52 }
53
54 if id_end > id_start && pos < len && bytes[pos] == b'=' {
55 let id = parse_u32_inline(bytes, id_start, id_end);
57
58 let entity_content = &bytes[pos..];
60 if let Some(semicolon_offset) = memchr::memchr(b';', entity_content) {
61 pos += semicolon_offset + 1; index.insert(id, (start, pos));
63 } else {
64 break; }
66 }
67 }
68
69 index
70}
71
72#[inline]
74fn parse_u32_inline(bytes: &[u8], start: usize, end: usize) -> u32 {
75 let mut result: u32 = 0;
76 for &byte in &bytes[start..end] {
77 let digit = byte.wrapping_sub(b'0');
78 result = result.wrapping_mul(10).wrapping_add(digit as u32);
79 }
80 result
81}
82
83pub struct EntityDecoder<'a> {
85 content: &'a str,
86 cache: FxHashMap<u32, Arc<DecodedEntity>>,
89 entity_index: Option<Arc<EntityIndex>>,
93 point_cache: FxHashMap<u32, (f64, f64, f64)>,
96}
97
98impl<'a> EntityDecoder<'a> {
99 pub fn new(content: &'a str) -> Self {
101 Self {
102 content,
103 cache: FxHashMap::default(),
104 entity_index: None,
105 point_cache: FxHashMap::default(),
106 }
107 }
108
109 pub fn with_index(content: &'a str, index: EntityIndex) -> Self {
111 Self {
112 content,
113 cache: FxHashMap::default(),
114 entity_index: Some(Arc::new(index)),
115 point_cache: FxHashMap::default(),
116 }
117 }
118
119 pub fn with_arc_index(content: &'a str, index: Arc<EntityIndex>) -> Self {
121 Self {
122 content,
123 cache: FxHashMap::default(),
124 entity_index: Some(index),
125 point_cache: FxHashMap::default(),
126 }
127 }
128
129 fn build_index(&mut self) {
132 if self.entity_index.is_some() {
133 return; }
135 self.entity_index = Some(Arc::new(build_entity_index(self.content)));
136 }
137
138 #[inline]
141 pub fn decode_at(&mut self, start: usize, end: usize) -> Result<DecodedEntity> {
142 let line = &self.content[start..end];
143 let (id, ifc_type, tokens) = parse_entity(line).map_err(|e| {
144 Error::parse(
146 0,
147 format!(
148 "Failed to parse entity: {:?}, input: {:?}",
149 e,
150 &line[..line.len().min(100)]
151 ),
152 )
153 })?;
154
155 if let Some(entity_arc) = self.cache.get(&id) {
157 return Ok(entity_arc.as_ref().clone());
158 }
159
160 let attributes = tokens
162 .iter()
163 .map(|token| AttributeValue::from_token(token))
164 .collect();
165
166 let entity = DecodedEntity::new(id, ifc_type, attributes);
167 self.cache.insert(id, Arc::new(entity.clone()));
168 Ok(entity)
169 }
170
171 #[inline]
174 pub fn decode_at_with_id(&mut self, id: u32, start: usize, end: usize) -> Result<DecodedEntity> {
175 if let Some(entity_arc) = self.cache.get(&id) {
177 return Ok(entity_arc.as_ref().clone());
178 }
179
180 self.decode_at(start, end)
182 }
183
184 #[inline]
186 pub fn decode_by_id(&mut self, entity_id: u32) -> Result<DecodedEntity> {
187 if let Some(entity_arc) = self.cache.get(&entity_id) {
189 return Ok(entity_arc.as_ref().clone());
190 }
191
192 self.build_index();
194
195 let (start, end) = self
197 .entity_index
198 .as_ref()
199 .and_then(|idx| idx.get(&entity_id).copied())
200 .ok_or_else(|| Error::parse(0, format!("Entity #{} not found", entity_id)))?;
201
202 self.decode_at(start, end)
203 }
204
205 #[inline]
208 pub fn resolve_ref(&mut self, attr: &AttributeValue) -> Result<Option<DecodedEntity>> {
209 match attr.as_entity_ref() {
210 Some(id) => Ok(Some(self.decode_by_id(id)?)),
211 None => Ok(None),
212 }
213 }
214
215 pub fn resolve_ref_list(&mut self, attr: &AttributeValue) -> Result<Vec<DecodedEntity>> {
217 let list = attr
218 .as_list()
219 .ok_or_else(|| Error::parse(0, "Expected list".to_string()))?;
220
221 let mut entities = Vec::with_capacity(list.len());
222 for item in list {
223 if let Some(id) = item.as_entity_ref() {
224 entities.push(self.decode_by_id(id)?);
225 }
226 }
227 Ok(entities)
228 }
229
230 pub fn get_cached(&self, entity_id: u32) -> Option<DecodedEntity> {
232 self.cache.get(&entity_id).map(|arc| arc.as_ref().clone())
233 }
234
235 pub fn clear_cache(&mut self) {
237 self.cache.clear();
238 self.point_cache.clear();
239 }
240
241 pub fn cache_size(&self) -> usize {
243 self.cache.len()
244 }
245
246 #[inline]
249 pub fn get_raw_bytes(&mut self, entity_id: u32) -> Option<&'a [u8]> {
250 self.build_index();
251 let (start, end) = self.entity_index.as_ref()?.get(&entity_id).copied()?;
252 Some(&self.content.as_bytes()[start..end])
253 }
254
255 #[inline]
257 pub fn get_raw_content(&mut self, entity_id: u32) -> Option<&'a str> {
258 self.build_index();
259 let (start, end) = self.entity_index.as_ref()?.get(&entity_id).copied()?;
260 Some(&self.content[start..end])
261 }
262
263 #[inline]
267 pub fn get_first_entity_ref_fast(&mut self, entity_id: u32) -> Option<u32> {
268 let bytes = self.get_raw_bytes(entity_id)?;
269 let len = bytes.len();
270 let mut i = 0;
271
272 while i < len && bytes[i] != b'(' {
274 i += 1;
275 }
276 if i >= len {
277 return None;
278 }
279 i += 1; while i < len {
283 while i < len
285 && (bytes[i] == b' ' || bytes[i] == b'\n' || bytes[i] == b'\r')
286 {
287 i += 1;
288 }
289
290 if i >= len {
291 return None;
292 }
293
294 if bytes[i] == b'#' {
295 i += 1;
296 let start = i;
297 while i < len && bytes[i].is_ascii_digit() {
298 i += 1;
299 }
300 if i > start {
301 let mut id = 0u32;
302 for &b in &bytes[start..i] {
303 id = id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
304 }
305 return Some(id);
306 }
307 }
308 i += 1;
309 }
310
311 None
312 }
313
314 #[inline]
318 pub fn get_entity_ref_list_fast(&mut self, entity_id: u32) -> Option<Vec<u32>> {
319 let bytes = self.get_raw_bytes(entity_id)?;
320
321 let mut i = 0;
323 let len = bytes.len();
324
325 while i < len && bytes[i] != b'(' {
327 i += 1;
328 }
329 if i >= len {
330 return None;
331 }
332 i += 1; while i < len && bytes[i] != b'(' {
336 i += 1;
337 }
338 if i >= len {
339 return None;
340 }
341 i += 1; let mut ids = Vec::with_capacity(32);
345
346 while i < len {
347 while i < len
349 && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
350 {
351 i += 1;
352 }
353
354 if i >= len || bytes[i] == b')' {
355 break;
356 }
357
358 if bytes[i] == b'#' {
360 i += 1;
361 let start = i;
362 while i < len && bytes[i].is_ascii_digit() {
363 i += 1;
364 }
365 if i > start {
366 let mut id = 0u32;
368 for &b in &bytes[start..i] {
369 id = id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
370 }
371 ids.push(id);
372 }
373 } else {
374 i += 1; }
376 }
377
378 if ids.is_empty() {
379 None
380 } else {
381 Some(ids)
382 }
383 }
384
385 #[inline]
389 pub fn get_polyloop_point_ids_fast(&mut self, entity_id: u32) -> Option<Vec<u32>> {
390 let bytes = self.get_raw_bytes(entity_id)?;
391
392 let mut i = 0;
394 let len = bytes.len();
395
396 while i < len && bytes[i] != b'(' {
398 i += 1;
399 }
400 if i >= len {
401 return None;
402 }
403 i += 1; while i < len && bytes[i] != b'(' {
407 i += 1;
408 }
409 if i >= len {
410 return None;
411 }
412 i += 1; let mut point_ids = Vec::with_capacity(8); while i < len {
418 while i < len
420 && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
421 {
422 i += 1;
423 }
424
425 if i >= len || bytes[i] == b')' {
426 break;
427 }
428
429 if bytes[i] == b'#' {
431 i += 1;
432 let start = i;
433 while i < len && bytes[i].is_ascii_digit() {
434 i += 1;
435 }
436 if i > start {
437 let mut id = 0u32;
439 for &b in &bytes[start..i] {
440 id = id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
441 }
442 point_ids.push(id);
443 }
444 } else {
445 i += 1; }
447 }
448
449 if point_ids.is_empty() {
450 None
451 } else {
452 Some(point_ids)
453 }
454 }
455
456 #[inline]
460 pub fn get_cartesian_point_fast(&mut self, entity_id: u32) -> Option<(f64, f64, f64)> {
461 let bytes = self.get_raw_bytes(entity_id)?;
462
463 let mut i = 0;
465 let len = bytes.len();
466
467 while i < len && bytes[i] != b'(' {
469 i += 1;
470 }
471 if i >= len {
472 return None;
473 }
474 i += 1; while i < len && bytes[i] != b'(' {
478 i += 1;
479 }
480 if i >= len {
481 return None;
482 }
483 i += 1; let x = parse_next_float(&bytes[i..], &mut i)?;
487
488 let y = parse_next_float(&bytes[i..], &mut i)?;
490
491 let z = parse_next_float(&bytes[i..], &mut i).unwrap_or(0.0);
493
494 Some((x, y, z))
495 }
496
497 #[inline]
501 pub fn get_face_bound_fast(&mut self, entity_id: u32) -> Option<(u32, bool, bool)> {
502 let bytes = self.get_raw_bytes(entity_id)?;
503 let len = bytes.len();
504
505 let mut eq_pos = 0;
507 while eq_pos < len && bytes[eq_pos] != b'=' {
508 eq_pos += 1;
509 }
510 if eq_pos >= len {
511 return None;
512 }
513
514 let mut is_outer = false;
518 let mut i = eq_pos + 1;
519 while i + 4 < len && bytes[i] != b'(' {
521 if bytes[i] == b'O'
522 && bytes[i + 1] == b'U'
523 && bytes[i + 2] == b'T'
524 && bytes[i + 3] == b'E'
525 && bytes[i + 4] == b'R'
526 {
527 is_outer = true;
528 break;
529 }
530 i += 1;
531 }
532 while i < len && bytes[i] != b'(' {
534 i += 1;
535 }
536 if i >= len {
537 return None;
538 }
539
540 i += 1; while i < len && (bytes[i] == b' ' || bytes[i] == b'\n' || bytes[i] == b'\r') {
544 i += 1;
545 }
546
547 if i >= len || bytes[i] != b'#' {
549 return None;
550 }
551 i += 1;
552
553 let start = i;
555 while i < len && bytes[i].is_ascii_digit() {
556 i += 1;
557 }
558 if i <= start {
559 return None;
560 }
561 let mut loop_id = 0u32;
562 for &b in &bytes[start..i] {
563 loop_id = loop_id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
564 }
565
566 while i < len && bytes[i] != b',' {
569 i += 1;
570 }
571 i += 1; while i < len && (bytes[i] == b' ' || bytes[i] == b'\n' || bytes[i] == b'\r') {
575 i += 1;
576 }
577
578 let orientation = if i + 2 < len && bytes[i] == b'.' && bytes[i + 2] == b'.' {
580 bytes[i + 1] != b'F'
581 } else {
582 true };
584
585 Some((loop_id, orientation, is_outer))
586 }
587
588 #[inline]
593 pub fn get_polyloop_coords_fast(&mut self, entity_id: u32) -> Option<Vec<(f64, f64, f64)>> {
594 self.build_index();
596 let index = self.entity_index.as_ref()?;
597 let bytes_full = self.content.as_bytes();
598
599 let (start, end) = index.get(&entity_id).copied()?;
601 let bytes = &bytes_full[start..end];
602
603 let mut i = 0;
605 let len = bytes.len();
606
607 while i < len && bytes[i] != b'(' {
609 i += 1;
610 }
611 if i >= len {
612 return None;
613 }
614 i += 1; while i < len && bytes[i] != b'(' {
618 i += 1;
619 }
620 if i >= len {
621 return None;
622 }
623 i += 1; let mut coords = Vec::with_capacity(8); while i < len {
629 while i < len
631 && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
632 {
633 i += 1;
634 }
635
636 if i >= len || bytes[i] == b')' {
637 break;
638 }
639
640 if bytes[i] == b'#' {
642 i += 1;
643 let id_start = i;
644 while i < len && bytes[i].is_ascii_digit() {
645 i += 1;
646 }
647 if i > id_start {
648 let mut point_id = 0u32;
650 for &b in &bytes[id_start..i] {
651 point_id = point_id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
652 }
653
654 if let Some((pt_start, pt_end)) = index.get(&point_id).copied() {
657 if let Some(coord) =
658 parse_cartesian_point_inline(&bytes_full[pt_start..pt_end])
659 {
660 coords.push(coord);
661 }
662 }
663 }
664 } else {
665 i += 1; }
667 }
668
669 if coords.len() >= 3 {
670 Some(coords)
671 } else {
672 None
673 }
674 }
675
676 #[inline]
680 pub fn get_polyloop_coords_cached(&mut self, entity_id: u32) -> Option<Vec<(f64, f64, f64)>> {
681 self.build_index();
683 let index = self.entity_index.as_ref()?;
684 let bytes_full = self.content.as_bytes();
685
686 let (start, end) = index.get(&entity_id).copied()?;
688 let bytes = &bytes_full[start..end];
689
690 let mut i = 0;
692 let len = bytes.len();
693
694 while i < len && bytes[i] != b'(' {
696 i += 1;
697 }
698 if i >= len {
699 return None;
700 }
701 i += 1; while i < len && bytes[i] != b'(' {
705 i += 1;
706 }
707 if i >= len {
708 return None;
709 }
710 i += 1; let mut coords = Vec::with_capacity(8);
715 let mut expected_count = 0u32;
716
717 while i < len {
718 while i < len
720 && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
721 {
722 i += 1;
723 }
724
725 if i >= len || bytes[i] == b')' {
726 break;
727 }
728
729 if bytes[i] == b'#' {
731 i += 1;
732 let id_start = i;
733 while i < len && bytes[i].is_ascii_digit() {
734 i += 1;
735 }
736 if i > id_start {
737 expected_count += 1; let mut point_id = 0u32;
741 for &b in &bytes[id_start..i] {
742 point_id = point_id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
743 }
744
745 if let Some(&coord) = self.point_cache.get(&point_id) {
747 coords.push(coord);
748 } else {
749 if let Some((pt_start, pt_end)) = index.get(&point_id).copied() {
751 if let Some(coord) =
752 parse_cartesian_point_inline(&bytes_full[pt_start..pt_end])
753 {
754 self.point_cache.insert(point_id, coord);
755 coords.push(coord);
756 }
757 }
758 }
759 }
760 } else {
761 i += 1; }
763 }
764
765 if coords.len() >= 3 && coords.len() == expected_count as usize {
768 Some(coords)
769 } else {
770 None
771 }
772 }
773}
774
775#[inline]
778fn parse_cartesian_point_inline(bytes: &[u8]) -> Option<(f64, f64, f64)> {
779 let len = bytes.len();
780 let mut i = 0;
781
782 while i < len && bytes[i] != b'(' {
784 i += 1;
785 }
786 if i >= len {
787 return None;
788 }
789 i += 1; while i < len && bytes[i] != b'(' {
793 i += 1;
794 }
795 if i >= len {
796 return None;
797 }
798 i += 1; let x = parse_float_inline(&bytes[i..], &mut i)?;
802
803 let y = parse_float_inline(&bytes[i..], &mut i)?;
805
806 let z = parse_float_inline(&bytes[i..], &mut i).unwrap_or(0.0);
808
809 Some((x, y, z))
810}
811
812#[inline]
814fn parse_float_inline(bytes: &[u8], offset: &mut usize) -> Option<f64> {
815 let len = bytes.len();
816 let mut i = 0;
817
818 while i < len
820 && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
821 {
822 i += 1;
823 }
824
825 if i >= len || bytes[i] == b')' {
826 return None;
827 }
828
829 match fast_float::parse_partial::<f64, _>(&bytes[i..]) {
831 Ok((value, consumed)) if consumed > 0 => {
832 *offset += i + consumed;
833 Some(value)
834 }
835 _ => None,
836 }
837}
838
839#[inline]
841fn parse_next_float(bytes: &[u8], offset: &mut usize) -> Option<f64> {
842 let len = bytes.len();
843 let mut i = 0;
844
845 while i < len
847 && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
848 {
849 i += 1;
850 }
851
852 if i >= len || bytes[i] == b')' {
853 return None;
854 }
855
856 match fast_float::parse_partial::<f64, _>(&bytes[i..]) {
858 Ok((value, consumed)) if consumed > 0 => {
859 *offset += i + consumed;
860 Some(value)
861 }
862 _ => None,
863 }
864}
865
866#[cfg(test)]
867mod tests {
868 use super::*;
869 use crate::IfcType;
870
871 #[test]
872 fn test_decode_entity() {
873 let content = r#"
874#1=IFCPROJECT('2vqT3bvqj9RBFjLlXpN8n9',$,$,$,$,$,$,$,$);
875#2=IFCWALL('3a4T3bvqj9RBFjLlXpN8n0',$,$,$,'Wall-001',$,#3,#4);
876#3=IFCLOCALPLACEMENT($,#4);
877#4=IFCAXIS2PLACEMENT3D(#5,$,$);
878#5=IFCCARTESIANPOINT((0.,0.,0.));
879"#;
880
881 let mut decoder = EntityDecoder::new(content);
882
883 let start = content.find("#2=").unwrap();
885 let end = content[start..].find(';').unwrap() + start + 1;
886
887 let entity = decoder.decode_at(start, end).unwrap();
888 assert_eq!(entity.id, 2);
889 assert_eq!(entity.ifc_type, IfcType::IfcWall);
890 assert_eq!(entity.attributes.len(), 8);
891 assert_eq!(entity.get_string(4), Some("Wall-001"));
892 assert_eq!(entity.get_ref(6), Some(3));
893 assert_eq!(entity.get_ref(7), Some(4));
894 }
895
896 #[test]
897 fn test_decode_by_id() {
898 let content = r#"
899#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
900#5=IFCWALL('guid2',$,$,$,'Wall-001',$,$,$);
901#10=IFCDOOR('guid3',$,$,$,'Door-001',$,$,$);
902"#;
903
904 let mut decoder = EntityDecoder::new(content);
905
906 let entity = decoder.decode_by_id(5).unwrap();
907 assert_eq!(entity.id, 5);
908 assert_eq!(entity.ifc_type, IfcType::IfcWall);
909 assert_eq!(entity.get_string(4), Some("Wall-001"));
910
911 assert_eq!(decoder.cache_size(), 1);
913 let cached = decoder.get_cached(5).unwrap();
914 assert_eq!(cached.id, 5);
915 }
916
917 #[test]
918 fn test_resolve_ref() {
919 let content = r#"
920#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
921#2=IFCWALL('guid2',$,$,$,$,$,#1,$);
922"#;
923
924 let mut decoder = EntityDecoder::new(content);
925
926 let wall = decoder.decode_by_id(2).unwrap();
927 let placement_attr = wall.get(6).unwrap();
928
929 let referenced = decoder.resolve_ref(placement_attr).unwrap().unwrap();
930 assert_eq!(referenced.id, 1);
931 assert_eq!(referenced.ifc_type, IfcType::IfcProject);
932 }
933
934 #[test]
935 fn test_resolve_ref_list() {
936 let content = r#"
937#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
938#2=IFCWALL('guid1',$,$,$,$,$,$,$);
939#3=IFCDOOR('guid2',$,$,$,$,$,$,$);
940#4=IFCRELCONTAINEDINSPATIALSTRUCTURE('guid3',$,$,$,(#2,#3),$,#1);
941"#;
942
943 let mut decoder = EntityDecoder::new(content);
944
945 let rel = decoder.decode_by_id(4).unwrap();
946 let elements_attr = rel.get(4).unwrap();
947
948 let elements = decoder.resolve_ref_list(elements_attr).unwrap();
949 assert_eq!(elements.len(), 2);
950 assert_eq!(elements[0].id, 2);
951 assert_eq!(elements[0].ifc_type, IfcType::IfcWall);
952 assert_eq!(elements[1].id, 3);
953 assert_eq!(elements[1].ifc_type, IfcType::IfcDoor);
954 }
955
956 #[test]
957 fn test_cache() {
958 let content = r#"
959#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
960#2=IFCWALL('guid2',$,$,$,$,$,$,$);
961"#;
962
963 let mut decoder = EntityDecoder::new(content);
964
965 assert_eq!(decoder.cache_size(), 0);
966
967 decoder.decode_by_id(1).unwrap();
968 assert_eq!(decoder.cache_size(), 1);
969
970 decoder.decode_by_id(2).unwrap();
971 assert_eq!(decoder.cache_size(), 2);
972
973 decoder.decode_by_id(1).unwrap();
975 assert_eq!(decoder.cache_size(), 2);
976
977 decoder.clear_cache();
978 assert_eq!(decoder.cache_size(), 0);
979 }
980}