Skip to main content

ifc_lite_core/
decoder.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
5//! Entity Decoder - On-demand entity parsing
6//!
7//! Lazily decode IFC entities from byte offsets without loading entire file into memory.
8
9use crate::error::{Error, Result};
10use crate::parser::parse_entity;
11use crate::schema_gen::{AttributeValue, DecodedEntity};
12use rustc_hash::FxHashMap;
13use std::sync::Arc;
14
15/// Pre-built entity index type
16pub type EntityIndex = FxHashMap<u32, (usize, usize)>;
17
18/// Build entity index from content - O(n) scan using SIMD-accelerated search
19/// Returns index mapping entity IDs to byte offsets
20#[inline]
21pub fn build_entity_index(content: &str) -> EntityIndex {
22    let bytes = content.as_bytes();
23    let len = bytes.len();
24
25    // Pre-allocate with estimated capacity (roughly 1 entity per 50 bytes)
26    let estimated_entities = len / 50;
27    let mut index = FxHashMap::with_capacity_and_hasher(estimated_entities, Default::default());
28
29    let mut pos = 0;
30
31    while pos < len {
32        // Find next '#' using SIMD-accelerated search
33        let remaining = &bytes[pos..];
34        let hash_offset = match memchr::memchr(b'#', remaining) {
35            Some(offset) => offset,
36            None => break,
37        };
38
39        let start = pos + hash_offset;
40        pos = start + 1;
41
42        // Parse entity ID (inline for speed)
43        let id_start = pos;
44        while pos < len && bytes[pos].is_ascii_digit() {
45            pos += 1;
46        }
47        let id_end = pos;
48
49        // Skip whitespace before '=' (handles both `#45=` and `#45 = ` formats)
50        while pos < len && bytes[pos].is_ascii_whitespace() {
51            pos += 1;
52        }
53
54        if id_end > id_start && pos < len && bytes[pos] == b'=' {
55            // Fast integer parsing without allocation
56            let id = parse_u32_inline(bytes, id_start, id_end);
57
58            // Find end of entity (;) using SIMD
59            let entity_content = &bytes[pos..];
60            if let Some(semicolon_offset) = memchr::memchr(b';', entity_content) {
61                pos += semicolon_offset + 1; // Include semicolon
62                index.insert(id, (start, pos));
63            } else {
64                break; // No semicolon found, malformed
65            }
66        }
67    }
68
69    index
70}
71
72/// Fast u32 parsing without string allocation
73#[inline]
74fn parse_u32_inline(bytes: &[u8], start: usize, end: usize) -> u32 {
75    let mut result: u32 = 0;
76    for &byte in &bytes[start..end] {
77        let digit = byte.wrapping_sub(b'0');
78        result = result.wrapping_mul(10).wrapping_add(digit as u32);
79    }
80    result
81}
82
83/// Entity decoder for lazy parsing - uses Arc for efficient cache sharing
84pub struct EntityDecoder<'a> {
85    content: &'a str,
86    /// Cache of decoded entities (entity_id -> `Arc<DecodedEntity>`)
87    /// Using Arc avoids expensive clones on cache hits
88    cache: FxHashMap<u32, Arc<DecodedEntity>>,
89    /// Index of entity offsets (entity_id -> (start, end))
90    /// Can be pre-built or built lazily
91    /// Using Arc to allow sharing across threads without cloning the HashMap
92    entity_index: Option<Arc<EntityIndex>>,
93    /// Cache of cartesian point coordinates for FacetedBrep optimization
94    /// Only populated when using get_polyloop_coords_cached
95    point_cache: FxHashMap<u32, (f64, f64, f64)>,
96}
97
98impl<'a> EntityDecoder<'a> {
99    /// Create new decoder
100    pub fn new(content: &'a str) -> Self {
101        Self {
102            content,
103            cache: FxHashMap::default(),
104            entity_index: None,
105            point_cache: FxHashMap::default(),
106        }
107    }
108
109    /// Create decoder with pre-built index (faster for repeated lookups)
110    pub fn with_index(content: &'a str, index: EntityIndex) -> Self {
111        Self {
112            content,
113            cache: FxHashMap::default(),
114            entity_index: Some(Arc::new(index)),
115            point_cache: FxHashMap::default(),
116        }
117    }
118
119    /// Create decoder with shared Arc index (for parallel processing)
120    pub fn with_arc_index(content: &'a str, index: Arc<EntityIndex>) -> Self {
121        Self {
122            content,
123            cache: FxHashMap::default(),
124            entity_index: Some(index),
125            point_cache: FxHashMap::default(),
126        }
127    }
128
129    /// Build entity index for O(1) lookups
130    /// This scans the file once and maps entity IDs to byte offsets
131    fn build_index(&mut self) {
132        if self.entity_index.is_some() {
133            return; // Already built
134        }
135        self.entity_index = Some(Arc::new(build_entity_index(self.content)));
136    }
137
138    /// Decode entity at byte offset
139    /// Returns cached entity if already decoded
140    #[inline]
141    pub fn decode_at(&mut self, start: usize, end: usize) -> Result<DecodedEntity> {
142        let line = &self.content[start..end];
143        let (id, ifc_type, tokens) = parse_entity(line).map_err(|e| {
144            // Add debug info about what failed to parse
145            Error::parse(
146                0,
147                format!(
148                    "Failed to parse entity: {:?}, input: {:?}",
149                    e,
150                    &line[..line.len().min(100)]
151                ),
152            )
153        })?;
154
155        // Check cache first - return clone of inner DecodedEntity
156        if let Some(entity_arc) = self.cache.get(&id) {
157            return Ok(entity_arc.as_ref().clone());
158        }
159
160        // Convert tokens to AttributeValues
161        let attributes = tokens
162            .iter()
163            .map(|token| AttributeValue::from_token(token))
164            .collect();
165
166        let entity = DecodedEntity::new(id, ifc_type, attributes);
167        self.cache.insert(id, Arc::new(entity.clone()));
168        Ok(entity)
169    }
170
171    /// Decode entity at byte offset with known ID (faster - checks cache before parsing)
172    /// Use this when the scanner provides the entity ID to avoid re-parsing cached entities
173    #[inline]
174    pub fn decode_at_with_id(
175        &mut self,
176        id: u32,
177        start: usize,
178        end: usize,
179    ) -> Result<DecodedEntity> {
180        // Check cache first - avoid parsing if already decoded
181        if let Some(entity_arc) = self.cache.get(&id) {
182            return Ok(entity_arc.as_ref().clone());
183        }
184
185        // Not in cache, parse and cache
186        self.decode_at(start, end)
187    }
188
189    /// Decode entity by ID - O(1) lookup using entity index
190    #[inline]
191    pub fn decode_by_id(&mut self, entity_id: u32) -> Result<DecodedEntity> {
192        // Check cache first - return clone of inner DecodedEntity
193        if let Some(entity_arc) = self.cache.get(&entity_id) {
194            return Ok(entity_arc.as_ref().clone());
195        }
196
197        // Build index if not already built
198        self.build_index();
199
200        // O(1) lookup in index
201        let (start, end) = self
202            .entity_index
203            .as_ref()
204            .and_then(|idx| idx.get(&entity_id).copied())
205            .ok_or_else(|| Error::parse(0, format!("Entity #{} not found", entity_id)))?;
206
207        self.decode_at(start, end)
208    }
209
210    /// Resolve entity reference (follow #ID)
211    /// Returns None for null/derived values
212    #[inline]
213    pub fn resolve_ref(&mut self, attr: &AttributeValue) -> Result<Option<DecodedEntity>> {
214        match attr.as_entity_ref() {
215            Some(id) => Ok(Some(self.decode_by_id(id)?)),
216            None => Ok(None),
217        }
218    }
219
220    /// Resolve list of entity references
221    pub fn resolve_ref_list(&mut self, attr: &AttributeValue) -> Result<Vec<DecodedEntity>> {
222        let list = attr
223            .as_list()
224            .ok_or_else(|| Error::parse(0, "Expected list".to_string()))?;
225
226        let mut entities = Vec::with_capacity(list.len());
227        for item in list {
228            if let Some(id) = item.as_entity_ref() {
229                entities.push(self.decode_by_id(id)?);
230            }
231        }
232        Ok(entities)
233    }
234
235    /// Get cached entity (without decoding)
236    pub fn get_cached(&self, entity_id: u32) -> Option<DecodedEntity> {
237        self.cache.get(&entity_id).map(|arc| arc.as_ref().clone())
238    }
239
240    /// Reserve cache capacity to avoid HashMap resizing during processing.
241    /// For a 487 MB file with 208 K building elements, the cache can grow to
242    /// 300 K+ entries (elements + representation chains + placements).
243    /// Pre-allocating avoids ~6 resize-and-rehash operations that each copy
244    /// all entries, reducing both peak memory spikes and timing variance.
245    pub fn reserve_cache(&mut self, additional: usize) {
246        self.cache.reserve(additional);
247    }
248
249    /// Clear all caches to free memory
250    pub fn clear_cache(&mut self) {
251        self.cache.clear();
252        self.point_cache.clear();
253    }
254
255    /// Clear only the point coordinate cache (used after BREP preprocessing).
256    /// The entity cache is preserved for subsequent geometry processing.
257    pub fn clear_point_cache(&mut self) {
258        self.point_cache.clear();
259    }
260
261    /// Get cache size
262    pub fn cache_size(&self) -> usize {
263        self.cache.len()
264    }
265
266    /// Get raw bytes for an entity (for direct/fast parsing)
267    /// Returns the full entity line including type and attributes
268    #[inline]
269    pub fn get_raw_bytes(&mut self, entity_id: u32) -> Option<&'a [u8]> {
270        self.build_index();
271        let (start, end) = self.entity_index.as_ref()?.get(&entity_id).copied()?;
272        Some(&self.content.as_bytes()[start..end])
273    }
274
275    /// Get raw content string for an entity
276    #[inline]
277    pub fn get_raw_content(&mut self, entity_id: u32) -> Option<&'a str> {
278        self.build_index();
279        let (start, end) = self.entity_index.as_ref()?.get(&entity_id).copied()?;
280        Some(&self.content[start..end])
281    }
282
283    /// Fast extraction of first entity ref from raw bytes
284    /// Useful for BREP -> shell ID, Face -> FaceBound, etc.
285    /// Returns the first entity reference ID found in the entity
286    #[inline]
287    pub fn get_first_entity_ref_fast(&mut self, entity_id: u32) -> Option<u32> {
288        let bytes = self.get_raw_bytes(entity_id)?;
289        let len = bytes.len();
290        let mut i = 0;
291
292        // Skip to first '(' after '='
293        while i < len && bytes[i] != b'(' {
294            i += 1;
295        }
296        if i >= len {
297            return None;
298        }
299        i += 1; // Skip first '('
300
301        // Find first '#' which is the entity ref
302        while i < len {
303            // Skip whitespace
304            while i < len && (bytes[i] == b' ' || bytes[i] == b'\n' || bytes[i] == b'\r') {
305                i += 1;
306            }
307
308            if i >= len {
309                return None;
310            }
311
312            if bytes[i] == b'#' {
313                i += 1;
314                let start = i;
315                while i < len && bytes[i].is_ascii_digit() {
316                    i += 1;
317                }
318                if i > start {
319                    let mut id = 0u32;
320                    for &b in &bytes[start..i] {
321                        id = id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
322                    }
323                    return Some(id);
324                }
325            }
326            i += 1;
327        }
328
329        None
330    }
331
332    /// Fast extraction of entity reference IDs from a list attribute in raw bytes
333    /// Useful for getting face list from ClosedShell, bounds from Face, etc.
334    /// Returns list of entity IDs
335    #[inline]
336    pub fn get_entity_ref_list_fast(&mut self, entity_id: u32) -> Option<Vec<u32>> {
337        let bytes = self.get_raw_bytes(entity_id)?;
338
339        // Pattern: IFCTYPE((#id1,#id2,...)); or IFCTYPE((#id1,#id2,...),other);
340        let mut i = 0;
341        let len = bytes.len();
342
343        // Skip to first '(' after '='
344        while i < len && bytes[i] != b'(' {
345            i += 1;
346        }
347        if i >= len {
348            return None;
349        }
350        i += 1; // Skip first '('
351
352        // Skip to second '(' for the list
353        while i < len && bytes[i] != b'(' {
354            i += 1;
355        }
356        if i >= len {
357            return None;
358        }
359        i += 1; // Skip second '('
360
361        // Parse entity IDs
362        let mut ids = Vec::with_capacity(32);
363
364        while i < len {
365            // Skip whitespace and commas
366            while i < len
367                && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
368            {
369                i += 1;
370            }
371
372            if i >= len || bytes[i] == b')' {
373                break;
374            }
375
376            // Expect '#' followed by number
377            if bytes[i] == b'#' {
378                i += 1;
379                let start = i;
380                while i < len && bytes[i].is_ascii_digit() {
381                    i += 1;
382                }
383                if i > start {
384                    // Fast integer parsing directly from ASCII digits
385                    let mut id = 0u32;
386                    for &b in &bytes[start..i] {
387                        id = id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
388                    }
389                    ids.push(id);
390                }
391            } else {
392                i += 1; // Skip unknown character
393            }
394        }
395
396        if ids.is_empty() {
397            None
398        } else {
399            Some(ids)
400        }
401    }
402
403    /// Fast extraction of PolyLoop point IDs directly from raw bytes
404    /// Bypasses full entity decoding for BREP optimization
405    /// Returns list of entity IDs for CartesianPoints
406    #[inline]
407    pub fn get_polyloop_point_ids_fast(&mut self, entity_id: u32) -> Option<Vec<u32>> {
408        let bytes = self.get_raw_bytes(entity_id)?;
409
410        // IFCPOLYLOOP((#id1,#id2,#id3,...));
411        let mut i = 0;
412        let len = bytes.len();
413
414        // Skip to first '(' after '='
415        while i < len && bytes[i] != b'(' {
416            i += 1;
417        }
418        if i >= len {
419            return None;
420        }
421        i += 1; // Skip first '('
422
423        // Skip to second '(' for the point list
424        while i < len && bytes[i] != b'(' {
425            i += 1;
426        }
427        if i >= len {
428            return None;
429        }
430        i += 1; // Skip second '('
431
432        // Parse point IDs
433        let mut point_ids = Vec::with_capacity(8); // Most faces have 3-8 vertices
434
435        while i < len {
436            // Skip whitespace and commas
437            while i < len
438                && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
439            {
440                i += 1;
441            }
442
443            if i >= len || bytes[i] == b')' {
444                break;
445            }
446
447            // Expect '#' followed by number
448            if bytes[i] == b'#' {
449                i += 1;
450                let start = i;
451                while i < len && bytes[i].is_ascii_digit() {
452                    i += 1;
453                }
454                if i > start {
455                    // Fast integer parsing directly from ASCII digits
456                    let mut id = 0u32;
457                    for &b in &bytes[start..i] {
458                        id = id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
459                    }
460                    point_ids.push(id);
461                }
462            } else {
463                i += 1; // Skip unknown character
464            }
465        }
466
467        if point_ids.is_empty() {
468            None
469        } else {
470            Some(point_ids)
471        }
472    }
473
474    /// Fast extraction of CartesianPoint coordinates directly from raw bytes
475    /// Bypasses full entity decoding for ~3x speedup on BREP-heavy files
476    /// Returns (x, y, z) as f64 tuple
477    #[inline]
478    pub fn get_cartesian_point_fast(&mut self, entity_id: u32) -> Option<(f64, f64, f64)> {
479        let bytes = self.get_raw_bytes(entity_id)?;
480
481        // Find opening paren for coordinates: IFCCARTESIANPOINT((x,y,z));
482        let mut i = 0;
483        let len = bytes.len();
484
485        // Skip to first '(' after '='
486        while i < len && bytes[i] != b'(' {
487            i += 1;
488        }
489        if i >= len {
490            return None;
491        }
492        i += 1; // Skip first '('
493
494        // Skip to second '(' for the coordinate list
495        while i < len && bytes[i] != b'(' {
496            i += 1;
497        }
498        if i >= len {
499            return None;
500        }
501        i += 1; // Skip second '('
502
503        // Parse x coordinate
504        let x = parse_next_float(&bytes[i..], &mut i)?;
505
506        // Parse y coordinate
507        let y = parse_next_float(&bytes[i..], &mut i)?;
508
509        // Parse z coordinate (optional for 2D points, default to 0)
510        let z = parse_next_float(&bytes[i..], &mut i).unwrap_or(0.0);
511
512        Some((x, y, z))
513    }
514
515    /// Fast extraction of FaceBound info directly from raw bytes
516    /// Returns (loop_id, orientation, is_outer_bound)
517    /// Bypasses full entity decoding for BREP optimization
518    #[inline]
519    pub fn get_face_bound_fast(&mut self, entity_id: u32) -> Option<(u32, bool, bool)> {
520        let bytes = self.get_raw_bytes(entity_id)?;
521        let len = bytes.len();
522
523        // Find '=' to locate start of type name, and '(' for end
524        let mut eq_pos = 0;
525        while eq_pos < len && bytes[eq_pos] != b'=' {
526            eq_pos += 1;
527        }
528        if eq_pos >= len {
529            return None;
530        }
531
532        // Check if this is an outer bound by looking for "OUTER" in the type name
533        // IFCFACEOUTERBOUND vs IFCFACEBOUND
534        // The type name is between '=' and '('
535        let mut is_outer = false;
536        let mut i = eq_pos + 1;
537        // Look for "OUTER" pattern (must check for the full word, not just 'O')
538        while i + 4 < len && bytes[i] != b'(' {
539            if bytes[i] == b'O'
540                && bytes[i + 1] == b'U'
541                && bytes[i + 2] == b'T'
542                && bytes[i + 3] == b'E'
543                && bytes[i + 4] == b'R'
544            {
545                is_outer = true;
546                break;
547            }
548            i += 1;
549        }
550        // Continue to find the '(' if we haven't already
551        while i < len && bytes[i] != b'(' {
552            i += 1;
553        }
554        if i >= len {
555            return None;
556        }
557
558        i += 1; // Skip first '('
559
560        // Skip whitespace
561        while i < len && (bytes[i] == b' ' || bytes[i] == b'\n' || bytes[i] == b'\r') {
562            i += 1;
563        }
564
565        // Expect '#' for loop entity ref
566        if i >= len || bytes[i] != b'#' {
567            return None;
568        }
569        i += 1;
570
571        // Parse loop ID
572        let start = i;
573        while i < len && bytes[i].is_ascii_digit() {
574            i += 1;
575        }
576        if i <= start {
577            return None;
578        }
579        let mut loop_id = 0u32;
580        for &b in &bytes[start..i] {
581            loop_id = loop_id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
582        }
583
584        // Find orientation after comma - default to true (.T.)
585        // Skip to comma
586        while i < len && bytes[i] != b',' {
587            i += 1;
588        }
589        i += 1; // Skip comma
590
591        // Skip whitespace
592        while i < len && (bytes[i] == b' ' || bytes[i] == b'\n' || bytes[i] == b'\r') {
593            i += 1;
594        }
595
596        // Check for .F. (false) or .T. (true)
597        let orientation = if i + 2 < len && bytes[i] == b'.' && bytes[i + 2] == b'.' {
598            bytes[i + 1] != b'F'
599        } else {
600            true // Default to true
601        };
602
603        Some((loop_id, orientation, is_outer))
604    }
605
606    /// Fast extraction of PolyLoop COORDINATES directly from raw bytes
607    /// This is the ultimate fast path - extracts all coordinates in one go
608    /// Avoids N+1 HashMap lookups by batching point extraction
609    /// Returns Vec of (x, y, z) coordinate tuples
610    #[inline]
611    pub fn get_polyloop_coords_fast(&mut self, entity_id: u32) -> Option<Vec<(f64, f64, f64)>> {
612        // Ensure index is built once
613        self.build_index();
614        let index = self.entity_index.as_ref()?;
615        let bytes_full = self.content.as_bytes();
616
617        // Get polyloop raw bytes
618        let (start, end) = index.get(&entity_id).copied()?;
619        let bytes = &bytes_full[start..end];
620
621        // IFCPOLYLOOP((#id1,#id2,#id3,...));
622        let mut i = 0;
623        let len = bytes.len();
624
625        // Skip to first '(' after '='
626        while i < len && bytes[i] != b'(' {
627            i += 1;
628        }
629        if i >= len {
630            return None;
631        }
632        i += 1; // Skip first '('
633
634        // Skip to second '(' for the point list
635        while i < len && bytes[i] != b'(' {
636            i += 1;
637        }
638        if i >= len {
639            return None;
640        }
641        i += 1; // Skip second '('
642
643        // Parse point IDs and immediately fetch coordinates
644        let mut coords = Vec::with_capacity(8); // Most faces have 3-8 vertices
645
646        while i < len {
647            // Skip whitespace and commas
648            while i < len
649                && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
650            {
651                i += 1;
652            }
653
654            if i >= len || bytes[i] == b')' {
655                break;
656            }
657
658            // Expect '#' followed by number
659            if bytes[i] == b'#' {
660                i += 1;
661                let id_start = i;
662                while i < len && bytes[i].is_ascii_digit() {
663                    i += 1;
664                }
665                if i > id_start {
666                    // Fast integer parsing directly from ASCII digits
667                    let mut point_id = 0u32;
668                    for &b in &bytes[id_start..i] {
669                        point_id = point_id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
670                    }
671
672                    // INLINE: Get cartesian point coordinates directly
673                    // This avoids the overhead of calling get_cartesian_point_fast for each point
674                    if let Some((pt_start, pt_end)) = index.get(&point_id).copied() {
675                        if let Some(coord) =
676                            parse_cartesian_point_inline(&bytes_full[pt_start..pt_end])
677                        {
678                            coords.push(coord);
679                        }
680                    }
681                }
682            } else {
683                i += 1; // Skip unknown character
684            }
685        }
686
687        if coords.len() >= 3 {
688            Some(coords)
689        } else {
690            None
691        }
692    }
693
694    /// Fast extraction of PolyLoop COORDINATES with point caching
695    /// Uses a cache to avoid re-parsing the same cartesian points
696    /// For files with many faces sharing points, this can be 2-3x faster
697    #[inline]
698    pub fn get_polyloop_coords_cached(&mut self, entity_id: u32) -> Option<Vec<(f64, f64, f64)>> {
699        // Ensure index is built once
700        self.build_index();
701        let index = self.entity_index.as_ref()?;
702        let bytes_full = self.content.as_bytes();
703
704        // Get polyloop raw bytes
705        let (start, end) = index.get(&entity_id).copied()?;
706        let bytes = &bytes_full[start..end];
707
708        // IFCPOLYLOOP((#id1,#id2,#id3,...));
709        let mut i = 0;
710        let len = bytes.len();
711
712        // Skip to first '(' after '='
713        while i < len && bytes[i] != b'(' {
714            i += 1;
715        }
716        if i >= len {
717            return None;
718        }
719        i += 1; // Skip first '('
720
721        // Skip to second '(' for the point list
722        while i < len && bytes[i] != b'(' {
723            i += 1;
724        }
725        if i >= len {
726            return None;
727        }
728        i += 1; // Skip second '('
729
730        // Parse point IDs and fetch coordinates (with caching)
731        // CRITICAL: Track expected count to ensure all points are resolved
732        let mut coords = Vec::with_capacity(8);
733        let mut expected_count = 0u32;
734
735        while i < len {
736            // Skip whitespace and commas
737            while i < len
738                && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
739            {
740                i += 1;
741            }
742
743            if i >= len || bytes[i] == b')' {
744                break;
745            }
746
747            // Expect '#' followed by number
748            if bytes[i] == b'#' {
749                i += 1;
750                let id_start = i;
751                while i < len && bytes[i].is_ascii_digit() {
752                    i += 1;
753                }
754                if i > id_start {
755                    expected_count += 1; // Count every point ID we encounter
756
757                    // Fast integer parsing directly from ASCII digits
758                    let mut point_id = 0u32;
759                    for &b in &bytes[id_start..i] {
760                        point_id = point_id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
761                    }
762
763                    // Check cache first
764                    if let Some(&coord) = self.point_cache.get(&point_id) {
765                        coords.push(coord);
766                    } else {
767                        // Not in cache - parse and cache
768                        if let Some((pt_start, pt_end)) = index.get(&point_id).copied() {
769                            if let Some(coord) =
770                                parse_cartesian_point_inline(&bytes_full[pt_start..pt_end])
771                            {
772                                self.point_cache.insert(point_id, coord);
773                                coords.push(coord);
774                            }
775                        }
776                    }
777                }
778            } else {
779                i += 1; // Skip unknown character
780            }
781        }
782
783        // CRITICAL: Return None if ANY point failed to resolve
784        // This matches the old behavior where missing points invalidated the whole polygon
785        if coords.len() >= 3 && coords.len() == expected_count as usize {
786            Some(coords)
787        } else {
788            None
789        }
790    }
791}
792
793/// Parse cartesian point coordinates inline from raw bytes
794/// Used by get_polyloop_coords_fast for maximum performance
795#[inline]
796fn parse_cartesian_point_inline(bytes: &[u8]) -> Option<(f64, f64, f64)> {
797    let len = bytes.len();
798    let mut i = 0;
799
800    // Skip to first '(' after '='
801    while i < len && bytes[i] != b'(' {
802        i += 1;
803    }
804    if i >= len {
805        return None;
806    }
807    i += 1; // Skip first '('
808
809    // Skip to second '(' for the coordinate list
810    while i < len && bytes[i] != b'(' {
811        i += 1;
812    }
813    if i >= len {
814        return None;
815    }
816    i += 1; // Skip second '('
817
818    // Parse x coordinate
819    let x = parse_float_inline(&bytes[i..], &mut i)?;
820
821    // Parse y coordinate
822    let y = parse_float_inline(&bytes[i..], &mut i)?;
823
824    // Parse z coordinate (optional for 2D points, default to 0)
825    let z = parse_float_inline(&bytes[i..], &mut i).unwrap_or(0.0);
826
827    Some((x, y, z))
828}
829
830/// Parse float inline - simpler version for batch coordinate extraction
831#[inline]
832fn parse_float_inline(bytes: &[u8], offset: &mut usize) -> Option<f64> {
833    let len = bytes.len();
834    let mut i = 0;
835
836    // Skip whitespace and commas
837    while i < len
838        && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
839    {
840        i += 1;
841    }
842
843    if i >= len || bytes[i] == b')' {
844        return None;
845    }
846
847    // Parse float using fast_float
848    match fast_float::parse_partial::<f64, _>(&bytes[i..]) {
849        Ok((value, consumed)) if consumed > 0 => {
850            *offset += i + consumed;
851            Some(value)
852        }
853        _ => None,
854    }
855}
856
857/// Parse next float from bytes, advancing position past it
858#[inline]
859fn parse_next_float(bytes: &[u8], offset: &mut usize) -> Option<f64> {
860    let len = bytes.len();
861    let mut i = 0;
862
863    // Skip whitespace and commas
864    while i < len
865        && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
866    {
867        i += 1;
868    }
869
870    if i >= len || bytes[i] == b')' {
871        return None;
872    }
873
874    // Parse float using fast_float
875    match fast_float::parse_partial::<f64, _>(&bytes[i..]) {
876        Ok((value, consumed)) if consumed > 0 => {
877            *offset += i + consumed;
878            Some(value)
879        }
880        _ => None,
881    }
882}
883
884#[cfg(test)]
885mod tests {
886    use super::*;
887    use crate::IfcType;
888
889    #[test]
890    fn test_decode_entity() {
891        let content = r#"
892#1=IFCPROJECT('2vqT3bvqj9RBFjLlXpN8n9',$,$,$,$,$,$,$,$);
893#2=IFCWALL('3a4T3bvqj9RBFjLlXpN8n0',$,$,$,'Wall-001',$,#3,#4);
894#3=IFCLOCALPLACEMENT($,#4);
895#4=IFCAXIS2PLACEMENT3D(#5,$,$);
896#5=IFCCARTESIANPOINT((0.,0.,0.));
897"#;
898
899        let mut decoder = EntityDecoder::new(content);
900
901        // Find entity #2
902        let start = content.find("#2=").unwrap();
903        let end = content[start..].find(';').unwrap() + start + 1;
904
905        let entity = decoder.decode_at(start, end).unwrap();
906        assert_eq!(entity.id, 2);
907        assert_eq!(entity.ifc_type, IfcType::IfcWall);
908        assert_eq!(entity.attributes.len(), 8);
909        assert_eq!(entity.get_string(4), Some("Wall-001"));
910        assert_eq!(entity.get_ref(6), Some(3));
911        assert_eq!(entity.get_ref(7), Some(4));
912    }
913
914    #[test]
915    fn test_decode_by_id() {
916        let content = r#"
917#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
918#5=IFCWALL('guid2',$,$,$,'Wall-001',$,$,$);
919#10=IFCDOOR('guid3',$,$,$,'Door-001',$,$,$);
920"#;
921
922        let mut decoder = EntityDecoder::new(content);
923
924        let entity = decoder.decode_by_id(5).unwrap();
925        assert_eq!(entity.id, 5);
926        assert_eq!(entity.ifc_type, IfcType::IfcWall);
927        assert_eq!(entity.get_string(4), Some("Wall-001"));
928
929        // Should be cached now
930        assert_eq!(decoder.cache_size(), 1);
931        let cached = decoder.get_cached(5).unwrap();
932        assert_eq!(cached.id, 5);
933    }
934
935    #[test]
936    fn test_resolve_ref() {
937        let content = r#"
938#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
939#2=IFCWALL('guid2',$,$,$,$,$,#1,$);
940"#;
941
942        let mut decoder = EntityDecoder::new(content);
943
944        let wall = decoder.decode_by_id(2).unwrap();
945        let placement_attr = wall.get(6).unwrap();
946
947        let referenced = decoder.resolve_ref(placement_attr).unwrap().unwrap();
948        assert_eq!(referenced.id, 1);
949        assert_eq!(referenced.ifc_type, IfcType::IfcProject);
950    }
951
952    #[test]
953    fn test_resolve_ref_list() {
954        let content = r#"
955#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
956#2=IFCWALL('guid1',$,$,$,$,$,$,$);
957#3=IFCDOOR('guid2',$,$,$,$,$,$,$);
958#4=IFCRELCONTAINEDINSPATIALSTRUCTURE('guid3',$,$,$,(#2,#3),$,#1);
959"#;
960
961        let mut decoder = EntityDecoder::new(content);
962
963        let rel = decoder.decode_by_id(4).unwrap();
964        let elements_attr = rel.get(4).unwrap();
965
966        let elements = decoder.resolve_ref_list(elements_attr).unwrap();
967        assert_eq!(elements.len(), 2);
968        assert_eq!(elements[0].id, 2);
969        assert_eq!(elements[0].ifc_type, IfcType::IfcWall);
970        assert_eq!(elements[1].id, 3);
971        assert_eq!(elements[1].ifc_type, IfcType::IfcDoor);
972    }
973
974    #[test]
975    fn test_cache() {
976        let content = r#"
977#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
978#2=IFCWALL('guid2',$,$,$,$,$,$,$);
979"#;
980
981        let mut decoder = EntityDecoder::new(content);
982
983        assert_eq!(decoder.cache_size(), 0);
984
985        decoder.decode_by_id(1).unwrap();
986        assert_eq!(decoder.cache_size(), 1);
987
988        decoder.decode_by_id(2).unwrap();
989        assert_eq!(decoder.cache_size(), 2);
990
991        // Decode same entity - should use cache
992        decoder.decode_by_id(1).unwrap();
993        assert_eq!(decoder.cache_size(), 2);
994
995        decoder.clear_cache();
996        assert_eq!(decoder.cache_size(), 0);
997    }
998}