Skip to main content

ifc_lite_core/
decoder.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
5//! Entity Decoder - On-demand entity parsing
6//!
7//! Lazily decode IFC entities from byte offsets without loading entire file into memory.
8
9use crate::error::{Error, Result};
10use crate::parser::parse_entity;
11use crate::schema_gen::{AttributeValue, DecodedEntity};
12use rustc_hash::FxHashMap;
13use std::sync::Arc;
14
15/// Pre-built entity index type
16pub type EntityIndex = FxHashMap<u32, (usize, usize)>;
17
18/// Build entity index from content - O(n) scan using SIMD-accelerated search
19/// Returns index mapping entity IDs to byte offsets
20#[inline]
21pub fn build_entity_index(content: &str) -> EntityIndex {
22    let bytes = content.as_bytes();
23    let len = bytes.len();
24
25    // Pre-allocate with estimated capacity (roughly 1 entity per 50 bytes)
26    let estimated_entities = len / 50;
27    let mut index = FxHashMap::with_capacity_and_hasher(estimated_entities, Default::default());
28
29    let mut pos = 0;
30
31    while pos < len {
32        // Find next '#' using SIMD-accelerated search
33        let remaining = &bytes[pos..];
34        let hash_offset = match memchr::memchr(b'#', remaining) {
35            Some(offset) => offset,
36            None => break,
37        };
38
39        let start = pos + hash_offset;
40        pos = start + 1;
41
42        // Parse entity ID (inline for speed)
43        let id_start = pos;
44        while pos < len && bytes[pos].is_ascii_digit() {
45            pos += 1;
46        }
47        let id_end = pos;
48
49        // Skip whitespace before '=' (handles both `#45=` and `#45 = ` formats)
50        while pos < len && bytes[pos].is_ascii_whitespace() {
51            pos += 1;
52        }
53
54        if id_end > id_start && pos < len && bytes[pos] == b'=' {
55            // Fast integer parsing without allocation
56            let id = parse_u32_inline(bytes, id_start, id_end);
57
58            // Find end of entity (;) using SIMD
59            let entity_content = &bytes[pos..];
60            if let Some(semicolon_offset) = memchr::memchr(b';', entity_content) {
61                pos += semicolon_offset + 1; // Include semicolon
62                index.insert(id, (start, pos));
63            } else {
64                break; // No semicolon found, malformed
65            }
66        }
67    }
68
69    index
70}
71
72/// Fast u32 parsing without string allocation
73#[inline]
74fn parse_u32_inline(bytes: &[u8], start: usize, end: usize) -> u32 {
75    let mut result: u32 = 0;
76    for &byte in &bytes[start..end] {
77        let digit = byte.wrapping_sub(b'0');
78        result = result.wrapping_mul(10).wrapping_add(digit as u32);
79    }
80    result
81}
82
83/// Entity decoder for lazy parsing - uses Arc for efficient cache sharing
84pub struct EntityDecoder<'a> {
85    content: &'a str,
86    /// Cache of decoded entities (entity_id -> `Arc<DecodedEntity>`)
87    /// Using Arc avoids expensive clones on cache hits
88    cache: FxHashMap<u32, Arc<DecodedEntity>>,
89    /// Index of entity offsets (entity_id -> (start, end))
90    /// Can be pre-built or built lazily
91    /// Using Arc to allow sharing across threads without cloning the HashMap
92    entity_index: Option<Arc<EntityIndex>>,
93    /// Cache of cartesian point coordinates for FacetedBrep optimization
94    /// Only populated when using get_polyloop_coords_cached
95    point_cache: FxHashMap<u32, (f64, f64, f64)>,
96}
97
98impl<'a> EntityDecoder<'a> {
99    /// Create new decoder
100    pub fn new(content: &'a str) -> Self {
101        Self {
102            content,
103            cache: FxHashMap::default(),
104            entity_index: None,
105            point_cache: FxHashMap::default(),
106        }
107    }
108
109    /// Create decoder with pre-built index (faster for repeated lookups)
110    pub fn with_index(content: &'a str, index: EntityIndex) -> Self {
111        Self {
112            content,
113            cache: FxHashMap::default(),
114            entity_index: Some(Arc::new(index)),
115            point_cache: FxHashMap::default(),
116        }
117    }
118
119    /// Create decoder with shared Arc index (for parallel processing)
120    pub fn with_arc_index(content: &'a str, index: Arc<EntityIndex>) -> Self {
121        Self {
122            content,
123            cache: FxHashMap::default(),
124            entity_index: Some(index),
125            point_cache: FxHashMap::default(),
126        }
127    }
128
129    /// Build entity index for O(1) lookups
130    /// This scans the file once and maps entity IDs to byte offsets
131    fn build_index(&mut self) {
132        if self.entity_index.is_some() {
133            return; // Already built
134        }
135        self.entity_index = Some(Arc::new(build_entity_index(self.content)));
136    }
137
138    /// Decode entity at byte offset
139    /// Returns cached entity if already decoded
140    #[inline]
141    pub fn decode_at(&mut self, start: usize, end: usize) -> Result<DecodedEntity> {
142        let line = &self.content[start..end];
143        let (id, ifc_type, tokens) = parse_entity(line).map_err(|e| {
144            // Add debug info about what failed to parse
145            Error::parse(
146                0,
147                format!(
148                    "Failed to parse entity: {:?}, input: {:?}",
149                    e,
150                    &line[..line.len().min(100)]
151                ),
152            )
153        })?;
154
155        // Check cache first - return clone of inner DecodedEntity
156        if let Some(entity_arc) = self.cache.get(&id) {
157            return Ok(entity_arc.as_ref().clone());
158        }
159
160        // Convert tokens to AttributeValues
161        let attributes = tokens
162            .iter()
163            .map(|token| AttributeValue::from_token(token))
164            .collect();
165
166        let entity = DecodedEntity::new(id, ifc_type, attributes);
167        self.cache.insert(id, Arc::new(entity.clone()));
168        Ok(entity)
169    }
170
171    /// Decode entity at byte offset with known ID (faster - checks cache before parsing)
172    /// Use this when the scanner provides the entity ID to avoid re-parsing cached entities
173    #[inline]
174    pub fn decode_at_with_id(&mut self, id: u32, start: usize, end: usize) -> Result<DecodedEntity> {
175        // Check cache first - avoid parsing if already decoded
176        if let Some(entity_arc) = self.cache.get(&id) {
177            return Ok(entity_arc.as_ref().clone());
178        }
179
180        // Not in cache, parse and cache
181        self.decode_at(start, end)
182    }
183
184    /// Decode entity by ID - O(1) lookup using entity index
185    #[inline]
186    pub fn decode_by_id(&mut self, entity_id: u32) -> Result<DecodedEntity> {
187        // Check cache first - return clone of inner DecodedEntity
188        if let Some(entity_arc) = self.cache.get(&entity_id) {
189            return Ok(entity_arc.as_ref().clone());
190        }
191
192        // Build index if not already built
193        self.build_index();
194
195        // O(1) lookup in index
196        let (start, end) = self
197            .entity_index
198            .as_ref()
199            .and_then(|idx| idx.get(&entity_id).copied())
200            .ok_or_else(|| Error::parse(0, format!("Entity #{} not found", entity_id)))?;
201
202        self.decode_at(start, end)
203    }
204
205    /// Resolve entity reference (follow #ID)
206    /// Returns None for null/derived values
207    #[inline]
208    pub fn resolve_ref(&mut self, attr: &AttributeValue) -> Result<Option<DecodedEntity>> {
209        match attr.as_entity_ref() {
210            Some(id) => Ok(Some(self.decode_by_id(id)?)),
211            None => Ok(None),
212        }
213    }
214
215    /// Resolve list of entity references
216    pub fn resolve_ref_list(&mut self, attr: &AttributeValue) -> Result<Vec<DecodedEntity>> {
217        let list = attr
218            .as_list()
219            .ok_or_else(|| Error::parse(0, "Expected list".to_string()))?;
220
221        let mut entities = Vec::with_capacity(list.len());
222        for item in list {
223            if let Some(id) = item.as_entity_ref() {
224                entities.push(self.decode_by_id(id)?);
225            }
226        }
227        Ok(entities)
228    }
229
230    /// Get cached entity (without decoding)
231    pub fn get_cached(&self, entity_id: u32) -> Option<DecodedEntity> {
232        self.cache.get(&entity_id).map(|arc| arc.as_ref().clone())
233    }
234
235    /// Reserve cache capacity to avoid HashMap resizing during processing.
236    /// For a 487 MB file with 208 K building elements, the cache can grow to
237    /// 300 K+ entries (elements + representation chains + placements).
238    /// Pre-allocating avoids ~6 resize-and-rehash operations that each copy
239    /// all entries, reducing both peak memory spikes and timing variance.
240    pub fn reserve_cache(&mut self, additional: usize) {
241        self.cache.reserve(additional);
242    }
243
244    /// Clear all caches to free memory
245    pub fn clear_cache(&mut self) {
246        self.cache.clear();
247        self.point_cache.clear();
248    }
249
250    /// Clear only the point coordinate cache (used after BREP preprocessing).
251    /// The entity cache is preserved for subsequent geometry processing.
252    pub fn clear_point_cache(&mut self) {
253        self.point_cache.clear();
254    }
255
256    /// Get cache size
257    pub fn cache_size(&self) -> usize {
258        self.cache.len()
259    }
260
261    /// Get raw bytes for an entity (for direct/fast parsing)
262    /// Returns the full entity line including type and attributes
263    #[inline]
264    pub fn get_raw_bytes(&mut self, entity_id: u32) -> Option<&'a [u8]> {
265        self.build_index();
266        let (start, end) = self.entity_index.as_ref()?.get(&entity_id).copied()?;
267        Some(&self.content.as_bytes()[start..end])
268    }
269
270    /// Get raw content string for an entity
271    #[inline]
272    pub fn get_raw_content(&mut self, entity_id: u32) -> Option<&'a str> {
273        self.build_index();
274        let (start, end) = self.entity_index.as_ref()?.get(&entity_id).copied()?;
275        Some(&self.content[start..end])
276    }
277
278    /// Fast extraction of first entity ref from raw bytes
279    /// Useful for BREP -> shell ID, Face -> FaceBound, etc.
280    /// Returns the first entity reference ID found in the entity
281    #[inline]
282    pub fn get_first_entity_ref_fast(&mut self, entity_id: u32) -> Option<u32> {
283        let bytes = self.get_raw_bytes(entity_id)?;
284        let len = bytes.len();
285        let mut i = 0;
286
287        // Skip to first '(' after '='
288        while i < len && bytes[i] != b'(' {
289            i += 1;
290        }
291        if i >= len {
292            return None;
293        }
294        i += 1; // Skip first '('
295
296        // Find first '#' which is the entity ref
297        while i < len {
298            // Skip whitespace
299            while i < len
300                && (bytes[i] == b' ' || bytes[i] == b'\n' || bytes[i] == b'\r')
301            {
302                i += 1;
303            }
304
305            if i >= len {
306                return None;
307            }
308
309            if bytes[i] == b'#' {
310                i += 1;
311                let start = i;
312                while i < len && bytes[i].is_ascii_digit() {
313                    i += 1;
314                }
315                if i > start {
316                    let mut id = 0u32;
317                    for &b in &bytes[start..i] {
318                        id = id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
319                    }
320                    return Some(id);
321                }
322            }
323            i += 1;
324        }
325
326        None
327    }
328
329    /// Fast extraction of entity reference IDs from a list attribute in raw bytes
330    /// Useful for getting face list from ClosedShell, bounds from Face, etc.
331    /// Returns list of entity IDs
332    #[inline]
333    pub fn get_entity_ref_list_fast(&mut self, entity_id: u32) -> Option<Vec<u32>> {
334        let bytes = self.get_raw_bytes(entity_id)?;
335
336        // Pattern: IFCTYPE((#id1,#id2,...)); or IFCTYPE((#id1,#id2,...),other);
337        let mut i = 0;
338        let len = bytes.len();
339
340        // Skip to first '(' after '='
341        while i < len && bytes[i] != b'(' {
342            i += 1;
343        }
344        if i >= len {
345            return None;
346        }
347        i += 1; // Skip first '('
348
349        // Skip to second '(' for the list
350        while i < len && bytes[i] != b'(' {
351            i += 1;
352        }
353        if i >= len {
354            return None;
355        }
356        i += 1; // Skip second '('
357
358        // Parse entity IDs
359        let mut ids = Vec::with_capacity(32);
360
361        while i < len {
362            // Skip whitespace and commas
363            while i < len
364                && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
365            {
366                i += 1;
367            }
368
369            if i >= len || bytes[i] == b')' {
370                break;
371            }
372
373            // Expect '#' followed by number
374            if bytes[i] == b'#' {
375                i += 1;
376                let start = i;
377                while i < len && bytes[i].is_ascii_digit() {
378                    i += 1;
379                }
380                if i > start {
381                    // Fast integer parsing directly from ASCII digits
382                    let mut id = 0u32;
383                    for &b in &bytes[start..i] {
384                        id = id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
385                    }
386                    ids.push(id);
387                }
388            } else {
389                i += 1; // Skip unknown character
390            }
391        }
392
393        if ids.is_empty() {
394            None
395        } else {
396            Some(ids)
397        }
398    }
399
400    /// Fast extraction of PolyLoop point IDs directly from raw bytes
401    /// Bypasses full entity decoding for BREP optimization
402    /// Returns list of entity IDs for CartesianPoints
403    #[inline]
404    pub fn get_polyloop_point_ids_fast(&mut self, entity_id: u32) -> Option<Vec<u32>> {
405        let bytes = self.get_raw_bytes(entity_id)?;
406
407        // IFCPOLYLOOP((#id1,#id2,#id3,...));
408        let mut i = 0;
409        let len = bytes.len();
410
411        // Skip to first '(' after '='
412        while i < len && bytes[i] != b'(' {
413            i += 1;
414        }
415        if i >= len {
416            return None;
417        }
418        i += 1; // Skip first '('
419
420        // Skip to second '(' for the point list
421        while i < len && bytes[i] != b'(' {
422            i += 1;
423        }
424        if i >= len {
425            return None;
426        }
427        i += 1; // Skip second '('
428
429        // Parse point IDs
430        let mut point_ids = Vec::with_capacity(8); // Most faces have 3-8 vertices
431
432        while i < len {
433            // Skip whitespace and commas
434            while i < len
435                && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
436            {
437                i += 1;
438            }
439
440            if i >= len || bytes[i] == b')' {
441                break;
442            }
443
444            // Expect '#' followed by number
445            if bytes[i] == b'#' {
446                i += 1;
447                let start = i;
448                while i < len && bytes[i].is_ascii_digit() {
449                    i += 1;
450                }
451                if i > start {
452                    // Fast integer parsing directly from ASCII digits
453                    let mut id = 0u32;
454                    for &b in &bytes[start..i] {
455                        id = id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
456                    }
457                    point_ids.push(id);
458                }
459            } else {
460                i += 1; // Skip unknown character
461            }
462        }
463
464        if point_ids.is_empty() {
465            None
466        } else {
467            Some(point_ids)
468        }
469    }
470
471    /// Fast extraction of CartesianPoint coordinates directly from raw bytes
472    /// Bypasses full entity decoding for ~3x speedup on BREP-heavy files
473    /// Returns (x, y, z) as f64 tuple
474    #[inline]
475    pub fn get_cartesian_point_fast(&mut self, entity_id: u32) -> Option<(f64, f64, f64)> {
476        let bytes = self.get_raw_bytes(entity_id)?;
477
478        // Find opening paren for coordinates: IFCCARTESIANPOINT((x,y,z));
479        let mut i = 0;
480        let len = bytes.len();
481
482        // Skip to first '(' after '='
483        while i < len && bytes[i] != b'(' {
484            i += 1;
485        }
486        if i >= len {
487            return None;
488        }
489        i += 1; // Skip first '('
490
491        // Skip to second '(' for the coordinate list
492        while i < len && bytes[i] != b'(' {
493            i += 1;
494        }
495        if i >= len {
496            return None;
497        }
498        i += 1; // Skip second '('
499
500        // Parse x coordinate
501        let x = parse_next_float(&bytes[i..], &mut i)?;
502
503        // Parse y coordinate
504        let y = parse_next_float(&bytes[i..], &mut i)?;
505
506        // Parse z coordinate (optional for 2D points, default to 0)
507        let z = parse_next_float(&bytes[i..], &mut i).unwrap_or(0.0);
508
509        Some((x, y, z))
510    }
511
512    /// Fast extraction of FaceBound info directly from raw bytes
513    /// Returns (loop_id, orientation, is_outer_bound)
514    /// Bypasses full entity decoding for BREP optimization
515    #[inline]
516    pub fn get_face_bound_fast(&mut self, entity_id: u32) -> Option<(u32, bool, bool)> {
517        let bytes = self.get_raw_bytes(entity_id)?;
518        let len = bytes.len();
519
520        // Find '=' to locate start of type name, and '(' for end
521        let mut eq_pos = 0;
522        while eq_pos < len && bytes[eq_pos] != b'=' {
523            eq_pos += 1;
524        }
525        if eq_pos >= len {
526            return None;
527        }
528
529        // Check if this is an outer bound by looking for "OUTER" in the type name
530        // IFCFACEOUTERBOUND vs IFCFACEBOUND
531        // The type name is between '=' and '('
532        let mut is_outer = false;
533        let mut i = eq_pos + 1;
534        // Look for "OUTER" pattern (must check for the full word, not just 'O')
535        while i + 4 < len && bytes[i] != b'(' {
536            if bytes[i] == b'O'
537                && bytes[i + 1] == b'U'
538                && bytes[i + 2] == b'T'
539                && bytes[i + 3] == b'E'
540                && bytes[i + 4] == b'R'
541            {
542                is_outer = true;
543                break;
544            }
545            i += 1;
546        }
547        // Continue to find the '(' if we haven't already
548        while i < len && bytes[i] != b'(' {
549            i += 1;
550        }
551        if i >= len {
552            return None;
553        }
554
555        i += 1; // Skip first '('
556
557        // Skip whitespace
558        while i < len && (bytes[i] == b' ' || bytes[i] == b'\n' || bytes[i] == b'\r') {
559            i += 1;
560        }
561
562        // Expect '#' for loop entity ref
563        if i >= len || bytes[i] != b'#' {
564            return None;
565        }
566        i += 1;
567
568        // Parse loop ID
569        let start = i;
570        while i < len && bytes[i].is_ascii_digit() {
571            i += 1;
572        }
573        if i <= start {
574            return None;
575        }
576        let mut loop_id = 0u32;
577        for &b in &bytes[start..i] {
578            loop_id = loop_id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
579        }
580
581        // Find orientation after comma - default to true (.T.)
582        // Skip to comma
583        while i < len && bytes[i] != b',' {
584            i += 1;
585        }
586        i += 1; // Skip comma
587
588        // Skip whitespace
589        while i < len && (bytes[i] == b' ' || bytes[i] == b'\n' || bytes[i] == b'\r') {
590            i += 1;
591        }
592
593        // Check for .F. (false) or .T. (true)
594        let orientation = if i + 2 < len && bytes[i] == b'.' && bytes[i + 2] == b'.' {
595            bytes[i + 1] != b'F'
596        } else {
597            true // Default to true
598        };
599
600        Some((loop_id, orientation, is_outer))
601    }
602
603    /// Fast extraction of PolyLoop COORDINATES directly from raw bytes
604    /// This is the ultimate fast path - extracts all coordinates in one go
605    /// Avoids N+1 HashMap lookups by batching point extraction
606    /// Returns Vec of (x, y, z) coordinate tuples
607    #[inline]
608    pub fn get_polyloop_coords_fast(&mut self, entity_id: u32) -> Option<Vec<(f64, f64, f64)>> {
609        // Ensure index is built once
610        self.build_index();
611        let index = self.entity_index.as_ref()?;
612        let bytes_full = self.content.as_bytes();
613
614        // Get polyloop raw bytes
615        let (start, end) = index.get(&entity_id).copied()?;
616        let bytes = &bytes_full[start..end];
617
618        // IFCPOLYLOOP((#id1,#id2,#id3,...));
619        let mut i = 0;
620        let len = bytes.len();
621
622        // Skip to first '(' after '='
623        while i < len && bytes[i] != b'(' {
624            i += 1;
625        }
626        if i >= len {
627            return None;
628        }
629        i += 1; // Skip first '('
630
631        // Skip to second '(' for the point list
632        while i < len && bytes[i] != b'(' {
633            i += 1;
634        }
635        if i >= len {
636            return None;
637        }
638        i += 1; // Skip second '('
639
640        // Parse point IDs and immediately fetch coordinates
641        let mut coords = Vec::with_capacity(8); // Most faces have 3-8 vertices
642
643        while i < len {
644            // Skip whitespace and commas
645            while i < len
646                && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
647            {
648                i += 1;
649            }
650
651            if i >= len || bytes[i] == b')' {
652                break;
653            }
654
655            // Expect '#' followed by number
656            if bytes[i] == b'#' {
657                i += 1;
658                let id_start = i;
659                while i < len && bytes[i].is_ascii_digit() {
660                    i += 1;
661                }
662                if i > id_start {
663                    // Fast integer parsing directly from ASCII digits
664                    let mut point_id = 0u32;
665                    for &b in &bytes[id_start..i] {
666                        point_id = point_id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
667                    }
668
669                    // INLINE: Get cartesian point coordinates directly
670                    // This avoids the overhead of calling get_cartesian_point_fast for each point
671                    if let Some((pt_start, pt_end)) = index.get(&point_id).copied() {
672                        if let Some(coord) =
673                            parse_cartesian_point_inline(&bytes_full[pt_start..pt_end])
674                        {
675                            coords.push(coord);
676                        }
677                    }
678                }
679            } else {
680                i += 1; // Skip unknown character
681            }
682        }
683
684        if coords.len() >= 3 {
685            Some(coords)
686        } else {
687            None
688        }
689    }
690
691    /// Fast extraction of PolyLoop COORDINATES with point caching
692    /// Uses a cache to avoid re-parsing the same cartesian points
693    /// For files with many faces sharing points, this can be 2-3x faster
694    #[inline]
695    pub fn get_polyloop_coords_cached(&mut self, entity_id: u32) -> Option<Vec<(f64, f64, f64)>> {
696        // Ensure index is built once
697        self.build_index();
698        let index = self.entity_index.as_ref()?;
699        let bytes_full = self.content.as_bytes();
700
701        // Get polyloop raw bytes
702        let (start, end) = index.get(&entity_id).copied()?;
703        let bytes = &bytes_full[start..end];
704
705        // IFCPOLYLOOP((#id1,#id2,#id3,...));
706        let mut i = 0;
707        let len = bytes.len();
708
709        // Skip to first '(' after '='
710        while i < len && bytes[i] != b'(' {
711            i += 1;
712        }
713        if i >= len {
714            return None;
715        }
716        i += 1; // Skip first '('
717
718        // Skip to second '(' for the point list
719        while i < len && bytes[i] != b'(' {
720            i += 1;
721        }
722        if i >= len {
723            return None;
724        }
725        i += 1; // Skip second '('
726
727        // Parse point IDs and fetch coordinates (with caching)
728        // CRITICAL: Track expected count to ensure all points are resolved
729        let mut coords = Vec::with_capacity(8);
730        let mut expected_count = 0u32;
731
732        while i < len {
733            // Skip whitespace and commas
734            while i < len
735                && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
736            {
737                i += 1;
738            }
739
740            if i >= len || bytes[i] == b')' {
741                break;
742            }
743
744            // Expect '#' followed by number
745            if bytes[i] == b'#' {
746                i += 1;
747                let id_start = i;
748                while i < len && bytes[i].is_ascii_digit() {
749                    i += 1;
750                }
751                if i > id_start {
752                    expected_count += 1; // Count every point ID we encounter
753
754                    // Fast integer parsing directly from ASCII digits
755                    let mut point_id = 0u32;
756                    for &b in &bytes[id_start..i] {
757                        point_id = point_id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
758                    }
759
760                    // Check cache first
761                    if let Some(&coord) = self.point_cache.get(&point_id) {
762                        coords.push(coord);
763                    } else {
764                        // Not in cache - parse and cache
765                        if let Some((pt_start, pt_end)) = index.get(&point_id).copied() {
766                            if let Some(coord) =
767                                parse_cartesian_point_inline(&bytes_full[pt_start..pt_end])
768                            {
769                                self.point_cache.insert(point_id, coord);
770                                coords.push(coord);
771                            }
772                        }
773                    }
774                }
775            } else {
776                i += 1; // Skip unknown character
777            }
778        }
779
780        // CRITICAL: Return None if ANY point failed to resolve
781        // This matches the old behavior where missing points invalidated the whole polygon
782        if coords.len() >= 3 && coords.len() == expected_count as usize {
783            Some(coords)
784        } else {
785            None
786        }
787    }
788}
789
790/// Parse cartesian point coordinates inline from raw bytes
791/// Used by get_polyloop_coords_fast for maximum performance
792#[inline]
793fn parse_cartesian_point_inline(bytes: &[u8]) -> Option<(f64, f64, f64)> {
794    let len = bytes.len();
795    let mut i = 0;
796
797    // Skip to first '(' after '='
798    while i < len && bytes[i] != b'(' {
799        i += 1;
800    }
801    if i >= len {
802        return None;
803    }
804    i += 1; // Skip first '('
805
806    // Skip to second '(' for the coordinate list
807    while i < len && bytes[i] != b'(' {
808        i += 1;
809    }
810    if i >= len {
811        return None;
812    }
813    i += 1; // Skip second '('
814
815    // Parse x coordinate
816    let x = parse_float_inline(&bytes[i..], &mut i)?;
817
818    // Parse y coordinate
819    let y = parse_float_inline(&bytes[i..], &mut i)?;
820
821    // Parse z coordinate (optional for 2D points, default to 0)
822    let z = parse_float_inline(&bytes[i..], &mut i).unwrap_or(0.0);
823
824    Some((x, y, z))
825}
826
827/// Parse float inline - simpler version for batch coordinate extraction
828#[inline]
829fn parse_float_inline(bytes: &[u8], offset: &mut usize) -> Option<f64> {
830    let len = bytes.len();
831    let mut i = 0;
832
833    // Skip whitespace and commas
834    while i < len
835        && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
836    {
837        i += 1;
838    }
839
840    if i >= len || bytes[i] == b')' {
841        return None;
842    }
843
844    // Parse float using fast_float
845    match fast_float::parse_partial::<f64, _>(&bytes[i..]) {
846        Ok((value, consumed)) if consumed > 0 => {
847            *offset += i + consumed;
848            Some(value)
849        }
850        _ => None,
851    }
852}
853
854/// Parse next float from bytes, advancing position past it
855#[inline]
856fn parse_next_float(bytes: &[u8], offset: &mut usize) -> Option<f64> {
857    let len = bytes.len();
858    let mut i = 0;
859
860    // Skip whitespace and commas
861    while i < len
862        && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
863    {
864        i += 1;
865    }
866
867    if i >= len || bytes[i] == b')' {
868        return None;
869    }
870
871    // Parse float using fast_float
872    match fast_float::parse_partial::<f64, _>(&bytes[i..]) {
873        Ok((value, consumed)) if consumed > 0 => {
874            *offset += i + consumed;
875            Some(value)
876        }
877        _ => None,
878    }
879}
880
881#[cfg(test)]
882mod tests {
883    use super::*;
884    use crate::IfcType;
885
886    #[test]
887    fn test_decode_entity() {
888        let content = r#"
889#1=IFCPROJECT('2vqT3bvqj9RBFjLlXpN8n9',$,$,$,$,$,$,$,$);
890#2=IFCWALL('3a4T3bvqj9RBFjLlXpN8n0',$,$,$,'Wall-001',$,#3,#4);
891#3=IFCLOCALPLACEMENT($,#4);
892#4=IFCAXIS2PLACEMENT3D(#5,$,$);
893#5=IFCCARTESIANPOINT((0.,0.,0.));
894"#;
895
896        let mut decoder = EntityDecoder::new(content);
897
898        // Find entity #2
899        let start = content.find("#2=").unwrap();
900        let end = content[start..].find(';').unwrap() + start + 1;
901
902        let entity = decoder.decode_at(start, end).unwrap();
903        assert_eq!(entity.id, 2);
904        assert_eq!(entity.ifc_type, IfcType::IfcWall);
905        assert_eq!(entity.attributes.len(), 8);
906        assert_eq!(entity.get_string(4), Some("Wall-001"));
907        assert_eq!(entity.get_ref(6), Some(3));
908        assert_eq!(entity.get_ref(7), Some(4));
909    }
910
911    #[test]
912    fn test_decode_by_id() {
913        let content = r#"
914#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
915#5=IFCWALL('guid2',$,$,$,'Wall-001',$,$,$);
916#10=IFCDOOR('guid3',$,$,$,'Door-001',$,$,$);
917"#;
918
919        let mut decoder = EntityDecoder::new(content);
920
921        let entity = decoder.decode_by_id(5).unwrap();
922        assert_eq!(entity.id, 5);
923        assert_eq!(entity.ifc_type, IfcType::IfcWall);
924        assert_eq!(entity.get_string(4), Some("Wall-001"));
925
926        // Should be cached now
927        assert_eq!(decoder.cache_size(), 1);
928        let cached = decoder.get_cached(5).unwrap();
929        assert_eq!(cached.id, 5);
930    }
931
932    #[test]
933    fn test_resolve_ref() {
934        let content = r#"
935#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
936#2=IFCWALL('guid2',$,$,$,$,$,#1,$);
937"#;
938
939        let mut decoder = EntityDecoder::new(content);
940
941        let wall = decoder.decode_by_id(2).unwrap();
942        let placement_attr = wall.get(6).unwrap();
943
944        let referenced = decoder.resolve_ref(placement_attr).unwrap().unwrap();
945        assert_eq!(referenced.id, 1);
946        assert_eq!(referenced.ifc_type, IfcType::IfcProject);
947    }
948
949    #[test]
950    fn test_resolve_ref_list() {
951        let content = r#"
952#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
953#2=IFCWALL('guid1',$,$,$,$,$,$,$);
954#3=IFCDOOR('guid2',$,$,$,$,$,$,$);
955#4=IFCRELCONTAINEDINSPATIALSTRUCTURE('guid3',$,$,$,(#2,#3),$,#1);
956"#;
957
958        let mut decoder = EntityDecoder::new(content);
959
960        let rel = decoder.decode_by_id(4).unwrap();
961        let elements_attr = rel.get(4).unwrap();
962
963        let elements = decoder.resolve_ref_list(elements_attr).unwrap();
964        assert_eq!(elements.len(), 2);
965        assert_eq!(elements[0].id, 2);
966        assert_eq!(elements[0].ifc_type, IfcType::IfcWall);
967        assert_eq!(elements[1].id, 3);
968        assert_eq!(elements[1].ifc_type, IfcType::IfcDoor);
969    }
970
971    #[test]
972    fn test_cache() {
973        let content = r#"
974#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
975#2=IFCWALL('guid2',$,$,$,$,$,$,$);
976"#;
977
978        let mut decoder = EntityDecoder::new(content);
979
980        assert_eq!(decoder.cache_size(), 0);
981
982        decoder.decode_by_id(1).unwrap();
983        assert_eq!(decoder.cache_size(), 1);
984
985        decoder.decode_by_id(2).unwrap();
986        assert_eq!(decoder.cache_size(), 2);
987
988        // Decode same entity - should use cache
989        decoder.decode_by_id(1).unwrap();
990        assert_eq!(decoder.cache_size(), 2);
991
992        decoder.clear_cache();
993        assert_eq!(decoder.cache_size(), 0);
994    }
995}