Skip to main content

ifc_lite_core/
decoder.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
5//! Entity Decoder - On-demand entity parsing
6//!
7//! Lazily decode IFC entities from byte offsets without loading entire file into memory.
8
9use crate::error::{Error, Result};
10use crate::parser::{parse_entity, EntityScanner};
11use crate::schema_gen::{AttributeValue, DecodedEntity};
12use rustc_hash::FxHashMap;
13use std::sync::Arc;
14
15/// Pre-built entity index type
16pub type EntityIndex = FxHashMap<u32, (usize, usize)>;
17
18/// Build an entity index from content.
19///
20/// This intentionally shares `EntityScanner`'s HEADER skipping and quoted-string
21/// semantics so scan iteration and decoder lookup cannot disagree on malformed
22/// headers or semicolons embedded inside STEP strings.
23#[inline]
24pub fn build_entity_index<T>(content: &T) -> EntityIndex
25where
26    T: AsRef<[u8]> + ?Sized,
27{
28    let content = content.as_ref();
29    let estimated_entities = content.len() / 50;
30    let mut index = FxHashMap::with_capacity_and_hasher(estimated_entities, Default::default());
31    let mut scanner = EntityScanner::new(content);
32    while let Some((id, _type_name, start, end)) = scanner.next_entity() {
33        index.insert(id, (start, end));
34    }
35
36    index
37}
38
39/// Entity decoder for lazy parsing from raw IFC bytes.
40///
41/// String attributes are decoded lossily when tokens become `AttributeValue`s;
42/// structural scanning and byte offsets always use the original source bytes.
43pub struct EntityDecoder<'a> {
44    content: &'a [u8],
45    /// Cache of decoded entities (entity_id -> `Arc<DecodedEntity>`)
46    /// Using Arc avoids expensive clones on cache hits
47    cache: FxHashMap<u32, Arc<DecodedEntity>>,
48    /// Index of entity offsets (entity_id -> (start, end))
49    /// Can be pre-built or built lazily
50    /// Using Arc to allow sharing across threads without cloning the HashMap
51    entity_index: Option<Arc<EntityIndex>>,
52    /// Cache of cartesian point coordinates for FacetedBrep optimization
53    /// Only populated when using get_polyloop_coords_cached
54    point_cache: FxHashMap<u32, (f64, f64, f64)>,
55    /// Lazy-cached multiplier converting file plane-angle units to radians.
56    /// Populated on first call to [`Self::plane_angle_to_radians`]. Spec
57    /// default (and Renga-style files) is 1.0 (RADIAN); degree-unit files
58    /// resolve to π/180.
59    plane_angle_to_radians_cache: Option<f64>,
60    /// Lazy-cached multiplier converting file length units to metres.
61    /// Populated on first call to [`Self::length_unit_scale`]. 1.0 for metre
62    /// files, 0.001 for millimetre files, etc. Used to express absolute
63    /// tolerances (e.g. curve-tessellation chord deviation) in file units.
64    length_unit_scale_cache: Option<f64>,
65}
66
67impl<'a> EntityDecoder<'a> {
68    /// Create new decoder
69    pub fn new<T>(content: &'a T) -> Self
70    where
71        T: AsRef<[u8]> + ?Sized,
72    {
73        let content = content.as_ref();
74        Self {
75            content,
76            cache: FxHashMap::default(),
77            entity_index: None,
78            point_cache: FxHashMap::default(),
79            plane_angle_to_radians_cache: None,
80            length_unit_scale_cache: None,
81        }
82    }
83
84    /// Create decoder with pre-built index (faster for repeated lookups)
85    pub fn with_index<T>(content: &'a T, index: EntityIndex) -> Self
86    where
87        T: AsRef<[u8]> + ?Sized,
88    {
89        let content = content.as_ref();
90        Self {
91            content,
92            cache: FxHashMap::default(),
93            entity_index: Some(Arc::new(index)),
94            point_cache: FxHashMap::default(),
95            plane_angle_to_radians_cache: None,
96            length_unit_scale_cache: None,
97        }
98    }
99
100    /// Create decoder with shared Arc index (for parallel processing)
101    pub fn with_arc_index<T>(content: &'a T, index: Arc<EntityIndex>) -> Self
102    where
103        T: AsRef<[u8]> + ?Sized,
104    {
105        let content = content.as_ref();
106        Self {
107            content,
108            cache: FxHashMap::default(),
109            entity_index: Some(index),
110            point_cache: FxHashMap::default(),
111            plane_angle_to_radians_cache: None,
112            length_unit_scale_cache: None,
113        }
114    }
115
116    /// Build entity index for O(1) lookups
117    /// This scans the file once and maps entity IDs to byte offsets
118    fn build_index(&mut self) {
119        if self.entity_index.is_some() {
120            return; // Already built
121        }
122        self.entity_index = Some(Arc::new(build_entity_index(self.content)));
123    }
124
125    /// Decode entity at byte offset
126    /// Returns cached entity if already decoded
127    ///
128    /// Validates the `(start, end)` span against `self.content.len()` before
129    /// slicing. Out-of-range or inverted spans return `Error::parse` instead
130    /// of panicking — callers (e.g. `decode_and_cache`, `decode_at_with_id`,
131    /// the streaming pre-pass shard mergers) hand us spans derived from
132    /// untrusted/streamed entity-index data, and a malformed span must not
133    /// take down the whole worker.
134    #[inline]
135    pub fn decode_at(&mut self, start: usize, end: usize) -> Result<DecodedEntity> {
136        let content_len = self.content.len();
137        if start > end || end > content_len {
138            return Err(Error::parse(
139                0,
140                format!(
141                    "decode_at: invalid byte span ({}, {}) for content length {}",
142                    start, end, content_len,
143                ),
144            ));
145        }
146        let line = &self.content[start..end];
147        let (id, ifc_type, tokens) = parse_entity(line).map_err(|e| {
148            // Add bounded, lossy debug info without requiring the source to be UTF-8.
149            let cut = line.len().min(100);
150            Error::parse(
151                0,
152                format!(
153                    "Failed to parse entity: {:?}, input: {:?}",
154                    e,
155                    String::from_utf8_lossy(&line[..cut])
156                ),
157            )
158        })?;
159
160        // Check cache first - return clone of inner DecodedEntity
161        if let Some(entity_arc) = self.cache.get(&id) {
162            return Ok(entity_arc.as_ref().clone());
163        }
164
165        // Convert tokens to AttributeValues
166        let attributes = tokens
167            .iter()
168            .map(|token| AttributeValue::from_token(token))
169            .collect();
170
171        let entity = DecodedEntity::new(id, ifc_type, attributes);
172        self.cache.insert(id, Arc::new(entity.clone()));
173        Ok(entity)
174    }
175
176    /// Decode entity at byte offset with known ID (faster - checks cache before parsing)
177    /// Use this when the scanner provides the entity ID to avoid re-parsing cached entities
178    #[inline]
179    pub fn decode_at_with_id(
180        &mut self,
181        id: u32,
182        start: usize,
183        end: usize,
184    ) -> Result<DecodedEntity> {
185        // Check cache first - avoid parsing if already decoded
186        if let Some(entity_arc) = self.cache.get(&id) {
187            return Ok(entity_arc.as_ref().clone());
188        }
189
190        // Not in cache, parse and cache
191        self.decode_at(start, end)
192    }
193
194    /// Decode entity by ID - O(1) lookup using entity index
195    #[inline]
196    pub fn decode_by_id(&mut self, entity_id: u32) -> Result<DecodedEntity> {
197        // Check cache first - return clone of inner DecodedEntity
198        if let Some(entity_arc) = self.cache.get(&entity_id) {
199            return Ok(entity_arc.as_ref().clone());
200        }
201
202        // Build index if not already built
203        self.build_index();
204
205        // O(1) lookup in index
206        let (start, end) = self
207            .entity_index
208            .as_ref()
209            .and_then(|idx| idx.get(&entity_id).copied())
210            .ok_or_else(|| Error::parse(0, format!("Entity #{} not found", entity_id)))?;
211
212        self.decode_at(start, end)
213    }
214
215    /// Multiplier that converts file plane-angle units to radians.
216    ///
217    /// Lazy-resolved on first call by scanning for IFCPROJECT and reading
218    /// its IFCUNITASSIGNMENT. Cached for subsequent calls. Returns `1.0`
219    /// when no plane-angle unit is declared (IFC spec default = RADIAN).
220    ///
221    /// Use this at curve-sampling time wherever an `IfcParameterValue` is
222    /// interpreted as an angle (IfcCircle / IfcEllipse trim parameters).
223    /// Without it, `value.to_radians()` is correct only for DEGREE files
224    /// and silently shrinks arcs on RADIAN files (issue #820).
225    pub fn plane_angle_to_radians(&mut self) -> f64 {
226        if let Some(cached) = self.plane_angle_to_radians_cache {
227            return cached;
228        }
229
230        let mut scanner = crate::parser::EntityScanner::new(self.content);
231        let mut project_id: Option<u32> = None;
232        while let Some((id, type_name, _, _)) = scanner.next_entity() {
233            if type_name == "IFCPROJECT" {
234                project_id = Some(id);
235                break;
236            }
237        }
238
239        let scale = match project_id {
240            Some(pid) => crate::units::extract_plane_angle_to_radians(self, pid).unwrap_or(1.0),
241            None => 1.0,
242        };
243        self.plane_angle_to_radians_cache = Some(scale);
244        scale
245    }
246
247    /// Multiplier that converts file length units to metres (1.0 for metre
248    /// files, 0.001 for millimetre files, …). Lazy-resolved on first call by
249    /// scanning for IFCPROJECT and reading its IFCUNITASSIGNMENT, then cached.
250    /// Returns `1.0` when no length unit is declared.
251    ///
252    /// Use this to express an *absolute* metric tolerance in file units —
253    /// e.g. a curve-tessellation chord-deviation budget that stays constant in
254    /// millimetres whether the file is authored in mm or m.
255    pub fn length_unit_scale(&mut self) -> f64 {
256        if let Some(cached) = self.length_unit_scale_cache {
257            return cached;
258        }
259
260        let mut scanner = crate::parser::EntityScanner::new(self.content);
261        let mut project_id: Option<u32> = None;
262        while let Some((id, type_name, _, _)) = scanner.next_entity() {
263            if type_name == "IFCPROJECT" {
264                project_id = Some(id);
265                break;
266            }
267        }
268
269        let scale = match project_id {
270            Some(pid) => crate::units::try_extract_length_unit_scale(self, pid).unwrap_or(1.0),
271            None => 1.0,
272        };
273        self.length_unit_scale_cache = Some(scale);
274        scale
275    }
276
277    /// Pre-seed the unit-scale caches so [`Self::length_unit_scale`] and
278    /// [`Self::plane_angle_to_radians`] return immediately without the full-file
279    /// `IFCPROJECT` scan.
280    ///
281    /// Both lazy resolvers walk the whole DATA section to locate the (singleton)
282    /// `IFCPROJECT`. That scan is `O(file size)` and `IFCPROJECT` legally sits
283    /// anywhere — IfcOpenShell emits it near the *end*, so on a large model the
284    /// scan touches tens of MB. The cache is per-decoder, and the parallel
285    /// geometry pipeline builds a fresh decoder per element, so without seeding
286    /// every arc-bearing element re-pays the scan (≈135 ms each on a 75 MB
287    /// file). The orchestrator resolves both scales once on a warm shared
288    /// decoder and seeds each worker decoder here.
289    pub fn seed_unit_scales(&mut self, length_unit_scale: f64, plane_angle_to_radians: f64) {
290        self.length_unit_scale_cache = Some(length_unit_scale);
291        self.plane_angle_to_radians_cache = Some(plane_angle_to_radians);
292    }
293
294    /// Resolve entity reference (follow #ID)
295    /// Returns None for null/derived values
296    #[inline]
297    pub fn resolve_ref(&mut self, attr: &AttributeValue) -> Result<Option<DecodedEntity>> {
298        match attr.as_entity_ref() {
299            Some(id) => Ok(Some(self.decode_by_id(id)?)),
300            None => Ok(None),
301        }
302    }
303
304    /// Resolve list of entity references
305    pub fn resolve_ref_list(&mut self, attr: &AttributeValue) -> Result<Vec<DecodedEntity>> {
306        let list = attr
307            .as_list()
308            .ok_or_else(|| Error::parse(0, "Expected list".to_string()))?;
309
310        let mut entities = Vec::with_capacity(list.len());
311        for item in list {
312            if let Some(id) = item.as_entity_ref() {
313                entities.push(self.decode_by_id(id)?);
314            }
315        }
316        Ok(entities)
317    }
318
319    /// Get cached entity (without decoding)
320    pub fn get_cached(&self, entity_id: u32) -> Option<DecodedEntity> {
321        self.cache.get(&entity_id).map(|arc| arc.as_ref().clone())
322    }
323
324    /// Reserve cache capacity to avoid HashMap resizing during processing.
325    /// For a 487 MB file with 208 K building elements, the cache can grow to
326    /// 300 K+ entries (elements + representation chains + placements).
327    /// Pre-allocating avoids ~6 resize-and-rehash operations that each copy
328    /// all entries, reducing both peak memory spikes and timing variance.
329    pub fn reserve_cache(&mut self, additional: usize) {
330        self.cache.reserve(additional);
331    }
332
333    /// Inject a pre-warmed Arc-shared cache into this decoder's local cache.
334    ///
335    /// Used by the de-normalized parallel path: a serial pre-pass builds a
336    /// shared `Arc<FxHashMap<u32, Arc<DecodedEntity>>>` containing all
337    /// entities reachable from the jobs. Each rayon task then injects
338    /// that shared cache into its own decoder via this method, so the
339    /// per-task hot path hits in-WASM-heap Arc handles instead of
340    /// SAB-imported atomic memory.
341    ///
342    /// Cost: one Arc::clone per cached entry (atomic refcount bump).
343    /// For a typical 100K-entry cache × 9 rayon tasks = 900K atomics
344    /// total, ~90 ms wall (incurred ONCE at task setup; the parallel
345    /// hot path then runs lock-free against the populated cache).
346    pub fn inject_shared_cache(&mut self, shared: &FxHashMap<u32, Arc<DecodedEntity>>) {
347        self.cache.reserve(shared.len());
348        for (&id, entity) in shared.iter() {
349            self.cache.insert(id, Arc::clone(entity));
350        }
351    }
352
353    /// Decode + cache without returning. Used by the pre-warm pass to
354    /// populate a shared cache. Returns the cached Arc so the caller
355    /// can chase references without re-decoding.
356    pub fn decode_and_cache(
357        &mut self,
358        id: u32,
359        start: usize,
360        end: usize,
361    ) -> Result<Arc<DecodedEntity>> {
362        if let Some(arc) = self.cache.get(&id) {
363            return Ok(Arc::clone(arc));
364        }
365        let _ = self.decode_at(start, end)?;
366        Ok(Arc::clone(self.cache.get(&id).ok_or_else(|| {
367            Error::parse(0, "decode_at didn't populate cache".to_string())
368        })?))
369    }
370
371    /// Drain the populated cache out of this decoder for sharing across
372    /// rayon tasks. After calling this, the decoder is empty (cache
373    /// moved out); callers typically then drop the decoder.
374    pub fn drain_cache(&mut self) -> FxHashMap<u32, Arc<DecodedEntity>> {
375        std::mem::take(&mut self.cache)
376    }
377
378    /// Clear all caches to free memory
379    pub fn clear_cache(&mut self) {
380        self.cache.clear();
381        self.point_cache.clear();
382    }
383
384    /// Clear only the point coordinate cache (used after BREP preprocessing).
385    /// The entity cache is preserved for subsequent geometry processing.
386    pub fn clear_point_cache(&mut self) {
387        self.point_cache.clear();
388    }
389
390    /// Get cache size
391    pub fn cache_size(&self) -> usize {
392        self.cache.len()
393    }
394
395    /// Get raw bytes for an entity (for direct/fast parsing)
396    /// Returns the full entity line including type and attributes
397    #[inline]
398    pub fn get_raw_bytes(&mut self, entity_id: u32) -> Option<&'a [u8]> {
399        self.build_index();
400        let (start, end) = self.entity_index.as_ref()?.get(&entity_id).copied()?;
401        Some(&self.content[start..end])
402    }
403
404    /// Fast extraction of first entity ref from raw bytes
405    /// Useful for BREP -> shell ID, Face -> FaceBound, etc.
406    /// Returns the first entity reference ID found in the entity
407    #[inline]
408    pub fn get_first_entity_ref_fast(&mut self, entity_id: u32) -> Option<u32> {
409        let bytes = self.get_raw_bytes(entity_id)?;
410        let len = bytes.len();
411        let mut i = 0;
412
413        // Skip to first '(' after '='
414        while i < len && bytes[i] != b'(' {
415            i += 1;
416        }
417        if i >= len {
418            return None;
419        }
420        i += 1; // Skip first '('
421
422        // Find first '#' which is the entity ref
423        while i < len {
424            // Skip whitespace
425            while i < len && (bytes[i] == b' ' || bytes[i] == b'\n' || bytes[i] == b'\r') {
426                i += 1;
427            }
428
429            if i >= len {
430                return None;
431            }
432
433            if bytes[i] == b'#' {
434                i += 1;
435                let start = i;
436                while i < len && bytes[i].is_ascii_digit() {
437                    i += 1;
438                }
439                if i > start {
440                    let mut id = 0u32;
441                    for &b in &bytes[start..i] {
442                        id = id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
443                    }
444                    return Some(id);
445                }
446            }
447            i += 1;
448        }
449
450        None
451    }
452
453    /// Fast extraction of entity reference IDs from a list attribute in raw bytes
454    /// Useful for getting face list from ClosedShell, bounds from Face, etc.
455    /// Returns list of entity IDs
456    #[inline]
457    pub fn get_entity_ref_list_fast(&mut self, entity_id: u32) -> Option<Vec<u32>> {
458        let bytes = self.get_raw_bytes(entity_id)?;
459
460        // Pattern: IFCTYPE((#id1,#id2,...)); or IFCTYPE((#id1,#id2,...),other);
461        let mut i = 0;
462        let len = bytes.len();
463
464        // Skip to first '(' after '='
465        while i < len && bytes[i] != b'(' {
466            i += 1;
467        }
468        if i >= len {
469            return None;
470        }
471        i += 1; // Skip first '('
472
473        // Skip to second '(' for the list
474        while i < len && bytes[i] != b'(' {
475            i += 1;
476        }
477        if i >= len {
478            return None;
479        }
480        i += 1; // Skip second '('
481
482        // Parse entity IDs
483        let mut ids = Vec::with_capacity(32);
484
485        while i < len {
486            // Skip whitespace and commas
487            while i < len
488                && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
489            {
490                i += 1;
491            }
492
493            if i >= len || bytes[i] == b')' {
494                break;
495            }
496
497            // Expect '#' followed by number
498            if bytes[i] == b'#' {
499                i += 1;
500                let start = i;
501                while i < len && bytes[i].is_ascii_digit() {
502                    i += 1;
503                }
504                if i > start {
505                    // Fast integer parsing directly from ASCII digits
506                    let mut id = 0u32;
507                    for &b in &bytes[start..i] {
508                        id = id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
509                    }
510                    ids.push(id);
511                }
512            } else {
513                i += 1; // Skip unknown character
514            }
515        }
516
517        if ids.is_empty() {
518            None
519        } else {
520            Some(ids)
521        }
522    }
523
524    /// Fast extraction of PolyLoop point IDs directly from raw bytes
525    /// Bypasses full entity decoding for BREP optimization
526    /// Returns list of entity IDs for CartesianPoints
527    #[inline]
528    pub fn get_polyloop_point_ids_fast(&mut self, entity_id: u32) -> Option<Vec<u32>> {
529        let bytes = self.get_raw_bytes(entity_id)?;
530
531        // IFCPOLYLOOP((#id1,#id2,#id3,...));
532        let mut i = 0;
533        let len = bytes.len();
534
535        // Skip to first '(' after '='
536        while i < len && bytes[i] != b'(' {
537            i += 1;
538        }
539        if i >= len {
540            return None;
541        }
542        i += 1; // Skip first '('
543
544        // Skip to second '(' for the point list
545        while i < len && bytes[i] != b'(' {
546            i += 1;
547        }
548        if i >= len {
549            return None;
550        }
551        i += 1; // Skip second '('
552
553        // Parse point IDs
554        let mut point_ids = Vec::with_capacity(8); // Most faces have 3-8 vertices
555
556        while i < len {
557            // Skip whitespace and commas
558            while i < len
559                && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
560            {
561                i += 1;
562            }
563
564            if i >= len || bytes[i] == b')' {
565                break;
566            }
567
568            // Expect '#' followed by number
569            if bytes[i] == b'#' {
570                i += 1;
571                let start = i;
572                while i < len && bytes[i].is_ascii_digit() {
573                    i += 1;
574                }
575                if i > start {
576                    // Fast integer parsing directly from ASCII digits
577                    let mut id = 0u32;
578                    for &b in &bytes[start..i] {
579                        id = id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
580                    }
581                    point_ids.push(id);
582                }
583            } else {
584                i += 1; // Skip unknown character
585            }
586        }
587
588        if point_ids.is_empty() {
589            None
590        } else {
591            Some(point_ids)
592        }
593    }
594
595    /// Fast extraction of CartesianPoint coordinates directly from raw bytes
596    /// Bypasses full entity decoding for ~3x speedup on BREP-heavy files
597    /// Returns (x, y, z) as f64 tuple
598    #[inline]
599    pub fn get_cartesian_point_fast(&mut self, entity_id: u32) -> Option<(f64, f64, f64)> {
600        let bytes = self.get_raw_bytes(entity_id)?;
601
602        // Find opening paren for coordinates: IFCCARTESIANPOINT((x,y,z));
603        let mut i = 0;
604        let len = bytes.len();
605
606        // Skip to first '(' after '='
607        while i < len && bytes[i] != b'(' {
608            i += 1;
609        }
610        if i >= len {
611            return None;
612        }
613        i += 1; // Skip first '('
614
615        // Skip to second '(' for the coordinate list
616        while i < len && bytes[i] != b'(' {
617            i += 1;
618        }
619        if i >= len {
620            return None;
621        }
622        i += 1; // Skip second '('
623
624        // Parse x coordinate
625        let x = parse_next_float(&bytes[i..], &mut i)?;
626
627        // Parse y coordinate
628        let y = parse_next_float(&bytes[i..], &mut i)?;
629
630        // Parse z coordinate (optional for 2D points, default to 0)
631        let z = parse_next_float(&bytes[i..], &mut i).unwrap_or(0.0);
632
633        Some((x, y, z))
634    }
635
636    /// Fast extraction of FaceBound info directly from raw bytes
637    /// Returns (loop_id, orientation, is_outer_bound)
638    /// Bypasses full entity decoding for BREP optimization
639    #[inline]
640    pub fn get_face_bound_fast(&mut self, entity_id: u32) -> Option<(u32, bool, bool)> {
641        let bytes = self.get_raw_bytes(entity_id)?;
642        let len = bytes.len();
643
644        // Find '=' to locate start of type name, and '(' for end
645        let mut eq_pos = 0;
646        while eq_pos < len && bytes[eq_pos] != b'=' {
647            eq_pos += 1;
648        }
649        if eq_pos >= len {
650            return None;
651        }
652
653        // Check if this is an outer bound by looking for "OUTER" in the type name
654        // IFCFACEOUTERBOUND vs IFCFACEBOUND
655        // The type name is between '=' and '('
656        let mut is_outer = false;
657        let mut i = eq_pos + 1;
658        // Look for "OUTER" pattern (must check for the full word, not just 'O')
659        while i + 4 < len && bytes[i] != b'(' {
660            if bytes[i] == b'O'
661                && bytes[i + 1] == b'U'
662                && bytes[i + 2] == b'T'
663                && bytes[i + 3] == b'E'
664                && bytes[i + 4] == b'R'
665            {
666                is_outer = true;
667                break;
668            }
669            i += 1;
670        }
671        // Continue to find the '(' if we haven't already
672        while i < len && bytes[i] != b'(' {
673            i += 1;
674        }
675        if i >= len {
676            return None;
677        }
678
679        i += 1; // Skip first '('
680
681        // Skip whitespace
682        while i < len && (bytes[i] == b' ' || bytes[i] == b'\n' || bytes[i] == b'\r') {
683            i += 1;
684        }
685
686        // Expect '#' for loop entity ref
687        if i >= len || bytes[i] != b'#' {
688            return None;
689        }
690        i += 1;
691
692        // Parse loop ID
693        let start = i;
694        while i < len && bytes[i].is_ascii_digit() {
695            i += 1;
696        }
697        if i <= start {
698            return None;
699        }
700        let mut loop_id = 0u32;
701        for &b in &bytes[start..i] {
702            loop_id = loop_id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
703        }
704
705        // Find orientation after comma - default to true (.T.)
706        // Skip to comma
707        while i < len && bytes[i] != b',' {
708            i += 1;
709        }
710        i += 1; // Skip comma
711
712        // Skip whitespace
713        while i < len && (bytes[i] == b' ' || bytes[i] == b'\n' || bytes[i] == b'\r') {
714            i += 1;
715        }
716
717        // Check for .F. (false) or .T. (true)
718        let orientation = if i + 2 < len && bytes[i] == b'.' && bytes[i + 2] == b'.' {
719            bytes[i + 1] != b'F'
720        } else {
721            true // Default to true
722        };
723
724        Some((loop_id, orientation, is_outer))
725    }
726
727    /// Fast extraction of PolyLoop COORDINATES directly from raw bytes
728    /// This is the ultimate fast path - extracts all coordinates in one go
729    /// Avoids N+1 HashMap lookups by batching point extraction
730    /// Returns Vec of (x, y, z) coordinate tuples
731    #[inline]
732    pub fn get_polyloop_coords_fast(&mut self, entity_id: u32) -> Option<Vec<(f64, f64, f64)>> {
733        // Ensure index is built once
734        self.build_index();
735        let index = self.entity_index.as_ref()?;
736        let bytes_full = self.content;
737
738        // Get polyloop raw bytes
739        let (start, end) = index.get(&entity_id).copied()?;
740        let bytes = &bytes_full[start..end];
741
742        // IFCPOLYLOOP((#id1,#id2,#id3,...));
743        let mut i = 0;
744        let len = bytes.len();
745
746        // Skip to first '(' after '='
747        while i < len && bytes[i] != b'(' {
748            i += 1;
749        }
750        if i >= len {
751            return None;
752        }
753        i += 1; // Skip first '('
754
755        // Skip to second '(' for the point list
756        while i < len && bytes[i] != b'(' {
757            i += 1;
758        }
759        if i >= len {
760            return None;
761        }
762        i += 1; // Skip second '('
763
764        // Parse point IDs and immediately fetch coordinates
765        let mut coords = Vec::with_capacity(8); // Most faces have 3-8 vertices
766
767        while i < len {
768            // Skip whitespace and commas
769            while i < len
770                && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
771            {
772                i += 1;
773            }
774
775            if i >= len || bytes[i] == b')' {
776                break;
777            }
778
779            // Expect '#' followed by number
780            if bytes[i] == b'#' {
781                i += 1;
782                let id_start = i;
783                while i < len && bytes[i].is_ascii_digit() {
784                    i += 1;
785                }
786                if i > id_start {
787                    // Fast integer parsing directly from ASCII digits
788                    let mut point_id = 0u32;
789                    for &b in &bytes[id_start..i] {
790                        point_id = point_id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
791                    }
792
793                    // INLINE: Get cartesian point coordinates directly
794                    // This avoids the overhead of calling get_cartesian_point_fast for each point
795                    if let Some((pt_start, pt_end)) = index.get(&point_id).copied() {
796                        if let Some(coord) =
797                            parse_cartesian_point_inline(&bytes_full[pt_start..pt_end])
798                        {
799                            coords.push(coord);
800                        }
801                    }
802                }
803            } else {
804                i += 1; // Skip unknown character
805            }
806        }
807
808        if coords.len() >= 3 {
809            Some(coords)
810        } else {
811            None
812        }
813    }
814
815    /// Fast extraction of PolyLoop COORDINATES with point caching
816    /// Uses a cache to avoid re-parsing the same cartesian points
817    /// For files with many faces sharing points, this can be 2-3x faster
818    #[inline]
819    pub fn get_polyloop_coords_cached(&mut self, entity_id: u32) -> Option<Vec<(f64, f64, f64)>> {
820        // Ensure index is built once
821        self.build_index();
822        let index = self.entity_index.as_ref()?;
823        let bytes_full = self.content;
824
825        // Get polyloop raw bytes
826        let (start, end) = index.get(&entity_id).copied()?;
827        let bytes = &bytes_full[start..end];
828
829        // IFCPOLYLOOP((#id1,#id2,#id3,...));
830        let mut i = 0;
831        let len = bytes.len();
832
833        // Skip to first '(' after '='
834        while i < len && bytes[i] != b'(' {
835            i += 1;
836        }
837        if i >= len {
838            return None;
839        }
840        i += 1; // Skip first '('
841
842        // Skip to second '(' for the point list
843        while i < len && bytes[i] != b'(' {
844            i += 1;
845        }
846        if i >= len {
847            return None;
848        }
849        i += 1; // Skip second '('
850
851        // Parse point IDs and fetch coordinates (with caching)
852        // CRITICAL: Track expected count to ensure all points are resolved
853        let mut coords = Vec::with_capacity(8);
854        let mut expected_count = 0u32;
855
856        while i < len {
857            // Skip whitespace and commas
858            while i < len
859                && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
860            {
861                i += 1;
862            }
863
864            if i >= len || bytes[i] == b')' {
865                break;
866            }
867
868            // Expect '#' followed by number
869            if bytes[i] == b'#' {
870                i += 1;
871                let id_start = i;
872                while i < len && bytes[i].is_ascii_digit() {
873                    i += 1;
874                }
875                if i > id_start {
876                    expected_count += 1; // Count every point ID we encounter
877
878                    // Fast integer parsing directly from ASCII digits
879                    let mut point_id = 0u32;
880                    for &b in &bytes[id_start..i] {
881                        point_id = point_id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
882                    }
883
884                    // Check cache first
885                    if let Some(&coord) = self.point_cache.get(&point_id) {
886                        coords.push(coord);
887                    } else {
888                        // Not in cache - parse and cache
889                        if let Some((pt_start, pt_end)) = index.get(&point_id).copied() {
890                            if let Some(coord) =
891                                parse_cartesian_point_inline(&bytes_full[pt_start..pt_end])
892                            {
893                                self.point_cache.insert(point_id, coord);
894                                coords.push(coord);
895                            }
896                        }
897                    }
898                }
899            } else {
900                i += 1; // Skip unknown character
901            }
902        }
903
904        // CRITICAL: Return None if ANY point failed to resolve
905        // This matches the old behavior where missing points invalidated the whole polygon
906        if coords.len() >= 3 && coords.len() == expected_count as usize {
907            Some(coords)
908        } else {
909            None
910        }
911    }
912}
913
914/// Parse cartesian point coordinates inline from raw bytes
915/// Used by get_polyloop_coords_fast for maximum performance
916#[inline]
917fn parse_cartesian_point_inline(bytes: &[u8]) -> Option<(f64, f64, f64)> {
918    let len = bytes.len();
919    let mut i = 0;
920
921    // Skip to first '(' after '='
922    while i < len && bytes[i] != b'(' {
923        i += 1;
924    }
925    if i >= len {
926        return None;
927    }
928    i += 1; // Skip first '('
929
930    // Skip to second '(' for the coordinate list
931    while i < len && bytes[i] != b'(' {
932        i += 1;
933    }
934    if i >= len {
935        return None;
936    }
937    i += 1; // Skip second '('
938
939    // Parse x coordinate
940    let x = parse_float_inline(&bytes[i..], &mut i)?;
941
942    // Parse y coordinate
943    let y = parse_float_inline(&bytes[i..], &mut i)?;
944
945    // Parse z coordinate (optional for 2D points, default to 0)
946    let z = parse_float_inline(&bytes[i..], &mut i).unwrap_or(0.0);
947
948    Some((x, y, z))
949}
950
951/// Parse float inline - simpler version for batch coordinate extraction
952#[inline]
953fn parse_float_inline(bytes: &[u8], offset: &mut usize) -> Option<f64> {
954    let len = bytes.len();
955    let mut i = 0;
956
957    // Skip whitespace and commas
958    while i < len
959        && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
960    {
961        i += 1;
962    }
963
964    if i >= len || bytes[i] == b')' {
965        return None;
966    }
967
968    // Parse float using fast_float
969    match fast_float2::parse_partial::<f64, _>(&bytes[i..]) {
970        Ok((value, consumed)) if consumed > 0 => {
971            *offset += i + consumed;
972            Some(value)
973        }
974        _ => None,
975    }
976}
977
978/// Parse next float from bytes, advancing position past it
979#[inline]
980fn parse_next_float(bytes: &[u8], offset: &mut usize) -> Option<f64> {
981    let len = bytes.len();
982    let mut i = 0;
983
984    // Skip whitespace and commas
985    while i < len
986        && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
987    {
988        i += 1;
989    }
990
991    if i >= len || bytes[i] == b')' {
992        return None;
993    }
994
995    // Parse float using fast_float
996    match fast_float2::parse_partial::<f64, _>(&bytes[i..]) {
997        Ok((value, consumed)) if consumed > 0 => {
998            *offset += i + consumed;
999            Some(value)
1000        }
1001        _ => None,
1002    }
1003}
1004
1005#[cfg(test)]
1006mod tests {
1007    use super::*;
1008    use crate::IfcType;
1009
1010    #[test]
1011    fn test_decode_entity() {
1012        let content = r#"
1013#1=IFCPROJECT('2vqT3bvqj9RBFjLlXpN8n9',$,$,$,$,$,$,$,$);
1014#2=IFCWALL('3a4T3bvqj9RBFjLlXpN8n0',$,$,$,'Wall-001',$,#3,#4);
1015#3=IFCLOCALPLACEMENT($,#4);
1016#4=IFCAXIS2PLACEMENT3D(#5,$,$);
1017#5=IFCCARTESIANPOINT((0.,0.,0.));
1018"#;
1019
1020        let mut decoder = EntityDecoder::new(content);
1021
1022        // Find entity #2
1023        let start = content.find("#2=").unwrap();
1024        let end = content[start..].find(';').unwrap() + start + 1;
1025
1026        let entity = decoder.decode_at(start, end).unwrap();
1027        assert_eq!(entity.id, 2);
1028        assert_eq!(entity.ifc_type, IfcType::IfcWall);
1029        assert_eq!(entity.attributes.len(), 8);
1030        assert_eq!(entity.get_string(4), Some("Wall-001"));
1031        assert_eq!(entity.get_ref(6), Some(3));
1032        assert_eq!(entity.get_ref(7), Some(4));
1033    }
1034
1035    #[test]
1036    fn test_decode_by_id() {
1037        let content = r#"
1038#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
1039#5=IFCWALL('guid2',$,$,$,'Wall-001',$,$,$);
1040#10=IFCDOOR('guid3',$,$,$,'Door-001',$,$,$);
1041"#;
1042
1043        let mut decoder = EntityDecoder::new(content);
1044
1045        let entity = decoder.decode_by_id(5).unwrap();
1046        assert_eq!(entity.id, 5);
1047        assert_eq!(entity.ifc_type, IfcType::IfcWall);
1048        assert_eq!(entity.get_string(4), Some("Wall-001"));
1049
1050        // Should be cached now
1051        assert_eq!(decoder.cache_size(), 1);
1052        let cached = decoder.get_cached(5).unwrap();
1053        assert_eq!(cached.id, 5);
1054    }
1055
1056    #[test]
1057    fn test_build_entity_index_matches_scanner_header_semantics() {
1058        let content = "ISO-10303-21;\nHEADER;\n\
1059FILE_DESCRIPTION(('ViewDefinition [ReferenceView]'),'2;1');\n\
1060FILE_NAME('26-IFC\\X2\\00B1\\X0\\2#.ifc','2026-04-29T18:21:27',$,$,'CATIA','CATIA',$);\n\
1061FILE_SCHEMA(('IFC4'));\nENDSEC;\n\
1062DATA;\n\
1063#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);\n\
1064#2=IFCWALL('guid2',$,$,$,'Wall; with semicolon',$,$,$);\n\
1065ENDSEC;\nEND-ISO-10303-21;\n";
1066
1067        let index = build_entity_index(content);
1068
1069        assert_eq!(index.len(), 2);
1070        assert!(!index.contains_key(&26));
1071        let (start, end) = index.get(&2).copied().unwrap();
1072        assert_eq!(
1073            &content[start..end],
1074            "#2=IFCWALL('guid2',$,$,$,'Wall; with semicolon',$,$,$);"
1075        );
1076    }
1077
1078    #[test]
1079    fn test_decode_by_id_handles_quoted_semicolon_from_shared_index() {
1080        let content = "#1=IFCWALL('guid',$,$,$,'Wall; with semicolon',$,$,$);\n";
1081        let mut decoder = EntityDecoder::new(content);
1082
1083        let wall = decoder.decode_by_id(1).unwrap();
1084
1085        assert_eq!(wall.id, 1);
1086        assert_eq!(wall.ifc_type, IfcType::IfcWall);
1087        assert_eq!(wall.get_string(4), Some("Wall; with semicolon"));
1088    }
1089
1090    #[test]
1091    fn test_resolve_ref() {
1092        let content = r#"
1093#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
1094#2=IFCWALL('guid2',$,$,$,$,$,#1,$);
1095"#;
1096
1097        let mut decoder = EntityDecoder::new(content);
1098
1099        let wall = decoder.decode_by_id(2).unwrap();
1100        let placement_attr = wall.get(6).unwrap();
1101
1102        let referenced = decoder.resolve_ref(placement_attr).unwrap().unwrap();
1103        assert_eq!(referenced.id, 1);
1104        assert_eq!(referenced.ifc_type, IfcType::IfcProject);
1105    }
1106
1107    #[test]
1108    fn test_resolve_ref_list() {
1109        let content = r#"
1110#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
1111#2=IFCWALL('guid1',$,$,$,$,$,$,$);
1112#3=IFCDOOR('guid2',$,$,$,$,$,$,$);
1113#4=IFCRELCONTAINEDINSPATIALSTRUCTURE('guid3',$,$,$,(#2,#3),$,#1);
1114"#;
1115
1116        let mut decoder = EntityDecoder::new(content);
1117
1118        let rel = decoder.decode_by_id(4).unwrap();
1119        let elements_attr = rel.get(4).unwrap();
1120
1121        let elements = decoder.resolve_ref_list(elements_attr).unwrap();
1122        assert_eq!(elements.len(), 2);
1123        assert_eq!(elements[0].id, 2);
1124        assert_eq!(elements[0].ifc_type, IfcType::IfcWall);
1125        assert_eq!(elements[1].id, 3);
1126        assert_eq!(elements[1].ifc_type, IfcType::IfcDoor);
1127    }
1128
1129    #[test]
1130    fn test_cache() {
1131        let content = r#"
1132#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
1133#2=IFCWALL('guid2',$,$,$,$,$,$,$);
1134"#;
1135
1136        let mut decoder = EntityDecoder::new(content);
1137
1138        assert_eq!(decoder.cache_size(), 0);
1139
1140        decoder.decode_by_id(1).unwrap();
1141        assert_eq!(decoder.cache_size(), 1);
1142
1143        decoder.decode_by_id(2).unwrap();
1144        assert_eq!(decoder.cache_size(), 2);
1145
1146        // Decode same entity - should use cache
1147        decoder.decode_by_id(1).unwrap();
1148        assert_eq!(decoder.cache_size(), 2);
1149
1150        decoder.clear_cache();
1151        assert_eq!(decoder.cache_size(), 0);
1152    }
1153}