Skip to main content

ifc_lite_core/
decoder.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
5//! Entity Decoder - On-demand entity parsing
6//!
7//! Lazily decode IFC entities from byte offsets without loading entire file into memory.
8
9use crate::error::{Error, Result};
10use crate::parser::parse_entity;
11use crate::schema_gen::{AttributeValue, DecodedEntity};
12use rustc_hash::FxHashMap;
13use std::sync::Arc;
14
15/// Pre-built entity index type
16pub type EntityIndex = FxHashMap<u32, (usize, usize)>;
17
18/// Build entity index from content - O(n) scan using SIMD-accelerated search
19/// Returns index mapping entity IDs to byte offsets
20#[inline]
21pub fn build_entity_index(content: &str) -> EntityIndex {
22    let bytes = content.as_bytes();
23    let len = bytes.len();
24
25    // Pre-allocate with estimated capacity (roughly 1 entity per 50 bytes)
26    let estimated_entities = len / 50;
27    let mut index = FxHashMap::with_capacity_and_hasher(estimated_entities, Default::default());
28
29    let mut pos = 0;
30
31    while pos < len {
32        // Find next '#' using SIMD-accelerated search
33        let remaining = &bytes[pos..];
34        let hash_offset = match memchr::memchr(b'#', remaining) {
35            Some(offset) => offset,
36            None => break,
37        };
38
39        let start = pos + hash_offset;
40        pos = start + 1;
41
42        // Parse entity ID (inline for speed)
43        let id_start = pos;
44        while pos < len && bytes[pos].is_ascii_digit() {
45            pos += 1;
46        }
47        let id_end = pos;
48
49        // Skip whitespace before '=' (handles both `#45=` and `#45 = ` formats)
50        while pos < len && bytes[pos].is_ascii_whitespace() {
51            pos += 1;
52        }
53
54        if id_end > id_start && pos < len && bytes[pos] == b'=' {
55            // Fast integer parsing without allocation
56            let id = parse_u32_inline(bytes, id_start, id_end);
57
58            // Find end of entity (;) using SIMD
59            let entity_content = &bytes[pos..];
60            if let Some(semicolon_offset) = memchr::memchr(b';', entity_content) {
61                pos += semicolon_offset + 1; // Include semicolon
62                index.insert(id, (start, pos));
63            } else {
64                break; // No semicolon found, malformed
65            }
66        }
67    }
68
69    index
70}
71
72/// Fast u32 parsing without string allocation
73#[inline]
74fn parse_u32_inline(bytes: &[u8], start: usize, end: usize) -> u32 {
75    let mut result: u32 = 0;
76    for &byte in &bytes[start..end] {
77        let digit = byte.wrapping_sub(b'0');
78        result = result.wrapping_mul(10).wrapping_add(digit as u32);
79    }
80    result
81}
82
83/// Entity decoder for lazy parsing - uses Arc for efficient cache sharing
84pub struct EntityDecoder<'a> {
85    content: &'a str,
86    /// Cache of decoded entities (entity_id -> `Arc<DecodedEntity>`)
87    /// Using Arc avoids expensive clones on cache hits
88    cache: FxHashMap<u32, Arc<DecodedEntity>>,
89    /// Index of entity offsets (entity_id -> (start, end))
90    /// Can be pre-built or built lazily
91    /// Using Arc to allow sharing across threads without cloning the HashMap
92    entity_index: Option<Arc<EntityIndex>>,
93    /// Cache of cartesian point coordinates for FacetedBrep optimization
94    /// Only populated when using get_polyloop_coords_cached
95    point_cache: FxHashMap<u32, (f64, f64, f64)>,
96}
97
98impl<'a> EntityDecoder<'a> {
99    /// Create new decoder
100    pub fn new(content: &'a str) -> Self {
101        Self {
102            content,
103            cache: FxHashMap::default(),
104            entity_index: None,
105            point_cache: FxHashMap::default(),
106        }
107    }
108
109    /// Create decoder with pre-built index (faster for repeated lookups)
110    pub fn with_index(content: &'a str, index: EntityIndex) -> Self {
111        Self {
112            content,
113            cache: FxHashMap::default(),
114            entity_index: Some(Arc::new(index)),
115            point_cache: FxHashMap::default(),
116        }
117    }
118
119    /// Create decoder with shared Arc index (for parallel processing)
120    pub fn with_arc_index(content: &'a str, index: Arc<EntityIndex>) -> Self {
121        Self {
122            content,
123            cache: FxHashMap::default(),
124            entity_index: Some(index),
125            point_cache: FxHashMap::default(),
126        }
127    }
128
129    /// Build entity index for O(1) lookups
130    /// This scans the file once and maps entity IDs to byte offsets
131    fn build_index(&mut self) {
132        if self.entity_index.is_some() {
133            return; // Already built
134        }
135        self.entity_index = Some(Arc::new(build_entity_index(self.content)));
136    }
137
138    /// Decode entity at byte offset
139    /// Returns cached entity if already decoded
140    #[inline]
141    pub fn decode_at(&mut self, start: usize, end: usize) -> Result<DecodedEntity> {
142        let line = &self.content[start..end];
143        let (id, ifc_type, tokens) = parse_entity(line).map_err(|e| {
144            // Add debug info about what failed to parse
145            Error::parse(
146                0,
147                format!(
148                    "Failed to parse entity: {:?}, input: {:?}",
149                    e,
150                    &line[..line.len().min(100)]
151                ),
152            )
153        })?;
154
155        // Check cache first - return clone of inner DecodedEntity
156        if let Some(entity_arc) = self.cache.get(&id) {
157            return Ok(entity_arc.as_ref().clone());
158        }
159
160        // Convert tokens to AttributeValues
161        let attributes = tokens
162            .iter()
163            .map(|token| AttributeValue::from_token(token))
164            .collect();
165
166        let entity = DecodedEntity::new(id, ifc_type, attributes);
167        self.cache.insert(id, Arc::new(entity.clone()));
168        Ok(entity)
169    }
170
171    /// Decode entity at byte offset with known ID (faster - checks cache before parsing)
172    /// Use this when the scanner provides the entity ID to avoid re-parsing cached entities
173    #[inline]
174    pub fn decode_at_with_id(&mut self, id: u32, start: usize, end: usize) -> Result<DecodedEntity> {
175        // Check cache first - avoid parsing if already decoded
176        if let Some(entity_arc) = self.cache.get(&id) {
177            return Ok(entity_arc.as_ref().clone());
178        }
179
180        // Not in cache, parse and cache
181        self.decode_at(start, end)
182    }
183
184    /// Decode entity by ID - O(1) lookup using entity index
185    #[inline]
186    pub fn decode_by_id(&mut self, entity_id: u32) -> Result<DecodedEntity> {
187        // Check cache first - return clone of inner DecodedEntity
188        if let Some(entity_arc) = self.cache.get(&entity_id) {
189            return Ok(entity_arc.as_ref().clone());
190        }
191
192        // Build index if not already built
193        self.build_index();
194
195        // O(1) lookup in index
196        let (start, end) = self
197            .entity_index
198            .as_ref()
199            .and_then(|idx| idx.get(&entity_id).copied())
200            .ok_or_else(|| Error::parse(0, format!("Entity #{} not found", entity_id)))?;
201
202        self.decode_at(start, end)
203    }
204
205    /// Resolve entity reference (follow #ID)
206    /// Returns None for null/derived values
207    #[inline]
208    pub fn resolve_ref(&mut self, attr: &AttributeValue) -> Result<Option<DecodedEntity>> {
209        match attr.as_entity_ref() {
210            Some(id) => Ok(Some(self.decode_by_id(id)?)),
211            None => Ok(None),
212        }
213    }
214
215    /// Resolve list of entity references
216    pub fn resolve_ref_list(&mut self, attr: &AttributeValue) -> Result<Vec<DecodedEntity>> {
217        let list = attr
218            .as_list()
219            .ok_or_else(|| Error::parse(0, "Expected list".to_string()))?;
220
221        let mut entities = Vec::with_capacity(list.len());
222        for item in list {
223            if let Some(id) = item.as_entity_ref() {
224                entities.push(self.decode_by_id(id)?);
225            }
226        }
227        Ok(entities)
228    }
229
230    /// Get cached entity (without decoding)
231    pub fn get_cached(&self, entity_id: u32) -> Option<DecodedEntity> {
232        self.cache.get(&entity_id).map(|arc| arc.as_ref().clone())
233    }
234
235    /// Clear cache to free memory
236    pub fn clear_cache(&mut self) {
237        self.cache.clear();
238        self.point_cache.clear();
239    }
240
241    /// Get cache size
242    pub fn cache_size(&self) -> usize {
243        self.cache.len()
244    }
245
246    /// Get raw bytes for an entity (for direct/fast parsing)
247    /// Returns the full entity line including type and attributes
248    #[inline]
249    pub fn get_raw_bytes(&mut self, entity_id: u32) -> Option<&'a [u8]> {
250        self.build_index();
251        let (start, end) = self.entity_index.as_ref()?.get(&entity_id).copied()?;
252        Some(&self.content.as_bytes()[start..end])
253    }
254
255    /// Get raw content string for an entity
256    #[inline]
257    pub fn get_raw_content(&mut self, entity_id: u32) -> Option<&'a str> {
258        self.build_index();
259        let (start, end) = self.entity_index.as_ref()?.get(&entity_id).copied()?;
260        Some(&self.content[start..end])
261    }
262
263    /// Fast extraction of first entity ref from raw bytes
264    /// Useful for BREP -> shell ID, Face -> FaceBound, etc.
265    /// Returns the first entity reference ID found in the entity
266    #[inline]
267    pub fn get_first_entity_ref_fast(&mut self, entity_id: u32) -> Option<u32> {
268        let bytes = self.get_raw_bytes(entity_id)?;
269        let len = bytes.len();
270        let mut i = 0;
271
272        // Skip to first '(' after '='
273        while i < len && bytes[i] != b'(' {
274            i += 1;
275        }
276        if i >= len {
277            return None;
278        }
279        i += 1; // Skip first '('
280
281        // Find first '#' which is the entity ref
282        while i < len {
283            // Skip whitespace
284            while i < len
285                && (bytes[i] == b' ' || bytes[i] == b'\n' || bytes[i] == b'\r')
286            {
287                i += 1;
288            }
289
290            if i >= len {
291                return None;
292            }
293
294            if bytes[i] == b'#' {
295                i += 1;
296                let start = i;
297                while i < len && bytes[i].is_ascii_digit() {
298                    i += 1;
299                }
300                if i > start {
301                    let mut id = 0u32;
302                    for &b in &bytes[start..i] {
303                        id = id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
304                    }
305                    return Some(id);
306                }
307            }
308            i += 1;
309        }
310
311        None
312    }
313
314    /// Fast extraction of entity reference IDs from a list attribute in raw bytes
315    /// Useful for getting face list from ClosedShell, bounds from Face, etc.
316    /// Returns list of entity IDs
317    #[inline]
318    pub fn get_entity_ref_list_fast(&mut self, entity_id: u32) -> Option<Vec<u32>> {
319        let bytes = self.get_raw_bytes(entity_id)?;
320
321        // Pattern: IFCTYPE((#id1,#id2,...)); or IFCTYPE((#id1,#id2,...),other);
322        let mut i = 0;
323        let len = bytes.len();
324
325        // Skip to first '(' after '='
326        while i < len && bytes[i] != b'(' {
327            i += 1;
328        }
329        if i >= len {
330            return None;
331        }
332        i += 1; // Skip first '('
333
334        // Skip to second '(' for the list
335        while i < len && bytes[i] != b'(' {
336            i += 1;
337        }
338        if i >= len {
339            return None;
340        }
341        i += 1; // Skip second '('
342
343        // Parse entity IDs
344        let mut ids = Vec::with_capacity(32);
345
346        while i < len {
347            // Skip whitespace and commas
348            while i < len
349                && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
350            {
351                i += 1;
352            }
353
354            if i >= len || bytes[i] == b')' {
355                break;
356            }
357
358            // Expect '#' followed by number
359            if bytes[i] == b'#' {
360                i += 1;
361                let start = i;
362                while i < len && bytes[i].is_ascii_digit() {
363                    i += 1;
364                }
365                if i > start {
366                    // Fast integer parsing directly from ASCII digits
367                    let mut id = 0u32;
368                    for &b in &bytes[start..i] {
369                        id = id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
370                    }
371                    ids.push(id);
372                }
373            } else {
374                i += 1; // Skip unknown character
375            }
376        }
377
378        if ids.is_empty() {
379            None
380        } else {
381            Some(ids)
382        }
383    }
384
385    /// Fast extraction of PolyLoop point IDs directly from raw bytes
386    /// Bypasses full entity decoding for BREP optimization
387    /// Returns list of entity IDs for CartesianPoints
388    #[inline]
389    pub fn get_polyloop_point_ids_fast(&mut self, entity_id: u32) -> Option<Vec<u32>> {
390        let bytes = self.get_raw_bytes(entity_id)?;
391
392        // IFCPOLYLOOP((#id1,#id2,#id3,...));
393        let mut i = 0;
394        let len = bytes.len();
395
396        // Skip to first '(' after '='
397        while i < len && bytes[i] != b'(' {
398            i += 1;
399        }
400        if i >= len {
401            return None;
402        }
403        i += 1; // Skip first '('
404
405        // Skip to second '(' for the point list
406        while i < len && bytes[i] != b'(' {
407            i += 1;
408        }
409        if i >= len {
410            return None;
411        }
412        i += 1; // Skip second '('
413
414        // Parse point IDs
415        let mut point_ids = Vec::with_capacity(8); // Most faces have 3-8 vertices
416
417        while i < len {
418            // Skip whitespace and commas
419            while i < len
420                && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
421            {
422                i += 1;
423            }
424
425            if i >= len || bytes[i] == b')' {
426                break;
427            }
428
429            // Expect '#' followed by number
430            if bytes[i] == b'#' {
431                i += 1;
432                let start = i;
433                while i < len && bytes[i].is_ascii_digit() {
434                    i += 1;
435                }
436                if i > start {
437                    // Fast integer parsing directly from ASCII digits
438                    let mut id = 0u32;
439                    for &b in &bytes[start..i] {
440                        id = id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
441                    }
442                    point_ids.push(id);
443                }
444            } else {
445                i += 1; // Skip unknown character
446            }
447        }
448
449        if point_ids.is_empty() {
450            None
451        } else {
452            Some(point_ids)
453        }
454    }
455
456    /// Fast extraction of CartesianPoint coordinates directly from raw bytes
457    /// Bypasses full entity decoding for ~3x speedup on BREP-heavy files
458    /// Returns (x, y, z) as f64 tuple
459    #[inline]
460    pub fn get_cartesian_point_fast(&mut self, entity_id: u32) -> Option<(f64, f64, f64)> {
461        let bytes = self.get_raw_bytes(entity_id)?;
462
463        // Find opening paren for coordinates: IFCCARTESIANPOINT((x,y,z));
464        let mut i = 0;
465        let len = bytes.len();
466
467        // Skip to first '(' after '='
468        while i < len && bytes[i] != b'(' {
469            i += 1;
470        }
471        if i >= len {
472            return None;
473        }
474        i += 1; // Skip first '('
475
476        // Skip to second '(' for the coordinate list
477        while i < len && bytes[i] != b'(' {
478            i += 1;
479        }
480        if i >= len {
481            return None;
482        }
483        i += 1; // Skip second '('
484
485        // Parse x coordinate
486        let x = parse_next_float(&bytes[i..], &mut i)?;
487
488        // Parse y coordinate
489        let y = parse_next_float(&bytes[i..], &mut i)?;
490
491        // Parse z coordinate (optional for 2D points, default to 0)
492        let z = parse_next_float(&bytes[i..], &mut i).unwrap_or(0.0);
493
494        Some((x, y, z))
495    }
496
497    /// Fast extraction of FaceBound info directly from raw bytes
498    /// Returns (loop_id, orientation, is_outer_bound)
499    /// Bypasses full entity decoding for BREP optimization
500    #[inline]
501    pub fn get_face_bound_fast(&mut self, entity_id: u32) -> Option<(u32, bool, bool)> {
502        let bytes = self.get_raw_bytes(entity_id)?;
503        let len = bytes.len();
504
505        // Find '=' to locate start of type name, and '(' for end
506        let mut eq_pos = 0;
507        while eq_pos < len && bytes[eq_pos] != b'=' {
508            eq_pos += 1;
509        }
510        if eq_pos >= len {
511            return None;
512        }
513
514        // Check if this is an outer bound by looking for "OUTER" in the type name
515        // IFCFACEOUTERBOUND vs IFCFACEBOUND
516        // The type name is between '=' and '('
517        let mut is_outer = false;
518        let mut i = eq_pos + 1;
519        // Look for "OUTER" pattern (must check for the full word, not just 'O')
520        while i + 4 < len && bytes[i] != b'(' {
521            if bytes[i] == b'O'
522                && bytes[i + 1] == b'U'
523                && bytes[i + 2] == b'T'
524                && bytes[i + 3] == b'E'
525                && bytes[i + 4] == b'R'
526            {
527                is_outer = true;
528                break;
529            }
530            i += 1;
531        }
532        // Continue to find the '(' if we haven't already
533        while i < len && bytes[i] != b'(' {
534            i += 1;
535        }
536        if i >= len {
537            return None;
538        }
539
540        i += 1; // Skip first '('
541
542        // Skip whitespace
543        while i < len && (bytes[i] == b' ' || bytes[i] == b'\n' || bytes[i] == b'\r') {
544            i += 1;
545        }
546
547        // Expect '#' for loop entity ref
548        if i >= len || bytes[i] != b'#' {
549            return None;
550        }
551        i += 1;
552
553        // Parse loop ID
554        let start = i;
555        while i < len && bytes[i].is_ascii_digit() {
556            i += 1;
557        }
558        if i <= start {
559            return None;
560        }
561        let mut loop_id = 0u32;
562        for &b in &bytes[start..i] {
563            loop_id = loop_id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
564        }
565
566        // Find orientation after comma - default to true (.T.)
567        // Skip to comma
568        while i < len && bytes[i] != b',' {
569            i += 1;
570        }
571        i += 1; // Skip comma
572
573        // Skip whitespace
574        while i < len && (bytes[i] == b' ' || bytes[i] == b'\n' || bytes[i] == b'\r') {
575            i += 1;
576        }
577
578        // Check for .F. (false) or .T. (true)
579        let orientation = if i + 2 < len && bytes[i] == b'.' && bytes[i + 2] == b'.' {
580            bytes[i + 1] != b'F'
581        } else {
582            true // Default to true
583        };
584
585        Some((loop_id, orientation, is_outer))
586    }
587
588    /// Fast extraction of PolyLoop COORDINATES directly from raw bytes
589    /// This is the ultimate fast path - extracts all coordinates in one go
590    /// Avoids N+1 HashMap lookups by batching point extraction
591    /// Returns Vec of (x, y, z) coordinate tuples
592    #[inline]
593    pub fn get_polyloop_coords_fast(&mut self, entity_id: u32) -> Option<Vec<(f64, f64, f64)>> {
594        // Ensure index is built once
595        self.build_index();
596        let index = self.entity_index.as_ref()?;
597        let bytes_full = self.content.as_bytes();
598
599        // Get polyloop raw bytes
600        let (start, end) = index.get(&entity_id).copied()?;
601        let bytes = &bytes_full[start..end];
602
603        // IFCPOLYLOOP((#id1,#id2,#id3,...));
604        let mut i = 0;
605        let len = bytes.len();
606
607        // Skip to first '(' after '='
608        while i < len && bytes[i] != b'(' {
609            i += 1;
610        }
611        if i >= len {
612            return None;
613        }
614        i += 1; // Skip first '('
615
616        // Skip to second '(' for the point list
617        while i < len && bytes[i] != b'(' {
618            i += 1;
619        }
620        if i >= len {
621            return None;
622        }
623        i += 1; // Skip second '('
624
625        // Parse point IDs and immediately fetch coordinates
626        let mut coords = Vec::with_capacity(8); // Most faces have 3-8 vertices
627
628        while i < len {
629            // Skip whitespace and commas
630            while i < len
631                && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
632            {
633                i += 1;
634            }
635
636            if i >= len || bytes[i] == b')' {
637                break;
638            }
639
640            // Expect '#' followed by number
641            if bytes[i] == b'#' {
642                i += 1;
643                let id_start = i;
644                while i < len && bytes[i].is_ascii_digit() {
645                    i += 1;
646                }
647                if i > id_start {
648                    // Fast integer parsing directly from ASCII digits
649                    let mut point_id = 0u32;
650                    for &b in &bytes[id_start..i] {
651                        point_id = point_id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
652                    }
653
654                    // INLINE: Get cartesian point coordinates directly
655                    // This avoids the overhead of calling get_cartesian_point_fast for each point
656                    if let Some((pt_start, pt_end)) = index.get(&point_id).copied() {
657                        if let Some(coord) =
658                            parse_cartesian_point_inline(&bytes_full[pt_start..pt_end])
659                        {
660                            coords.push(coord);
661                        }
662                    }
663                }
664            } else {
665                i += 1; // Skip unknown character
666            }
667        }
668
669        if coords.len() >= 3 {
670            Some(coords)
671        } else {
672            None
673        }
674    }
675
676    /// Fast extraction of PolyLoop COORDINATES with point caching
677    /// Uses a cache to avoid re-parsing the same cartesian points
678    /// For files with many faces sharing points, this can be 2-3x faster
679    #[inline]
680    pub fn get_polyloop_coords_cached(&mut self, entity_id: u32) -> Option<Vec<(f64, f64, f64)>> {
681        // Ensure index is built once
682        self.build_index();
683        let index = self.entity_index.as_ref()?;
684        let bytes_full = self.content.as_bytes();
685
686        // Get polyloop raw bytes
687        let (start, end) = index.get(&entity_id).copied()?;
688        let bytes = &bytes_full[start..end];
689
690        // IFCPOLYLOOP((#id1,#id2,#id3,...));
691        let mut i = 0;
692        let len = bytes.len();
693
694        // Skip to first '(' after '='
695        while i < len && bytes[i] != b'(' {
696            i += 1;
697        }
698        if i >= len {
699            return None;
700        }
701        i += 1; // Skip first '('
702
703        // Skip to second '(' for the point list
704        while i < len && bytes[i] != b'(' {
705            i += 1;
706        }
707        if i >= len {
708            return None;
709        }
710        i += 1; // Skip second '('
711
712        // Parse point IDs and fetch coordinates (with caching)
713        // CRITICAL: Track expected count to ensure all points are resolved
714        let mut coords = Vec::with_capacity(8);
715        let mut expected_count = 0u32;
716
717        while i < len {
718            // Skip whitespace and commas
719            while i < len
720                && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
721            {
722                i += 1;
723            }
724
725            if i >= len || bytes[i] == b')' {
726                break;
727            }
728
729            // Expect '#' followed by number
730            if bytes[i] == b'#' {
731                i += 1;
732                let id_start = i;
733                while i < len && bytes[i].is_ascii_digit() {
734                    i += 1;
735                }
736                if i > id_start {
737                    expected_count += 1; // Count every point ID we encounter
738
739                    // Fast integer parsing directly from ASCII digits
740                    let mut point_id = 0u32;
741                    for &b in &bytes[id_start..i] {
742                        point_id = point_id.wrapping_mul(10).wrapping_add((b - b'0') as u32);
743                    }
744
745                    // Check cache first
746                    if let Some(&coord) = self.point_cache.get(&point_id) {
747                        coords.push(coord);
748                    } else {
749                        // Not in cache - parse and cache
750                        if let Some((pt_start, pt_end)) = index.get(&point_id).copied() {
751                            if let Some(coord) =
752                                parse_cartesian_point_inline(&bytes_full[pt_start..pt_end])
753                            {
754                                self.point_cache.insert(point_id, coord);
755                                coords.push(coord);
756                            }
757                        }
758                    }
759                }
760            } else {
761                i += 1; // Skip unknown character
762            }
763        }
764
765        // CRITICAL: Return None if ANY point failed to resolve
766        // This matches the old behavior where missing points invalidated the whole polygon
767        if coords.len() >= 3 && coords.len() == expected_count as usize {
768            Some(coords)
769        } else {
770            None
771        }
772    }
773}
774
775/// Parse cartesian point coordinates inline from raw bytes
776/// Used by get_polyloop_coords_fast for maximum performance
777#[inline]
778fn parse_cartesian_point_inline(bytes: &[u8]) -> Option<(f64, f64, f64)> {
779    let len = bytes.len();
780    let mut i = 0;
781
782    // Skip to first '(' after '='
783    while i < len && bytes[i] != b'(' {
784        i += 1;
785    }
786    if i >= len {
787        return None;
788    }
789    i += 1; // Skip first '('
790
791    // Skip to second '(' for the coordinate list
792    while i < len && bytes[i] != b'(' {
793        i += 1;
794    }
795    if i >= len {
796        return None;
797    }
798    i += 1; // Skip second '('
799
800    // Parse x coordinate
801    let x = parse_float_inline(&bytes[i..], &mut i)?;
802
803    // Parse y coordinate
804    let y = parse_float_inline(&bytes[i..], &mut i)?;
805
806    // Parse z coordinate (optional for 2D points, default to 0)
807    let z = parse_float_inline(&bytes[i..], &mut i).unwrap_or(0.0);
808
809    Some((x, y, z))
810}
811
812/// Parse float inline - simpler version for batch coordinate extraction
813#[inline]
814fn parse_float_inline(bytes: &[u8], offset: &mut usize) -> Option<f64> {
815    let len = bytes.len();
816    let mut i = 0;
817
818    // Skip whitespace and commas
819    while i < len
820        && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
821    {
822        i += 1;
823    }
824
825    if i >= len || bytes[i] == b')' {
826        return None;
827    }
828
829    // Parse float using fast_float
830    match fast_float::parse_partial::<f64, _>(&bytes[i..]) {
831        Ok((value, consumed)) if consumed > 0 => {
832            *offset += i + consumed;
833            Some(value)
834        }
835        _ => None,
836    }
837}
838
839/// Parse next float from bytes, advancing position past it
840#[inline]
841fn parse_next_float(bytes: &[u8], offset: &mut usize) -> Option<f64> {
842    let len = bytes.len();
843    let mut i = 0;
844
845    // Skip whitespace and commas
846    while i < len
847        && (bytes[i] == b' ' || bytes[i] == b',' || bytes[i] == b'\n' || bytes[i] == b'\r')
848    {
849        i += 1;
850    }
851
852    if i >= len || bytes[i] == b')' {
853        return None;
854    }
855
856    // Parse float using fast_float
857    match fast_float::parse_partial::<f64, _>(&bytes[i..]) {
858        Ok((value, consumed)) if consumed > 0 => {
859            *offset += i + consumed;
860            Some(value)
861        }
862        _ => None,
863    }
864}
865
866#[cfg(test)]
867mod tests {
868    use super::*;
869    use crate::IfcType;
870
871    #[test]
872    fn test_decode_entity() {
873        let content = r#"
874#1=IFCPROJECT('2vqT3bvqj9RBFjLlXpN8n9',$,$,$,$,$,$,$,$);
875#2=IFCWALL('3a4T3bvqj9RBFjLlXpN8n0',$,$,$,'Wall-001',$,#3,#4);
876#3=IFCLOCALPLACEMENT($,#4);
877#4=IFCAXIS2PLACEMENT3D(#5,$,$);
878#5=IFCCARTESIANPOINT((0.,0.,0.));
879"#;
880
881        let mut decoder = EntityDecoder::new(content);
882
883        // Find entity #2
884        let start = content.find("#2=").unwrap();
885        let end = content[start..].find(';').unwrap() + start + 1;
886
887        let entity = decoder.decode_at(start, end).unwrap();
888        assert_eq!(entity.id, 2);
889        assert_eq!(entity.ifc_type, IfcType::IfcWall);
890        assert_eq!(entity.attributes.len(), 8);
891        assert_eq!(entity.get_string(4), Some("Wall-001"));
892        assert_eq!(entity.get_ref(6), Some(3));
893        assert_eq!(entity.get_ref(7), Some(4));
894    }
895
896    #[test]
897    fn test_decode_by_id() {
898        let content = r#"
899#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
900#5=IFCWALL('guid2',$,$,$,'Wall-001',$,$,$);
901#10=IFCDOOR('guid3',$,$,$,'Door-001',$,$,$);
902"#;
903
904        let mut decoder = EntityDecoder::new(content);
905
906        let entity = decoder.decode_by_id(5).unwrap();
907        assert_eq!(entity.id, 5);
908        assert_eq!(entity.ifc_type, IfcType::IfcWall);
909        assert_eq!(entity.get_string(4), Some("Wall-001"));
910
911        // Should be cached now
912        assert_eq!(decoder.cache_size(), 1);
913        let cached = decoder.get_cached(5).unwrap();
914        assert_eq!(cached.id, 5);
915    }
916
917    #[test]
918    fn test_resolve_ref() {
919        let content = r#"
920#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
921#2=IFCWALL('guid2',$,$,$,$,$,#1,$);
922"#;
923
924        let mut decoder = EntityDecoder::new(content);
925
926        let wall = decoder.decode_by_id(2).unwrap();
927        let placement_attr = wall.get(6).unwrap();
928
929        let referenced = decoder.resolve_ref(placement_attr).unwrap().unwrap();
930        assert_eq!(referenced.id, 1);
931        assert_eq!(referenced.ifc_type, IfcType::IfcProject);
932    }
933
934    #[test]
935    fn test_resolve_ref_list() {
936        let content = r#"
937#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
938#2=IFCWALL('guid1',$,$,$,$,$,$,$);
939#3=IFCDOOR('guid2',$,$,$,$,$,$,$);
940#4=IFCRELCONTAINEDINSPATIALSTRUCTURE('guid3',$,$,$,(#2,#3),$,#1);
941"#;
942
943        let mut decoder = EntityDecoder::new(content);
944
945        let rel = decoder.decode_by_id(4).unwrap();
946        let elements_attr = rel.get(4).unwrap();
947
948        let elements = decoder.resolve_ref_list(elements_attr).unwrap();
949        assert_eq!(elements.len(), 2);
950        assert_eq!(elements[0].id, 2);
951        assert_eq!(elements[0].ifc_type, IfcType::IfcWall);
952        assert_eq!(elements[1].id, 3);
953        assert_eq!(elements[1].ifc_type, IfcType::IfcDoor);
954    }
955
956    #[test]
957    fn test_cache() {
958        let content = r#"
959#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
960#2=IFCWALL('guid2',$,$,$,$,$,$,$);
961"#;
962
963        let mut decoder = EntityDecoder::new(content);
964
965        assert_eq!(decoder.cache_size(), 0);
966
967        decoder.decode_by_id(1).unwrap();
968        assert_eq!(decoder.cache_size(), 1);
969
970        decoder.decode_by_id(2).unwrap();
971        assert_eq!(decoder.cache_size(), 2);
972
973        // Decode same entity - should use cache
974        decoder.decode_by_id(1).unwrap();
975        assert_eq!(decoder.cache_size(), 2);
976
977        decoder.clear_cache();
978        assert_eq!(decoder.cache_size(), 0);
979    }
980}