Skip to main content

ifc_lite_core/
fast_parse.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
5//! Fast Direct Parsing Module
6//!
7//! Provides zero-allocation parsing for coordinate lists and index arrays.
8//! This bypasses the Token/AttributeValue pipeline for massive speedups
9//! on tessellation-heavy IFC files.
10//!
11//! Performance: 3-5x faster than standard path for IfcTriangulatedFaceSet
12
13/// Check if byte is a digit, minus sign, or decimal point (start of number)
14#[inline(always)]
15fn is_number_start(b: u8) -> bool {
16    b.is_ascii_digit() || b == b'-' || b == b'.'
17}
18
19/// Estimate number of floats in coordinate data
20#[inline]
21fn estimate_float_count(bytes: &[u8]) -> usize {
22    // Rough estimate: ~8 bytes per float on average (including delimiters)
23    bytes.len() / 8
24}
25
26/// Estimate number of integers in index data
27#[inline]
28fn estimate_int_count(bytes: &[u8]) -> usize {
29    // Rough estimate: ~4 bytes per integer on average
30    bytes.len() / 4
31}
32
33/// Parse coordinate list directly from raw bytes to `Vec<f32>`
34///
35/// This parses IFC coordinate data like:
36/// `((0.,0.,150.),(0.,40.,140.),...)`
37///
38/// Returns flattened f32 array: [x0, y0, z0, x1, y1, z1, ...]
39///
40/// # Performance
41/// - Zero intermediate allocations (no Token, no AttributeValue)
42/// - Uses fast-float for SIMD-accelerated parsing
43/// - Pre-allocates result vector
44#[inline]
45pub fn parse_coordinates_direct(bytes: &[u8]) -> Vec<f32> {
46    let mut result = Vec::with_capacity(estimate_float_count(bytes));
47    let mut pos = 0;
48    let len = bytes.len();
49
50    while pos < len {
51        // Skip to next number using SIMD-accelerated search
52        while pos < len && !is_number_start(bytes[pos]) {
53            pos += 1;
54        }
55        if pos >= len {
56            break;
57        }
58
59        // Parse float directly
60        match fast_float::parse_partial::<f32, _>(&bytes[pos..]) {
61            Ok((value, consumed)) if consumed > 0 => {
62                result.push(value);
63                pos += consumed;
64            }
65            _ => {
66                // Skip this character and continue
67                pos += 1;
68            }
69        }
70    }
71
72    result
73}
74
75/// Parse coordinate list directly from raw bytes to `Vec<f64>`
76///
77/// Same as parse_coordinates_direct but with f64 precision.
78#[inline]
79pub fn parse_coordinates_direct_f64(bytes: &[u8]) -> Vec<f64> {
80    let mut result = Vec::with_capacity(estimate_float_count(bytes));
81    let mut pos = 0;
82    let len = bytes.len();
83
84    while pos < len {
85        while pos < len && !is_number_start(bytes[pos]) {
86            pos += 1;
87        }
88        if pos >= len {
89            break;
90        }
91
92        match fast_float::parse_partial::<f64, _>(&bytes[pos..]) {
93            Ok((value, consumed)) if consumed > 0 => {
94                result.push(value);
95                pos += consumed;
96            }
97            _ => {
98                pos += 1;
99            }
100        }
101    }
102
103    result
104}
105
106/// Parse index list directly from raw bytes to `Vec<u32>`
107///
108/// This parses IFC face index data like:
109/// `((1,2,3),(2,1,4),...)`
110///
111/// Automatically converts from 1-based IFC indices to 0-based.
112///
113/// # Performance
114/// - Zero intermediate allocations
115/// - Uses inline integer parsing
116#[inline]
117pub fn parse_indices_direct(bytes: &[u8]) -> Vec<u32> {
118    let mut result = Vec::with_capacity(estimate_int_count(bytes));
119    let mut pos = 0;
120    let len = bytes.len();
121
122    while pos < len {
123        // Skip to next digit
124        while pos < len && !bytes[pos].is_ascii_digit() {
125            pos += 1;
126        }
127        if pos >= len {
128            break;
129        }
130
131        // Parse integer inline (avoiding any allocation)
132        let mut value: u32 = 0;
133        while pos < len && bytes[pos].is_ascii_digit() {
134            value = value
135                .wrapping_mul(10)
136                .wrapping_add((bytes[pos] - b'0') as u32);
137            pos += 1;
138        }
139
140        // Convert from 1-based to 0-based
141        result.push(value.saturating_sub(1));
142    }
143
144    result
145}
146
147/// Parse a single entity's coordinate list attribute
148///
149/// Takes the raw bytes of an entity line like:
150/// `#78=IFCCARTESIANPOINTLIST3D(((0.,0.,150.),(0.,40.,140.),...));`
151///
152/// And extracts just the coordinate data.
153#[inline]
154pub fn extract_coordinate_list_from_entity(bytes: &[u8]) -> Option<Vec<f32>> {
155    // Find the opening '((' which starts the coordinate list
156    let start = memchr::memmem::find(bytes, b"((")?;
157
158    // Find matching closing '))'
159    let end = memchr::memmem::rfind(bytes, b"))")?;
160
161    if end <= start {
162        return None;
163    }
164
165    // Parse the coordinate data
166    Some(parse_coordinates_direct(&bytes[start..end + 2]))
167}
168
169/// Parse face indices from IfcTriangulatedFaceSet entity
170///
171/// Finds the CoordIndex attribute (4th attribute, 0-indexed as 3)
172/// in an entity like:
173/// `#77=IFCTRIANGULATEDFACESET(#78,$,$,((1,2,3),(2,1,4),...),$);`
174#[inline]
175pub fn extract_face_indices_from_entity(bytes: &[u8]) -> Option<Vec<u32>> {
176    // Count commas to find the 4th attribute (CoordIndex)
177    // Format: IFCTRIANGULATEDFACESET(Coordinates,Normals,Closed,CoordIndex,PnIndex)
178    let mut paren_depth = 0;
179    let mut comma_count = 0;
180    let mut attr_start = None;
181    let mut attr_end = None;
182
183    for (i, &b) in bytes.iter().enumerate() {
184        match b {
185            b'(' => {
186                if paren_depth == 1 && comma_count == 3 && attr_start.is_none() {
187                    attr_start = Some(i);
188                }
189                paren_depth += 1;
190            }
191            b')' => {
192                paren_depth -= 1;
193                if paren_depth == 1
194                    && comma_count == 3
195                    && attr_start.is_some()
196                    && attr_end.is_none()
197                {
198                    attr_end = Some(i + 1);
199                }
200            }
201            b',' if paren_depth == 1 => {
202                if comma_count == 3 && attr_end.is_none() && attr_start.is_some() {
203                    attr_end = Some(i);
204                }
205                comma_count += 1;
206                if comma_count == 3 {
207                    // Next character starts the 4th attribute
208                }
209            }
210            _ => {}
211        }
212    }
213
214    let start = attr_start?;
215    let end = attr_end?;
216
217    if end <= start {
218        return None;
219    }
220
221    Some(parse_indices_direct(&bytes[start..end]))
222}
223
224/// Fast path checker - determines if entity type benefits from direct parsing
225#[inline]
226pub fn should_use_fast_path(type_name: &str) -> bool {
227    matches!(
228        type_name.to_uppercase().as_str(),
229        "IFCCARTESIANPOINTLIST3D"
230            | "IFCTRIANGULATEDFACESET"
231            | "IFCPOLYGONALFACESET"
232            | "IFCINDEXEDPOLYGONALFACE"
233    )
234}
235
236/// Extract entity type name from raw bytes
237///
238/// From `#77=IFCTRIANGULATEDFACESET(...)` extracts `IFCTRIANGULATEDFACESET`
239#[inline]
240pub fn extract_entity_type_name(bytes: &[u8]) -> Option<&str> {
241    // Find '=' position
242    let eq_pos = bytes.iter().position(|&b| b == b'=')?;
243    // Find '(' position after '='
244    let paren_pos = bytes[eq_pos..].iter().position(|&b| b == b'(')?;
245    let type_start = eq_pos + 1;
246    let type_end = eq_pos + paren_pos;
247
248    if type_end <= type_start {
249        return None;
250    }
251
252    std::str::from_utf8(&bytes[type_start..type_end]).ok()
253}
254
255/// Extract the first entity reference from an entity's first attribute
256///
257/// From `#77=IFCTRIANGULATEDFACESET(#78,...)` extracts `78`
258#[inline]
259pub fn extract_first_entity_ref(bytes: &[u8]) -> Option<u32> {
260    // Find opening paren
261    let paren_pos = bytes.iter().position(|&b| b == b'(')?;
262    let content = &bytes[paren_pos + 1..];
263
264    // Find '#' which marks entity reference
265    let hash_pos = content.iter().position(|&b| b == b'#')?;
266    let id_start = hash_pos + 1;
267
268    // Parse the ID number
269    let mut id: u32 = 0;
270    let mut i = id_start;
271    while i < content.len() && content[i].is_ascii_digit() {
272        id = id.wrapping_mul(10).wrapping_add((content[i] - b'0') as u32);
273        i += 1;
274    }
275
276    if i > id_start {
277        Some(id)
278    } else {
279        None
280    }
281}
282
283/// Mesh data for fast path processing (avoiding full Mesh struct dependency)
284#[derive(Debug, Clone)]
285pub struct FastMeshData {
286    pub positions: Vec<f32>,
287    pub indices: Vec<u32>,
288}
289
290/// Process IfcTriangulatedFaceSet directly from raw bytes
291///
292/// This completely bypasses the Token/AttributeValue pipeline for
293/// maximum performance on tessellation geometry.
294///
295/// # Arguments
296/// * `faceset_bytes` - Raw bytes of the IfcTriangulatedFaceSet entity
297/// * `get_entity_bytes` - Function to retrieve raw bytes for a given entity ID
298///
299/// # Returns
300/// FastMeshData with positions and indices, or None if parsing fails
301#[inline]
302pub fn process_triangulated_faceset_direct<F>(
303    faceset_bytes: &[u8],
304    get_entity_bytes: F,
305) -> Option<FastMeshData>
306where
307    F: Fn(u32) -> Option<Vec<u8>>,
308{
309    // Extract coordinate entity reference from first attribute
310    let coord_entity_id = extract_first_entity_ref(faceset_bytes)?;
311
312    // Get raw bytes of coordinate list entity
313    let coord_bytes = get_entity_bytes(coord_entity_id)?;
314
315    // Parse coordinates directly
316    let positions = parse_coordinates_direct(&coord_bytes);
317
318    // Extract and parse indices from attribute 3 (CoordIndex)
319    let indices = extract_face_indices_from_entity(faceset_bytes)?;
320
321    Some(FastMeshData { positions, indices })
322}
323
324/// Extract entity IDs from a list attribute without full parsing
325///
326/// From `(#1,#2,#3)` extracts `[1, 2, 3]`
327#[inline]
328pub fn extract_entity_refs_from_list(bytes: &[u8]) -> Vec<u32> {
329    let mut ids = Vec::with_capacity(16);
330    let mut i = 0;
331    let len = bytes.len();
332
333    while i < len {
334        // Find next '#'
335        while i < len && bytes[i] != b'#' {
336            i += 1;
337        }
338        if i >= len {
339            break;
340        }
341        i += 1; // Skip '#'
342
343        // Parse ID
344        let mut id: u32 = 0;
345        while i < len && bytes[i].is_ascii_digit() {
346            id = id.wrapping_mul(10).wrapping_add((bytes[i] - b'0') as u32);
347            i += 1;
348        }
349        if id > 0 {
350            ids.push(id);
351        }
352    }
353
354    ids
355}
356
357#[cfg(test)]
358mod tests {
359    use super::*;
360
361    #[test]
362    fn test_parse_coordinates_direct() {
363        let bytes = b"((0.,0.,150.),(0.,40.,140.),(100.,0.,0.))";
364        let coords = parse_coordinates_direct(bytes);
365
366        assert_eq!(coords.len(), 9);
367        assert!((coords[0] - 0.0).abs() < 0.001);
368        assert!((coords[1] - 0.0).abs() < 0.001);
369        assert!((coords[2] - 150.0).abs() < 0.001);
370        assert!((coords[3] - 0.0).abs() < 0.001);
371        assert!((coords[4] - 40.0).abs() < 0.001);
372        assert!((coords[5] - 140.0).abs() < 0.001);
373    }
374
375    #[test]
376    fn test_parse_indices_direct() {
377        let bytes = b"((1,2,3),(2,1,4),(5,6,7))";
378        let indices = parse_indices_direct(bytes);
379
380        assert_eq!(indices.len(), 9);
381        // Should be 0-based (1-based converted)
382        assert_eq!(indices[0], 0); // 1 -> 0
383        assert_eq!(indices[1], 1); // 2 -> 1
384        assert_eq!(indices[2], 2); // 3 -> 2
385        assert_eq!(indices[3], 1); // 2 -> 1
386        assert_eq!(indices[4], 0); // 1 -> 0
387        assert_eq!(indices[5], 3); // 4 -> 3
388    }
389
390    #[test]
391    fn test_parse_scientific_notation() {
392        let bytes = b"((1.5E-10,2.0e+5,-3.14))";
393        let coords = parse_coordinates_direct(bytes);
394
395        assert_eq!(coords.len(), 3);
396        assert!((coords[0] - 1.5e-10).abs() < 1e-15);
397        assert!((coords[1] - 2.0e5).abs() < 1.0);
398        assert!((coords[2] - (-std::f32::consts::PI)).abs() < 0.01);
399    }
400
401    #[test]
402    fn test_parse_negative_numbers() {
403        let bytes = b"((-1.0,-2.5,3.0))";
404        let coords = parse_coordinates_direct(bytes);
405
406        assert_eq!(coords.len(), 3);
407        assert!((coords[0] - (-1.0)).abs() < 0.001);
408        assert!((coords[1] - (-2.5)).abs() < 0.001);
409        assert!((coords[2] - 3.0).abs() < 0.001);
410    }
411
412    #[test]
413    fn test_extract_coordinate_list() {
414        let entity = b"#78=IFCCARTESIANPOINTLIST3D(((0.,0.,150.),(100.,0.,0.)));";
415        let coords = extract_coordinate_list_from_entity(entity).unwrap();
416
417        assert_eq!(coords.len(), 6);
418        assert!((coords[0] - 0.0).abs() < 0.001);
419        assert!((coords[2] - 150.0).abs() < 0.001);
420        assert!((coords[3] - 100.0).abs() < 0.001);
421    }
422
423    #[test]
424    fn test_should_use_fast_path() {
425        assert!(should_use_fast_path("IFCCARTESIANPOINTLIST3D"));
426        assert!(should_use_fast_path("IFCTRIANGULATEDFACESET"));
427        assert!(should_use_fast_path("IfcTriangulatedFaceSet"));
428        assert!(!should_use_fast_path("IFCWALL"));
429        assert!(!should_use_fast_path("IFCEXTRUDEDAREASOLID"));
430    }
431}