1use nom::{
10 branch::alt,
11 bytes::complete::{take_while, take_while1},
12 character::complete::{char, digit1, one_of},
13 combinator::{map, map_res, opt, recognize},
14 multi::separated_list0,
15 sequence::{delimited, pair, preceded, tuple},
16 IResult,
17};
18
19use crate::error::{Error, Result};
20use crate::generated::IfcType;
21
22#[derive(Debug, Clone, PartialEq)]
24pub enum Token<'a> {
25 EntityRef(u32),
27 String(&'a str),
29 Integer(i64),
31 Float(f64),
33 Enum(&'a str),
35 List(Vec<Token<'a>>),
37 TypedValue(&'a str, Vec<Token<'a>>),
39 Null,
41 Derived,
43}
44
45fn entity_ref(input: &str) -> IResult<&str, Token<'_>> {
47 map(
48 preceded(char('#'), map_res(digit1, |s: &str| s.parse::<u32>())),
49 Token::EntityRef,
50 )(input)
51}
52
53fn string_literal(input: &str) -> IResult<&str, Token<'_>> {
57 #[inline]
59 fn parse_string_content(input: &str, quote_byte: u8) -> IResult<&str, &str> {
60 let bytes = input.as_bytes();
61 let mut pos = 0;
62
63 while let Some(found) = memchr::memchr(quote_byte, &bytes[pos..]) {
65 let idx = pos + found;
66 if idx + 1 < bytes.len() && bytes[idx + 1] == quote_byte {
68 pos = idx + 2; continue;
70 }
71 return Ok((&input[idx..], &input[..idx]));
73 }
74
75 Err(nom::Err::Error(nom::error::Error::new(
77 input,
78 nom::error::ErrorKind::Char,
79 )))
80 }
81
82 alt((
83 map(
84 delimited(char('\''), |i| parse_string_content(i, b'\''), char('\'')),
85 Token::String,
86 ),
87 map(
88 delimited(char('"'), |i| parse_string_content(i, b'"'), char('"')),
89 Token::String,
90 ),
91 ))(input)
92}
93
94#[inline]
97fn integer(input: &str) -> IResult<&str, Token<'_>> {
98 map_res(recognize(tuple((opt(char('-')), digit1))), |s: &str| {
99 lexical_core::parse::<i64>(s.as_bytes())
100 .map(Token::Integer)
101 .map_err(|_| "parse error")
102 })(input)
103}
104
105#[inline]
109fn float(input: &str) -> IResult<&str, Token<'_>> {
110 map_res(
111 recognize(tuple((
112 opt(char('-')),
113 digit1,
114 char('.'),
115 opt(digit1), opt(tuple((one_of("eE"), opt(one_of("+-")), digit1))),
117 ))),
118 |s: &str| {
119 lexical_core::parse::<f64>(s.as_bytes())
120 .map(Token::Float)
121 .map_err(|_| "parse error")
122 },
123 )(input)
124}
125
126fn enum_value(input: &str) -> IResult<&str, Token<'_>> {
128 map(
129 delimited(
130 char('.'),
131 take_while1(|c: char| c.is_alphanumeric() || c == '_'),
132 char('.'),
133 ),
134 Token::Enum,
135 )(input)
136}
137
138fn null(input: &str) -> IResult<&str, Token<'_>> {
140 map(char('$'), |_| Token::Null)(input)
141}
142
143fn derived(input: &str) -> IResult<&str, Token<'_>> {
145 map(char('*'), |_| Token::Derived)(input)
146}
147
148fn typed_value(input: &str) -> IResult<&str, Token<'_>> {
150 map(
151 pair(
152 take_while1(|c: char| c.is_alphanumeric() || c == '_'),
154 delimited(
156 char('('),
157 separated_list0(delimited(ws, char(','), ws), token),
158 char(')'),
159 ),
160 ),
161 |(type_name, args)| Token::TypedValue(type_name, args),
162 )(input)
163}
164
165fn ws(input: &str) -> IResult<&str, ()> {
167 map(take_while(|c: char| c.is_whitespace()), |_| ())(input)
168}
169
170fn token(input: &str) -> IResult<&str, Token<'_>> {
173 delimited(
174 ws,
175 alt((
176 null, derived, entity_ref, enum_value, string_literal, list, float,
186 integer,
187 typed_value, )),
189 ws,
190 )(input)
191}
192
193fn list(input: &str) -> IResult<&str, Token<'_>> {
195 map(
196 delimited(
197 char('('),
198 separated_list0(delimited(ws, char(','), ws), token),
199 char(')'),
200 ),
201 Token::List,
202 )(input)
203}
204
205pub fn parse_entity(input: &str) -> Result<(u32, IfcType, Vec<Token<'_>>)> {
208 let result: IResult<&str, (u32, &str, Vec<Token>)> = tuple((
209 delimited(
211 ws,
212 preceded(char('#'), map_res(digit1, |s: &str| s.parse::<u32>())),
213 ws,
214 ),
215 preceded(
217 char('='),
218 delimited(
220 ws,
221 take_while1(|c: char| c.is_alphanumeric() || c == '_'),
222 ws,
223 ),
224 ),
225 delimited(
227 char('('),
228 separated_list0(delimited(ws, char(','), ws), token),
229 tuple((char(')'), ws, char(';'))),
230 ),
231 ))(input);
232
233 match result {
234 Ok((_, (id, type_str, args))) => {
235 let ifc_type = IfcType::from_str(type_str);
236 Ok((id, ifc_type, args))
237 }
238 Err(e) => Err(Error::parse(0, format!("Failed to parse entity: {}", e))),
239 }
240}
241
242pub struct EntityScanner<'a> {
246 #[allow(dead_code)]
247 content: &'a str,
248 bytes: &'a [u8],
249 position: usize,
250}
251
252impl<'a> EntityScanner<'a> {
253 pub fn new(content: &'a str) -> Self {
255 Self {
256 content,
257 bytes: content.as_bytes(),
258 position: 0,
259 }
260 }
261
262 #[inline]
265 pub fn next_entity(&mut self) -> Option<(u32, &'a str, usize, usize)> {
266 let remaining = &self.bytes[self.position..];
267
268 let start_offset = memchr::memchr(b'#', remaining)?;
270 let line_start = self.position + start_offset;
271
272 let line_content = &self.bytes[line_start..];
275 let end_offset = self.find_entity_end(line_content)?;
276 let line_end = line_start + end_offset + 1;
277
278 let id_start = line_start + 1;
280 let mut id_end = id_start;
281 while id_end < line_end && self.bytes[id_end].is_ascii_digit() {
282 id_end += 1;
283 }
284
285 let id = self.parse_u32_fast(id_start, id_end)?;
287
288 let eq_search = &self.bytes[id_end..line_end];
290 let eq_offset = memchr::memchr(b'=', eq_search)?;
291 let mut type_start = id_end + eq_offset + 1;
292
293 while type_start < line_end && self.bytes[type_start].is_ascii_whitespace() {
295 type_start += 1;
296 }
297
298 let mut type_end = type_start;
300 while type_end < line_end {
301 let b = self.bytes[type_end];
302 if b == b'(' || b.is_ascii_whitespace() {
303 break;
304 }
305 type_end += 1;
306 }
307
308 let type_name = std::str::from_utf8(&self.bytes[type_start..type_end])
310 .unwrap_or("UNKNOWN");
311
312 self.position = line_end;
314
315 Some((id, type_name, line_start, line_end))
316 }
317
318 #[inline]
320 fn parse_u32_fast(&self, start: usize, end: usize) -> Option<u32> {
321 let mut result: u32 = 0;
322 for i in start..end {
323 let digit = self.bytes[i].wrapping_sub(b'0');
324 if digit > 9 {
325 return None;
326 }
327 result = result.wrapping_mul(10).wrapping_add(digit as u32);
328 }
329 Some(result)
330 }
331
332 #[inline]
336 fn find_entity_end(&self, content: &[u8]) -> Option<usize> {
337 let mut pos = 0;
338 let len = content.len();
339 let mut in_string = false;
340
341 while pos < len {
342 let b = content[pos];
343
344 if in_string {
345 if b == b'\'' {
346 if pos + 1 < len && content[pos + 1] == b'\'' {
348 pos += 2; continue;
350 }
351 in_string = false;
352 }
353 pos += 1;
354 } else {
355 match b {
356 b'\'' => {
357 in_string = true;
358 pos += 1;
359 }
360 b';' => {
361 return Some(pos);
362 }
363 b'\n' => {
364 pos += 1;
366 }
367 _ => {
368 pos += 1;
369 }
370 }
371 }
372 }
373 None
374 }
375
376 pub fn find_by_type(&mut self, target_type: &str) -> Vec<(u32, usize, usize)> {
378 let mut results = Vec::new();
379
380 while let Some((id, type_name, start, end)) = self.next_entity() {
381 if type_name.eq_ignore_ascii_case(target_type) {
382 results.push((id, start, end));
383 }
384 }
385
386 results
387 }
388
389 pub fn count_by_type(&mut self) -> rustc_hash::FxHashMap<String, usize> {
391 let mut counts = rustc_hash::FxHashMap::default();
392
393 while let Some((_, type_name, _, _)) = self.next_entity() {
394 *counts.entry(type_name.to_string()).or_insert(0) += 1;
395 }
396
397 counts
398 }
399
400 pub fn reset(&mut self) {
402 self.position = 0;
403 }
404
405 #[inline]
411 pub fn has_non_null_attribute(&self, start: usize, end: usize, attr_index: usize) -> bool {
412 let content = &self.bytes[start..end];
413
414 let paren_pos = match memchr::memchr(b'(', content) {
416 Some(p) => p + 1,
417 None => return false,
418 };
419
420 let mut pos = paren_pos;
421 let mut current_attr = 0;
422 let mut depth = 0; let mut in_string = false;
424
425 let check_target = |pos: usize, current_attr: usize, depth: usize| -> Option<bool> {
427 if current_attr == attr_index && depth == 0 {
428 let mut p = pos;
430 while p < content.len() && content[p].is_ascii_whitespace() {
431 p += 1;
432 }
433 if p < content.len() {
435 return Some(content[p] != b'$');
436 }
437 return Some(false);
438 }
439 None
440 };
441
442 if let Some(result) = check_target(pos, current_attr, depth) {
444 return result;
445 }
446
447 while pos < content.len() {
448 let b = content[pos];
449
450 if in_string {
451 if b == b'\'' {
452 if pos + 1 < content.len() && content[pos + 1] == b'\'' {
454 pos += 2;
455 continue;
456 }
457 in_string = false;
458 }
459 pos += 1;
460 continue;
461 }
462
463 match b {
464 b'\'' => {
465 in_string = true;
466 pos += 1;
467 }
468 b'(' => {
469 depth += 1;
470 pos += 1;
471 }
472 b')' => {
473 if depth == 0 {
474 return false;
476 }
477 depth -= 1;
478 pos += 1;
479 }
480 b',' if depth == 0 => {
481 current_attr += 1;
482 pos += 1;
483 while pos < content.len() && content[pos].is_ascii_whitespace() {
485 pos += 1;
486 }
487 if let Some(result) = check_target(pos, current_attr, depth) {
489 return result;
490 }
491 }
492 _ => {
493 pos += 1;
494 }
495 }
496 }
497
498 false
499 }
500}
501
502#[cfg(test)]
503mod tests {
504 use super::*;
505
506 #[test]
507 fn test_entity_ref() {
508 assert_eq!(entity_ref("#123"), Ok(("", Token::EntityRef(123))));
509 assert_eq!(entity_ref("#0"), Ok(("", Token::EntityRef(0))));
510 }
511
512 #[test]
513 fn test_string_literal() {
514 assert_eq!(string_literal("'hello'"), Ok(("", Token::String("hello"))));
515 assert_eq!(
516 string_literal("'with spaces'"),
517 Ok(("", Token::String("with spaces")))
518 );
519 }
520
521 #[test]
522 fn test_integer() {
523 assert_eq!(integer("42"), Ok(("", Token::Integer(42))));
524 assert_eq!(integer("-42"), Ok(("", Token::Integer(-42))));
525 assert_eq!(integer("0"), Ok(("", Token::Integer(0))));
526 }
527
528 #[test]
529 #[allow(clippy::approx_constant)]
530 fn test_float() {
531 assert_eq!(float("3.14"), Ok(("", Token::Float(3.14))));
532 assert_eq!(float("-3.14"), Ok(("", Token::Float(-3.14))));
533 assert_eq!(float("1.5E-10"), Ok(("", Token::Float(1.5e-10))));
534 }
535
536 #[test]
537 fn test_enum() {
538 assert_eq!(enum_value(".TRUE."), Ok(("", Token::Enum("TRUE"))));
539 assert_eq!(enum_value(".FALSE."), Ok(("", Token::Enum("FALSE"))));
540 assert_eq!(enum_value(".ELEMENT."), Ok(("", Token::Enum("ELEMENT"))));
541 }
542
543 #[test]
544 fn test_list() {
545 let result = list("(1,2,3)");
546 assert!(result.is_ok());
547 let (_, token) = result.unwrap();
548 match token {
549 Token::List(items) => {
550 assert_eq!(items.len(), 3);
551 assert_eq!(items[0], Token::Integer(1));
552 assert_eq!(items[1], Token::Integer(2));
553 assert_eq!(items[2], Token::Integer(3));
554 }
555 _ => panic!("Expected List token"),
556 }
557 }
558
559 #[test]
560 fn test_nested_list() {
561 let result = list("(1,(2,3),4)");
562 assert!(result.is_ok());
563 let (_, token) = result.unwrap();
564 match token {
565 Token::List(items) => {
566 assert_eq!(items.len(), 3);
567 assert_eq!(items[0], Token::Integer(1));
568 match &items[1] {
569 Token::List(inner) => {
570 assert_eq!(inner.len(), 2);
571 assert_eq!(inner[0], Token::Integer(2));
572 assert_eq!(inner[1], Token::Integer(3));
573 }
574 _ => panic!("Expected nested List"),
575 }
576 assert_eq!(items[2], Token::Integer(4));
577 }
578 _ => panic!("Expected List token"),
579 }
580 }
581
582 #[test]
583 fn test_parse_entity() {
584 let input = "#123=IFCWALL('guid','owner',$,$,'name',$,$,$);";
585 let result = parse_entity(input);
586 assert!(result.is_ok());
587 let (id, ifc_type, args) = result.unwrap();
588 assert_eq!(id, 123);
589 assert_eq!(ifc_type, IfcType::IfcWall);
590 assert_eq!(args.len(), 8);
591 }
592
593 #[test]
594 fn test_parse_entity_with_nested_list() {
595 let simple = "(0.,0.,1.)";
597 println!("Testing simple list: {}", simple);
598 let simple_result = list(simple);
599 println!("Simple list result: {:?}", simple_result);
600
601 let input = "#9=IFCDIRECTION((0.,0.,1.));";
603 println!("\nTesting full entity: {}", input);
604 let result = parse_entity(input);
605
606 if let Err(ref e) = result {
607 println!("Parse error: {:?}", e);
608
609 println!("\nTrying to parse just arguments: ((0.,0.,1.))");
611 let args_input = "((0.,0.,1.))";
612 let args_result = list(args_input);
613 println!("Args list result: {:?}", args_result);
614 }
615
616 assert!(result.is_ok(), "Failed to parse: {:?}", result);
617 let (id, _ifc_type, args) = result.unwrap();
618 assert_eq!(id, 9);
619 assert_eq!(args.len(), 1);
620 if let Token::List(inner) = &args[0] {
622 assert_eq!(inner.len(), 3);
623 } else {
624 panic!("Expected Token::List, got {:?}", args[0]);
625 }
626 }
627
628 #[test]
629 fn test_entity_scanner() {
630 let content = r#"
631#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
632#2=IFCWALL('guid2',$,$,$,$,$,$,$);
633#3=IFCDOOR('guid3',$,$,$,$,$,$,$);
634#4=IFCWALL('guid4',$,$,$,$,$,$,$);
635"#;
636
637 let mut scanner = EntityScanner::new(content);
638
639 let (id, type_name, _, _) = scanner.next_entity().unwrap();
641 assert_eq!(id, 1);
642 assert_eq!(type_name, "IFCPROJECT");
643
644 scanner.reset();
646 let walls = scanner.find_by_type("IFCWALL");
647 assert_eq!(walls.len(), 2);
648 assert_eq!(walls[0].0, 2);
649 assert_eq!(walls[1].0, 4);
650
651 scanner.reset();
653 let counts = scanner.count_by_type();
654 assert_eq!(counts.get("IFCPROJECT"), Some(&1));
655 assert_eq!(counts.get("IFCWALL"), Some(&2));
656 assert_eq!(counts.get("IFCDOOR"), Some(&1));
657 }
658}