1use nom::{
10 branch::alt,
11 bytes::complete::{take_while, take_while1},
12 character::complete::{char, digit1, one_of},
13 combinator::{map, map_res, opt, recognize},
14 multi::separated_list0,
15 sequence::{delimited, pair, preceded, tuple},
16 IResult,
17};
18
19use crate::error::{Error, Result};
20use crate::generated::IfcType;
21
22#[derive(Debug, Clone, PartialEq)]
24pub enum Token<'a> {
25 EntityRef(u32),
27 String(&'a str),
29 Integer(i64),
31 Float(f64),
33 Enum(&'a str),
35 List(Vec<Token<'a>>),
37 TypedValue(&'a str, Vec<Token<'a>>),
39 Null,
41 Derived,
43}
44
45fn entity_ref(input: &str) -> IResult<&str, Token<'_>> {
47 map(
48 preceded(char('#'), map_res(digit1, |s: &str| s.parse::<u32>())),
49 Token::EntityRef,
50 )(input)
51}
52
53fn string_literal(input: &str) -> IResult<&str, Token<'_>> {
57 #[inline]
59 fn parse_string_content(input: &str, quote_byte: u8) -> IResult<&str, &str> {
60 let bytes = input.as_bytes();
61 let mut pos = 0;
62
63 while let Some(found) = memchr::memchr(quote_byte, &bytes[pos..]) {
65 let idx = pos + found;
66 if idx + 1 < bytes.len() && bytes[idx + 1] == quote_byte {
68 pos = idx + 2; continue;
70 }
71 return Ok((&input[idx..], &input[..idx]));
73 }
74
75 Err(nom::Err::Error(nom::error::Error::new(
77 input,
78 nom::error::ErrorKind::Char,
79 )))
80 }
81
82 alt((
83 map(
84 delimited(char('\''), |i| parse_string_content(i, b'\''), char('\'')),
85 Token::String,
86 ),
87 map(
88 delimited(char('"'), |i| parse_string_content(i, b'"'), char('"')),
89 Token::String,
90 ),
91 ))(input)
92}
93
94#[inline]
97fn integer(input: &str) -> IResult<&str, Token<'_>> {
98 map_res(recognize(tuple((opt(char('-')), digit1))), |s: &str| {
99 lexical_core::parse::<i64>(s.as_bytes())
100 .map(Token::Integer)
101 .map_err(|_| "parse error")
102 })(input)
103}
104
105#[inline]
109fn float(input: &str) -> IResult<&str, Token<'_>> {
110 map_res(
111 recognize(tuple((
112 opt(char('-')),
113 digit1,
114 char('.'),
115 opt(digit1), opt(tuple((one_of("eE"), opt(one_of("+-")), digit1))),
117 ))),
118 |s: &str| {
119 lexical_core::parse::<f64>(s.as_bytes())
120 .map(Token::Float)
121 .map_err(|_| "parse error")
122 },
123 )(input)
124}
125
126fn enum_value(input: &str) -> IResult<&str, Token<'_>> {
128 map(
129 delimited(
130 char('.'),
131 take_while1(|c: char| c.is_alphanumeric() || c == '_'),
132 char('.'),
133 ),
134 Token::Enum,
135 )(input)
136}
137
138fn null(input: &str) -> IResult<&str, Token<'_>> {
140 map(char('$'), |_| Token::Null)(input)
141}
142
143fn derived(input: &str) -> IResult<&str, Token<'_>> {
145 map(char('*'), |_| Token::Derived)(input)
146}
147
148fn typed_value(input: &str) -> IResult<&str, Token<'_>> {
150 map(
151 pair(
152 take_while1(|c: char| c.is_alphanumeric() || c == '_'),
154 delimited(
156 char('('),
157 separated_list0(delimited(ws, char(','), ws), token),
158 char(')'),
159 ),
160 ),
161 |(type_name, args)| Token::TypedValue(type_name, args),
162 )(input)
163}
164
165fn ws(input: &str) -> IResult<&str, ()> {
167 map(take_while(|c: char| c.is_whitespace()), |_| ())(input)
168}
169
170fn token(input: &str) -> IResult<&str, Token<'_>> {
173 delimited(
174 ws,
175 alt((
176 null, derived, entity_ref, enum_value, string_literal, list, float,
186 integer,
187 typed_value, )),
189 ws,
190 )(input)
191}
192
193fn list(input: &str) -> IResult<&str, Token<'_>> {
195 map(
196 delimited(
197 char('('),
198 separated_list0(delimited(ws, char(','), ws), token),
199 char(')'),
200 ),
201 Token::List,
202 )(input)
203}
204
205pub fn parse_entity(input: &str) -> Result<(u32, IfcType, Vec<Token<'_>>)> {
208 let result: IResult<&str, (u32, &str, Vec<Token>)> = tuple((
209 delimited(
211 ws,
212 preceded(char('#'), map_res(digit1, |s: &str| s.parse::<u32>())),
213 ws,
214 ),
215 preceded(
217 char('='),
218 delimited(
220 ws,
221 take_while1(|c: char| c.is_alphanumeric() || c == '_'),
222 ws,
223 ),
224 ),
225 delimited(
227 char('('),
228 separated_list0(delimited(ws, char(','), ws), token),
229 tuple((char(')'), ws, char(';'))),
230 ),
231 ))(input);
232
233 match result {
234 Ok((_, (id, type_str, args))) => {
235 let ifc_type = IfcType::from_str(type_str);
236 Ok((id, ifc_type, args))
237 }
238 Err(e) => Err(Error::parse(0, format!("Failed to parse entity: {}", e))),
239 }
240}
241
242pub struct EntityScanner<'a> {
246 #[allow(dead_code)]
247 content: &'a str,
248 bytes: &'a [u8],
249 position: usize,
250}
251
252impl<'a> EntityScanner<'a> {
253 pub fn new(content: &'a str) -> Self {
255 Self {
256 content,
257 bytes: content.as_bytes(),
258 position: 0,
259 }
260 }
261
262 #[inline]
265 pub fn next_entity(&mut self) -> Option<(u32, &'a str, usize, usize)> {
266 let remaining = &self.bytes[self.position..];
267
268 let start_offset = memchr::memchr(b'#', remaining)?;
270 let line_start = self.position + start_offset;
271
272 let line_content = &self.bytes[line_start..];
275 let end_offset = self.find_entity_end(line_content)?;
276 let line_end = line_start + end_offset + 1;
277
278 let id_start = line_start + 1;
280 let mut id_end = id_start;
281 while id_end < line_end && self.bytes[id_end].is_ascii_digit() {
282 id_end += 1;
283 }
284
285 let id = self.parse_u32_fast(id_start, id_end)?;
287
288 let eq_search = &self.bytes[id_end..line_end];
290 let eq_offset = memchr::memchr(b'=', eq_search)?;
291 let mut type_start = id_end + eq_offset + 1;
292
293 while type_start < line_end && self.bytes[type_start].is_ascii_whitespace() {
295 type_start += 1;
296 }
297
298 let mut type_end = type_start;
300 while type_end < line_end {
301 let b = self.bytes[type_end];
302 if b == b'(' || b.is_ascii_whitespace() {
303 break;
304 }
305 type_end += 1;
306 }
307
308 let type_name = std::str::from_utf8(&self.bytes[type_start..type_end]).unwrap_or("UNKNOWN");
310
311 self.position = line_end;
313
314 Some((id, type_name, line_start, line_end))
315 }
316
317 #[inline]
319 fn parse_u32_fast(&self, start: usize, end: usize) -> Option<u32> {
320 let mut result: u32 = 0;
321 for i in start..end {
322 let digit = self.bytes[i].wrapping_sub(b'0');
323 if digit > 9 {
324 return None;
325 }
326 result = result.wrapping_mul(10).wrapping_add(digit as u32);
327 }
328 Some(result)
329 }
330
331 #[inline]
335 fn find_entity_end(&self, content: &[u8]) -> Option<usize> {
336 let mut pos = 0;
337 let len = content.len();
338 let mut in_string = false;
339
340 while pos < len {
341 let b = content[pos];
342
343 if in_string {
344 if b == b'\'' {
345 if pos + 1 < len && content[pos + 1] == b'\'' {
347 pos += 2; continue;
349 }
350 in_string = false;
351 }
352 pos += 1;
353 } else {
354 match b {
355 b'\'' => {
356 in_string = true;
357 pos += 1;
358 }
359 b';' => {
360 return Some(pos);
361 }
362 b'\n' => {
363 pos += 1;
365 }
366 _ => {
367 pos += 1;
368 }
369 }
370 }
371 }
372 None
373 }
374
375 pub fn find_by_type(&mut self, target_type: &str) -> Vec<(u32, usize, usize)> {
377 let mut results = Vec::new();
378
379 while let Some((id, type_name, start, end)) = self.next_entity() {
380 if type_name.eq_ignore_ascii_case(target_type) {
381 results.push((id, start, end));
382 }
383 }
384
385 results
386 }
387
388 pub fn count_by_type(&mut self) -> rustc_hash::FxHashMap<String, usize> {
390 let mut counts = rustc_hash::FxHashMap::default();
391
392 while let Some((_, type_name, _, _)) = self.next_entity() {
393 *counts.entry(type_name.to_string()).or_insert(0) += 1;
394 }
395
396 counts
397 }
398
399 pub fn reset(&mut self) {
401 self.position = 0;
402 }
403
404 #[inline]
410 pub fn has_non_null_attribute(&self, start: usize, end: usize, attr_index: usize) -> bool {
411 let content = &self.bytes[start..end];
412
413 let paren_pos = match memchr::memchr(b'(', content) {
415 Some(p) => p + 1,
416 None => return false,
417 };
418
419 let mut pos = paren_pos;
420 let mut current_attr = 0;
421 let mut depth = 0; let mut in_string = false;
423
424 let check_target = |pos: usize, current_attr: usize, depth: usize| -> Option<bool> {
426 if current_attr == attr_index && depth == 0 {
427 let mut p = pos;
429 while p < content.len() && content[p].is_ascii_whitespace() {
430 p += 1;
431 }
432 if p < content.len() {
434 return Some(content[p] != b'$');
435 }
436 return Some(false);
437 }
438 None
439 };
440
441 if let Some(result) = check_target(pos, current_attr, depth) {
443 return result;
444 }
445
446 while pos < content.len() {
447 let b = content[pos];
448
449 if in_string {
450 if b == b'\'' {
451 if pos + 1 < content.len() && content[pos + 1] == b'\'' {
453 pos += 2;
454 continue;
455 }
456 in_string = false;
457 }
458 pos += 1;
459 continue;
460 }
461
462 match b {
463 b'\'' => {
464 in_string = true;
465 pos += 1;
466 }
467 b'(' => {
468 depth += 1;
469 pos += 1;
470 }
471 b')' => {
472 if depth == 0 {
473 return false;
475 }
476 depth -= 1;
477 pos += 1;
478 }
479 b',' if depth == 0 => {
480 current_attr += 1;
481 pos += 1;
482 while pos < content.len() && content[pos].is_ascii_whitespace() {
484 pos += 1;
485 }
486 if let Some(result) = check_target(pos, current_attr, depth) {
488 return result;
489 }
490 }
491 _ => {
492 pos += 1;
493 }
494 }
495 }
496
497 false
498 }
499}
500
501#[cfg(test)]
502mod tests {
503 use super::*;
504
505 #[test]
506 fn test_entity_ref() {
507 assert_eq!(entity_ref("#123"), Ok(("", Token::EntityRef(123))));
508 assert_eq!(entity_ref("#0"), Ok(("", Token::EntityRef(0))));
509 }
510
511 #[test]
512 fn test_string_literal() {
513 assert_eq!(string_literal("'hello'"), Ok(("", Token::String("hello"))));
514 assert_eq!(
515 string_literal("'with spaces'"),
516 Ok(("", Token::String("with spaces")))
517 );
518 }
519
520 #[test]
521 fn test_integer() {
522 assert_eq!(integer("42"), Ok(("", Token::Integer(42))));
523 assert_eq!(integer("-42"), Ok(("", Token::Integer(-42))));
524 assert_eq!(integer("0"), Ok(("", Token::Integer(0))));
525 }
526
527 #[test]
528 #[allow(clippy::approx_constant)]
529 fn test_float() {
530 assert_eq!(float("3.14"), Ok(("", Token::Float(3.14))));
531 assert_eq!(float("-3.14"), Ok(("", Token::Float(-3.14))));
532 assert_eq!(float("1.5E-10"), Ok(("", Token::Float(1.5e-10))));
533 }
534
535 #[test]
536 fn test_enum() {
537 assert_eq!(enum_value(".TRUE."), Ok(("", Token::Enum("TRUE"))));
538 assert_eq!(enum_value(".FALSE."), Ok(("", Token::Enum("FALSE"))));
539 assert_eq!(enum_value(".ELEMENT."), Ok(("", Token::Enum("ELEMENT"))));
540 }
541
542 #[test]
543 fn test_list() {
544 let result = list("(1,2,3)");
545 assert!(result.is_ok());
546 let (_, token) = result.unwrap();
547 match token {
548 Token::List(items) => {
549 assert_eq!(items.len(), 3);
550 assert_eq!(items[0], Token::Integer(1));
551 assert_eq!(items[1], Token::Integer(2));
552 assert_eq!(items[2], Token::Integer(3));
553 }
554 _ => panic!("Expected List token"),
555 }
556 }
557
558 #[test]
559 fn test_nested_list() {
560 let result = list("(1,(2,3),4)");
561 assert!(result.is_ok());
562 let (_, token) = result.unwrap();
563 match token {
564 Token::List(items) => {
565 assert_eq!(items.len(), 3);
566 assert_eq!(items[0], Token::Integer(1));
567 match &items[1] {
568 Token::List(inner) => {
569 assert_eq!(inner.len(), 2);
570 assert_eq!(inner[0], Token::Integer(2));
571 assert_eq!(inner[1], Token::Integer(3));
572 }
573 _ => panic!("Expected nested List"),
574 }
575 assert_eq!(items[2], Token::Integer(4));
576 }
577 _ => panic!("Expected List token"),
578 }
579 }
580
581 #[test]
582 fn test_parse_entity() {
583 let input = "#123=IFCWALL('guid','owner',$,$,'name',$,$,$);";
584 let result = parse_entity(input);
585 assert!(result.is_ok());
586 let (id, ifc_type, args) = result.unwrap();
587 assert_eq!(id, 123);
588 assert_eq!(ifc_type, IfcType::IfcWall);
589 assert_eq!(args.len(), 8);
590 }
591
592 #[test]
593 fn test_parse_entity_with_nested_list() {
594 let simple = "(0.,0.,1.)";
596 println!("Testing simple list: {}", simple);
597 let simple_result = list(simple);
598 println!("Simple list result: {:?}", simple_result);
599
600 let input = "#9=IFCDIRECTION((0.,0.,1.));";
602 println!("\nTesting full entity: {}", input);
603 let result = parse_entity(input);
604
605 if let Err(ref e) = result {
606 println!("Parse error: {:?}", e);
607
608 println!("\nTrying to parse just arguments: ((0.,0.,1.))");
610 let args_input = "((0.,0.,1.))";
611 let args_result = list(args_input);
612 println!("Args list result: {:?}", args_result);
613 }
614
615 assert!(result.is_ok(), "Failed to parse: {:?}", result);
616 let (id, _ifc_type, args) = result.unwrap();
617 assert_eq!(id, 9);
618 assert_eq!(args.len(), 1);
619 if let Token::List(inner) = &args[0] {
621 assert_eq!(inner.len(), 3);
622 } else {
623 panic!("Expected Token::List, got {:?}", args[0]);
624 }
625 }
626
627 #[test]
628 fn test_entity_scanner() {
629 let content = r#"
630#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
631#2=IFCWALL('guid2',$,$,$,$,$,$,$);
632#3=IFCDOOR('guid3',$,$,$,$,$,$,$);
633#4=IFCWALL('guid4',$,$,$,$,$,$,$);
634"#;
635
636 let mut scanner = EntityScanner::new(content);
637
638 let (id, type_name, _, _) = scanner.next_entity().unwrap();
640 assert_eq!(id, 1);
641 assert_eq!(type_name, "IFCPROJECT");
642
643 scanner.reset();
645 let walls = scanner.find_by_type("IFCWALL");
646 assert_eq!(walls.len(), 2);
647 assert_eq!(walls[0].0, 2);
648 assert_eq!(walls[1].0, 4);
649
650 scanner.reset();
652 let counts = scanner.count_by_type();
653 assert_eq!(counts.get("IFCPROJECT"), Some(&1));
654 assert_eq!(counts.get("IFCWALL"), Some(&2));
655 assert_eq!(counts.get("IFCDOOR"), Some(&1));
656 }
657}