1use nom::{
10 branch::alt,
11 bytes::complete::{take_while, take_while1},
12 character::complete::{char, digit1, one_of},
13 combinator::{map, map_res, opt, recognize},
14 multi::separated_list0,
15 sequence::{delimited, pair, preceded, tuple},
16 IResult,
17};
18
19use crate::error::{Error, Result};
20use crate::schema::IfcType;
21
22#[derive(Debug, Clone, PartialEq)]
24pub enum Token<'a> {
25 EntityRef(u32),
27 String(&'a str),
29 Integer(i64),
31 Float(f64),
33 Enum(&'a str),
35 List(Vec<Token<'a>>),
37 TypedValue(&'a str, Vec<Token<'a>>),
39 Null,
41 Derived,
43}
44
45fn entity_ref(input: &str) -> IResult<&str, Token> {
47 map(
48 preceded(
49 char('#'),
50 map_res(digit1, |s: &str| s.parse::<u32>())
51 ),
52 Token::EntityRef
53 )(input)
54}
55
56fn string_literal(input: &str) -> IResult<&str, Token> {
60 #[inline]
62 fn parse_string_content(input: &str, quote_byte: u8) -> IResult<&str, &str> {
63 let bytes = input.as_bytes();
64 let mut pos = 0;
65
66 while let Some(found) = memchr::memchr(quote_byte, &bytes[pos..]) {
68 let idx = pos + found;
69 if idx + 1 < bytes.len() && bytes[idx + 1] == quote_byte {
71 pos = idx + 2; continue;
73 }
74 return Ok((&input[idx..], &input[..idx]));
76 }
77
78 Err(nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Char)))
80 }
81
82 alt((
83 map(
84 delimited(
85 char('\''),
86 |i| parse_string_content(i, b'\''),
87 char('\'')
88 ),
89 Token::String
90 ),
91 map(
92 delimited(
93 char('"'),
94 |i| parse_string_content(i, b'"'),
95 char('"')
96 ),
97 Token::String
98 ),
99 ))(input)
100}
101
102#[inline]
105fn integer(input: &str) -> IResult<&str, Token> {
106 map_res(
107 recognize(
108 tuple((
109 opt(char('-')),
110 digit1,
111 ))
112 ),
113 |s: &str| lexical_core::parse::<i64>(s.as_bytes())
114 .map(Token::Integer)
115 .map_err(|_| "parse error")
116 )(input)
117}
118
119#[inline]
123fn float(input: &str) -> IResult<&str, Token> {
124 map_res(
125 recognize(
126 tuple((
127 opt(char('-')),
128 digit1,
129 char('.'),
130 opt(digit1), opt(tuple((
132 one_of("eE"),
133 opt(one_of("+-")),
134 digit1,
135 ))),
136 ))
137 ),
138 |s: &str| lexical_core::parse::<f64>(s.as_bytes())
139 .map(Token::Float)
140 .map_err(|_| "parse error")
141 )(input)
142}
143
144fn enum_value(input: &str) -> IResult<&str, Token> {
146 map(
147 delimited(
148 char('.'),
149 take_while1(|c: char| c.is_alphanumeric() || c == '_'),
150 char('.')
151 ),
152 Token::Enum
153 )(input)
154}
155
156fn null(input: &str) -> IResult<&str, Token> {
158 map(char('$'), |_| Token::Null)(input)
159}
160
161fn derived(input: &str) -> IResult<&str, Token> {
163 map(char('*'), |_| Token::Derived)(input)
164}
165
166fn typed_value(input: &str) -> IResult<&str, Token> {
168 map(
169 pair(
170 take_while1(|c: char| c.is_alphanumeric() || c == '_'),
172 delimited(
174 char('('),
175 separated_list0(
176 delimited(ws, char(','), ws),
177 token
178 ),
179 char(')')
180 )
181 ),
182 |(type_name, args)| Token::TypedValue(type_name, args)
183 )(input)
184}
185
186fn ws(input: &str) -> IResult<&str, ()> {
188 map(
189 take_while(|c: char| c.is_whitespace()),
190 |_| ()
191 )(input)
192}
193
194fn token(input: &str) -> IResult<&str, Token> {
197 delimited(
198 ws,
199 alt((
200 null, derived, entity_ref, enum_value, string_literal, list, float,
210 integer,
211 typed_value, )),
213 ws
214 )(input)
215}
216
217fn list(input: &str) -> IResult<&str, Token> {
219 map(
220 delimited(
221 char('('),
222 separated_list0(
223 delimited(ws, char(','), ws),
224 token
225 ),
226 char(')')
227 ),
228 Token::List
229 )(input)
230}
231
232pub fn parse_entity(input: &str) -> Result<(u32, IfcType, Vec<Token>)> {
235 let result: IResult<&str, (u32, &str, Vec<Token>)> = tuple((
236 delimited(
238 ws,
239 preceded(
240 char('#'),
241 map_res(digit1, |s: &str| s.parse::<u32>())
242 ),
243 ws
244 ),
245 preceded(
247 char('='),
248 delimited(
250 ws,
251 take_while1(|c: char| c.is_alphanumeric() || c == '_'),
252 ws
253 )
254 ),
255 delimited(
257 char('('),
258 separated_list0(
259 delimited(ws, char(','), ws),
260 token
261 ),
262 tuple((char(')'), ws, char(';')))
263 ),
264 ))(input);
265
266 match result {
267 Ok((_, (id, type_str, args))) => {
268 let ifc_type = IfcType::from_str(type_str);
269 Ok((id, ifc_type, args))
270 }
271 Err(e) => Err(Error::parse(0, format!("Failed to parse entity: {}", e))),
272 }
273}
274
275pub struct EntityScanner<'a> {
279 content: &'a str,
280 bytes: &'a [u8],
281 position: usize,
282}
283
284impl<'a> EntityScanner<'a> {
285 pub fn new(content: &'a str) -> Self {
287 Self {
288 content,
289 bytes: content.as_bytes(),
290 position: 0,
291 }
292 }
293
294 #[inline]
297 pub fn next_entity(&mut self) -> Option<(u32, &'a str, usize, usize)> {
298 let remaining = &self.bytes[self.position..];
299
300 let start_offset = memchr::memchr(b'#', remaining)?;
302 let line_start = self.position + start_offset;
303
304 let line_content = &self.bytes[line_start..];
306 let end_offset = memchr::memchr(b';', line_content)?;
307 let line_end = line_start + end_offset + 1;
308
309 let id_start = line_start + 1;
311 let mut id_end = id_start;
312 while id_end < line_end && self.bytes[id_end].is_ascii_digit() {
313 id_end += 1;
314 }
315
316 let id = self.parse_u32_fast(id_start, id_end)?;
318
319 let eq_search = &self.bytes[id_end..line_end];
321 let eq_offset = memchr::memchr(b'=', eq_search)?;
322 let mut type_start = id_end + eq_offset + 1;
323
324 while type_start < line_end && self.bytes[type_start].is_ascii_whitespace() {
326 type_start += 1;
327 }
328
329 let mut type_end = type_start;
331 while type_end < line_end {
332 let b = self.bytes[type_end];
333 if b == b'(' || b.is_ascii_whitespace() {
334 break;
335 }
336 type_end += 1;
337 }
338
339 let type_name = unsafe { std::str::from_utf8_unchecked(&self.bytes[type_start..type_end]) };
341
342 self.position = line_end;
344
345 Some((id, type_name, line_start, line_end))
346 }
347
348 #[inline]
350 fn parse_u32_fast(&self, start: usize, end: usize) -> Option<u32> {
351 let mut result: u32 = 0;
352 for i in start..end {
353 let digit = self.bytes[i].wrapping_sub(b'0');
354 if digit > 9 {
355 return None;
356 }
357 result = result.wrapping_mul(10).wrapping_add(digit as u32);
358 }
359 Some(result)
360 }
361
362 pub fn find_by_type(&mut self, target_type: &str) -> Vec<(u32, usize, usize)> {
364 let mut results = Vec::new();
365
366 while let Some((id, type_name, start, end)) = self.next_entity() {
367 if type_name.eq_ignore_ascii_case(target_type) {
368 results.push((id, start, end));
369 }
370 }
371
372 results
373 }
374
375 pub fn count_by_type(&mut self) -> rustc_hash::FxHashMap<String, usize> {
377 let mut counts = rustc_hash::FxHashMap::default();
378
379 while let Some((_, type_name, _, _)) = self.next_entity() {
380 *counts.entry(type_name.to_string()).or_insert(0) += 1;
381 }
382
383 counts
384 }
385
386 pub fn reset(&mut self) {
388 self.position = 0;
389 }
390}
391
392#[cfg(test)]
393mod tests {
394 use super::*;
395
396 #[test]
397 fn test_entity_ref() {
398 assert_eq!(entity_ref("#123"), Ok(("", Token::EntityRef(123))));
399 assert_eq!(entity_ref("#0"), Ok(("", Token::EntityRef(0))));
400 }
401
402 #[test]
403 fn test_string_literal() {
404 assert_eq!(string_literal("'hello'"), Ok(("", Token::String("hello"))));
405 assert_eq!(string_literal("'with spaces'"), Ok(("", Token::String("with spaces"))));
406 }
407
408 #[test]
409 fn test_integer() {
410 assert_eq!(integer("42"), Ok(("", Token::Integer(42))));
411 assert_eq!(integer("-42"), Ok(("", Token::Integer(-42))));
412 assert_eq!(integer("0"), Ok(("", Token::Integer(0))));
413 }
414
415 #[test]
416 fn test_float() {
417 assert_eq!(float("3.14"), Ok(("", Token::Float(3.14))));
418 assert_eq!(float("-3.14"), Ok(("", Token::Float(-3.14))));
419 assert_eq!(float("1.5E-10"), Ok(("", Token::Float(1.5e-10))));
420 }
421
422 #[test]
423 fn test_enum() {
424 assert_eq!(enum_value(".TRUE."), Ok(("", Token::Enum("TRUE"))));
425 assert_eq!(enum_value(".FALSE."), Ok(("", Token::Enum("FALSE"))));
426 assert_eq!(enum_value(".ELEMENT."), Ok(("", Token::Enum("ELEMENT"))));
427 }
428
429 #[test]
430 fn test_list() {
431 let result = list("(1,2,3)");
432 assert!(result.is_ok());
433 let (_, token) = result.unwrap();
434 match token {
435 Token::List(items) => {
436 assert_eq!(items.len(), 3);
437 assert_eq!(items[0], Token::Integer(1));
438 assert_eq!(items[1], Token::Integer(2));
439 assert_eq!(items[2], Token::Integer(3));
440 }
441 _ => panic!("Expected List token"),
442 }
443 }
444
445 #[test]
446 fn test_nested_list() {
447 let result = list("(1,(2,3),4)");
448 assert!(result.is_ok());
449 let (_, token) = result.unwrap();
450 match token {
451 Token::List(items) => {
452 assert_eq!(items.len(), 3);
453 assert_eq!(items[0], Token::Integer(1));
454 match &items[1] {
455 Token::List(inner) => {
456 assert_eq!(inner.len(), 2);
457 assert_eq!(inner[0], Token::Integer(2));
458 assert_eq!(inner[1], Token::Integer(3));
459 }
460 _ => panic!("Expected nested List"),
461 }
462 assert_eq!(items[2], Token::Integer(4));
463 }
464 _ => panic!("Expected List token"),
465 }
466 }
467
468 #[test]
469 fn test_parse_entity() {
470 let input = "#123=IFCWALL('guid','owner',$,$,'name',$,$,$);";
471 let result = parse_entity(input);
472 assert!(result.is_ok());
473 let (id, ifc_type, args) = result.unwrap();
474 assert_eq!(id, 123);
475 assert_eq!(ifc_type, IfcType::IfcWall);
476 assert_eq!(args.len(), 8);
477 }
478
479 #[test]
480 fn test_parse_entity_with_nested_list() {
481 let simple = "(0.,0.,1.)";
483 println!("Testing simple list: {}", simple);
484 let simple_result = list(simple);
485 println!("Simple list result: {:?}", simple_result);
486
487 let input = "#9=IFCDIRECTION((0.,0.,1.));";
489 println!("\nTesting full entity: {}", input);
490 let result = parse_entity(input);
491
492 if let Err(ref e) = result {
493 println!("Parse error: {:?}", e);
494
495 println!("\nTrying to parse just arguments: ((0.,0.,1.))");
497 let args_input = "((0.,0.,1.))";
498 let args_result = list(args_input);
499 println!("Args list result: {:?}", args_result);
500 }
501
502 assert!(result.is_ok(), "Failed to parse: {:?}", result);
503 let (id, _ifc_type, args) = result.unwrap();
504 assert_eq!(id, 9);
505 assert_eq!(args.len(), 1);
506 if let Token::List(inner) = &args[0] {
508 assert_eq!(inner.len(), 3);
509 } else {
510 panic!("Expected Token::List, got {:?}", args[0]);
511 }
512 }
513
514 #[test]
515 fn test_entity_scanner() {
516 let content = r#"
517#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
518#2=IFCWALL('guid2',$,$,$,$,$,$,$);
519#3=IFCDOOR('guid3',$,$,$,$,$,$,$);
520#4=IFCWALL('guid4',$,$,$,$,$,$,$);
521"#;
522
523 let mut scanner = EntityScanner::new(content);
524
525 let (id, type_name, _, _) = scanner.next_entity().unwrap();
527 assert_eq!(id, 1);
528 assert_eq!(type_name, "IFCPROJECT");
529
530 scanner.reset();
532 let walls = scanner.find_by_type("IFCWALL");
533 assert_eq!(walls.len(), 2);
534 assert_eq!(walls[0].0, 2);
535 assert_eq!(walls[1].0, 4);
536
537 scanner.reset();
539 let counts = scanner.count_by_type();
540 assert_eq!(counts.get("IFCPROJECT"), Some(&1));
541 assert_eq!(counts.get("IFCWALL"), Some(&2));
542 assert_eq!(counts.get("IFCDOOR"), Some(&1));
543 }
544}