use nom::{
branch::alt,
bytes::complete::{take_while, take_while1},
character::complete::{char, digit1, one_of},
combinator::{map, map_res, opt, recognize},
multi::separated_list0,
sequence::{delimited, pair, preceded, tuple},
IResult,
};
use crate::error::{Error, Result};
use crate::generated::IfcType;
#[derive(Debug, Clone, PartialEq)]
pub enum Token<'a> {
EntityRef(u32),
String(&'a str),
Integer(i64),
Float(f64),
Enum(&'a str),
List(Vec<Token<'a>>),
TypedValue(&'a str, Vec<Token<'a>>),
Null,
Derived,
}
fn entity_ref(input: &str) -> IResult<&str, Token<'_>> {
map(
preceded(char('#'), map_res(digit1, |s: &str| s.parse::<u32>())),
Token::EntityRef,
)(input)
}
fn string_literal(input: &str) -> IResult<&str, Token<'_>> {
#[inline]
fn parse_string_content(input: &str, quote_byte: u8) -> IResult<&str, &str> {
let bytes = input.as_bytes();
let mut pos = 0;
while let Some(found) = memchr::memchr(quote_byte, &bytes[pos..]) {
let idx = pos + found;
if idx + 1 < bytes.len() && bytes[idx + 1] == quote_byte {
pos = idx + 2; continue;
}
return Ok((&input[idx..], &input[..idx]));
}
Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Char,
)))
}
alt((
map(
delimited(char('\''), |i| parse_string_content(i, b'\''), char('\'')),
Token::String,
),
map(
delimited(char('"'), |i| parse_string_content(i, b'"'), char('"')),
Token::String,
),
))(input)
}
#[inline]
fn integer(input: &str) -> IResult<&str, Token<'_>> {
map_res(recognize(tuple((opt(char('-')), digit1))), |s: &str| {
lexical_core::parse::<i64>(s.as_bytes())
.map(Token::Integer)
.map_err(|_| "parse error")
})(input)
}
#[inline]
fn float(input: &str) -> IResult<&str, Token<'_>> {
map_res(
recognize(tuple((
opt(char('-')),
digit1,
char('.'),
opt(digit1), opt(tuple((one_of("eE"), opt(one_of("+-")), digit1))),
))),
|s: &str| {
lexical_core::parse::<f64>(s.as_bytes())
.map(Token::Float)
.map_err(|_| "parse error")
},
)(input)
}
fn enum_value(input: &str) -> IResult<&str, Token<'_>> {
map(
delimited(
char('.'),
take_while1(|c: char| c.is_alphanumeric() || c == '_'),
char('.'),
),
Token::Enum,
)(input)
}
fn null(input: &str) -> IResult<&str, Token<'_>> {
map(char('$'), |_| Token::Null)(input)
}
fn derived(input: &str) -> IResult<&str, Token<'_>> {
map(char('*'), |_| Token::Derived)(input)
}
fn typed_value(input: &str) -> IResult<&str, Token<'_>> {
map(
pair(
take_while1(|c: char| c.is_alphanumeric() || c == '_'),
delimited(
char('('),
separated_list0(delimited(ws, char(','), ws), token),
char(')'),
),
),
|(type_name, args)| Token::TypedValue(type_name, args),
)(input)
}
fn ws(input: &str) -> IResult<&str, ()> {
map(take_while(|c: char| c.is_whitespace()), |_| ())(input)
}
fn token(input: &str) -> IResult<&str, Token<'_>> {
delimited(
ws,
alt((
null, derived, entity_ref, enum_value, string_literal, list, float,
integer,
typed_value, )),
ws,
)(input)
}
fn list(input: &str) -> IResult<&str, Token<'_>> {
map(
delimited(
char('('),
separated_list0(delimited(ws, char(','), ws), token),
char(')'),
),
Token::List,
)(input)
}
pub fn parse_entity(input: &str) -> Result<(u32, IfcType, Vec<Token<'_>>)> {
let result: IResult<&str, (u32, &str, Vec<Token>)> = tuple((
delimited(
ws,
preceded(char('#'), map_res(digit1, |s: &str| s.parse::<u32>())),
ws,
),
preceded(
char('='),
delimited(
ws,
take_while1(|c: char| c.is_alphanumeric() || c == '_'),
ws,
),
),
delimited(
char('('),
separated_list0(delimited(ws, char(','), ws), token),
tuple((char(')'), ws, char(';'))),
),
))(input);
match result {
Ok((_, (id, type_str, args))) => {
let ifc_type = IfcType::from_str(type_str);
Ok((id, ifc_type, args))
}
Err(e) => Err(Error::parse(0, format!("Failed to parse entity: {}", e))),
}
}
pub struct EntityScanner<'a> {
#[allow(dead_code)]
content: &'a str,
bytes: &'a [u8],
position: usize,
}
impl<'a> EntityScanner<'a> {
pub fn new(content: &'a str) -> Self {
Self {
content,
bytes: content.as_bytes(),
position: 0,
}
}
#[inline]
pub fn next_entity(&mut self) -> Option<(u32, &'a str, usize, usize)> {
let remaining = &self.bytes[self.position..];
let start_offset = memchr::memchr(b'#', remaining)?;
let line_start = self.position + start_offset;
let line_content = &self.bytes[line_start..];
let end_offset = self.find_entity_end(line_content)?;
let line_end = line_start + end_offset + 1;
let id_start = line_start + 1;
let mut id_end = id_start;
while id_end < line_end && self.bytes[id_end].is_ascii_digit() {
id_end += 1;
}
let id = self.parse_u32_fast(id_start, id_end)?;
let eq_search = &self.bytes[id_end..line_end];
let eq_offset = memchr::memchr(b'=', eq_search)?;
let mut type_start = id_end + eq_offset + 1;
while type_start < line_end && self.bytes[type_start].is_ascii_whitespace() {
type_start += 1;
}
let mut type_end = type_start;
while type_end < line_end {
let b = self.bytes[type_end];
if b == b'(' || b.is_ascii_whitespace() {
break;
}
type_end += 1;
}
let type_name = std::str::from_utf8(&self.bytes[type_start..type_end]).unwrap_or("UNKNOWN");
self.position = line_end;
Some((id, type_name, line_start, line_end))
}
#[inline]
fn parse_u32_fast(&self, start: usize, end: usize) -> Option<u32> {
let mut result: u32 = 0;
for i in start..end {
let digit = self.bytes[i].wrapping_sub(b'0');
if digit > 9 {
return None;
}
result = result.wrapping_mul(10).wrapping_add(digit as u32);
}
Some(result)
}
#[inline]
fn find_entity_end(&self, content: &[u8]) -> Option<usize> {
let mut pos = 0;
let len = content.len();
let mut in_string = false;
while pos < len {
let b = content[pos];
if in_string {
if b == b'\'' {
if pos + 1 < len && content[pos + 1] == b'\'' {
pos += 2; continue;
}
in_string = false;
}
pos += 1;
} else {
match b {
b'\'' => {
in_string = true;
pos += 1;
}
b';' => {
return Some(pos);
}
b'\n' => {
pos += 1;
}
_ => {
pos += 1;
}
}
}
}
None
}
pub fn find_by_type(&mut self, target_type: &str) -> Vec<(u32, usize, usize)> {
let mut results = Vec::new();
while let Some((id, type_name, start, end)) = self.next_entity() {
if type_name.eq_ignore_ascii_case(target_type) {
results.push((id, start, end));
}
}
results
}
pub fn count_by_type(&mut self) -> rustc_hash::FxHashMap<String, usize> {
let mut counts = rustc_hash::FxHashMap::default();
while let Some((_, type_name, _, _)) = self.next_entity() {
*counts.entry(type_name.to_string()).or_insert(0) += 1;
}
counts
}
pub fn reset(&mut self) {
self.position = 0;
}
#[inline]
pub fn has_non_null_attribute(&self, start: usize, end: usize, attr_index: usize) -> bool {
let content = &self.bytes[start..end];
let paren_pos = match memchr::memchr(b'(', content) {
Some(p) => p + 1,
None => return false,
};
let mut pos = paren_pos;
let mut current_attr = 0;
let mut depth = 0; let mut in_string = false;
let check_target = |pos: usize, current_attr: usize, depth: usize| -> Option<bool> {
if current_attr == attr_index && depth == 0 {
let mut p = pos;
while p < content.len() && content[p].is_ascii_whitespace() {
p += 1;
}
if p < content.len() {
return Some(content[p] != b'$');
}
return Some(false);
}
None
};
if let Some(result) = check_target(pos, current_attr, depth) {
return result;
}
while pos < content.len() {
let b = content[pos];
if in_string {
if b == b'\'' {
if pos + 1 < content.len() && content[pos + 1] == b'\'' {
pos += 2;
continue;
}
in_string = false;
}
pos += 1;
continue;
}
match b {
b'\'' => {
in_string = true;
pos += 1;
}
b'(' => {
depth += 1;
pos += 1;
}
b')' => {
if depth == 0 {
return false;
}
depth -= 1;
pos += 1;
}
b',' if depth == 0 => {
current_attr += 1;
pos += 1;
while pos < content.len() && content[pos].is_ascii_whitespace() {
pos += 1;
}
if let Some(result) = check_target(pos, current_attr, depth) {
return result;
}
}
_ => {
pos += 1;
}
}
}
false
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_entity_ref() {
assert_eq!(entity_ref("#123"), Ok(("", Token::EntityRef(123))));
assert_eq!(entity_ref("#0"), Ok(("", Token::EntityRef(0))));
}
#[test]
fn test_string_literal() {
assert_eq!(string_literal("'hello'"), Ok(("", Token::String("hello"))));
assert_eq!(
string_literal("'with spaces'"),
Ok(("", Token::String("with spaces")))
);
}
#[test]
fn test_integer() {
assert_eq!(integer("42"), Ok(("", Token::Integer(42))));
assert_eq!(integer("-42"), Ok(("", Token::Integer(-42))));
assert_eq!(integer("0"), Ok(("", Token::Integer(0))));
}
#[test]
#[allow(clippy::approx_constant)]
fn test_float() {
assert_eq!(float("3.14"), Ok(("", Token::Float(3.14))));
assert_eq!(float("-3.14"), Ok(("", Token::Float(-3.14))));
assert_eq!(float("1.5E-10"), Ok(("", Token::Float(1.5e-10))));
}
#[test]
fn test_enum() {
assert_eq!(enum_value(".TRUE."), Ok(("", Token::Enum("TRUE"))));
assert_eq!(enum_value(".FALSE."), Ok(("", Token::Enum("FALSE"))));
assert_eq!(enum_value(".ELEMENT."), Ok(("", Token::Enum("ELEMENT"))));
}
#[test]
fn test_list() {
let result = list("(1,2,3)");
assert!(result.is_ok());
let (_, token) = result.unwrap();
match token {
Token::List(items) => {
assert_eq!(items.len(), 3);
assert_eq!(items[0], Token::Integer(1));
assert_eq!(items[1], Token::Integer(2));
assert_eq!(items[2], Token::Integer(3));
}
_ => panic!("Expected List token"),
}
}
#[test]
fn test_nested_list() {
let result = list("(1,(2,3),4)");
assert!(result.is_ok());
let (_, token) = result.unwrap();
match token {
Token::List(items) => {
assert_eq!(items.len(), 3);
assert_eq!(items[0], Token::Integer(1));
match &items[1] {
Token::List(inner) => {
assert_eq!(inner.len(), 2);
assert_eq!(inner[0], Token::Integer(2));
assert_eq!(inner[1], Token::Integer(3));
}
_ => panic!("Expected nested List"),
}
assert_eq!(items[2], Token::Integer(4));
}
_ => panic!("Expected List token"),
}
}
#[test]
fn test_parse_entity() {
let input = "#123=IFCWALL('guid','owner',$,$,'name',$,$,$);";
let result = parse_entity(input);
assert!(result.is_ok());
let (id, ifc_type, args) = result.unwrap();
assert_eq!(id, 123);
assert_eq!(ifc_type, IfcType::IfcWall);
assert_eq!(args.len(), 8);
}
#[test]
fn test_parse_entity_with_nested_list() {
let simple = "(0.,0.,1.)";
println!("Testing simple list: {}", simple);
let simple_result = list(simple);
println!("Simple list result: {:?}", simple_result);
let input = "#9=IFCDIRECTION((0.,0.,1.));";
println!("\nTesting full entity: {}", input);
let result = parse_entity(input);
if let Err(ref e) = result {
println!("Parse error: {:?}", e);
println!("\nTrying to parse just arguments: ((0.,0.,1.))");
let args_input = "((0.,0.,1.))";
let args_result = list(args_input);
println!("Args list result: {:?}", args_result);
}
assert!(result.is_ok(), "Failed to parse: {:?}", result);
let (id, _ifc_type, args) = result.unwrap();
assert_eq!(id, 9);
assert_eq!(args.len(), 1);
if let Token::List(inner) = &args[0] {
assert_eq!(inner.len(), 3);
} else {
panic!("Expected Token::List, got {:?}", args[0]);
}
}
#[test]
fn test_entity_scanner() {
let content = r#"
#1=IFCPROJECT('guid',$,$,$,$,$,$,$,$);
#2=IFCWALL('guid2',$,$,$,$,$,$,$);
#3=IFCDOOR('guid3',$,$,$,$,$,$,$);
#4=IFCWALL('guid4',$,$,$,$,$,$,$);
"#;
let mut scanner = EntityScanner::new(content);
let (id, type_name, _, _) = scanner.next_entity().unwrap();
assert_eq!(id, 1);
assert_eq!(type_name, "IFCPROJECT");
scanner.reset();
let walls = scanner.find_by_type("IFCWALL");
assert_eq!(walls.len(), 2);
assert_eq!(walls[0].0, 2);
assert_eq!(walls[1].0, 4);
scanner.reset();
let counts = scanner.count_by_type();
assert_eq!(counts.get("IFCPROJECT"), Some(&1));
assert_eq!(counts.get("IFCWALL"), Some(&2));
assert_eq!(counts.get("IFCDOOR"), Some(&1));
}
}