1use bimifc_model::{AttributeValue, DecodedEntity, EntityId, IfcType};
10use nom::{
11 branch::alt,
12 bytes::complete::{take_while, take_while1},
13 character::complete::{char, multispace0},
14 combinator::{opt, recognize},
15 multi::separated_list0,
16 sequence::{delimited, pair},
17 IResult, Parser,
18};
19
20#[derive(Clone, Debug, PartialEq)]
22pub enum Token<'a> {
23 EntityRef(u32),
25 String(&'a str),
27 Integer(i64),
29 Float(f64),
31 Enum(&'a str),
33 List(Vec<Token<'a>>),
35 TypedValue(&'a str, Vec<Token<'a>>),
37 Null,
39 Derived,
41}
42
43impl<'a> Token<'a> {
44 pub fn to_attribute_value(&self) -> AttributeValue {
46 match self {
47 Token::EntityRef(id) => AttributeValue::EntityRef(EntityId(*id)),
48 Token::String(s) => AttributeValue::String((*s).to_string()),
49 Token::Integer(i) => AttributeValue::Integer(*i),
50 Token::Float(f) => AttributeValue::Float(*f),
51 Token::Enum(s) => AttributeValue::Enum((*s).to_string()),
52 Token::List(items) => {
53 AttributeValue::List(items.iter().map(|t| t.to_attribute_value()).collect())
54 }
55 Token::TypedValue(name, args) => AttributeValue::TypedValue(
56 (*name).to_string(),
57 args.iter().map(|t| t.to_attribute_value()).collect(),
58 ),
59 Token::Null => AttributeValue::Null,
60 Token::Derived => AttributeValue::Derived,
61 }
62 }
63}
64
65fn ws(input: &str) -> IResult<&str, ()> {
71 let (input, _) = multispace0(input)?;
72 Ok((input, ()))
73}
74
75fn entity_ref(input: &str) -> IResult<&str, Token<'_>> {
77 let (input, _) = char('#')(input)?;
78 let (input, digits) = take_while1(|c: char| c.is_ascii_digit())(input)?;
79 let id = digits.parse::<u32>().unwrap_or(0);
80 Ok((input, Token::EntityRef(id)))
81}
82
83fn step_string(input: &str) -> IResult<&str, Token<'_>> {
85 let (input, _) = char('\'')(input)?;
86
87 let mut end = 0;
89 let bytes = input.as_bytes();
90 while end < bytes.len() {
91 if bytes[end] == b'\'' {
92 if end + 1 < bytes.len() && bytes[end + 1] == b'\'' {
94 end += 2;
95 continue;
96 }
97 break;
98 }
99 end += 1;
100 }
101
102 let content = &input[..end];
103 let remaining = &input[end + 1..]; Ok((remaining, Token::String(content)))
106}
107
108fn number(input: &str) -> IResult<&str, Token<'_>> {
110 let (input, num_str) = recognize((
111 opt(char('-')),
112 take_while1(|c: char| c.is_ascii_digit()),
113 opt(pair(char('.'), take_while(|c: char| c.is_ascii_digit()))),
114 opt((
115 alt((char('e'), char('E'))),
116 opt(alt((char('+'), char('-')))),
117 take_while1(|c: char| c.is_ascii_digit()),
118 )),
119 ))
120 .parse(input)?;
121
122 if num_str.contains('.') || num_str.contains('e') || num_str.contains('E') {
124 let f: f64 = lexical_core::parse(num_str.as_bytes()).unwrap_or(0.0);
125 Ok((input, Token::Float(f)))
126 } else {
127 let i: i64 = lexical_core::parse(num_str.as_bytes()).unwrap_or(0);
128 Ok((input, Token::Integer(i)))
129 }
130}
131
132fn enumeration(input: &str) -> IResult<&str, Token<'_>> {
134 let (input, _) = char('.')(input)?;
135 let (input, name) = take_while1(|c: char| c.is_alphanumeric() || c == '_')(input)?;
136 let (input, _) = char('.')(input)?;
137 Ok((input, Token::Enum(name)))
138}
139
140fn null_value(input: &str) -> IResult<&str, Token<'_>> {
142 let (input, _) = char('$')(input)?;
143 Ok((input, Token::Null))
144}
145
146fn derived_value(input: &str) -> IResult<&str, Token<'_>> {
148 let (input, _) = char('*')(input)?;
149 Ok((input, Token::Derived))
150}
151
152fn list(input: &str) -> IResult<&str, Token<'_>> {
154 let (input, items) = delimited(
155 pair(char('('), ws),
156 separated_list0((ws, char(','), ws), token),
157 pair(ws, char(')')),
158 )
159 .parse(input)?;
160 Ok((input, Token::List(items)))
161}
162
163fn typed_value(input: &str) -> IResult<&str, Token<'_>> {
165 let (input, type_name) = take_while1(|c: char| c.is_alphanumeric() || c == '_')(input)?;
166 let (input, _) = ws(input)?;
167 let (input, args) = delimited(
168 pair(char('('), ws),
169 separated_list0((ws, char(','), ws), token),
170 pair(ws, char(')')),
171 )
172 .parse(input)?;
173 Ok((input, Token::TypedValue(type_name, args)))
174}
175
176fn token(input: &str) -> IResult<&str, Token<'_>> {
178 alt((
179 entity_ref,
180 step_string,
181 null_value,
182 derived_value,
183 enumeration,
184 number,
185 list,
186 typed_value,
187 ))
188 .parse(input)
189}
190
191fn attribute_list(input: &str) -> IResult<&str, Vec<Token<'_>>> {
193 delimited(
194 pair(char('('), ws),
195 separated_list0((ws, char(','), ws), token),
196 pair(ws, char(')')),
197 )
198 .parse(input)
199}
200
201pub fn parse_entity(input: &str) -> Result<DecodedEntity, String> {
209 let input = input.trim_start();
211
212 let (input, _) = char::<&str, nom::error::Error<&str>>('#')
214 .parse(input)
215 .map_err(|_| "Expected # at start of entity")?;
216
217 let (input, id_str) =
218 take_while1::<_, &str, nom::error::Error<&str>>(|c: char| c.is_ascii_digit())
219 .parse(input)
220 .map_err(|_| "Expected entity ID")?;
221
222 let id: u32 = id_str.parse().map_err(|_| "Invalid entity ID")?;
223
224 let (input, _) = (ws, char('='), ws)
226 .parse(input)
227 .map_err(|_: nom::Err<nom::error::Error<&str>>| "Expected = after entity ID")?;
228
229 let (input, type_name) =
231 take_while1::<_, &str, nom::error::Error<&str>>(|c: char| c.is_alphanumeric() || c == '_')
232 .parse(input)
233 .map_err(|_| "Expected type name")?;
234
235 let (input, _) = ws(input).unwrap_or((input, ()));
237
238 let (_, tokens) =
239 attribute_list(input).map_err(|e| format!("Failed to parse attributes: {:?}", e))?;
240
241 let attributes: Vec<AttributeValue> = tokens.iter().map(|t| t.to_attribute_value()).collect();
243
244 Ok(DecodedEntity {
245 id: EntityId(id),
246 ifc_type: IfcType::parse(type_name),
247 attributes,
248 })
249}
250
251pub fn parse_entity_at(content: &str, start: usize, end: usize) -> Result<DecodedEntity, String> {
253 let slice = &content[start..end];
254 parse_entity(slice)
255}
256
257#[allow(dead_code)]
264pub fn parse_coordinate_list_3d_fast(content: &str) -> Option<Vec<f64>> {
265 let start = content.find("((")?;
267 let end = content.rfind("))")?;
268 let list_content = &content[start + 1..end + 1];
269
270 let mut coords = Vec::new();
271 let mut current = list_content;
272
273 while let Some(paren_start) = current.find('(') {
274 let paren_end = current[paren_start..].find(')')? + paren_start;
275 let point_str = ¤t[paren_start + 1..paren_end];
276
277 for num_str in point_str.split(',') {
279 let num_str = num_str.trim();
280 if !num_str.is_empty() {
281 let val: f64 = lexical_core::parse(num_str.as_bytes()).ok()?;
282 coords.push(val);
283 }
284 }
285
286 current = ¤t[paren_end + 1..];
287 }
288
289 if coords.is_empty() {
290 None
291 } else {
292 Some(coords)
293 }
294}
295
296#[allow(dead_code)]
299pub fn parse_index_list_fast(content: &str) -> Option<Vec<u32>> {
300 let start = content.find("((")?;
301 let end = content.rfind("))")?;
302 let list_content = &content[start + 1..end + 1];
303
304 let mut indices = Vec::new();
305 let mut current = list_content;
306
307 while let Some(paren_start) = current.find('(') {
308 let paren_end = current[paren_start..].find(')')? + paren_start;
309 let index_str = ¤t[paren_start + 1..paren_end];
310
311 for num_str in index_str.split(',') {
312 let num_str = num_str.trim();
313 if !num_str.is_empty() {
314 let val: u32 = lexical_core::parse(num_str.as_bytes()).ok()?;
315 indices.push(val.saturating_sub(1));
317 }
318 }
319
320 current = ¤t[paren_end + 1..];
321 }
322
323 if indices.is_empty() {
324 None
325 } else {
326 Some(indices)
327 }
328}
329
330#[cfg(test)]
331mod tests {
332 use super::*;
333
334 #[test]
335 fn test_parse_entity_ref() {
336 let (remaining, token) = entity_ref("#123").unwrap();
337 assert_eq!(remaining, "");
338 assert_eq!(token, Token::EntityRef(123));
339 }
340
341 #[test]
342 fn test_parse_string() {
343 let (remaining, token) = step_string("'hello world'").unwrap();
344 assert_eq!(remaining, "");
345 assert_eq!(token, Token::String("hello world"));
346 }
347
348 #[test]
349 fn test_parse_string_with_escaped_quote() {
350 let (remaining, token) = step_string("'it''s a test'").unwrap();
351 assert_eq!(remaining, "");
352 assert_eq!(token, Token::String("it''s a test"));
353 }
354
355 #[test]
356 fn test_parse_number_integer() {
357 let (remaining, token) = number("42").unwrap();
358 assert_eq!(remaining, "");
359 assert_eq!(token, Token::Integer(42));
360 }
361
362 #[test]
363 fn test_parse_number_float() {
364 let (remaining, token) = number("3.14159").unwrap();
365 assert_eq!(remaining, "");
366 if let Token::Float(f) = token {
367 assert!((f - std::f64::consts::PI).abs() < 1e-5);
368 } else {
369 panic!("Expected float");
370 }
371 }
372
373 #[test]
374 fn test_parse_number_scientific() {
375 let (remaining, token) = number("1.5E-3").unwrap();
376 assert_eq!(remaining, "");
377 if let Token::Float(f) = token {
378 assert!((f - 0.0015).abs() < 1e-10);
379 } else {
380 panic!("Expected float");
381 }
382 }
383
384 #[test]
385 fn test_parse_enum() {
386 let (remaining, token) = enumeration(".TRUE.").unwrap();
387 assert_eq!(remaining, "");
388 assert_eq!(token, Token::Enum("TRUE"));
389 }
390
391 #[test]
392 fn test_parse_list() {
393 let (remaining, token) = list("(1, 2, 3)").unwrap();
394 assert_eq!(remaining, "");
395 if let Token::List(items) = token {
396 assert_eq!(items.len(), 3);
397 } else {
398 panic!("Expected list");
399 }
400 }
401
402 #[test]
403 fn test_parse_entity() {
404 let entity = parse_entity("#1=IFCWALL('abc',$,#2);").unwrap();
405 assert_eq!(entity.id, EntityId(1));
406 assert_eq!(entity.ifc_type, IfcType::IfcWall);
407 assert_eq!(entity.attributes.len(), 3);
408 }
409}