1use memchr::memchr;
10use rustc_hash::FxHashMap;
11
12pub type EntityIndex = FxHashMap<u32, (usize, usize)>;
14
15pub struct EntityScanner<'a> {
20 content: &'a str,
21 pos: usize,
22}
23
24impl<'a> EntityScanner<'a> {
25 pub fn new(content: &'a str) -> Self {
27 let pos = content.find("DATA;").map(|p| p + 5).unwrap_or(0);
29
30 Self { content, pos }
31 }
32
33 pub fn next_entity(&mut self) -> Option<(u32, &'a str, usize, usize)> {
37 let bytes = self.content.as_bytes();
38
39 while self.pos < bytes.len() {
41 let hash_pos = memchr(b'#', &bytes[self.pos..])?;
43 self.pos += hash_pos;
44
45 let is_entity_start = self.pos == 0
48 || bytes[self.pos - 1] == b'\n'
49 || bytes[self.pos - 1] == b'\r'
50 || bytes[self.pos - 1] == b';';
51
52 if !is_entity_start {
53 self.pos += 1;
54 continue;
55 }
56
57 let start = self.pos;
58
59 self.pos += 1; let id_start = self.pos;
62
63 while self.pos < bytes.len() && bytes[self.pos].is_ascii_digit() {
64 self.pos += 1;
65 }
66
67 if self.pos == id_start {
68 continue;
70 }
71
72 let id: u32 = self.content[id_start..self.pos].parse().ok()?;
73
74 while self.pos < bytes.len() && (bytes[self.pos] == b' ' || bytes[self.pos] == b'\t') {
76 self.pos += 1;
77 }
78
79 if self.pos >= bytes.len() || bytes[self.pos] != b'=' {
80 continue;
81 }
82 self.pos += 1; while self.pos < bytes.len() && (bytes[self.pos] == b' ' || bytes[self.pos] == b'\t') {
86 self.pos += 1;
87 }
88
89 let type_start = self.pos;
91 while self.pos < bytes.len()
92 && (bytes[self.pos].is_ascii_alphanumeric() || bytes[self.pos] == b'_')
93 {
94 self.pos += 1;
95 }
96
97 if self.pos == type_start {
98 continue;
99 }
100
101 let type_name = &self.content[type_start..self.pos];
102
103 let end = self.find_entity_end()?;
105
106 return Some((id, type_name, start, end));
107 }
108
109 None
110 }
111
112 fn find_entity_end(&mut self) -> Option<usize> {
114 let bytes = self.content.as_bytes();
115 let mut in_string = false;
116
117 while self.pos < bytes.len() {
118 match bytes[self.pos] {
119 b'\'' => {
120 if in_string && self.pos + 1 < bytes.len() && bytes[self.pos + 1] == b'\'' {
122 self.pos += 2;
123 continue;
124 }
125 in_string = !in_string;
126 }
127 b';' if !in_string => {
128 self.pos += 1;
129 return Some(self.pos);
130 }
131 _ => {}
132 }
133 self.pos += 1;
134 }
135
136 None
137 }
138
139 pub fn build_index(content: &'a str) -> EntityIndex {
141 let mut scanner = Self::new(content);
142 let mut index = FxHashMap::default();
143
144 while let Some((id, _, start, end)) = scanner.next_entity() {
145 index.insert(id, (start, end));
146 }
147
148 index
149 }
150
151 pub fn count_by_type(content: &'a str) -> FxHashMap<String, usize> {
153 let mut scanner = Self::new(content);
154 let mut counts: FxHashMap<String, usize> = FxHashMap::default();
155
156 while let Some((_, type_name, _, _)) = scanner.next_entity() {
157 *counts.entry(type_name.to_uppercase()).or_insert(0) += 1;
158 }
159
160 counts
161 }
162
163 pub fn find_by_type(content: &'a str, target_type: &str) -> Vec<(u32, usize, usize)> {
165 let mut scanner = Self::new(content);
166 let mut results = Vec::new();
167 let target_upper = target_type.to_uppercase();
168
169 while let Some((id, type_name, start, end)) = scanner.next_entity() {
170 if type_name.eq_ignore_ascii_case(&target_upper) {
171 results.push((id, start, end));
172 }
173 }
174
175 results
176 }
177
178 pub fn entity_count(content: &'a str) -> usize {
180 let mut scanner = Self::new(content);
181 let mut count = 0;
182
183 while scanner.next_entity().is_some() {
184 count += 1;
185 }
186
187 count
188 }
189}
190
191pub fn parse_header(content: &str) -> HeaderInfo {
193 let mut info = HeaderInfo::default();
194
195 let header_start = content.find("HEADER;").unwrap_or(0);
197 let header_end = content.find("ENDSEC;").unwrap_or(content.len());
198 let header = &content[header_start..header_end];
199
200 if let Some(schema_start) = header.find("FILE_SCHEMA") {
202 if let Some(paren_start) = header[schema_start..].find("((") {
203 let start = schema_start + paren_start + 2;
204 if let Some(paren_end) = header[start..].find("))") {
205 let schema_list = &header[start..start + paren_end];
206 if let Some(quote_start) = schema_list.find('\'') {
208 if let Some(quote_end) = schema_list[quote_start + 1..].find('\'') {
209 info.schema_version =
210 schema_list[quote_start + 1..quote_start + 1 + quote_end].to_string();
211 }
212 }
213 }
214 }
215 }
216
217 if let Some(name_start) = header.find("FILE_NAME") {
219 if let Some(paren_start) = header[name_start..].find('(') {
221 let start = name_start + paren_start + 1;
222 if let Some((file_name, rest)) = parse_header_string(&header[start..]) {
224 info.file_name = Some(file_name);
225
226 if let Some(comma) = rest.find(',') {
228 if let Some((timestamp, rest2)) = parse_header_string(&rest[comma + 1..]) {
229 info.timestamp = Some(timestamp);
230
231 if let Some(comma2) = rest2.find(',') {
233 if let Some((author, rest3)) = parse_header_list(&rest2[comma2 + 1..]) {
234 info.author = author.first().cloned();
235
236 if let Some(comma3) = rest3.find(',') {
238 if let Some((org, rest4)) =
239 parse_header_list(&rest3[comma3 + 1..])
240 {
241 info.organization = org.first().cloned();
242
243 if let Some(comma4) = rest4.find(',') {
245 if let Some((preproc, rest5)) =
246 parse_header_string(&rest4[comma4 + 1..])
247 {
248 info.preprocessor_version = Some(preproc);
249
250 if let Some(comma5) = rest5.find(',') {
252 if let Some((orig_sys, _)) =
253 parse_header_string(&rest5[comma5 + 1..])
254 {
255 info.originating_system = Some(orig_sys);
256 }
257 }
258 }
259 }
260 }
261 }
262 }
263 }
264 }
265 }
266 }
267 }
268 }
269
270 info
271}
272
273fn parse_header_string(s: &str) -> Option<(String, &str)> {
275 let s = s.trim_start();
276 if !s.starts_with('\'') {
277 if let Some(stripped) = s.strip_prefix('$') {
279 return Some((String::new(), stripped));
280 }
281 return None;
282 }
283
284 let mut end = 1;
285 let bytes = s.as_bytes();
286 while end < bytes.len() {
287 if bytes[end] == b'\'' {
288 if end + 1 < bytes.len() && bytes[end + 1] == b'\'' {
289 end += 2;
290 continue;
291 }
292 break;
293 }
294 end += 1;
295 }
296
297 let value = s[1..end].replace("''", "'");
298 Some((value, &s[end + 1..]))
299}
300
301fn parse_header_list(s: &str) -> Option<(Vec<String>, &str)> {
303 let s = s.trim_start();
304 if !s.starts_with('(') {
305 return Some((Vec::new(), s));
306 }
307
308 let mut items = Vec::new();
309 let mut current = &s[1..]; loop {
312 current = current.trim_start();
313 if let Some(stripped) = current.strip_prefix(')') {
314 return Some((items, stripped));
315 }
316
317 if let Some((item, rest)) = parse_header_string(current) {
318 if !item.is_empty() {
319 items.push(item);
320 }
321 current = rest.trim_start();
322 if current.starts_with(',') {
323 current = ¤t[1..];
324 }
325 } else {
326 if let Some(pos) = current.find([',', ')']) {
328 current = ¤t[pos..];
329 if current.starts_with(',') {
330 current = ¤t[1..];
331 }
332 } else {
333 break;
334 }
335 }
336 }
337
338 Some((items, current))
339}
340
341#[derive(Clone, Debug, Default)]
343pub struct HeaderInfo {
344 pub schema_version: String,
345 pub file_name: Option<String>,
346 pub timestamp: Option<String>,
347 pub author: Option<String>,
348 pub organization: Option<String>,
349 pub preprocessor_version: Option<String>,
350 pub originating_system: Option<String>,
351}
352
353#[cfg(test)]
354mod tests {
355 use super::*;
356
357 const TEST_IFC: &str = r#"ISO-10303-21;
358HEADER;
359FILE_DESCRIPTION(('ViewDefinition [CoordinationView]'),'2;1');
360FILE_NAME('test.ifc','2024-01-01T00:00:00',('Author'),('Org'),'Preprocessor','App','');
361FILE_SCHEMA(('IFC2X3'));
362ENDSEC;
363DATA;
364#1=IFCPROJECT('guid',$,'Project',$,$,$,$,$,#2);
365#2=IFCUNITASSIGNMENT((#3));
366#3=IFCSIUNIT(*,.LENGTHUNIT.,.MILLI.,.METRE.);
367#4=IFCWALL('guid',$,'Wall 1',$,$,#5,#6,$);
368ENDSEC;
369END-ISO-10303-21;
370"#;
371
372 #[test]
373 fn test_scanner_finds_entities() {
374 let mut scanner = EntityScanner::new(TEST_IFC);
375 let mut entities = Vec::new();
376
377 while let Some((id, type_name, _, _)) = scanner.next_entity() {
378 entities.push((id, type_name.to_string()));
379 }
380
381 assert_eq!(entities.len(), 4);
382 assert_eq!(entities[0], (1, "IFCPROJECT".to_string()));
383 assert_eq!(entities[3], (4, "IFCWALL".to_string()));
384 }
385
386 #[test]
387 fn test_build_index() {
388 let index = EntityScanner::build_index(TEST_IFC);
389 assert_eq!(index.len(), 4);
390 assert!(index.contains_key(&1));
391 assert!(index.contains_key(&4));
392 }
393
394 #[test]
395 fn test_count_by_type() {
396 let counts = EntityScanner::count_by_type(TEST_IFC);
397 assert_eq!(counts.get("IFCPROJECT"), Some(&1));
398 assert_eq!(counts.get("IFCWALL"), Some(&1));
399 }
400
401 #[test]
402 fn test_parse_header() {
403 let info = parse_header(TEST_IFC);
404 assert_eq!(info.schema_version, "IFC2X3");
405 assert_eq!(info.file_name, Some("test.ifc".to_string()));
406 }
407}