1use std::collections::HashMap;
18
19use supersigil_core::{ComponentDefs, ExtractedComponent, SourcePosition};
20
21use crate::util::{is_pascal_case, line_col};
22use crate::xml_parser::XmlNode;
23
24fn collect_body_text(
34 children: &[XmlNode],
35 defs: &ComponentDefs,
36) -> (Option<String>, Option<usize>, Option<usize>) {
37 let mut buf = String::new();
38 let mut first_offset: Option<usize> = None;
39 let mut last_end_offset: Option<usize> = None;
40 collect_text_recursive(
41 &mut buf,
42 &mut first_offset,
43 &mut last_end_offset,
44 children,
45 defs,
46 );
47 let trimmed = buf.trim();
48 if trimmed.is_empty() {
49 (None, None, None)
50 } else {
51 let leading_ws = buf.len() - buf.trim_start().len();
53 let offset = first_offset.map(|o| o + leading_ws);
54 let trailing_ws = buf.len() - buf.trim_end().len();
56 let end_offset = last_end_offset.map(|o| o - trailing_ws);
57 (Some(trimmed.to_owned()), offset, end_offset)
58 }
59}
60
61fn collect_text_recursive(
63 buf: &mut String,
64 first_offset: &mut Option<usize>,
65 last_end_offset: &mut Option<usize>,
66 nodes: &[XmlNode],
67 defs: &ComponentDefs,
68) {
69 for node in nodes {
70 match node {
71 XmlNode::Text {
72 content,
73 offset,
74 end_offset,
75 } => {
76 if first_offset.is_none() {
77 *first_offset = Some(*offset);
78 }
79 *last_end_offset = Some(*end_offset);
80 buf.push_str(content);
81 }
82 XmlNode::Element { name, children, .. } => {
83 if defs.is_known(name) {
85 continue;
86 }
87 collect_text_recursive(buf, first_offset, last_end_offset, children, defs);
90 }
91 }
92 }
93}
94
95fn attributes_to_map(attrs: &[(String, String)]) -> HashMap<String, String> {
97 attrs.iter().cloned().collect()
98}
99
100struct ExtractionCtx<'a> {
106 content: &'a str,
108 defs: &'a ComponentDefs,
109}
110
111#[must_use]
122pub fn extract_components_from_xml(
123 nodes: &[XmlNode],
124 content: &str,
125 component_defs: &ComponentDefs,
126) -> Vec<ExtractedComponent> {
127 let ctx = ExtractionCtx {
128 content,
129 defs: component_defs,
130 };
131 let mut components = Vec::new();
132 collect_from_nodes(nodes, &ctx, &mut components);
133 components
134}
135
136fn collect_from_nodes(
142 nodes: &[XmlNode],
143 ctx: &ExtractionCtx<'_>,
144 out: &mut Vec<ExtractedComponent>,
145) {
146 for node in nodes {
147 collect_component(node, ctx, out);
148 }
149}
150
151fn collect_component(node: &XmlNode, ctx: &ExtractionCtx<'_>, out: &mut Vec<ExtractedComponent>) {
154 match node {
155 XmlNode::Text { .. } => {}
156
157 XmlNode::Element {
158 name,
159 attributes,
160 children,
161 offset,
162 end_offset,
163 } => {
164 if !is_pascal_case(name) {
165 collect_from_nodes(children, ctx, out);
166 return;
167 }
168
169 if !ctx.defs.is_known(name) {
170 collect_from_nodes(children, ctx, out);
171 return;
172 }
173
174 let (line, column) = line_col(ctx.content, *offset);
176 let position = SourcePosition {
177 byte_offset: *offset,
178 line,
179 column,
180 };
181
182 let (end_line, end_column) = line_col(ctx.content, *end_offset);
183 let end_position = SourcePosition {
184 byte_offset: *end_offset,
185 line: end_line,
186 column: end_column,
187 };
188
189 let attrs = attributes_to_map(attributes);
190
191 let mut child_components = Vec::new();
192 collect_from_nodes(children, ctx, &mut child_components);
193
194 let (body_text, body_text_offset, body_text_end_offset) =
195 collect_body_text(children, ctx.defs);
196
197 out.push(ExtractedComponent {
198 name: name.clone(),
199 attributes: attrs,
200 children: child_components,
201 body_text,
202 body_text_offset,
203 body_text_end_offset,
204 code_blocks: Vec::new(),
205 position,
206 end_position,
207 });
208 }
209 }
210}
211
212#[cfg(test)]
217mod tests {
218 use super::*;
219
220 fn extract(nodes: &[XmlNode], content: &str, defs: &ComponentDefs) -> Vec<ExtractedComponent> {
221 extract_components_from_xml(nodes, content, defs)
222 }
223
224 fn text(s: &str) -> XmlNode {
226 XmlNode::Text {
227 content: s.into(),
228 offset: 0,
229 end_offset: s.len(),
230 }
231 }
232
233 #[test]
236 fn extracts_known_component() {
237 let defs = ComponentDefs::defaults();
238 let content = "0123456789<Criterion id=\"c1\">Some text</Criterion>";
239 let nodes = vec![XmlNode::Element {
240 name: "Criterion".into(),
241 attributes: vec![("id".into(), "c1".into())],
242 children: vec![text("Some text")],
243 offset: 10,
244 end_offset: content.len(),
245 }];
246
247 let result = extract(&nodes, content, &defs);
248 assert_eq!(result.len(), 1);
249 assert_eq!(result[0].name, "Criterion");
250 assert_eq!(result[0].attributes["id"], "c1");
251 assert_eq!(result[0].body_text.as_deref(), Some("Some text"));
252 assert_eq!(result[0].position.byte_offset, 10);
253 assert_eq!(result[0].end_position.byte_offset, content.len());
254 assert_eq!(result[0].end_position.line, 1);
255 assert_eq!(result[0].end_position.column, content.len() + 1);
256 }
257
258 #[test]
259 fn extracts_multiple_top_level_components() {
260 let defs = ComponentDefs::defaults();
261 let nodes = vec![
262 XmlNode::Element {
263 name: "Criterion".into(),
264 attributes: vec![("id".into(), "c1".into())],
265 children: vec![text("text")],
266 offset: 0,
267 end_offset: 0,
268 },
269 XmlNode::Element {
270 name: "VerifiedBy".into(),
271 attributes: vec![("refs".into(), "c1".into())],
272 children: vec![],
273 offset: 50,
274 end_offset: 0,
275 },
276 ];
277 let content = &"x".repeat(100);
278
279 let result = extract(&nodes, content, &defs);
280 assert_eq!(result.len(), 2);
281 assert_eq!(result[0].name, "Criterion");
282 assert_eq!(result[1].name, "VerifiedBy");
283 }
284
285 #[test]
288 fn unknown_pascal_case_is_transparent_wrapper() {
289 let defs = ComponentDefs::defaults();
290 let nodes = vec![XmlNode::Element {
292 name: "Aside".into(),
293 attributes: vec![],
294 children: vec![XmlNode::Element {
295 name: "Criterion".into(),
296 attributes: vec![("id".into(), "c1".into())],
297 children: vec![],
298 offset: 20,
299 end_offset: 0,
300 }],
301 offset: 0,
302 end_offset: 0,
303 }];
304 let content = &"x".repeat(100);
305
306 let result = extract(&nodes, content, &defs);
307 assert_eq!(result.len(), 1);
309 assert_eq!(result[0].name, "Criterion");
310 }
311
312 #[test]
313 fn deeply_nested_unknown_wrappers_are_transparent() {
314 let defs = ComponentDefs::defaults();
315 let nodes = vec![XmlNode::Element {
316 name: "Wrapper".into(),
317 attributes: vec![],
318 children: vec![XmlNode::Element {
319 name: "Inner".into(),
320 attributes: vec![],
321 children: vec![XmlNode::Element {
322 name: "Criterion".into(),
323 attributes: vec![("id".into(), "deep".into())],
324 children: vec![],
325 offset: 40,
326 end_offset: 0,
327 }],
328 offset: 20,
329 end_offset: 0,
330 }],
331 offset: 0,
332 end_offset: 0,
333 }];
334 let content = &"x".repeat(100);
335
336 let result = extract(&nodes, content, &defs);
337 assert_eq!(result.len(), 1);
338 assert_eq!(result[0].name, "Criterion");
339 assert_eq!(result[0].attributes["id"], "deep");
340 }
341
342 #[test]
345 fn lowercase_elements_are_ignored() {
346 let defs = ComponentDefs::defaults();
347 let nodes = vec![XmlNode::Element {
348 name: "div".into(),
349 attributes: vec![],
350 children: vec![XmlNode::Element {
351 name: "Criterion".into(),
352 attributes: vec![("id".into(), "c1".into())],
353 children: vec![],
354 offset: 10,
355 end_offset: 0,
356 }],
357 offset: 0,
358 end_offset: 0,
359 }];
360 let content = &"x".repeat(100);
361
362 let result = extract(&nodes, content, &defs);
363 assert_eq!(result.len(), 1);
365 assert_eq!(result[0].name, "Criterion");
366 }
367
368 #[test]
371 fn attributes_stored_as_raw_strings() {
372 let defs = ComponentDefs::defaults();
373 let nodes = vec![XmlNode::Element {
374 name: "Criterion".into(),
375 attributes: vec![
376 ("id".into(), "c1".into()),
377 ("strategy".into(), "tag".into()),
378 ],
379 children: vec![],
380 offset: 0,
381 end_offset: 0,
382 }];
383 let content = &"x".repeat(100);
384
385 let result = extract(&nodes, content, &defs);
386 assert_eq!(result[0].attributes.len(), 2);
387 assert_eq!(result[0].attributes["id"], "c1");
388 assert_eq!(result[0].attributes["strategy"], "tag");
389 }
390
391 #[test]
392 fn self_closing_element_has_empty_children_and_no_body_text() {
393 let defs = ComponentDefs::defaults();
394 let nodes = vec![XmlNode::Element {
395 name: "VerifiedBy".into(),
396 attributes: vec![("refs".into(), "c1".into())],
397 children: vec![],
398 offset: 0,
399 end_offset: 0,
400 }];
401 let content = &"x".repeat(100);
402
403 let result = extract(&nodes, content, &defs);
404 assert_eq!(result.len(), 1);
405 assert!(result[0].children.is_empty());
406 assert_eq!(result[0].body_text, None);
407 }
408
409 #[test]
412 fn body_text_from_text_children() {
413 let defs = ComponentDefs::defaults();
414 let nodes = vec![XmlNode::Element {
415 name: "Criterion".into(),
416 attributes: vec![("id".into(), "c1".into())],
417 children: vec![text("\n The system shall do something.\n")],
418 offset: 0,
419 end_offset: 0,
420 }];
421 let content = &"x".repeat(100);
422
423 let result = extract(&nodes, content, &defs);
424 assert_eq!(
425 result[0].body_text.as_deref(),
426 Some("The system shall do something.")
427 );
428 }
429
430 #[test]
431 fn body_text_none_for_whitespace_only() {
432 let defs = ComponentDefs::defaults();
433 let nodes = vec![XmlNode::Element {
434 name: "Criterion".into(),
435 attributes: vec![("id".into(), "c1".into())],
436 children: vec![text(" \n \n ")],
437 offset: 0,
438 end_offset: 0,
439 }];
440 let content = &"x".repeat(100);
441
442 let result = extract(&nodes, content, &defs);
443 assert_eq!(result[0].body_text, None);
444 }
445
446 #[test]
447 fn body_text_excludes_known_child_components() {
448 let defs = ComponentDefs::defaults();
449 let nodes = vec![XmlNode::Element {
450 name: "AcceptanceCriteria".into(),
451 attributes: vec![],
452 children: vec![
453 text("Parent text"),
454 XmlNode::Element {
455 name: "Criterion".into(),
456 attributes: vec![("id".into(), "c1".into())],
457 children: vec![text("Child text")],
458 offset: 30,
459 end_offset: 0,
460 },
461 ],
462 offset: 0,
463 end_offset: 0,
464 }];
465 let content = &"x".repeat(100);
466
467 let result = extract(&nodes, content, &defs);
468 assert_eq!(result.len(), 1);
469 assert_eq!(result[0].name, "AcceptanceCriteria");
470 assert_eq!(result[0].body_text.as_deref(), Some("Parent text"));
472 }
473
474 #[test]
475 fn body_text_includes_text_from_unknown_wrapper() {
476 let defs = ComponentDefs::defaults();
477 let nodes = vec![XmlNode::Element {
478 name: "Criterion".into(),
479 attributes: vec![("id".into(), "c1".into())],
480 children: vec![XmlNode::Element {
481 name: "Emphasis".into(),
482 attributes: vec![],
483 children: vec![text("important")],
484 offset: 20,
485 end_offset: 0,
486 }],
487 offset: 0,
488 end_offset: 0,
489 }];
490 let content = &"x".repeat(100);
491
492 let result = extract(&nodes, content, &defs);
493 assert_eq!(result[0].body_text.as_deref(), Some("important"));
495 }
496
497 #[test]
500 fn nested_child_components_collected() {
501 let defs = ComponentDefs::defaults();
502 let nodes = vec![XmlNode::Element {
503 name: "AcceptanceCriteria".into(),
504 attributes: vec![],
505 children: vec![
506 XmlNode::Element {
507 name: "Criterion".into(),
508 attributes: vec![("id".into(), "c1".into())],
509 children: vec![text("First")],
510 offset: 20,
511 end_offset: 0,
512 },
513 XmlNode::Element {
514 name: "Criterion".into(),
515 attributes: vec![("id".into(), "c2".into())],
516 children: vec![text("Second")],
517 offset: 60,
518 end_offset: 0,
519 },
520 ],
521 offset: 0,
522 end_offset: 0,
523 }];
524 let content = &"x".repeat(100);
525
526 let result = extract(&nodes, content, &defs);
527 assert_eq!(result.len(), 1);
528 assert_eq!(result[0].name, "AcceptanceCriteria");
529 assert_eq!(result[0].children.len(), 2);
530 assert_eq!(result[0].children[0].name, "Criterion");
531 assert_eq!(result[0].children[0].attributes["id"], "c1");
532 assert_eq!(result[0].children[0].body_text.as_deref(), Some("First"));
533 assert_eq!(result[0].children[1].name, "Criterion");
534 assert_eq!(result[0].children[1].attributes["id"], "c2");
535 assert_eq!(result[0].children[1].body_text.as_deref(), Some("Second"));
536 }
537
538 #[test]
541 fn position_computed_from_byte_offset() {
542 let defs = ComponentDefs::defaults();
543 let content = "line1\nline2\n<Criterion id=\"c1\" />";
545 let nodes = vec![XmlNode::Element {
546 name: "Criterion".into(),
547 attributes: vec![("id".into(), "c1".into())],
548 children: vec![],
549 offset: 12,
550 end_offset: 0,
551 }];
552
553 let result = extract(&nodes, content, &defs);
554 assert_eq!(result[0].position.byte_offset, 12);
555 assert_eq!(result[0].position.line, 3);
556 assert_eq!(result[0].position.column, 1);
557 }
558
559 #[test]
560 fn position_mid_line() {
561 let defs = ComponentDefs::defaults();
562 let content = "abcdef\n <Criterion />";
564 let nodes = vec![XmlNode::Element {
565 name: "Criterion".into(),
566 attributes: vec![("id".into(), "c1".into())],
567 children: vec![],
568 offset: 9, end_offset: 0,
570 }];
571
572 let result = extract(&nodes, content, &defs);
573 assert_eq!(result[0].position.byte_offset, 9);
574 assert_eq!(result[0].position.line, 2);
575 assert_eq!(result[0].position.column, 3);
576 }
577
578 #[test]
581 fn empty_nodes_produces_empty_result() {
582 let defs = ComponentDefs::defaults();
583 let result = extract(&[], "", &defs);
584 assert!(result.is_empty());
585 }
586
587 #[test]
590 fn text_only_nodes_produce_no_components() {
591 let defs = ComponentDefs::defaults();
592 let nodes = vec![text("just some text")];
593 let result = extract(&nodes, "just some text", &defs);
594 assert!(result.is_empty());
595 }
596
597 #[test]
600 fn realistic_spec_extraction() {
601 let defs = ComponentDefs::defaults();
602 let content = r#"---
603supersigil:
604 id: test-spec
605---
606
607```supersigil-xml
608<AcceptanceCriteria>
609 <Criterion id="perf-latency" strategy="tag">
610 P99 latency must be under 100ms for API requests.
611 </Criterion>
612</AcceptanceCriteria>
613<VerifiedBy refs="perf-latency" />
614```
615"#;
616 let nodes = vec![
620 XmlNode::Element {
621 name: "AcceptanceCriteria".into(),
622 attributes: vec![],
623 children: vec![XmlNode::Element {
624 name: "Criterion".into(),
625 attributes: vec![
626 ("id".into(), "perf-latency".into()),
627 ("strategy".into(), "tag".into()),
628 ],
629 children: vec![text(
630 "\n P99 latency must be under 100ms for API requests.\n ",
631 )],
632 offset: 70,
633 end_offset: 0,
634 }],
635 offset: 50,
636 end_offset: 0,
637 },
638 XmlNode::Element {
639 name: "VerifiedBy".into(),
640 attributes: vec![("refs".into(), "perf-latency".into())],
641 children: vec![],
642 offset: 160,
643 end_offset: 0,
644 },
645 ];
646
647 let result = extract(&nodes, content, &defs);
648 assert_eq!(result.len(), 2);
649
650 assert_eq!(result[0].name, "AcceptanceCriteria");
652 assert!(result[0].attributes.is_empty());
653 assert_eq!(result[0].children.len(), 1);
654
655 let criterion = &result[0].children[0];
657 assert_eq!(criterion.name, "Criterion");
658 assert_eq!(criterion.attributes["id"], "perf-latency");
659 assert_eq!(criterion.attributes["strategy"], "tag");
660 assert_eq!(
661 criterion.body_text.as_deref(),
662 Some("P99 latency must be under 100ms for API requests.")
663 );
664
665 assert_eq!(result[1].name, "VerifiedBy");
667 assert_eq!(result[1].attributes["refs"], "perf-latency");
668 assert_eq!(result[1].body_text, None);
669 assert!(result[1].children.is_empty());
670 }
671
672 #[test]
675 fn public_api_extracts_components() {
676 let defs = ComponentDefs::defaults();
677 let nodes = vec![XmlNode::Element {
678 name: "Criterion".into(),
679 attributes: vec![("id".into(), "c1".into())],
680 children: vec![],
681 offset: 0,
682 end_offset: 0,
683 }];
684
685 let result = extract_components_from_xml(&nodes, "x", &defs);
686
687 assert_eq!(result.len(), 1);
688 }
689}