1use anyhow::{bail, Result};
2use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
3use std::collections::HashMap;
4
5#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct Component {
11 pub name: String,
12 pub attrs: HashMap<String, String>,
14 pub open_start: usize,
16 pub open_end: usize,
18 pub close_start: usize,
20 pub close_end: usize,
22}
23
24impl Component {
25 #[allow(dead_code)] pub fn content<'a>(&self, doc: &'a str) -> &'a str {
28 &doc[self.open_end..self.close_start]
29 }
30
31 pub fn patch_mode(&self) -> Option<&str> {
35 self.attrs.get("patch").map(|s| s.as_str())
36 .or_else(|| self.attrs.get("mode").map(|s| s.as_str()))
37 }
38
39 pub fn replace_content(&self, doc: &str, new_content: &str) -> String {
42 let mut result = String::with_capacity(doc.len() + new_content.len());
43 result.push_str(&doc[..self.open_end]);
44 result.push_str(new_content);
45 result.push_str(&doc[self.close_start..]);
46 result
47 }
48}
49
50fn is_valid_name(name: &str) -> bool {
52 if name.is_empty() {
53 return false;
54 }
55 let first = name.as_bytes()[0];
56 if !first.is_ascii_alphanumeric() {
57 return false;
58 }
59 name.bytes()
60 .all(|b| b.is_ascii_alphanumeric() || b == b'-')
61}
62
63pub fn is_agent_marker(comment_text: &str) -> bool {
67 let trimmed = comment_text.trim();
68 if let Some(rest) = trimmed.strip_prefix("/agent:") {
69 is_valid_name(rest)
70 } else if let Some(rest) = trimmed.strip_prefix("agent:") {
71 let name_part = rest.split_whitespace().next().unwrap_or("");
73 is_valid_name(name_part)
74 } else {
75 false
76 }
77}
78
79fn parse_attrs(attr_text: &str) -> HashMap<String, String> {
84 let mut attrs = HashMap::new();
85 for token in attr_text.split_whitespace() {
86 if let Some((key, value)) = token.split_once('=')
87 && !key.is_empty()
88 && !value.is_empty()
89 {
90 attrs.insert(key.to_string(), value.to_string());
91 }
92 }
93 attrs
94}
95
96pub fn find_code_ranges(doc: &str) -> Vec<(usize, usize)> {
102 let mut ranges = Vec::new();
103 let parser = Parser::new_ext(doc, Options::empty());
104 let mut iter = parser.into_offset_iter();
105 while let Some((event, range)) = iter.next() {
106 match event {
107 Event::Code(_) => {
109 ranges.push((range.start, range.end));
110 }
111 Event::Start(Tag::CodeBlock(_)) => {
113 let block_start = range.start;
114 let mut block_end = range.end;
115 for (inner_event, inner_range) in iter.by_ref() {
116 block_end = inner_range.end;
117 if matches!(inner_event, Event::End(TagEnd::CodeBlock)) {
118 break;
119 }
120 }
121 ranges.push((block_start, block_end));
122 }
123 _ => {}
124 }
125 }
126 ranges
127}
128
129pub fn parse(doc: &str) -> Result<Vec<Component>> {
135 let bytes = doc.as_bytes();
136 let len = bytes.len();
137 let code_ranges = find_code_ranges(doc);
138 let mut templates: Vec<Component> = Vec::new();
139 let mut stack: Vec<(String, HashMap<String, String>, usize, usize)> = Vec::new();
141 let mut pos = 0;
142
143 while pos + 4 <= len {
144 if &bytes[pos..pos + 4] != b"<!--" {
146 pos += 1;
147 continue;
148 }
149
150 if code_ranges.iter().any(|&(start, end)| pos >= start && pos < end) {
152 pos += 4;
153 continue;
154 }
155
156 let marker_start = pos;
157
158 let close = match find_comment_end(bytes, pos + 4) {
160 Some(c) => c,
161 None => {
162 pos += 4;
163 continue;
164 }
165 };
166
167 let inner = &doc[marker_start + 4..close - 3]; let trimmed = inner.trim();
170
171 let mut marker_end = close;
173 if marker_end < len && bytes[marker_end] == b'\n' {
174 marker_end += 1;
175 }
176
177 if let Some(name) = trimmed.strip_prefix("/agent:") {
178 if !is_valid_name(name) {
180 bail!("invalid component name: '{}'", name);
181 }
182 match stack.pop() {
183 Some((open_name, open_attrs, open_start, open_end)) => {
184 if open_name != name {
185 bail!(
186 "mismatched component: opened '{}' but closed '{}'",
187 open_name,
188 name
189 );
190 }
191 templates.push(Component {
192 name: name.to_string(),
193 attrs: open_attrs,
194 open_start,
195 open_end,
196 close_start: marker_start,
197 close_end: marker_end,
198 });
199 }
200 None => bail!("closing marker <!-- /agent:{} --> without matching open", name),
201 }
202 } else if let Some(rest) = trimmed.strip_prefix("agent:") {
203 let mut parts = rest.splitn(2, |c: char| c.is_whitespace());
205 let name = parts.next().unwrap_or("");
206 let attr_text = parts.next().unwrap_or("");
207 if !is_valid_name(name) {
208 bail!("invalid component name: '{}'", name);
209 }
210 let attrs = parse_attrs(attr_text);
211 stack.push((name.to_string(), attrs, marker_start, marker_end));
212 }
213
214 pos = close;
215 }
216
217 if let Some((name, _, _, _)) = stack.last() {
218 bail!(
219 "unclosed component: <!-- agent:{} --> without matching close",
220 name
221 );
222 }
223
224 templates.sort_by_key(|t| t.open_start);
225 Ok(templates)
226}
227
228fn find_comment_end(bytes: &[u8], start: usize) -> Option<usize> {
230 let len = bytes.len();
231 let mut i = start;
232 while i + 3 <= len {
233 if &bytes[i..i + 3] == b"-->" {
234 return Some(i + 3);
235 }
236 i += 1;
237 }
238 None
239}
240
241#[cfg(test)]
242mod tests {
243 use super::*;
244
245 #[test]
246 fn single_range() {
247 let doc = "before\n<!-- agent:status -->\nHello\n<!-- /agent:status -->\nafter\n";
248 let ranges = parse(doc).unwrap();
249 assert_eq!(ranges.len(), 1);
250 assert_eq!(ranges[0].name, "status");
251 assert_eq!(ranges[0].content(doc), "Hello\n");
252 }
253
254 #[test]
255 fn nested_ranges() {
256 let doc = "\
257<!-- agent:outer -->
258<!-- agent:inner -->
259content
260<!-- /agent:inner -->
261<!-- /agent:outer -->
262";
263 let ranges = parse(doc).unwrap();
264 assert_eq!(ranges.len(), 2);
265 assert_eq!(ranges[0].name, "outer");
267 assert_eq!(ranges[1].name, "inner");
268 assert_eq!(ranges[1].content(doc), "content\n");
269 }
270
271 #[test]
272 fn siblings() {
273 let doc = "\
274<!-- agent:a -->
275alpha
276<!-- /agent:a -->
277<!-- agent:b -->
278beta
279<!-- /agent:b -->
280";
281 let ranges = parse(doc).unwrap();
282 assert_eq!(ranges.len(), 2);
283 assert_eq!(ranges[0].name, "a");
284 assert_eq!(ranges[0].content(doc), "alpha\n");
285 assert_eq!(ranges[1].name, "b");
286 assert_eq!(ranges[1].content(doc), "beta\n");
287 }
288
289 #[test]
290 fn no_ranges() {
291 let doc = "# Just a document\n\nWith no range templates.\n";
292 let ranges = parse(doc).unwrap();
293 assert!(ranges.is_empty());
294 }
295
296 #[test]
297 fn unmatched_open_error() {
298 let doc = "<!-- agent:orphan -->\nContent\n";
299 let err = parse(doc).unwrap_err();
300 assert!(err.to_string().contains("unclosed component"));
301 }
302
303 #[test]
304 fn unmatched_close_error() {
305 let doc = "Content\n<!-- /agent:orphan -->\n";
306 let err = parse(doc).unwrap_err();
307 assert!(err.to_string().contains("without matching open"));
308 }
309
310 #[test]
311 fn mismatched_names_error() {
312 let doc = "<!-- agent:foo -->\n<!-- /agent:bar -->\n";
313 let err = parse(doc).unwrap_err();
314 assert!(err.to_string().contains("mismatched"));
315 }
316
317 #[test]
318 fn invalid_name() {
319 let doc = "<!-- agent:-bad -->\n<!-- /agent:-bad -->\n";
320 let err = parse(doc).unwrap_err();
321 assert!(err.to_string().contains("invalid component name"));
322 }
323
324 #[test]
325 fn name_validation() {
326 assert!(is_valid_name("status"));
327 assert!(is_valid_name("my-section"));
328 assert!(is_valid_name("a1"));
329 assert!(is_valid_name("A"));
330 assert!(!is_valid_name(""));
331 assert!(!is_valid_name("-bad"));
332 assert!(!is_valid_name("has space"));
333 assert!(!is_valid_name("has_underscore"));
334 }
335
336 #[test]
337 fn content_extraction() {
338 let doc = "<!-- agent:x -->\nfoo\nbar\n<!-- /agent:x -->\n";
339 let ranges = parse(doc).unwrap();
340 assert_eq!(ranges[0].content(doc), "foo\nbar\n");
341 }
342
343 #[test]
344 fn replace_roundtrip() {
345 let doc = "before\n<!-- agent:s -->\nold\n<!-- /agent:s -->\nafter\n";
346 let ranges = parse(doc).unwrap();
347 let new_doc = ranges[0].replace_content(doc, "new\n");
348 assert_eq!(
349 new_doc,
350 "before\n<!-- agent:s -->\nnew\n<!-- /agent:s -->\nafter\n"
351 );
352 let ranges2 = parse(&new_doc).unwrap();
354 assert_eq!(ranges2.len(), 1);
355 assert_eq!(ranges2[0].content(&new_doc), "new\n");
356 }
357
358 #[test]
359 fn is_agent_marker_yes() {
360 assert!(is_agent_marker(" agent:status "));
361 assert!(is_agent_marker("/agent:status"));
362 assert!(is_agent_marker("agent:my-thing"));
363 assert!(is_agent_marker(" /agent:A1 "));
364 }
365
366 #[test]
367 fn is_agent_marker_no() {
368 assert!(!is_agent_marker("just a comment"));
369 assert!(!is_agent_marker("agent:"));
370 assert!(!is_agent_marker("/agent:"));
371 assert!(!is_agent_marker("agent:-bad"));
372 assert!(!is_agent_marker("some agent:fake stuff"));
373 }
374
375 #[test]
376 fn regular_comments_ignored() {
377 let doc = "<!-- just a comment -->\n<!-- agent:x -->\ndata\n<!-- /agent:x -->\n";
378 let ranges = parse(doc).unwrap();
379 assert_eq!(ranges.len(), 1);
380 assert_eq!(ranges[0].name, "x");
381 }
382
383 #[test]
384 fn multiline_comment_ignored() {
385 let doc = "\
386<!--
387multi
388line
389comment
390-->
391<!-- agent:s -->
392content
393<!-- /agent:s -->
394";
395 let ranges = parse(doc).unwrap();
396 assert_eq!(ranges.len(), 1);
397 assert_eq!(ranges[0].name, "s");
398 }
399
400 #[test]
401 fn empty_content() {
402 let doc = "<!-- agent:empty --><!-- /agent:empty -->\n";
403 let ranges = parse(doc).unwrap();
404 assert_eq!(ranges.len(), 1);
405 assert_eq!(ranges[0].content(doc), "");
406 }
407
408 #[test]
409 fn markers_in_fenced_code_block_ignored() {
410 let doc = "\
411<!-- agent:real -->
412content
413<!-- /agent:real -->
414```markdown
415<!-- agent:fake -->
416this is just an example
417<!-- /agent:fake -->
418```
419";
420 let ranges = parse(doc).unwrap();
421 assert_eq!(ranges.len(), 1);
422 assert_eq!(ranges[0].name, "real");
423 }
424
425 #[test]
426 fn markers_in_inline_code_ignored() {
427 let doc = "\
428Use `<!-- agent:example -->` markers for components.
429<!-- agent:real -->
430content
431<!-- /agent:real -->
432";
433 let ranges = parse(doc).unwrap();
434 assert_eq!(ranges.len(), 1);
435 assert_eq!(ranges[0].name, "real");
436 }
437
438 #[test]
439 fn markers_in_tilde_fence_ignored() {
440 let doc = "\
441<!-- agent:x -->
442data
443<!-- /agent:x -->
444~~~
445<!-- agent:y -->
446example
447<!-- /agent:y -->
448~~~
449";
450 let ranges = parse(doc).unwrap();
451 assert_eq!(ranges.len(), 1);
452 assert_eq!(ranges[0].name, "x");
453 }
454
455 #[test]
456 fn markers_in_indented_fenced_code_block_ignored() {
457 let doc = "\
459<!-- agent:exchange -->
460Content here.
461<!-- /agent:exchange -->
462
463 ```markdown
464 <!-- agent:fake -->
465 demo without closing tag
466 ```
467";
468 let ranges = parse(doc).unwrap();
469 assert_eq!(ranges.len(), 1);
470 assert_eq!(ranges[0].name, "exchange");
471 }
472
473 #[test]
474 fn indented_fence_inside_component_ignored() {
475 let doc = "\
477<!-- agent:exchange -->
478Here's how to set up:
479
480 ```markdown
481 <!-- agent:status -->
482 Your status here
483 ```
484
485Done explaining.
486<!-- /agent:exchange -->
487";
488 let ranges = parse(doc).unwrap();
489 assert_eq!(ranges.len(), 1);
490 assert_eq!(ranges[0].name, "exchange");
491 }
492
493 #[test]
494 fn deeply_indented_fence_ignored() {
495 let doc = "\
497<!-- agent:x -->
498ok
499<!-- /agent:x -->
500 ```
501 <!-- agent:y -->
502 inside fence
503 ```
504";
505 let ranges = parse(doc).unwrap();
506 assert_eq!(ranges.len(), 1);
507 assert_eq!(ranges[0].name, "x");
508 }
509
510 #[test]
511 fn indented_fence_code_ranges_detected() {
512 let doc = "before\n ```\n code\n ```\nafter\n";
513 let ranges = find_code_ranges(doc);
514 assert_eq!(ranges.len(), 1);
515 assert!(doc[ranges[0].0..ranges[0].1].contains("code"));
516 }
517
518 #[test]
519 fn code_ranges_detected() {
520 let doc = "before\n```\ncode\n```\nafter `inline` end\n";
521 let ranges = find_code_ranges(doc);
522 assert_eq!(ranges.len(), 2);
523 assert!(doc[ranges[0].0..ranges[0].1].contains("code"));
525 assert!(doc[ranges[1].0..ranges[1].1].contains("inline"));
527 }
528
529 #[test]
530 fn code_ranges_double_backtick() {
531 let doc = "text `` `<!--` `` more\n";
533 let ranges = find_code_ranges(doc);
534 assert_eq!(ranges.len(), 1);
535 let span = &doc[ranges[0].0..ranges[0].1];
536 assert!(span.contains("<!--"), "double-backtick span should contain <!--: {:?}", span);
537 }
538
539 #[test]
540 fn code_ranges_double_backtick_does_not_match_single() {
541 let doc = "text `` foo ` bar `` end\n";
543 let ranges = find_code_ranges(doc);
544 assert_eq!(ranges.len(), 1);
545 let span = &doc[ranges[0].0..ranges[0].1];
546 assert_eq!(span, "`` foo ` bar ``");
547 }
548
549 #[test]
550 fn double_backtick_comment_before_agent_marker() {
551 let doc = "\
553<!-- agent:exchange -->\n\
554text `` `<!--` `` description\n\
555new content here\n\
556<!-- /agent:exchange -->\n";
557 let components = parse(doc).unwrap();
558 assert_eq!(components.len(), 1);
559 assert_eq!(components[0].name, "exchange");
560 assert!(components[0].content(doc).contains("new content here"));
561 }
562
563 #[test]
566 fn parse_component_with_mode_attr() {
567 let doc = "<!-- agent:exchange mode=append -->\nContent\n<!-- /agent:exchange -->\n";
568 let components = parse(doc).unwrap();
569 assert_eq!(components.len(), 1);
570 assert_eq!(components[0].name, "exchange");
571 assert_eq!(components[0].attrs.get("mode").map(|s| s.as_str()), Some("append"));
572 assert_eq!(components[0].content(doc), "Content\n");
573 }
574
575 #[test]
576 fn parse_component_with_multiple_attrs() {
577 let doc = "<!-- agent:log mode=prepend timestamp=true -->\nData\n<!-- /agent:log -->\n";
578 let components = parse(doc).unwrap();
579 assert_eq!(components.len(), 1);
580 assert_eq!(components[0].name, "log");
581 assert_eq!(components[0].attrs.get("mode").map(|s| s.as_str()), Some("prepend"));
582 assert_eq!(components[0].attrs.get("timestamp").map(|s| s.as_str()), Some("true"));
583 }
584
585 #[test]
586 fn parse_component_no_attrs_backward_compat() {
587 let doc = "<!-- agent:status -->\nOK\n<!-- /agent:status -->\n";
588 let components = parse(doc).unwrap();
589 assert_eq!(components.len(), 1);
590 assert_eq!(components[0].name, "status");
591 assert!(components[0].attrs.is_empty());
592 }
593
594 #[test]
595 fn is_agent_marker_with_attrs() {
596 assert!(is_agent_marker(" agent:exchange mode=append "));
597 assert!(is_agent_marker("agent:status mode=replace"));
598 assert!(is_agent_marker("agent:log mode=prepend timestamp=true"));
599 }
600
601 #[test]
602 fn closing_tag_unchanged_with_attrs() {
603 let doc = "<!-- agent:status mode=replace -->\n- [x] Done\n<!-- /agent:status -->\n";
605 let components = parse(doc).unwrap();
606 assert_eq!(components.len(), 1);
607 let new_doc = components[0].replace_content(doc, "- [ ] Todo\n");
608 assert!(new_doc.contains("<!-- agent:status mode=replace -->"));
609 assert!(new_doc.contains("<!-- /agent:status -->"));
610 assert!(new_doc.contains("- [ ] Todo"));
611 }
612
613 #[test]
614 fn parse_component_with_patch_attr() {
615 let doc = "<!-- agent:exchange patch=append -->\nContent\n<!-- /agent:exchange -->\n";
616 let components = parse(doc).unwrap();
617 assert_eq!(components.len(), 1);
618 assert_eq!(components[0].name, "exchange");
619 assert_eq!(components[0].patch_mode(), Some("append"));
620 assert_eq!(components[0].content(doc), "Content\n");
621 }
622
623 #[test]
624 fn patch_attr_takes_precedence_over_mode() {
625 let doc = "<!-- agent:exchange patch=replace mode=append -->\nContent\n<!-- /agent:exchange -->\n";
626 let components = parse(doc).unwrap();
627 assert_eq!(components[0].patch_mode(), Some("replace"));
628 }
629
630 #[test]
631 fn mode_attr_backward_compat() {
632 let doc = "<!-- agent:exchange mode=append -->\nContent\n<!-- /agent:exchange -->\n";
633 let components = parse(doc).unwrap();
634 assert_eq!(components[0].patch_mode(), Some("append"));
635 }
636
637 #[test]
638 fn no_patch_or_mode_attr() {
639 let doc = "<!-- agent:exchange -->\nContent\n<!-- /agent:exchange -->\n";
640 let components = parse(doc).unwrap();
641 assert_eq!(components[0].patch_mode(), None);
642 }
643
644 #[test]
647 fn single_backtick_component_tag_ignored() {
648 let doc = "\
650Use `<!-- agent:pending patch=replace -->` to mark pending sections.
651<!-- agent:real -->
652content
653<!-- /agent:real -->
654";
655 let components = parse(doc).unwrap();
656 assert_eq!(components.len(), 1);
657 assert_eq!(components[0].name, "real");
658 }
659
660 #[test]
661 fn double_backtick_component_tag_ignored() {
662 let doc = "\
664Use ``<!-- agent:pending patch=replace -->`` to mark pending sections.
665<!-- agent:real -->
666content
667<!-- /agent:real -->
668";
669 let components = parse(doc).unwrap();
670 assert_eq!(components.len(), 1);
671 assert_eq!(components[0].name, "real");
672 }
673
674 #[test]
675 fn component_tags_not_in_backticks_still_work() {
676 let doc = "\
678<!-- agent:a -->
679alpha
680<!-- /agent:a -->
681<!-- agent:b patch=append -->
682beta
683<!-- /agent:b -->
684";
685 let components = parse(doc).unwrap();
686 assert_eq!(components.len(), 2);
687 assert_eq!(components[0].name, "a");
688 assert_eq!(components[1].name, "b");
689 assert_eq!(components[1].patch_mode(), Some("append"));
690 }
691
692 #[test]
693 fn mixed_backtick_and_real_tags() {
694 let doc = "\
696Here is an example: `<!-- agent:fake -->` and ``<!-- /agent:fake -->``.
697<!-- agent:real -->
698real content
699<!-- /agent:real -->
700Another example: `<!-- agent:also-fake patch=replace -->` is just documentation.
701";
702 let components = parse(doc).unwrap();
703 assert_eq!(components.len(), 1);
704 assert_eq!(components[0].name, "real");
705 assert_eq!(components[0].content(doc), "real content\n");
706 }
707
708 #[test]
709 fn inline_code_mid_line_with_surrounding_text_ignored() {
710 let doc = "\
713Wrap markers like `<!-- agent:status -->` in backticks to show them literally.
714<!-- agent:real -->
715actual content
716<!-- /agent:real -->
717";
718 let components = parse(doc).unwrap();
719 assert_eq!(components.len(), 1);
720 assert_eq!(components[0].name, "real");
721 assert_eq!(components[0].content(doc), "actual content\n");
722 }
723
724 #[test]
725 fn parse_attrs_unit() {
726 let attrs = parse_attrs("mode=append");
727 assert_eq!(attrs.get("mode").map(|s| s.as_str()), Some("append"));
728
729 let attrs = parse_attrs("mode=replace timestamp=true");
730 assert_eq!(attrs.len(), 2);
731
732 let attrs = parse_attrs("");
733 assert!(attrs.is_empty());
734
735 let attrs = parse_attrs("mode=append broken novalue=");
737 assert_eq!(attrs.len(), 1);
738 assert_eq!(attrs.get("mode").map(|s| s.as_str()), Some("append"));
739 }
740}