1use anyhow::{bail, Result};
2use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
3use std::collections::HashMap;
4
5#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct Component {
11 pub name: String,
12 pub attrs: HashMap<String, String>,
14 pub open_start: usize,
16 pub open_end: usize,
18 pub close_start: usize,
20 pub close_end: usize,
22}
23
24impl Component {
25 #[allow(dead_code)] pub fn content<'a>(&self, doc: &'a str) -> &'a str {
28 &doc[self.open_end..self.close_start]
29 }
30
31 pub fn patch_mode(&self) -> Option<&str> {
35 self.attrs.get("patch").map(|s| s.as_str())
36 .or_else(|| self.attrs.get("mode").map(|s| s.as_str()))
37 }
38
39 pub fn replace_content(&self, doc: &str, new_content: &str) -> String {
42 let mut result = String::with_capacity(doc.len() + new_content.len());
43 result.push_str(&doc[..self.open_end]);
44 result.push_str(new_content);
45 result.push_str(&doc[self.close_start..]);
46 result
47 }
48
49 pub fn append_with_caret(&self, doc: &str, content: &str, caret_offset: Option<usize>) -> String {
56 let existing = &doc[self.open_end..self.close_start];
57
58 if let Some(caret) = caret_offset {
59 if caret > self.open_end && caret <= self.close_start {
61 let insert_at = doc[..caret].rfind('\n')
63 .map(|i| i + 1)
64 .unwrap_or(self.open_end);
65
66 let insert_at = insert_at.max(self.open_end);
68
69 let mut result = String::with_capacity(doc.len() + content.len() + 1);
70 result.push_str(&doc[..insert_at]);
71 result.push_str(content.trim_end());
72 result.push('\n');
73 result.push_str(&doc[insert_at..]);
74 return result;
75 }
76 }
77
78 let mut result = String::with_capacity(doc.len() + content.len() + 1);
80 result.push_str(&doc[..self.open_end]);
81 result.push_str(existing.trim_end());
82 result.push('\n');
83 result.push_str(content.trim_end());
84 result.push('\n');
85 result.push_str(&doc[self.close_start..]);
86 result
87 }
88}
89
90fn is_valid_name(name: &str) -> bool {
92 if name.is_empty() {
93 return false;
94 }
95 let first = name.as_bytes()[0];
96 if !first.is_ascii_alphanumeric() {
97 return false;
98 }
99 name.bytes()
100 .all(|b| b.is_ascii_alphanumeric() || b == b'-')
101}
102
103pub fn is_agent_marker(comment_text: &str) -> bool {
107 let trimmed = comment_text.trim();
108 if let Some(rest) = trimmed.strip_prefix("/agent:") {
109 is_valid_name(rest)
110 } else if let Some(rest) = trimmed.strip_prefix("agent:") {
111 let name_part = rest.split_whitespace().next().unwrap_or("");
113 is_valid_name(name_part)
114 } else {
115 false
116 }
117}
118
119fn parse_attrs(attr_text: &str) -> HashMap<String, String> {
124 let mut attrs = HashMap::new();
125 for token in attr_text.split_whitespace() {
126 if let Some((key, value)) = token.split_once('=')
127 && !key.is_empty()
128 && !value.is_empty()
129 {
130 attrs.insert(key.to_string(), value.to_string());
131 }
132 }
133 attrs
134}
135
136pub fn find_code_ranges(doc: &str) -> Vec<(usize, usize)> {
142 let mut ranges = Vec::new();
143 let parser = Parser::new_ext(doc, Options::empty());
144 let mut iter = parser.into_offset_iter();
145 while let Some((event, range)) = iter.next() {
146 match event {
147 Event::Code(_) => {
149 ranges.push((range.start, range.end));
150 }
151 Event::Start(Tag::CodeBlock(_)) => {
153 let block_start = range.start;
154 let mut block_end = range.end;
155 for (inner_event, inner_range) in iter.by_ref() {
156 block_end = inner_range.end;
157 if matches!(inner_event, Event::End(TagEnd::CodeBlock)) {
158 break;
159 }
160 }
161 ranges.push((block_start, block_end));
162 }
163 _ => {}
164 }
165 }
166 ranges
167}
168
169pub fn parse(doc: &str) -> Result<Vec<Component>> {
175 let bytes = doc.as_bytes();
176 let len = bytes.len();
177 let code_ranges = find_code_ranges(doc);
178 let mut templates: Vec<Component> = Vec::new();
179 let mut stack: Vec<(String, HashMap<String, String>, usize, usize)> = Vec::new();
181 let mut pos = 0;
182
183 while pos + 4 <= len {
184 if &bytes[pos..pos + 4] != b"<!--" {
186 pos += 1;
187 continue;
188 }
189
190 if code_ranges.iter().any(|&(start, end)| pos >= start && pos < end) {
192 pos += 4;
193 continue;
194 }
195
196 let marker_start = pos;
197
198 let close = match find_comment_end(bytes, pos + 4) {
200 Some(c) => c,
201 None => {
202 pos += 4;
203 continue;
204 }
205 };
206
207 let inner = &doc[marker_start + 4..close - 3]; let trimmed = inner.trim();
210
211 let mut marker_end = close;
213 if marker_end < len && bytes[marker_end] == b'\n' {
214 marker_end += 1;
215 }
216
217 if let Some(name) = trimmed.strip_prefix("/agent:") {
218 if !is_valid_name(name) {
220 bail!("invalid component name: '{}'", name);
221 }
222 match stack.pop() {
223 Some((open_name, open_attrs, open_start, open_end)) => {
224 if open_name != name {
225 bail!(
226 "mismatched component: opened '{}' but closed '{}'",
227 open_name,
228 name
229 );
230 }
231 templates.push(Component {
232 name: name.to_string(),
233 attrs: open_attrs,
234 open_start,
235 open_end,
236 close_start: marker_start,
237 close_end: marker_end,
238 });
239 }
240 None => bail!("closing marker <!-- /agent:{} --> without matching open", name),
241 }
242 } else if let Some(rest) = trimmed.strip_prefix("agent:") {
243 let mut parts = rest.splitn(2, |c: char| c.is_whitespace());
245 let name = parts.next().unwrap_or("");
246 let attr_text = parts.next().unwrap_or("");
247 if !is_valid_name(name) {
248 bail!("invalid component name: '{}'", name);
249 }
250 let attrs = parse_attrs(attr_text);
251 stack.push((name.to_string(), attrs, marker_start, marker_end));
252 }
253
254 pos = close;
255 }
256
257 if let Some((name, _, _, _)) = stack.last() {
258 bail!(
259 "unclosed component: <!-- agent:{} --> without matching close",
260 name
261 );
262 }
263
264 templates.sort_by_key(|t| t.open_start);
265 Ok(templates)
266}
267
268fn find_comment_end(bytes: &[u8], start: usize) -> Option<usize> {
270 let len = bytes.len();
271 let mut i = start;
272 while i + 3 <= len {
273 if &bytes[i..i + 3] == b"-->" {
274 return Some(i + 3);
275 }
276 i += 1;
277 }
278 None
279}
280
281#[cfg(test)]
282mod tests {
283 use super::*;
284
285 #[test]
286 fn single_range() {
287 let doc = "before\n<!-- agent:status -->\nHello\n<!-- /agent:status -->\nafter\n";
288 let ranges = parse(doc).unwrap();
289 assert_eq!(ranges.len(), 1);
290 assert_eq!(ranges[0].name, "status");
291 assert_eq!(ranges[0].content(doc), "Hello\n");
292 }
293
294 #[test]
295 fn nested_ranges() {
296 let doc = "\
297<!-- agent:outer -->
298<!-- agent:inner -->
299content
300<!-- /agent:inner -->
301<!-- /agent:outer -->
302";
303 let ranges = parse(doc).unwrap();
304 assert_eq!(ranges.len(), 2);
305 assert_eq!(ranges[0].name, "outer");
307 assert_eq!(ranges[1].name, "inner");
308 assert_eq!(ranges[1].content(doc), "content\n");
309 }
310
311 #[test]
312 fn siblings() {
313 let doc = "\
314<!-- agent:a -->
315alpha
316<!-- /agent:a -->
317<!-- agent:b -->
318beta
319<!-- /agent:b -->
320";
321 let ranges = parse(doc).unwrap();
322 assert_eq!(ranges.len(), 2);
323 assert_eq!(ranges[0].name, "a");
324 assert_eq!(ranges[0].content(doc), "alpha\n");
325 assert_eq!(ranges[1].name, "b");
326 assert_eq!(ranges[1].content(doc), "beta\n");
327 }
328
329 #[test]
330 fn no_ranges() {
331 let doc = "# Just a document\n\nWith no range templates.\n";
332 let ranges = parse(doc).unwrap();
333 assert!(ranges.is_empty());
334 }
335
336 #[test]
337 fn unmatched_open_error() {
338 let doc = "<!-- agent:orphan -->\nContent\n";
339 let err = parse(doc).unwrap_err();
340 assert!(err.to_string().contains("unclosed component"));
341 }
342
343 #[test]
344 fn unmatched_close_error() {
345 let doc = "Content\n<!-- /agent:orphan -->\n";
346 let err = parse(doc).unwrap_err();
347 assert!(err.to_string().contains("without matching open"));
348 }
349
350 #[test]
351 fn mismatched_names_error() {
352 let doc = "<!-- agent:foo -->\n<!-- /agent:bar -->\n";
353 let err = parse(doc).unwrap_err();
354 assert!(err.to_string().contains("mismatched"));
355 }
356
357 #[test]
358 fn invalid_name() {
359 let doc = "<!-- agent:-bad -->\n<!-- /agent:-bad -->\n";
360 let err = parse(doc).unwrap_err();
361 assert!(err.to_string().contains("invalid component name"));
362 }
363
364 #[test]
365 fn name_validation() {
366 assert!(is_valid_name("status"));
367 assert!(is_valid_name("my-section"));
368 assert!(is_valid_name("a1"));
369 assert!(is_valid_name("A"));
370 assert!(!is_valid_name(""));
371 assert!(!is_valid_name("-bad"));
372 assert!(!is_valid_name("has space"));
373 assert!(!is_valid_name("has_underscore"));
374 }
375
376 #[test]
377 fn content_extraction() {
378 let doc = "<!-- agent:x -->\nfoo\nbar\n<!-- /agent:x -->\n";
379 let ranges = parse(doc).unwrap();
380 assert_eq!(ranges[0].content(doc), "foo\nbar\n");
381 }
382
383 #[test]
384 fn replace_roundtrip() {
385 let doc = "before\n<!-- agent:s -->\nold\n<!-- /agent:s -->\nafter\n";
386 let ranges = parse(doc).unwrap();
387 let new_doc = ranges[0].replace_content(doc, "new\n");
388 assert_eq!(
389 new_doc,
390 "before\n<!-- agent:s -->\nnew\n<!-- /agent:s -->\nafter\n"
391 );
392 let ranges2 = parse(&new_doc).unwrap();
394 assert_eq!(ranges2.len(), 1);
395 assert_eq!(ranges2[0].content(&new_doc), "new\n");
396 }
397
398 #[test]
399 fn is_agent_marker_yes() {
400 assert!(is_agent_marker(" agent:status "));
401 assert!(is_agent_marker("/agent:status"));
402 assert!(is_agent_marker("agent:my-thing"));
403 assert!(is_agent_marker(" /agent:A1 "));
404 }
405
406 #[test]
407 fn is_agent_marker_no() {
408 assert!(!is_agent_marker("just a comment"));
409 assert!(!is_agent_marker("agent:"));
410 assert!(!is_agent_marker("/agent:"));
411 assert!(!is_agent_marker("agent:-bad"));
412 assert!(!is_agent_marker("some agent:fake stuff"));
413 }
414
415 #[test]
416 fn regular_comments_ignored() {
417 let doc = "<!-- just a comment -->\n<!-- agent:x -->\ndata\n<!-- /agent:x -->\n";
418 let ranges = parse(doc).unwrap();
419 assert_eq!(ranges.len(), 1);
420 assert_eq!(ranges[0].name, "x");
421 }
422
423 #[test]
424 fn multiline_comment_ignored() {
425 let doc = "\
426<!--
427multi
428line
429comment
430-->
431<!-- agent:s -->
432content
433<!-- /agent:s -->
434";
435 let ranges = parse(doc).unwrap();
436 assert_eq!(ranges.len(), 1);
437 assert_eq!(ranges[0].name, "s");
438 }
439
440 #[test]
441 fn empty_content() {
442 let doc = "<!-- agent:empty --><!-- /agent:empty -->\n";
443 let ranges = parse(doc).unwrap();
444 assert_eq!(ranges.len(), 1);
445 assert_eq!(ranges[0].content(doc), "");
446 }
447
448 #[test]
449 fn markers_in_fenced_code_block_ignored() {
450 let doc = "\
451<!-- agent:real -->
452content
453<!-- /agent:real -->
454```markdown
455<!-- agent:fake -->
456this is just an example
457<!-- /agent:fake -->
458```
459";
460 let ranges = parse(doc).unwrap();
461 assert_eq!(ranges.len(), 1);
462 assert_eq!(ranges[0].name, "real");
463 }
464
465 #[test]
466 fn markers_in_inline_code_ignored() {
467 let doc = "\
468Use `<!-- agent:example -->` markers for components.
469<!-- agent:real -->
470content
471<!-- /agent:real -->
472";
473 let ranges = parse(doc).unwrap();
474 assert_eq!(ranges.len(), 1);
475 assert_eq!(ranges[0].name, "real");
476 }
477
478 #[test]
479 fn markers_in_tilde_fence_ignored() {
480 let doc = "\
481<!-- agent:x -->
482data
483<!-- /agent:x -->
484~~~
485<!-- agent:y -->
486example
487<!-- /agent:y -->
488~~~
489";
490 let ranges = parse(doc).unwrap();
491 assert_eq!(ranges.len(), 1);
492 assert_eq!(ranges[0].name, "x");
493 }
494
495 #[test]
496 fn markers_in_indented_fenced_code_block_ignored() {
497 let doc = "\
499<!-- agent:exchange -->
500Content here.
501<!-- /agent:exchange -->
502
503 ```markdown
504 <!-- agent:fake -->
505 demo without closing tag
506 ```
507";
508 let ranges = parse(doc).unwrap();
509 assert_eq!(ranges.len(), 1);
510 assert_eq!(ranges[0].name, "exchange");
511 }
512
513 #[test]
514 fn indented_fence_inside_component_ignored() {
515 let doc = "\
517<!-- agent:exchange -->
518Here's how to set up:
519
520 ```markdown
521 <!-- agent:status -->
522 Your status here
523 ```
524
525Done explaining.
526<!-- /agent:exchange -->
527";
528 let ranges = parse(doc).unwrap();
529 assert_eq!(ranges.len(), 1);
530 assert_eq!(ranges[0].name, "exchange");
531 }
532
533 #[test]
534 fn deeply_indented_fence_ignored() {
535 let doc = "\
537<!-- agent:x -->
538ok
539<!-- /agent:x -->
540 ```
541 <!-- agent:y -->
542 inside fence
543 ```
544";
545 let ranges = parse(doc).unwrap();
546 assert_eq!(ranges.len(), 1);
547 assert_eq!(ranges[0].name, "x");
548 }
549
550 #[test]
551 fn indented_fence_code_ranges_detected() {
552 let doc = "before\n ```\n code\n ```\nafter\n";
553 let ranges = find_code_ranges(doc);
554 assert_eq!(ranges.len(), 1);
555 assert!(doc[ranges[0].0..ranges[0].1].contains("code"));
556 }
557
558 #[test]
559 fn code_ranges_detected() {
560 let doc = "before\n```\ncode\n```\nafter `inline` end\n";
561 let ranges = find_code_ranges(doc);
562 assert_eq!(ranges.len(), 2);
563 assert!(doc[ranges[0].0..ranges[0].1].contains("code"));
565 assert!(doc[ranges[1].0..ranges[1].1].contains("inline"));
567 }
568
569 #[test]
570 fn code_ranges_double_backtick() {
571 let doc = "text `` `<!--` `` more\n";
573 let ranges = find_code_ranges(doc);
574 assert_eq!(ranges.len(), 1);
575 let span = &doc[ranges[0].0..ranges[0].1];
576 assert!(span.contains("<!--"), "double-backtick span should contain <!--: {:?}", span);
577 }
578
579 #[test]
580 fn code_ranges_double_backtick_does_not_match_single() {
581 let doc = "text `` foo ` bar `` end\n";
583 let ranges = find_code_ranges(doc);
584 assert_eq!(ranges.len(), 1);
585 let span = &doc[ranges[0].0..ranges[0].1];
586 assert_eq!(span, "`` foo ` bar ``");
587 }
588
589 #[test]
590 fn double_backtick_comment_before_agent_marker() {
591 let doc = "\
593<!-- agent:exchange -->\n\
594text `` `<!--` `` description\n\
595new content here\n\
596<!-- /agent:exchange -->\n";
597 let components = parse(doc).unwrap();
598 assert_eq!(components.len(), 1);
599 assert_eq!(components[0].name, "exchange");
600 assert!(components[0].content(doc).contains("new content here"));
601 }
602
603 #[test]
606 fn parse_component_with_mode_attr() {
607 let doc = "<!-- agent:exchange mode=append -->\nContent\n<!-- /agent:exchange -->\n";
608 let components = parse(doc).unwrap();
609 assert_eq!(components.len(), 1);
610 assert_eq!(components[0].name, "exchange");
611 assert_eq!(components[0].attrs.get("mode").map(|s| s.as_str()), Some("append"));
612 assert_eq!(components[0].content(doc), "Content\n");
613 }
614
615 #[test]
616 fn parse_component_with_multiple_attrs() {
617 let doc = "<!-- agent:log mode=prepend timestamp=true -->\nData\n<!-- /agent:log -->\n";
618 let components = parse(doc).unwrap();
619 assert_eq!(components.len(), 1);
620 assert_eq!(components[0].name, "log");
621 assert_eq!(components[0].attrs.get("mode").map(|s| s.as_str()), Some("prepend"));
622 assert_eq!(components[0].attrs.get("timestamp").map(|s| s.as_str()), Some("true"));
623 }
624
625 #[test]
626 fn parse_component_no_attrs_backward_compat() {
627 let doc = "<!-- agent:status -->\nOK\n<!-- /agent:status -->\n";
628 let components = parse(doc).unwrap();
629 assert_eq!(components.len(), 1);
630 assert_eq!(components[0].name, "status");
631 assert!(components[0].attrs.is_empty());
632 }
633
634 #[test]
635 fn is_agent_marker_with_attrs() {
636 assert!(is_agent_marker(" agent:exchange mode=append "));
637 assert!(is_agent_marker("agent:status mode=replace"));
638 assert!(is_agent_marker("agent:log mode=prepend timestamp=true"));
639 }
640
641 #[test]
642 fn closing_tag_unchanged_with_attrs() {
643 let doc = "<!-- agent:status mode=replace -->\n- [x] Done\n<!-- /agent:status -->\n";
645 let components = parse(doc).unwrap();
646 assert_eq!(components.len(), 1);
647 let new_doc = components[0].replace_content(doc, "- [ ] Todo\n");
648 assert!(new_doc.contains("<!-- agent:status mode=replace -->"));
649 assert!(new_doc.contains("<!-- /agent:status -->"));
650 assert!(new_doc.contains("- [ ] Todo"));
651 }
652
653 #[test]
654 fn parse_component_with_patch_attr() {
655 let doc = "<!-- agent:exchange patch=append -->\nContent\n<!-- /agent:exchange -->\n";
656 let components = parse(doc).unwrap();
657 assert_eq!(components.len(), 1);
658 assert_eq!(components[0].name, "exchange");
659 assert_eq!(components[0].patch_mode(), Some("append"));
660 assert_eq!(components[0].content(doc), "Content\n");
661 }
662
663 #[test]
664 fn patch_attr_takes_precedence_over_mode() {
665 let doc = "<!-- agent:exchange patch=replace mode=append -->\nContent\n<!-- /agent:exchange -->\n";
666 let components = parse(doc).unwrap();
667 assert_eq!(components[0].patch_mode(), Some("replace"));
668 }
669
670 #[test]
671 fn mode_attr_backward_compat() {
672 let doc = "<!-- agent:exchange mode=append -->\nContent\n<!-- /agent:exchange -->\n";
673 let components = parse(doc).unwrap();
674 assert_eq!(components[0].patch_mode(), Some("append"));
675 }
676
677 #[test]
678 fn no_patch_or_mode_attr() {
679 let doc = "<!-- agent:exchange -->\nContent\n<!-- /agent:exchange -->\n";
680 let components = parse(doc).unwrap();
681 assert_eq!(components[0].patch_mode(), None);
682 }
683
684 #[test]
687 fn single_backtick_component_tag_ignored() {
688 let doc = "\
690Use `<!-- agent:pending patch=replace -->` to mark pending sections.
691<!-- agent:real -->
692content
693<!-- /agent:real -->
694";
695 let components = parse(doc).unwrap();
696 assert_eq!(components.len(), 1);
697 assert_eq!(components[0].name, "real");
698 }
699
700 #[test]
701 fn double_backtick_component_tag_ignored() {
702 let doc = "\
704Use ``<!-- agent:pending patch=replace -->`` to mark pending sections.
705<!-- agent:real -->
706content
707<!-- /agent:real -->
708";
709 let components = parse(doc).unwrap();
710 assert_eq!(components.len(), 1);
711 assert_eq!(components[0].name, "real");
712 }
713
714 #[test]
715 fn component_tags_not_in_backticks_still_work() {
716 let doc = "\
718<!-- agent:a -->
719alpha
720<!-- /agent:a -->
721<!-- agent:b patch=append -->
722beta
723<!-- /agent:b -->
724";
725 let components = parse(doc).unwrap();
726 assert_eq!(components.len(), 2);
727 assert_eq!(components[0].name, "a");
728 assert_eq!(components[1].name, "b");
729 assert_eq!(components[1].patch_mode(), Some("append"));
730 }
731
732 #[test]
733 fn mixed_backtick_and_real_tags() {
734 let doc = "\
736Here is an example: `<!-- agent:fake -->` and ``<!-- /agent:fake -->``.
737<!-- agent:real -->
738real content
739<!-- /agent:real -->
740Another example: `<!-- agent:also-fake patch=replace -->` is just documentation.
741";
742 let components = parse(doc).unwrap();
743 assert_eq!(components.len(), 1);
744 assert_eq!(components[0].name, "real");
745 assert_eq!(components[0].content(doc), "real content\n");
746 }
747
748 #[test]
749 fn inline_code_mid_line_with_surrounding_text_ignored() {
750 let doc = "\
753Wrap markers like `<!-- agent:status -->` in backticks to show them literally.
754<!-- agent:real -->
755actual content
756<!-- /agent:real -->
757";
758 let components = parse(doc).unwrap();
759 assert_eq!(components.len(), 1);
760 assert_eq!(components[0].name, "real");
761 assert_eq!(components[0].content(doc), "actual content\n");
762 }
763
764 #[test]
765 fn parse_attrs_unit() {
766 let attrs = parse_attrs("mode=append");
767 assert_eq!(attrs.get("mode").map(|s| s.as_str()), Some("append"));
768
769 let attrs = parse_attrs("mode=replace timestamp=true");
770 assert_eq!(attrs.len(), 2);
771
772 let attrs = parse_attrs("");
773 assert!(attrs.is_empty());
774
775 let attrs = parse_attrs("mode=append broken novalue=");
777 assert_eq!(attrs.len(), 1);
778 assert_eq!(attrs.get("mode").map(|s| s.as_str()), Some("append"));
779 }
780}