Skip to main content

agent_doc/
component.rs

1use anyhow::{bail, Result};
2use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
3use std::collections::HashMap;
4
5/// A parsed component in a document.
6///
7/// Components are bounded regions marked by `<!-- agent:name -->...<!-- /agent:name -->`.
8/// Opening tags may contain inline attributes: `<!-- agent:name key=value -->`.
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct Component {
11    pub name: String,
12    /// Inline attributes parsed from the opening tag (e.g., `patch=append`).
13    pub attrs: HashMap<String, String>,
14    /// Byte offset of `<` in opening marker.
15    pub open_start: usize,
16    /// Byte offset past `>` in opening marker (includes trailing newline if present).
17    pub open_end: usize,
18    /// Byte offset of `<` in closing marker.
19    pub close_start: usize,
20    /// Byte offset past `>` in closing marker (includes trailing newline if present).
21    pub close_end: usize,
22}
23
24impl Component {
25    /// Extract the content between the opening and closing markers.
26    #[allow(dead_code)] // public API — used by tests and future consumers
27    pub fn content<'a>(&self, doc: &'a str) -> &'a str {
28        &doc[self.open_end..self.close_start]
29    }
30
31    /// Get the patch mode from inline attributes.
32    ///
33    /// Checks `patch=` first, falls back to `mode=` for backward compatibility.
34    pub fn patch_mode(&self) -> Option<&str> {
35        self.attrs.get("patch").map(|s| s.as_str())
36            .or_else(|| self.attrs.get("mode").map(|s| s.as_str()))
37    }
38
39    /// Replace the content between markers, returning the new document.
40    /// The markers themselves are preserved.
41    pub fn replace_content(&self, doc: &str, new_content: &str) -> String {
42        let mut result = String::with_capacity(doc.len() + new_content.len());
43        result.push_str(&doc[..self.open_end]);
44        result.push_str(new_content);
45        result.push_str(&doc[self.close_start..]);
46        result
47    }
48
49    /// Append content into this component, inserting before the caret position
50    /// if the caret is inside the component. Falls back to normal append if the
51    /// caret is outside the component.
52    ///
53    /// `caret_offset`: byte offset of the caret in the document. Pass `None` for
54    /// normal append behavior.
55    pub fn append_with_caret(&self, doc: &str, content: &str, caret_offset: Option<usize>) -> String {
56        let existing = &doc[self.open_end..self.close_start];
57
58        if let Some(caret) = caret_offset {
59            // Check if caret is inside this component
60            if caret > self.open_end && caret <= self.close_start {
61                // Find the line boundary before the caret
62                let insert_at = doc[..caret].rfind('\n')
63                    .map(|i| i + 1)
64                    .unwrap_or(self.open_end);
65
66                // Clamp to component bounds
67                let insert_at = insert_at.max(self.open_end);
68
69                let mut result = String::with_capacity(doc.len() + content.len() + 1);
70                result.push_str(&doc[..insert_at]);
71                result.push_str(content.trim_end());
72                result.push('\n');
73                result.push_str(&doc[insert_at..]);
74                return result;
75            }
76        }
77
78        // Normal append: add after existing content
79        let mut result = String::with_capacity(doc.len() + content.len() + 1);
80        result.push_str(&doc[..self.open_end]);
81        result.push_str(existing.trim_end());
82        result.push('\n');
83        result.push_str(content.trim_end());
84        result.push('\n');
85        result.push_str(&doc[self.close_start..]);
86        result
87    }
88
89    /// Append content into this component at the boundary marker position.
90    ///
91    /// Finds `<!-- agent:boundary:ID -->` inside the component. If found,
92    /// inserts content at the line start of the boundary marker (replacing
93    /// the marker). Falls back to normal append if the boundary is not found.
94    pub fn append_with_boundary(&self, doc: &str, content: &str, boundary_id: &str) -> String {
95        let boundary_marker = format!("<!-- agent:boundary:{} -->", boundary_id);
96        let content_region = &doc[self.open_end..self.close_start];
97        let code_ranges = find_code_ranges(doc);
98
99        // Search for boundary marker, skipping matches inside code blocks
100        let mut search_from = 0;
101        let found_pos = loop {
102            match content_region[search_from..].find(&boundary_marker) {
103                Some(rel_pos) => {
104                    let abs_pos = self.open_end + search_from + rel_pos;
105                    if code_ranges.iter().any(|&(cs, ce)| abs_pos >= cs && abs_pos < ce) {
106                        // Inside a code block — skip and keep searching
107                        search_from += rel_pos + boundary_marker.len();
108                        continue;
109                    }
110                    break Some(abs_pos);
111                }
112                None => break None,
113            }
114        };
115
116        if let Some(abs_pos) = found_pos {
117            // Find start of the line containing the marker
118            let line_start = doc[..abs_pos]
119                .rfind('\n')
120                .map(|i| i + 1)
121                .unwrap_or(self.open_end)
122                .max(self.open_end);
123
124            // Find end of the marker line (including trailing newline)
125            let marker_end = abs_pos + boundary_marker.len();
126            let line_end = if marker_end < self.close_start
127                && doc.as_bytes().get(marker_end) == Some(&b'\n')
128            {
129                marker_end + 1
130            } else {
131                marker_end
132            };
133            let line_end = line_end.min(self.close_start);
134
135            // Replace the boundary marker with response content + new boundary.
136            // The boundary is consumed and re-inserted, matching the binary's
137            // post-patch behavior in apply_patches_with_overrides().
138            let new_id = uuid::Uuid::new_v4();
139            let new_marker = format!("<!-- agent:boundary:{} -->", new_id);
140            let mut result = String::with_capacity(doc.len() + content.len() + new_marker.len());
141            result.push_str(&doc[..line_start]);
142            result.push_str(content.trim_end());
143            result.push('\n');
144            result.push_str(&new_marker);
145            result.push('\n');
146            result.push_str(&doc[line_end..]);
147            return result;
148        }
149
150        // Boundary not found — fall back to normal append
151        self.append_with_caret(doc, content, None)
152    }
153}
154
155/// Valid name: `[a-zA-Z0-9][a-zA-Z0-9-]*`
156fn is_valid_name(name: &str) -> bool {
157    if name.is_empty() {
158        return false;
159    }
160    let first = name.as_bytes()[0];
161    if !first.is_ascii_alphanumeric() {
162        return false;
163    }
164    name.bytes()
165        .all(|b| b.is_ascii_alphanumeric() || b == b'-')
166}
167
168/// True if the text inside `<!-- ... -->` is an agent component marker.
169///
170/// Matches `agent:NAME [attrs...]` (open) or `/agent:NAME` (close).
171pub fn is_agent_marker(comment_text: &str) -> bool {
172    let trimmed = comment_text.trim();
173    if let Some(rest) = trimmed.strip_prefix("/agent:") {
174        is_valid_name(rest)
175    } else if let Some(rest) = trimmed.strip_prefix("agent:") {
176        // Opening marker may have attributes after the name: `agent:NAME key=value`
177        let name_part = rest.split_whitespace().next().unwrap_or("");
178        is_valid_name(name_part)
179    } else {
180        false
181    }
182}
183
184/// Parse `key=value` pairs from the attribute portion of an opening marker.
185///
186/// Given the text after `agent:NAME `, parses space-separated `key=value` pairs.
187/// Values are unquoted (no quote support needed for simple mode values).
188fn parse_attrs(attr_text: &str) -> HashMap<String, String> {
189    let mut attrs = HashMap::new();
190    for token in attr_text.split_whitespace() {
191        if let Some((key, value)) = token.split_once('=')
192            && !key.is_empty()
193            && !value.is_empty()
194        {
195            attrs.insert(key.to_string(), value.to_string());
196        }
197    }
198    attrs
199}
200
201/// Find byte ranges of code regions (fenced code blocks + inline code spans).
202/// Markers inside these ranges are treated as literal text, not component markers.
203///
204/// Uses `pulldown-cmark` AST parsing with `offset_iter()` to accurately detect
205/// code regions per the CommonMark spec.
206pub fn find_code_ranges(doc: &str) -> Vec<(usize, usize)> {
207    let t = std::time::Instant::now();
208    let mut ranges = Vec::new();
209    let parser = Parser::new_ext(doc, Options::empty());
210    let mut iter = parser.into_offset_iter();
211    while let Some((event, range)) = iter.next() {
212        match event {
213            // Inline code span: `code` or ``code``
214            Event::Code(_) => {
215                ranges.push((range.start, range.end));
216            }
217            // Fenced or indented code block: consume until End(CodeBlock)
218            Event::Start(Tag::CodeBlock(_)) => {
219                let block_start = range.start;
220                let mut block_end = range.end;
221                for (inner_event, inner_range) in iter.by_ref() {
222                    block_end = inner_range.end;
223                    if matches!(inner_event, Event::End(TagEnd::CodeBlock)) {
224                        break;
225                    }
226                }
227                ranges.push((block_start, block_end));
228            }
229            _ => {}
230        }
231    }
232    let elapsed = t.elapsed().as_millis();
233    if elapsed > 0 {
234        eprintln!("[perf] find_code_ranges: {}ms", elapsed);
235    }
236    ranges
237}
238
239/// Parse all components from a document.
240///
241/// Uses a stack for nesting. Returns components sorted by `open_start`.
242/// Errors on unmatched open/close markers or invalid names.
243/// Skips markers inside fenced code blocks and inline code spans.
244pub fn parse(doc: &str) -> Result<Vec<Component>> {
245    let bytes = doc.as_bytes();
246    let len = bytes.len();
247    let code_ranges = find_code_ranges(doc);
248    let mut templates: Vec<Component> = Vec::new();
249    // Stack of (name, attrs, open_start, open_end)
250    let mut stack: Vec<(String, HashMap<String, String>, usize, usize)> = Vec::new();
251    let mut pos = 0;
252
253    while pos + 4 <= len {
254        // Look for `<!--`
255        if &bytes[pos..pos + 4] != b"<!--" {
256            pos += 1;
257            continue;
258        }
259
260        // Skip markers inside code regions
261        if code_ranges.iter().any(|&(start, end)| pos >= start && pos < end) {
262            pos += 4;
263            continue;
264        }
265
266        let marker_start = pos;
267
268        // Find closing `-->`
269        let close = match find_comment_end(bytes, pos + 4) {
270            Some(c) => c,
271            None => {
272                pos += 4;
273                continue;
274            }
275        };
276
277        // close points to the byte after `>`
278        let inner = &doc[marker_start + 4..close - 3]; // between `<!--` and `-->`
279        let trimmed = inner.trim();
280
281        // Determine end offset — consume trailing newline if present
282        let mut marker_end = close;
283        if marker_end < len && bytes[marker_end] == b'\n' {
284            marker_end += 1;
285        }
286
287        if let Some(name) = trimmed.strip_prefix("/agent:") {
288            // Closing marker
289            if !is_valid_name(name) {
290                bail!("invalid component name: '{}'", name);
291            }
292            match stack.pop() {
293                Some((open_name, open_attrs, open_start, open_end)) => {
294                    if open_name != name {
295                        bail!(
296                            "mismatched component: opened '{}' but closed '{}'",
297                            open_name,
298                            name
299                        );
300                    }
301                    templates.push(Component {
302                        name: name.to_string(),
303                        attrs: open_attrs,
304                        open_start,
305                        open_end,
306                        close_start: marker_start,
307                        close_end: marker_end,
308                    });
309                }
310                None => bail!("closing marker <!-- /agent:{} --> without matching open", name),
311            }
312        } else if let Some(rest) = trimmed.strip_prefix("agent:") {
313            // Skip boundary markers — these are not component markers
314            if rest.starts_with("boundary:") {
315                pos = close;
316                continue;
317            }
318            // Opening marker — may have attributes: `agent:NAME key=value`
319            let mut parts = rest.splitn(2, |c: char| c.is_whitespace());
320            let name = parts.next().unwrap_or("");
321            let attr_text = parts.next().unwrap_or("");
322            if !is_valid_name(name) {
323                bail!("invalid component name: '{}'", name);
324            }
325            let attrs = parse_attrs(attr_text);
326            stack.push((name.to_string(), attrs, marker_start, marker_end));
327        }
328
329        pos = close;
330    }
331
332    if let Some((name, _, _, _)) = stack.last() {
333        bail!(
334            "unclosed component: <!-- agent:{} --> without matching close",
335            name
336        );
337    }
338
339    templates.sort_by_key(|t| t.open_start);
340    Ok(templates)
341}
342
343/// Find the end of an HTML comment (`-->`), returning byte offset past `>`.
344pub(crate) fn find_comment_end(bytes: &[u8], start: usize) -> Option<usize> {
345    let len = bytes.len();
346    let mut i = start;
347    while i + 3 <= len {
348        if &bytes[i..i + 3] == b"-->" {
349            return Some(i + 3);
350        }
351        i += 1;
352    }
353    None
354}
355
356#[cfg(test)]
357mod tests {
358    use super::*;
359
360    #[test]
361    fn single_range() {
362        let doc = "before\n<!-- agent:status -->\nHello\n<!-- /agent:status -->\nafter\n";
363        let ranges = parse(doc).unwrap();
364        assert_eq!(ranges.len(), 1);
365        assert_eq!(ranges[0].name, "status");
366        assert_eq!(ranges[0].content(doc), "Hello\n");
367    }
368
369    #[test]
370    fn nested_ranges() {
371        let doc = "\
372<!-- agent:outer -->
373<!-- agent:inner -->
374content
375<!-- /agent:inner -->
376<!-- /agent:outer -->
377";
378        let ranges = parse(doc).unwrap();
379        assert_eq!(ranges.len(), 2);
380        // Sorted by open_start — outer first
381        assert_eq!(ranges[0].name, "outer");
382        assert_eq!(ranges[1].name, "inner");
383        assert_eq!(ranges[1].content(doc), "content\n");
384    }
385
386    #[test]
387    fn siblings() {
388        let doc = "\
389<!-- agent:a -->
390alpha
391<!-- /agent:a -->
392<!-- agent:b -->
393beta
394<!-- /agent:b -->
395";
396        let ranges = parse(doc).unwrap();
397        assert_eq!(ranges.len(), 2);
398        assert_eq!(ranges[0].name, "a");
399        assert_eq!(ranges[0].content(doc), "alpha\n");
400        assert_eq!(ranges[1].name, "b");
401        assert_eq!(ranges[1].content(doc), "beta\n");
402    }
403
404    #[test]
405    fn no_ranges() {
406        let doc = "# Just a document\n\nWith no range templates.\n";
407        let ranges = parse(doc).unwrap();
408        assert!(ranges.is_empty());
409    }
410
411    #[test]
412    fn unmatched_open_error() {
413        let doc = "<!-- agent:orphan -->\nContent\n";
414        let err = parse(doc).unwrap_err();
415        assert!(err.to_string().contains("unclosed component"));
416    }
417
418    #[test]
419    fn unmatched_close_error() {
420        let doc = "Content\n<!-- /agent:orphan -->\n";
421        let err = parse(doc).unwrap_err();
422        assert!(err.to_string().contains("without matching open"));
423    }
424
425    #[test]
426    fn mismatched_names_error() {
427        let doc = "<!-- agent:foo -->\n<!-- /agent:bar -->\n";
428        let err = parse(doc).unwrap_err();
429        assert!(err.to_string().contains("mismatched"));
430    }
431
432    #[test]
433    fn invalid_name() {
434        let doc = "<!-- agent:-bad -->\n<!-- /agent:-bad -->\n";
435        let err = parse(doc).unwrap_err();
436        assert!(err.to_string().contains("invalid component name"));
437    }
438
439    #[test]
440    fn name_validation() {
441        assert!(is_valid_name("status"));
442        assert!(is_valid_name("my-section"));
443        assert!(is_valid_name("a1"));
444        assert!(is_valid_name("A"));
445        assert!(!is_valid_name(""));
446        assert!(!is_valid_name("-bad"));
447        assert!(!is_valid_name("has space"));
448        assert!(!is_valid_name("has_underscore"));
449    }
450
451    #[test]
452    fn content_extraction() {
453        let doc = "<!-- agent:x -->\nfoo\nbar\n<!-- /agent:x -->\n";
454        let ranges = parse(doc).unwrap();
455        assert_eq!(ranges[0].content(doc), "foo\nbar\n");
456    }
457
458    #[test]
459    fn replace_roundtrip() {
460        let doc = "before\n<!-- agent:s -->\nold\n<!-- /agent:s -->\nafter\n";
461        let ranges = parse(doc).unwrap();
462        let new_doc = ranges[0].replace_content(doc, "new\n");
463        assert_eq!(
464            new_doc,
465            "before\n<!-- agent:s -->\nnew\n<!-- /agent:s -->\nafter\n"
466        );
467        // Re-parse should work
468        let ranges2 = parse(&new_doc).unwrap();
469        assert_eq!(ranges2.len(), 1);
470        assert_eq!(ranges2[0].content(&new_doc), "new\n");
471    }
472
473    #[test]
474    fn is_agent_marker_yes() {
475        assert!(is_agent_marker(" agent:status "));
476        assert!(is_agent_marker("/agent:status"));
477        assert!(is_agent_marker("agent:my-thing"));
478        assert!(is_agent_marker(" /agent:A1 "));
479    }
480
481    #[test]
482    fn is_agent_marker_no() {
483        assert!(!is_agent_marker("just a comment"));
484        assert!(!is_agent_marker("agent:"));
485        assert!(!is_agent_marker("/agent:"));
486        assert!(!is_agent_marker("agent:-bad"));
487        assert!(!is_agent_marker("some agent:fake stuff"));
488    }
489
490    #[test]
491    fn regular_comments_ignored() {
492        let doc = "<!-- just a comment -->\n<!-- agent:x -->\ndata\n<!-- /agent:x -->\n";
493        let ranges = parse(doc).unwrap();
494        assert_eq!(ranges.len(), 1);
495        assert_eq!(ranges[0].name, "x");
496    }
497
498    #[test]
499    fn multiline_comment_ignored() {
500        let doc = "\
501<!--
502multi
503line
504comment
505-->
506<!-- agent:s -->
507content
508<!-- /agent:s -->
509";
510        let ranges = parse(doc).unwrap();
511        assert_eq!(ranges.len(), 1);
512        assert_eq!(ranges[0].name, "s");
513    }
514
515    #[test]
516    fn empty_content() {
517        let doc = "<!-- agent:empty --><!-- /agent:empty -->\n";
518        let ranges = parse(doc).unwrap();
519        assert_eq!(ranges.len(), 1);
520        assert_eq!(ranges[0].content(doc), "");
521    }
522
523    #[test]
524    fn markers_in_fenced_code_block_ignored() {
525        let doc = "\
526<!-- agent:real -->
527content
528<!-- /agent:real -->
529```markdown
530<!-- agent:fake -->
531this is just an example
532<!-- /agent:fake -->
533```
534";
535        let ranges = parse(doc).unwrap();
536        assert_eq!(ranges.len(), 1);
537        assert_eq!(ranges[0].name, "real");
538    }
539
540    #[test]
541    fn markers_in_inline_code_ignored() {
542        let doc = "\
543Use `<!-- agent:example -->` markers for components.
544<!-- agent:real -->
545content
546<!-- /agent:real -->
547";
548        let ranges = parse(doc).unwrap();
549        assert_eq!(ranges.len(), 1);
550        assert_eq!(ranges[0].name, "real");
551    }
552
553    #[test]
554    fn markers_in_tilde_fence_ignored() {
555        let doc = "\
556<!-- agent:x -->
557data
558<!-- /agent:x -->
559~~~
560<!-- agent:y -->
561example
562<!-- /agent:y -->
563~~~
564";
565        let ranges = parse(doc).unwrap();
566        assert_eq!(ranges.len(), 1);
567        assert_eq!(ranges[0].name, "x");
568    }
569
570    #[test]
571    fn markers_in_indented_fenced_code_block_ignored() {
572        // CommonMark allows up to 3 spaces before fence opener
573        let doc = "\
574<!-- agent:exchange -->
575Content here.
576<!-- /agent:exchange -->
577
578  ```markdown
579  <!-- agent:fake -->
580  demo without closing tag
581  ```
582";
583        let ranges = parse(doc).unwrap();
584        assert_eq!(ranges.len(), 1);
585        assert_eq!(ranges[0].name, "exchange");
586    }
587
588    #[test]
589    fn indented_fence_inside_component_ignored() {
590        // Indented code block inside a component should not cause mismatched errors
591        let doc = "\
592<!-- agent:exchange -->
593Here's how to set up:
594
595   ```markdown
596   <!-- agent:status -->
597   Your status here
598   ```
599
600Done explaining.
601<!-- /agent:exchange -->
602";
603        let ranges = parse(doc).unwrap();
604        assert_eq!(ranges.len(), 1);
605        assert_eq!(ranges[0].name, "exchange");
606    }
607
608    #[test]
609    fn deeply_indented_fence_ignored() {
610        // Tabs and many spaces should still be detected as a fence
611        let doc = "\
612<!-- agent:x -->
613ok
614<!-- /agent:x -->
615      ```
616      <!-- agent:y -->
617      inside fence
618      ```
619";
620        let ranges = parse(doc).unwrap();
621        assert_eq!(ranges.len(), 1);
622        assert_eq!(ranges[0].name, "x");
623    }
624
625    #[test]
626    fn indented_fence_code_ranges_detected() {
627        let doc = "before\n  ```\n  code\n  ```\nafter\n";
628        let ranges = find_code_ranges(doc);
629        assert_eq!(ranges.len(), 1);
630        assert!(doc[ranges[0].0..ranges[0].1].contains("code"));
631    }
632
633    #[test]
634    fn code_ranges_detected() {
635        let doc = "before\n```\ncode\n```\nafter `inline` end\n";
636        let ranges = find_code_ranges(doc);
637        assert_eq!(ranges.len(), 2);
638        // Fenced block
639        assert!(doc[ranges[0].0..ranges[0].1].contains("code"));
640        // Inline span
641        assert!(doc[ranges[1].0..ranges[1].1].contains("inline"));
642    }
643
644    #[test]
645    fn code_ranges_double_backtick() {
646        // CommonMark: `` `<!--` `` is a code span containing `<!--`
647        let doc = "text `` `<!--` `` more\n";
648        let ranges = find_code_ranges(doc);
649        assert_eq!(ranges.len(), 1);
650        let span = &doc[ranges[0].0..ranges[0].1];
651        assert!(span.contains("<!--"), "double-backtick span should contain <!--: {:?}", span);
652    }
653
654    #[test]
655    fn code_ranges_double_backtick_does_not_match_single() {
656        // `` should not match a single ` close
657        let doc = "text `` foo ` bar `` end\n";
658        let ranges = find_code_ranges(doc);
659        assert_eq!(ranges.len(), 1);
660        let span = &doc[ranges[0].0..ranges[0].1];
661        assert_eq!(span, "`` foo ` bar ``");
662    }
663
664    #[test]
665    fn double_backtick_comment_before_agent_marker() {
666        // Regression: `` `<!--` `` followed by agent marker should not confuse the parser
667        let doc = "\
668<!-- agent:exchange -->\n\
669text `` `<!--` `` description\n\
670new content here\n\
671<!-- /agent:exchange -->\n";
672        let components = parse(doc).unwrap();
673        assert_eq!(components.len(), 1);
674        assert_eq!(components[0].name, "exchange");
675        assert!(components[0].content(doc).contains("new content here"));
676    }
677
678    // --- Inline attribute tests ---
679
680    #[test]
681    fn parse_component_with_mode_attr() {
682        let doc = "<!-- agent:exchange mode=append -->\nContent\n<!-- /agent:exchange -->\n";
683        let components = parse(doc).unwrap();
684        assert_eq!(components.len(), 1);
685        assert_eq!(components[0].name, "exchange");
686        assert_eq!(components[0].attrs.get("mode").map(|s| s.as_str()), Some("append"));
687        assert_eq!(components[0].content(doc), "Content\n");
688    }
689
690    #[test]
691    fn parse_component_with_multiple_attrs() {
692        let doc = "<!-- agent:log mode=prepend timestamp=true -->\nData\n<!-- /agent:log -->\n";
693        let components = parse(doc).unwrap();
694        assert_eq!(components.len(), 1);
695        assert_eq!(components[0].name, "log");
696        assert_eq!(components[0].attrs.get("mode").map(|s| s.as_str()), Some("prepend"));
697        assert_eq!(components[0].attrs.get("timestamp").map(|s| s.as_str()), Some("true"));
698    }
699
700    #[test]
701    fn parse_component_no_attrs_backward_compat() {
702        let doc = "<!-- agent:status -->\nOK\n<!-- /agent:status -->\n";
703        let components = parse(doc).unwrap();
704        assert_eq!(components.len(), 1);
705        assert_eq!(components[0].name, "status");
706        assert!(components[0].attrs.is_empty());
707    }
708
709    #[test]
710    fn is_agent_marker_with_attrs() {
711        assert!(is_agent_marker(" agent:exchange mode=append "));
712        assert!(is_agent_marker("agent:status mode=replace"));
713        assert!(is_agent_marker("agent:log mode=prepend timestamp=true"));
714    }
715
716    #[test]
717    fn closing_tag_unchanged_with_attrs() {
718        // Closing tags never have attributes
719        let doc = "<!-- agent:status mode=replace -->\n- [x] Done\n<!-- /agent:status -->\n";
720        let components = parse(doc).unwrap();
721        assert_eq!(components.len(), 1);
722        let new_doc = components[0].replace_content(doc, "- [ ] Todo\n");
723        assert!(new_doc.contains("<!-- agent:status mode=replace -->"));
724        assert!(new_doc.contains("<!-- /agent:status -->"));
725        assert!(new_doc.contains("- [ ] Todo"));
726    }
727
728    #[test]
729    fn parse_component_with_patch_attr() {
730        let doc = "<!-- agent:exchange patch=append -->\nContent\n<!-- /agent:exchange -->\n";
731        let components = parse(doc).unwrap();
732        assert_eq!(components.len(), 1);
733        assert_eq!(components[0].name, "exchange");
734        assert_eq!(components[0].patch_mode(), Some("append"));
735        assert_eq!(components[0].content(doc), "Content\n");
736    }
737
738    #[test]
739    fn patch_attr_takes_precedence_over_mode() {
740        let doc = "<!-- agent:exchange patch=replace mode=append -->\nContent\n<!-- /agent:exchange -->\n";
741        let components = parse(doc).unwrap();
742        assert_eq!(components[0].patch_mode(), Some("replace"));
743    }
744
745    #[test]
746    fn mode_attr_backward_compat() {
747        let doc = "<!-- agent:exchange mode=append -->\nContent\n<!-- /agent:exchange -->\n";
748        let components = parse(doc).unwrap();
749        assert_eq!(components[0].patch_mode(), Some("append"));
750    }
751
752    #[test]
753    fn no_patch_or_mode_attr() {
754        let doc = "<!-- agent:exchange -->\nContent\n<!-- /agent:exchange -->\n";
755        let components = parse(doc).unwrap();
756        assert_eq!(components[0].patch_mode(), None);
757    }
758
759    // --- Inline backtick code span exclusion tests ---
760
761    #[test]
762    fn single_backtick_component_tag_ignored() {
763        // A component tag wrapped in single backticks should not be parsed
764        let doc = "\
765Use `<!-- agent:pending patch=replace -->` to mark pending sections.
766<!-- agent:real -->
767content
768<!-- /agent:real -->
769";
770        let components = parse(doc).unwrap();
771        assert_eq!(components.len(), 1);
772        assert_eq!(components[0].name, "real");
773    }
774
775    #[test]
776    fn double_backtick_component_tag_ignored() {
777        // A component tag wrapped in double backticks should not be parsed
778        let doc = "\
779Use ``<!-- agent:pending patch=replace -->`` to mark pending sections.
780<!-- agent:real -->
781content
782<!-- /agent:real -->
783";
784        let components = parse(doc).unwrap();
785        assert_eq!(components.len(), 1);
786        assert_eq!(components[0].name, "real");
787    }
788
789    #[test]
790    fn component_tags_not_in_backticks_still_work() {
791        // Tags outside of any backticks are parsed normally
792        let doc = "\
793<!-- agent:a -->
794alpha
795<!-- /agent:a -->
796<!-- agent:b patch=append -->
797beta
798<!-- /agent:b -->
799";
800        let components = parse(doc).unwrap();
801        assert_eq!(components.len(), 2);
802        assert_eq!(components[0].name, "a");
803        assert_eq!(components[1].name, "b");
804        assert_eq!(components[1].patch_mode(), Some("append"));
805    }
806
807    #[test]
808    fn mixed_backtick_and_real_tags() {
809        // Some tags in backticks (ignored), some not (parsed)
810        let doc = "\
811Here is an example: `<!-- agent:fake -->` and ``<!-- /agent:fake -->``.
812<!-- agent:real -->
813real content
814<!-- /agent:real -->
815Another example: `<!-- agent:also-fake patch=replace -->` is just documentation.
816";
817        let components = parse(doc).unwrap();
818        assert_eq!(components.len(), 1);
819        assert_eq!(components[0].name, "real");
820        assert_eq!(components[0].content(doc), "real content\n");
821    }
822
823    #[test]
824    fn inline_code_mid_line_with_surrounding_text_ignored() {
825        // Edge case: component tag inside inline code span on a line with other content
826        // before and after — must not be parsed as a real component marker.
827        let doc = "\
828Wrap markers like `<!-- agent:status -->` in backticks to show them literally.
829<!-- agent:real -->
830actual content
831<!-- /agent:real -->
832";
833        let components = parse(doc).unwrap();
834        assert_eq!(components.len(), 1);
835        assert_eq!(components[0].name, "real");
836        assert_eq!(components[0].content(doc), "actual content\n");
837    }
838
839    #[test]
840    fn parse_attrs_unit() {
841        let attrs = parse_attrs("mode=append");
842        assert_eq!(attrs.get("mode").map(|s| s.as_str()), Some("append"));
843
844        let attrs = parse_attrs("mode=replace timestamp=true");
845        assert_eq!(attrs.len(), 2);
846
847        let attrs = parse_attrs("");
848        assert!(attrs.is_empty());
849
850        // Malformed tokens without = are ignored
851        let attrs = parse_attrs("mode=append broken novalue=");
852        assert_eq!(attrs.len(), 1);
853        assert_eq!(attrs.get("mode").map(|s| s.as_str()), Some("append"));
854    }
855
856    #[test]
857    fn append_with_boundary_skips_code_block() {
858        // Boundary marker inside a code block should be ignored;
859        // the real marker outside should be used.
860        let boundary_id = "real-uuid";
861        let doc = format!(
862            "<!-- agent:exchange patch=append -->\n\
863             user prompt\n\
864             ```\n\
865             <!-- agent:boundary:{boundary_id} -->\n\
866             ```\n\
867             more user text\n\
868             <!-- agent:boundary:{boundary_id} -->\n\
869             <!-- /agent:exchange -->\n"
870        );
871        let components = parse(&doc).unwrap();
872        let comp = &components[0];
873        let result = comp.append_with_boundary(&doc, "### Re: Response\n\nContent here.", boundary_id);
874
875        // Response should replace the REAL marker (outside code block),
876        // not the one inside the code block.
877        assert!(result.contains("### Re: Response"));
878        assert!(result.contains("more user text"));
879        // The code block example should be preserved
880        assert!(result.contains(&format!("<!-- agent:boundary:{boundary_id} -->\n```")));
881        // The real marker should be consumed (replaced by response)
882        assert!(!result.contains(&format!("more user text\n<!-- agent:boundary:{boundary_id} -->\n<!-- /agent:exchange -->")));
883    }
884
885    #[test]
886    fn append_with_boundary_no_code_block() {
887        // Normal case: boundary marker not in a code block
888        let boundary_id = "simple-uuid";
889        let doc = format!(
890            "<!-- agent:exchange patch=append -->\n\
891             user prompt\n\
892             <!-- agent:boundary:{boundary_id} -->\n\
893             <!-- /agent:exchange -->\n"
894        );
895        let components = parse(&doc).unwrap();
896        let comp = &components[0];
897        let result = comp.append_with_boundary(&doc, "### Re: Answer\n\nDone.", boundary_id);
898
899        assert!(result.contains("### Re: Answer"));
900        assert!(result.contains("user prompt"));
901        // Original marker should be consumed, but a NEW boundary re-inserted
902        assert!(!result.contains(&format!("agent:boundary:{boundary_id}")));
903        assert!(result.contains("agent:boundary:"));
904    }
905}