Skip to main content

agent_doc/
component.rs

1//! # Module: component
2//!
3//! ## Spec
4//! - Defines `Component`, the parsed representation of a bounded document region delimited by
5//!   `<!-- agent:name [attrs...] -->` (open) and `<!-- /agent:name -->` (close) HTML comments.
6//! - `parse(doc)` scans the raw document bytes for `<!--` / `-->` comment pairs, builds a
7//!   stack-based nesting model, and returns all `Component` values sorted by `open_start`.
8//! - Markers that appear inside fenced code blocks (backtick or tilde) or inline code spans are
9//!   skipped; `find_code_ranges(doc)` uses the `pulldown-cmark` AST (CommonMark-compliant) to
10//!   locate these regions.
11//! - `is_agent_marker(comment_text)` classifies whether the inner text of a comment is an
12//!   agent open/close marker vs. an ordinary HTML comment.
13//! - Component names must match `[a-zA-Z0-9][a-zA-Z0-9-]*`; invalid names, unmatched opens,
14//!   unmatched closes, and mismatched open/close pairs all return `Err`.
15//! - `boundary:*` prefixed markers (`<!-- agent:boundary:ID -->`) are recognised and skipped
16//!   during component parsing; they are not treated as component open tags.
17//! - Opening markers may carry space-separated `key=value` inline attributes
18//!   (e.g., `patch=append`). `patch=` takes precedence over the legacy `mode=` alias;
19//!   `patch_mode()` encapsulates this lookup.
20//! - Marker `open_end` / `close_end` byte offsets include a trailing newline when present,
21//!   so that content slices are clean.
22//! - `replace_content(doc, new_content)` rebuilds the document preserving both markers,
23//!   replacing only the bytes between them.
24//! - `append_with_caret(doc, content, caret_offset)` appends content after existing text, or
25//!   inserts before the caret line when the caret falls inside the component.
26//! - `append_with_boundary(doc, content, boundary_id)` locates `<!-- agent:boundary:ID -->`
27//!   inside the component (skipping any occurrence that lives inside a code block), replaces it
28//!   with the new content, and re-inserts a fresh boundary marker; falls back to
29//!   `append_with_caret` when no boundary is found.
30//!
31//! ## Agentic Contracts
32//! - `parse` is pure and deterministic: identical input always yields identical output.
33//! - All byte offsets (`open_start`, `open_end`, `close_start`, `close_end`) are valid UTF-8
34//!   char boundaries within the document string they were parsed from.
35//! - `content(doc)` returns exactly `&doc[open_end..close_start]` — no allocation.
36//! - `replace_content` and `append_with_*` never mutate the original `&str`; they return a
37//!   new `String` with all offsets consistent for a fresh `parse` call.
38//! - Markers inside any code region (fenced block or inline span) are never parsed as
39//!   components and never mutated by any append/replace operation.
40//! - `append_with_boundary` always re-inserts a boundary marker with a fresh UUID, preserving
41//!   the invariant that exactly one boundary exists inside the component after each write.
42//! - Unknown or malformed `key=value` tokens in inline attributes are silently discarded;
43//!   they never cause a parse error.
44//!
45//! ## Evals
46//! - single_range: doc with one component → one `Component`, correct name and content slice
47//! - nested_ranges: outer + inner components → two entries sorted outer-first
48//! - siblings: two adjacent components → two entries, each with correct content
49//! - no_ranges: plain markdown → empty vec, no error
50//! - unmatched_open_error: open without close → `Err` containing "unclosed component"
51//! - unmatched_close_error: close without open → `Err` containing "without matching open"
52//! - mismatched_names_error: `<!-- agent:foo -->…<!-- /agent:bar -->` → `Err` "mismatched"
53//! - invalid_name: name starting with `-` → `Err` "invalid component name"
54//! - markers_in_fenced_code_block_ignored: marker inside ``` block → not parsed as component
55//! - markers_in_inline_code_ignored: marker inside `` `…` `` span → not parsed
56//! - markers_in_tilde_fence_ignored: marker inside ~~~ block → not parsed
57//! - markers_in_indented_fenced_code_block_ignored: up-to-3-space-indented fence → not parsed
58//! - double_backtick_comment_before_agent_marker: `` `<!--` `` followed by real marker → one component
59//! - parse_component_with_patch_attr: `patch=append` on opening tag → `patch_mode()` = "append"
60//! - patch_attr_takes_precedence_over_mode: both `patch=` and `mode=` present → `patch=` wins
61//! - mode_attr_backward_compat: only `mode=append` present → `patch_mode()` = "append"
62//! - replace_roundtrip: replace content, re-parse → one component with new content
63//! - append_with_boundary_no_code_block: boundary found → content inserted, old ID consumed, new boundary present
64//! - append_with_boundary_skips_code_block: boundary inside code block skipped, real boundary used
65
66use anyhow::{bail, Result};
67use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
68use std::collections::HashMap;
69
70/// A parsed component in a document.
71///
72/// Components are bounded regions marked by `<!-- agent:name -->...<!-- /agent:name -->`.
73/// Opening tags may contain inline attributes: `<!-- agent:name key=value -->`.
74#[derive(Debug, Clone, PartialEq, Eq)]
75pub struct Component {
76    pub name: String,
77    /// Inline attributes parsed from the opening tag (e.g., `patch=append`).
78    pub attrs: HashMap<String, String>,
79    /// Byte offset of `<` in opening marker.
80    pub open_start: usize,
81    /// Byte offset past `>` in opening marker (includes trailing newline if present).
82    pub open_end: usize,
83    /// Byte offset of `<` in closing marker.
84    pub close_start: usize,
85    /// Byte offset past `>` in closing marker (includes trailing newline if present).
86    pub close_end: usize,
87}
88
89impl Component {
90    /// Extract the content between the opening and closing markers.
91    #[allow(dead_code)] // public API — used by tests and future consumers
92    pub fn content<'a>(&self, doc: &'a str) -> &'a str {
93        &doc[self.open_end..self.close_start]
94    }
95
96    /// Get the patch mode from inline attributes.
97    ///
98    /// Checks `patch=` first, falls back to `mode=` for backward compatibility.
99    pub fn patch_mode(&self) -> Option<&str> {
100        self.attrs.get("patch").map(|s| s.as_str())
101            .or_else(|| self.attrs.get("mode").map(|s| s.as_str()))
102    }
103
104    /// Replace the content between markers, returning the new document.
105    /// The markers themselves are preserved.
106    pub fn replace_content(&self, doc: &str, new_content: &str) -> String {
107        let mut result = String::with_capacity(doc.len() + new_content.len());
108        result.push_str(&doc[..self.open_end]);
109        result.push_str(new_content);
110        result.push_str(&doc[self.close_start..]);
111        result
112    }
113
114    /// Append content into this component, inserting before the caret position
115    /// if the caret is inside the component. Falls back to normal append if the
116    /// caret is outside the component.
117    ///
118    /// `caret_offset`: byte offset of the caret in the document. Pass `None` for
119    /// normal append behavior.
120    pub fn append_with_caret(&self, doc: &str, content: &str, caret_offset: Option<usize>) -> String {
121        let existing = &doc[self.open_end..self.close_start];
122
123        if let Some(caret) = caret_offset {
124            // Check if caret is inside this component
125            if caret > self.open_end && caret <= self.close_start {
126                // Find the line boundary before the caret
127                let insert_at = doc[..caret].rfind('\n')
128                    .map(|i| i + 1)
129                    .unwrap_or(self.open_end);
130
131                // Clamp to component bounds
132                let insert_at = insert_at.max(self.open_end);
133
134                let mut result = String::with_capacity(doc.len() + content.len() + 1);
135                result.push_str(&doc[..insert_at]);
136                result.push_str(content.trim_end());
137                result.push('\n');
138                result.push_str(&doc[insert_at..]);
139                return result;
140            }
141        }
142
143        // Normal append: add after existing content
144        let mut result = String::with_capacity(doc.len() + content.len() + 1);
145        result.push_str(&doc[..self.open_end]);
146        result.push_str(existing.trim_end());
147        result.push('\n');
148        result.push_str(content.trim_end());
149        result.push('\n');
150        result.push_str(&doc[self.close_start..]);
151        result
152    }
153
154    /// Append content into this component at the boundary marker position.
155    ///
156    /// Finds `<!-- agent:boundary:ID -->` inside the component. If found,
157    /// inserts content at the line start of the boundary marker (replacing
158    /// the marker). Falls back to normal append if the boundary is not found.
159    pub fn append_with_boundary(&self, doc: &str, content: &str, boundary_id: &str) -> String {
160        let boundary_marker = format!("<!-- agent:boundary:{} -->", boundary_id);
161        let content_region = &doc[self.open_end..self.close_start];
162        let code_ranges = find_code_ranges(doc);
163
164        // Search for boundary marker, skipping matches inside code blocks
165        let mut search_from = 0;
166        let found_pos = loop {
167            match content_region[search_from..].find(&boundary_marker) {
168                Some(rel_pos) => {
169                    let abs_pos = self.open_end + search_from + rel_pos;
170                    if code_ranges.iter().any(|&(cs, ce)| abs_pos >= cs && abs_pos < ce) {
171                        // Inside a code block — skip and keep searching
172                        search_from += rel_pos + boundary_marker.len();
173                        continue;
174                    }
175                    break Some(abs_pos);
176                }
177                None => break None,
178            }
179        };
180
181        if let Some(abs_pos) = found_pos {
182            // Find start of the line containing the marker
183            let line_start = doc[..abs_pos]
184                .rfind('\n')
185                .map(|i| i + 1)
186                .unwrap_or(self.open_end)
187                .max(self.open_end);
188
189            // Find end of the marker line (including trailing newline)
190            let marker_end = abs_pos + boundary_marker.len();
191            let line_end = if marker_end < self.close_start
192                && doc.as_bytes().get(marker_end) == Some(&b'\n')
193            {
194                marker_end + 1
195            } else {
196                marker_end
197            };
198            let line_end = line_end.min(self.close_start);
199
200            // Replace the boundary marker with response content + new boundary.
201            // The boundary is consumed and re-inserted, matching the binary's
202            // post-patch behavior in apply_patches_with_overrides().
203            let new_id = crate::new_boundary_id();
204            let new_marker = crate::format_boundary_marker(&new_id);
205            let mut result = String::with_capacity(doc.len() + content.len() + new_marker.len());
206            result.push_str(&doc[..line_start]);
207            result.push_str(content.trim_end());
208            result.push('\n');
209            result.push_str(&new_marker);
210            result.push('\n');
211            result.push_str(&doc[line_end..]);
212            return result;
213        }
214
215        // Boundary not found — fall back to normal append
216        self.append_with_caret(doc, content, None)
217    }
218}
219
220/// Valid name: `[a-zA-Z0-9][a-zA-Z0-9-]*`
221fn is_valid_name(name: &str) -> bool {
222    if name.is_empty() {
223        return false;
224    }
225    let first = name.as_bytes()[0];
226    if !first.is_ascii_alphanumeric() {
227        return false;
228    }
229    name.bytes()
230        .all(|b| b.is_ascii_alphanumeric() || b == b'-')
231}
232
233/// True if the text inside `<!-- ... -->` is an agent component marker.
234///
235/// Matches `agent:NAME [attrs...]` (open) or `/agent:NAME` (close).
236pub fn is_agent_marker(comment_text: &str) -> bool {
237    let trimmed = comment_text.trim();
238    if let Some(rest) = trimmed.strip_prefix("/agent:") {
239        is_valid_name(rest)
240    } else if let Some(rest) = trimmed.strip_prefix("agent:") {
241        // Opening marker may have attributes after the name: `agent:NAME key=value`
242        let name_part = rest.split_whitespace().next().unwrap_or("");
243        is_valid_name(name_part)
244    } else {
245        false
246    }
247}
248
249/// Parse `key=value` pairs from the attribute portion of an opening marker.
250///
251/// Given the text after `agent:NAME `, parses space-separated `key=value` pairs.
252/// Values are unquoted (no quote support needed for simple mode values).
253fn parse_attrs(attr_text: &str) -> HashMap<String, String> {
254    let mut attrs = HashMap::new();
255    for token in attr_text.split_whitespace() {
256        if let Some((key, value)) = token.split_once('=')
257            && !key.is_empty()
258            && !value.is_empty()
259        {
260            attrs.insert(key.to_string(), value.to_string());
261        }
262    }
263    attrs
264}
265
266/// Find byte ranges of code regions (fenced code blocks + inline code spans).
267/// Markers inside these ranges are treated as literal text, not component markers.
268///
269/// Uses `pulldown-cmark` AST parsing with `offset_iter()` to accurately detect
270/// code regions per the CommonMark spec.
271pub fn find_code_ranges(doc: &str) -> Vec<(usize, usize)> {
272    let t = std::time::Instant::now();
273    let mut ranges = Vec::new();
274    let parser = Parser::new_ext(doc, Options::empty());
275    let mut iter = parser.into_offset_iter();
276    while let Some((event, range)) = iter.next() {
277        match event {
278            // Inline code span: `code` or ``code``
279            Event::Code(_) => {
280                ranges.push((range.start, range.end));
281            }
282            // Fenced or indented code block: consume until End(CodeBlock)
283            Event::Start(Tag::CodeBlock(_)) => {
284                let block_start = range.start;
285                let mut block_end = range.end;
286                for (inner_event, inner_range) in iter.by_ref() {
287                    block_end = inner_range.end;
288                    if matches!(inner_event, Event::End(TagEnd::CodeBlock)) {
289                        break;
290                    }
291                }
292                ranges.push((block_start, block_end));
293            }
294            _ => {}
295        }
296    }
297    let elapsed = t.elapsed().as_millis();
298    if elapsed > 0 {
299        eprintln!("[perf] find_code_ranges: {}ms", elapsed);
300    }
301    ranges
302}
303
304/// Parse all components from a document.
305///
306/// Uses a stack for nesting. Returns components sorted by `open_start`.
307/// Errors on unmatched open/close markers or invalid names.
308/// Skips markers inside fenced code blocks and inline code spans.
309pub fn parse(doc: &str) -> Result<Vec<Component>> {
310    let bytes = doc.as_bytes();
311    let len = bytes.len();
312    let code_ranges = find_code_ranges(doc);
313    let mut templates: Vec<Component> = Vec::new();
314    // Stack of (name, attrs, open_start, open_end)
315    let mut stack: Vec<(String, HashMap<String, String>, usize, usize)> = Vec::new();
316    let mut pos = 0;
317
318    while pos + 4 <= len {
319        // Look for `<!--`
320        if &bytes[pos..pos + 4] != b"<!--" {
321            pos += 1;
322            continue;
323        }
324
325        // Skip markers inside code regions
326        if code_ranges.iter().any(|&(start, end)| pos >= start && pos < end) {
327            pos += 4;
328            continue;
329        }
330
331        let marker_start = pos;
332
333        // Find closing `-->`
334        let close = match find_comment_end(bytes, pos + 4) {
335            Some(c) => c,
336            None => {
337                pos += 4;
338                continue;
339            }
340        };
341
342        // close points to the byte after `>`
343        let inner = &doc[marker_start + 4..close - 3]; // between `<!--` and `-->`
344        let trimmed = inner.trim();
345
346        // Determine end offset — consume trailing newline if present
347        let mut marker_end = close;
348        if marker_end < len && bytes[marker_end] == b'\n' {
349            marker_end += 1;
350        }
351
352        if let Some(name) = trimmed.strip_prefix("/agent:") {
353            // Closing marker
354            if !is_valid_name(name) {
355                bail!("invalid component name: '{}'", name);
356            }
357            match stack.pop() {
358                Some((open_name, open_attrs, open_start, open_end)) => {
359                    if open_name != name {
360                        bail!(
361                            "mismatched component: opened '{}' but closed '{}'",
362                            open_name,
363                            name
364                        );
365                    }
366                    templates.push(Component {
367                        name: name.to_string(),
368                        attrs: open_attrs,
369                        open_start,
370                        open_end,
371                        close_start: marker_start,
372                        close_end: marker_end,
373                    });
374                }
375                None => bail!("closing marker <!-- /agent:{} --> without matching open", name),
376            }
377        } else if let Some(rest) = trimmed.strip_prefix("agent:") {
378            // Skip boundary markers — these are not component markers
379            if rest.starts_with("boundary:") {
380                pos = close;
381                continue;
382            }
383            // Opening marker — may have attributes: `agent:NAME key=value`
384            let mut parts = rest.splitn(2, |c: char| c.is_whitespace());
385            let name = parts.next().unwrap_or("");
386            let attr_text = parts.next().unwrap_or("");
387            if !is_valid_name(name) {
388                bail!("invalid component name: '{}'", name);
389            }
390            let attrs = parse_attrs(attr_text);
391            stack.push((name.to_string(), attrs, marker_start, marker_end));
392        }
393
394        pos = close;
395    }
396
397    if let Some((name, _, _, _)) = stack.last() {
398        bail!(
399            "unclosed component: <!-- agent:{} --> without matching close",
400            name
401        );
402    }
403
404    templates.sort_by_key(|t| t.open_start);
405    Ok(templates)
406}
407
408/// Find the end of an HTML comment (`-->`), returning byte offset past `>`.
409pub(crate) fn find_comment_end(bytes: &[u8], start: usize) -> Option<usize> {
410    let len = bytes.len();
411    let mut i = start;
412    while i + 3 <= len {
413        if &bytes[i..i + 3] == b"-->" {
414            return Some(i + 3);
415        }
416        i += 1;
417    }
418    None
419}
420
421#[cfg(test)]
422mod tests {
423    use super::*;
424
425    #[test]
426    fn single_range() {
427        let doc = "before\n<!-- agent:status -->\nHello\n<!-- /agent:status -->\nafter\n";
428        let ranges = parse(doc).unwrap();
429        assert_eq!(ranges.len(), 1);
430        assert_eq!(ranges[0].name, "status");
431        assert_eq!(ranges[0].content(doc), "Hello\n");
432    }
433
434    #[test]
435    fn nested_ranges() {
436        let doc = "\
437<!-- agent:outer -->
438<!-- agent:inner -->
439content
440<!-- /agent:inner -->
441<!-- /agent:outer -->
442";
443        let ranges = parse(doc).unwrap();
444        assert_eq!(ranges.len(), 2);
445        // Sorted by open_start — outer first
446        assert_eq!(ranges[0].name, "outer");
447        assert_eq!(ranges[1].name, "inner");
448        assert_eq!(ranges[1].content(doc), "content\n");
449    }
450
451    #[test]
452    fn siblings() {
453        let doc = "\
454<!-- agent:a -->
455alpha
456<!-- /agent:a -->
457<!-- agent:b -->
458beta
459<!-- /agent:b -->
460";
461        let ranges = parse(doc).unwrap();
462        assert_eq!(ranges.len(), 2);
463        assert_eq!(ranges[0].name, "a");
464        assert_eq!(ranges[0].content(doc), "alpha\n");
465        assert_eq!(ranges[1].name, "b");
466        assert_eq!(ranges[1].content(doc), "beta\n");
467    }
468
469    #[test]
470    fn no_ranges() {
471        let doc = "# Just a document\n\nWith no range templates.\n";
472        let ranges = parse(doc).unwrap();
473        assert!(ranges.is_empty());
474    }
475
476    #[test]
477    fn unmatched_open_error() {
478        let doc = "<!-- agent:orphan -->\nContent\n";
479        let err = parse(doc).unwrap_err();
480        assert!(err.to_string().contains("unclosed component"));
481    }
482
483    #[test]
484    fn unmatched_close_error() {
485        let doc = "Content\n<!-- /agent:orphan -->\n";
486        let err = parse(doc).unwrap_err();
487        assert!(err.to_string().contains("without matching open"));
488    }
489
490    #[test]
491    fn mismatched_names_error() {
492        let doc = "<!-- agent:foo -->\n<!-- /agent:bar -->\n";
493        let err = parse(doc).unwrap_err();
494        assert!(err.to_string().contains("mismatched"));
495    }
496
497    #[test]
498    fn invalid_name() {
499        let doc = "<!-- agent:-bad -->\n<!-- /agent:-bad -->\n";
500        let err = parse(doc).unwrap_err();
501        assert!(err.to_string().contains("invalid component name"));
502    }
503
504    #[test]
505    fn name_validation() {
506        assert!(is_valid_name("status"));
507        assert!(is_valid_name("my-section"));
508        assert!(is_valid_name("a1"));
509        assert!(is_valid_name("A"));
510        assert!(!is_valid_name(""));
511        assert!(!is_valid_name("-bad"));
512        assert!(!is_valid_name("has space"));
513        assert!(!is_valid_name("has_underscore"));
514    }
515
516    #[test]
517    fn content_extraction() {
518        let doc = "<!-- agent:x -->\nfoo\nbar\n<!-- /agent:x -->\n";
519        let ranges = parse(doc).unwrap();
520        assert_eq!(ranges[0].content(doc), "foo\nbar\n");
521    }
522
523    #[test]
524    fn replace_roundtrip() {
525        let doc = "before\n<!-- agent:s -->\nold\n<!-- /agent:s -->\nafter\n";
526        let ranges = parse(doc).unwrap();
527        let new_doc = ranges[0].replace_content(doc, "new\n");
528        assert_eq!(
529            new_doc,
530            "before\n<!-- agent:s -->\nnew\n<!-- /agent:s -->\nafter\n"
531        );
532        // Re-parse should work
533        let ranges2 = parse(&new_doc).unwrap();
534        assert_eq!(ranges2.len(), 1);
535        assert_eq!(ranges2[0].content(&new_doc), "new\n");
536    }
537
538    #[test]
539    fn is_agent_marker_yes() {
540        assert!(is_agent_marker(" agent:status "));
541        assert!(is_agent_marker("/agent:status"));
542        assert!(is_agent_marker("agent:my-thing"));
543        assert!(is_agent_marker(" /agent:A1 "));
544    }
545
546    #[test]
547    fn is_agent_marker_no() {
548        assert!(!is_agent_marker("just a comment"));
549        assert!(!is_agent_marker("agent:"));
550        assert!(!is_agent_marker("/agent:"));
551        assert!(!is_agent_marker("agent:-bad"));
552        assert!(!is_agent_marker("some agent:fake stuff"));
553    }
554
555    #[test]
556    fn regular_comments_ignored() {
557        let doc = "<!-- just a comment -->\n<!-- agent:x -->\ndata\n<!-- /agent:x -->\n";
558        let ranges = parse(doc).unwrap();
559        assert_eq!(ranges.len(), 1);
560        assert_eq!(ranges[0].name, "x");
561    }
562
563    #[test]
564    fn multiline_comment_ignored() {
565        let doc = "\
566<!--
567multi
568line
569comment
570-->
571<!-- agent:s -->
572content
573<!-- /agent:s -->
574";
575        let ranges = parse(doc).unwrap();
576        assert_eq!(ranges.len(), 1);
577        assert_eq!(ranges[0].name, "s");
578    }
579
580    #[test]
581    fn empty_content() {
582        let doc = "<!-- agent:empty --><!-- /agent:empty -->\n";
583        let ranges = parse(doc).unwrap();
584        assert_eq!(ranges.len(), 1);
585        assert_eq!(ranges[0].content(doc), "");
586    }
587
588    #[test]
589    fn markers_in_fenced_code_block_ignored() {
590        let doc = "\
591<!-- agent:real -->
592content
593<!-- /agent:real -->
594```markdown
595<!-- agent:fake -->
596this is just an example
597<!-- /agent:fake -->
598```
599";
600        let ranges = parse(doc).unwrap();
601        assert_eq!(ranges.len(), 1);
602        assert_eq!(ranges[0].name, "real");
603    }
604
605    #[test]
606    fn markers_in_inline_code_ignored() {
607        let doc = "\
608Use `<!-- agent:example -->` markers for components.
609<!-- agent:real -->
610content
611<!-- /agent:real -->
612";
613        let ranges = parse(doc).unwrap();
614        assert_eq!(ranges.len(), 1);
615        assert_eq!(ranges[0].name, "real");
616    }
617
618    #[test]
619    fn markers_in_tilde_fence_ignored() {
620        let doc = "\
621<!-- agent:x -->
622data
623<!-- /agent:x -->
624~~~
625<!-- agent:y -->
626example
627<!-- /agent:y -->
628~~~
629";
630        let ranges = parse(doc).unwrap();
631        assert_eq!(ranges.len(), 1);
632        assert_eq!(ranges[0].name, "x");
633    }
634
635    #[test]
636    fn markers_in_indented_fenced_code_block_ignored() {
637        // CommonMark allows up to 3 spaces before fence opener
638        let doc = "\
639<!-- agent:exchange -->
640Content here.
641<!-- /agent:exchange -->
642
643  ```markdown
644  <!-- agent:fake -->
645  demo without closing tag
646  ```
647";
648        let ranges = parse(doc).unwrap();
649        assert_eq!(ranges.len(), 1);
650        assert_eq!(ranges[0].name, "exchange");
651    }
652
653    #[test]
654    fn indented_fence_inside_component_ignored() {
655        // Indented code block inside a component should not cause mismatched errors
656        let doc = "\
657<!-- agent:exchange -->
658Here's how to set up:
659
660   ```markdown
661   <!-- agent:status -->
662   Your status here
663   ```
664
665Done explaining.
666<!-- /agent:exchange -->
667";
668        let ranges = parse(doc).unwrap();
669        assert_eq!(ranges.len(), 1);
670        assert_eq!(ranges[0].name, "exchange");
671    }
672
673    #[test]
674    fn deeply_indented_fence_ignored() {
675        // Tabs and many spaces should still be detected as a fence
676        let doc = "\
677<!-- agent:x -->
678ok
679<!-- /agent:x -->
680      ```
681      <!-- agent:y -->
682      inside fence
683      ```
684";
685        let ranges = parse(doc).unwrap();
686        assert_eq!(ranges.len(), 1);
687        assert_eq!(ranges[0].name, "x");
688    }
689
690    #[test]
691    fn indented_fence_code_ranges_detected() {
692        let doc = "before\n  ```\n  code\n  ```\nafter\n";
693        let ranges = find_code_ranges(doc);
694        assert_eq!(ranges.len(), 1);
695        assert!(doc[ranges[0].0..ranges[0].1].contains("code"));
696    }
697
698    #[test]
699    fn code_ranges_detected() {
700        let doc = "before\n```\ncode\n```\nafter `inline` end\n";
701        let ranges = find_code_ranges(doc);
702        assert_eq!(ranges.len(), 2);
703        // Fenced block
704        assert!(doc[ranges[0].0..ranges[0].1].contains("code"));
705        // Inline span
706        assert!(doc[ranges[1].0..ranges[1].1].contains("inline"));
707    }
708
709    #[test]
710    fn code_ranges_double_backtick() {
711        // CommonMark: `` `<!--` `` is a code span containing `<!--`
712        let doc = "text `` `<!--` `` more\n";
713        let ranges = find_code_ranges(doc);
714        assert_eq!(ranges.len(), 1);
715        let span = &doc[ranges[0].0..ranges[0].1];
716        assert!(span.contains("<!--"), "double-backtick span should contain <!--: {:?}", span);
717    }
718
719    #[test]
720    fn code_ranges_double_backtick_does_not_match_single() {
721        // `` should not match a single ` close
722        let doc = "text `` foo ` bar `` end\n";
723        let ranges = find_code_ranges(doc);
724        assert_eq!(ranges.len(), 1);
725        let span = &doc[ranges[0].0..ranges[0].1];
726        assert_eq!(span, "`` foo ` bar ``");
727    }
728
729    #[test]
730    fn double_backtick_comment_before_agent_marker() {
731        // Regression: `` `<!--` `` followed by agent marker should not confuse the parser
732        let doc = "\
733<!-- agent:exchange -->\n\
734text `` `<!--` `` description\n\
735new content here\n\
736<!-- /agent:exchange -->\n";
737        let components = parse(doc).unwrap();
738        assert_eq!(components.len(), 1);
739        assert_eq!(components[0].name, "exchange");
740        assert!(components[0].content(doc).contains("new content here"));
741    }
742
743    // --- Inline attribute tests ---
744
745    #[test]
746    fn parse_component_with_mode_attr() {
747        let doc = "<!-- agent:exchange mode=append -->\nContent\n<!-- /agent:exchange -->\n";
748        let components = parse(doc).unwrap();
749        assert_eq!(components.len(), 1);
750        assert_eq!(components[0].name, "exchange");
751        assert_eq!(components[0].attrs.get("mode").map(|s| s.as_str()), Some("append"));
752        assert_eq!(components[0].content(doc), "Content\n");
753    }
754
755    #[test]
756    fn parse_component_with_multiple_attrs() {
757        let doc = "<!-- agent:log mode=prepend timestamp=true -->\nData\n<!-- /agent:log -->\n";
758        let components = parse(doc).unwrap();
759        assert_eq!(components.len(), 1);
760        assert_eq!(components[0].name, "log");
761        assert_eq!(components[0].attrs.get("mode").map(|s| s.as_str()), Some("prepend"));
762        assert_eq!(components[0].attrs.get("timestamp").map(|s| s.as_str()), Some("true"));
763    }
764
765    #[test]
766    fn parse_component_no_attrs_backward_compat() {
767        let doc = "<!-- agent:status -->\nOK\n<!-- /agent:status -->\n";
768        let components = parse(doc).unwrap();
769        assert_eq!(components.len(), 1);
770        assert_eq!(components[0].name, "status");
771        assert!(components[0].attrs.is_empty());
772    }
773
774    #[test]
775    fn is_agent_marker_with_attrs() {
776        assert!(is_agent_marker(" agent:exchange mode=append "));
777        assert!(is_agent_marker("agent:status mode=replace"));
778        assert!(is_agent_marker("agent:log mode=prepend timestamp=true"));
779    }
780
781    #[test]
782    fn closing_tag_unchanged_with_attrs() {
783        // Closing tags never have attributes
784        let doc = "<!-- agent:status mode=replace -->\n- [x] Done\n<!-- /agent:status -->\n";
785        let components = parse(doc).unwrap();
786        assert_eq!(components.len(), 1);
787        let new_doc = components[0].replace_content(doc, "- [ ] Todo\n");
788        assert!(new_doc.contains("<!-- agent:status mode=replace -->"));
789        assert!(new_doc.contains("<!-- /agent:status -->"));
790        assert!(new_doc.contains("- [ ] Todo"));
791    }
792
793    #[test]
794    fn parse_component_with_patch_attr() {
795        let doc = "<!-- agent:exchange patch=append -->\nContent\n<!-- /agent:exchange -->\n";
796        let components = parse(doc).unwrap();
797        assert_eq!(components.len(), 1);
798        assert_eq!(components[0].name, "exchange");
799        assert_eq!(components[0].patch_mode(), Some("append"));
800        assert_eq!(components[0].content(doc), "Content\n");
801    }
802
803    #[test]
804    fn patch_attr_takes_precedence_over_mode() {
805        let doc = "<!-- agent:exchange patch=replace mode=append -->\nContent\n<!-- /agent:exchange -->\n";
806        let components = parse(doc).unwrap();
807        assert_eq!(components[0].patch_mode(), Some("replace"));
808    }
809
810    #[test]
811    fn mode_attr_backward_compat() {
812        let doc = "<!-- agent:exchange mode=append -->\nContent\n<!-- /agent:exchange -->\n";
813        let components = parse(doc).unwrap();
814        assert_eq!(components[0].patch_mode(), Some("append"));
815    }
816
817    #[test]
818    fn no_patch_or_mode_attr() {
819        let doc = "<!-- agent:exchange -->\nContent\n<!-- /agent:exchange -->\n";
820        let components = parse(doc).unwrap();
821        assert_eq!(components[0].patch_mode(), None);
822    }
823
824    // --- Inline backtick code span exclusion tests ---
825
826    #[test]
827    fn single_backtick_component_tag_ignored() {
828        // A component tag wrapped in single backticks should not be parsed
829        let doc = "\
830Use `<!-- agent:pending patch=replace -->` to mark pending sections.
831<!-- agent:real -->
832content
833<!-- /agent:real -->
834";
835        let components = parse(doc).unwrap();
836        assert_eq!(components.len(), 1);
837        assert_eq!(components[0].name, "real");
838    }
839
840    #[test]
841    fn double_backtick_component_tag_ignored() {
842        // A component tag wrapped in double backticks should not be parsed
843        let doc = "\
844Use ``<!-- agent:pending patch=replace -->`` to mark pending sections.
845<!-- agent:real -->
846content
847<!-- /agent:real -->
848";
849        let components = parse(doc).unwrap();
850        assert_eq!(components.len(), 1);
851        assert_eq!(components[0].name, "real");
852    }
853
854    #[test]
855    fn component_tags_not_in_backticks_still_work() {
856        // Tags outside of any backticks are parsed normally
857        let doc = "\
858<!-- agent:a -->
859alpha
860<!-- /agent:a -->
861<!-- agent:b patch=append -->
862beta
863<!-- /agent:b -->
864";
865        let components = parse(doc).unwrap();
866        assert_eq!(components.len(), 2);
867        assert_eq!(components[0].name, "a");
868        assert_eq!(components[1].name, "b");
869        assert_eq!(components[1].patch_mode(), Some("append"));
870    }
871
872    #[test]
873    fn mixed_backtick_and_real_tags() {
874        // Some tags in backticks (ignored), some not (parsed)
875        let doc = "\
876Here is an example: `<!-- agent:fake -->` and ``<!-- /agent:fake -->``.
877<!-- agent:real -->
878real content
879<!-- /agent:real -->
880Another example: `<!-- agent:also-fake patch=replace -->` is just documentation.
881";
882        let components = parse(doc).unwrap();
883        assert_eq!(components.len(), 1);
884        assert_eq!(components[0].name, "real");
885        assert_eq!(components[0].content(doc), "real content\n");
886    }
887
888    #[test]
889    fn inline_code_mid_line_with_surrounding_text_ignored() {
890        // Edge case: component tag inside inline code span on a line with other content
891        // before and after — must not be parsed as a real component marker.
892        let doc = "\
893Wrap markers like `<!-- agent:status -->` in backticks to show them literally.
894<!-- agent:real -->
895actual content
896<!-- /agent:real -->
897";
898        let components = parse(doc).unwrap();
899        assert_eq!(components.len(), 1);
900        assert_eq!(components[0].name, "real");
901        assert_eq!(components[0].content(doc), "actual content\n");
902    }
903
904    #[test]
905    fn parse_attrs_unit() {
906        let attrs = parse_attrs("mode=append");
907        assert_eq!(attrs.get("mode").map(|s| s.as_str()), Some("append"));
908
909        let attrs = parse_attrs("mode=replace timestamp=true");
910        assert_eq!(attrs.len(), 2);
911
912        let attrs = parse_attrs("");
913        assert!(attrs.is_empty());
914
915        // Malformed tokens without = are ignored
916        let attrs = parse_attrs("mode=append broken novalue=");
917        assert_eq!(attrs.len(), 1);
918        assert_eq!(attrs.get("mode").map(|s| s.as_str()), Some("append"));
919    }
920
921    #[test]
922    fn append_with_boundary_skips_code_block() {
923        // Boundary marker inside a code block should be ignored;
924        // the real marker outside should be used.
925        let boundary_id = "real-uuid";
926        let doc = format!(
927            "<!-- agent:exchange patch=append -->\n\
928             user prompt\n\
929             ```\n\
930             <!-- agent:boundary:{boundary_id} -->\n\
931             ```\n\
932             more user text\n\
933             <!-- agent:boundary:{boundary_id} -->\n\
934             <!-- /agent:exchange -->\n"
935        );
936        let components = parse(&doc).unwrap();
937        let comp = &components[0];
938        let result = comp.append_with_boundary(&doc, "### Re: Response\n\nContent here.", boundary_id);
939
940        // Response should replace the REAL marker (outside code block),
941        // not the one inside the code block.
942        assert!(result.contains("### Re: Response"));
943        assert!(result.contains("more user text"));
944        // The code block example should be preserved
945        assert!(result.contains(&format!("<!-- agent:boundary:{boundary_id} -->\n```")));
946        // The real marker should be consumed (replaced by response)
947        assert!(!result.contains(&format!("more user text\n<!-- agent:boundary:{boundary_id} -->\n<!-- /agent:exchange -->")));
948    }
949
950    #[test]
951    fn append_with_boundary_no_code_block() {
952        // Normal case: boundary marker not in a code block
953        let boundary_id = "simple-uuid";
954        let doc = format!(
955            "<!-- agent:exchange patch=append -->\n\
956             user prompt\n\
957             <!-- agent:boundary:{boundary_id} -->\n\
958             <!-- /agent:exchange -->\n"
959        );
960        let components = parse(&doc).unwrap();
961        let comp = &components[0];
962        let result = comp.append_with_boundary(&doc, "### Re: Answer\n\nDone.", boundary_id);
963
964        assert!(result.contains("### Re: Answer"));
965        assert!(result.contains("user prompt"));
966        // Original marker should be consumed, but a NEW boundary re-inserted
967        assert!(!result.contains(&format!("agent:boundary:{boundary_id}")));
968        assert!(result.contains("agent:boundary:"));
969    }
970}