agent_doc/
component.rs

1//! # Module: component
2//!
3//! ## Spec
4//! - Defines `Component`, the parsed representation of a bounded document region delimited by
5//!   `<!-- agent:name [attrs...] -->` (open) and `<!-- /agent:name -->` (close) HTML comments.
6//! - `parse(doc)` scans the raw document bytes for `<!--` / `-->` comment pairs, builds a
7//!   stack-based nesting model, and returns all `Component` values sorted by `open_start`.
8//! - Markers that appear inside fenced code blocks (backtick or tilde) or inline code spans are
9//!   skipped; `find_code_ranges(doc)` uses the `pulldown-cmark` AST (CommonMark-compliant) to
10//!   locate these regions.
11//! - `is_agent_marker(comment_text)` classifies whether the inner text of a comment is an
12//!   agent open/close marker vs. an ordinary HTML comment.
13//! - Component names must match `[a-zA-Z0-9][a-zA-Z0-9-]*`; invalid names, unmatched opens,
14//!   unmatched closes, and mismatched open/close pairs all return `Err`.
15//! - `boundary:*` prefixed markers (`<!-- agent:boundary:ID -->`) are recognised and skipped
16//!   during component parsing; they are not treated as component open tags.
17//! - Opening markers may carry space-separated `key=value` inline attributes
18//!   (e.g., `patch=append`). `patch=` takes precedence over the legacy `mode=` alias;
19//!   `patch_mode()` encapsulates this lookup.
20//! - Marker `open_end` / `close_end` byte offsets include a trailing newline when present,
21//!   so that content slices are clean.
22//! - `replace_content(doc, new_content)` rebuilds the document preserving both markers,
23//!   replacing only the bytes between them.
24//! - `append_with_caret(doc, content, caret_offset)` appends content after existing text, or
25//!   inserts before the caret line when the caret falls inside the component.
26//! - `append_with_boundary(doc, content, boundary_id)` locates `<!-- agent:boundary:ID -->`
27//!   inside the component (skipping any occurrence that lives inside a code block), replaces it
28//!   with the new content, and re-inserts a fresh boundary marker; falls back to
29//!   `append_with_caret` when no boundary is found.
30//!
31//! ## Agentic Contracts
32//! - `parse` is pure and deterministic: identical input always yields identical output.
33//! - All byte offsets (`open_start`, `open_end`, `close_start`, `close_end`) are valid UTF-8
34//!   char boundaries within the document string they were parsed from.
35//! - `content(doc)` returns exactly `&doc[open_end..close_start]` — no allocation.
36//! - `replace_content` and `append_with_*` never mutate the original `&str`; they return a
37//!   new `String` with all offsets consistent for a fresh `parse` call.
38//! - Markers inside any code region (fenced block or inline span) are never parsed as
39//!   components and never mutated by any append/replace operation.
40//! - `append_with_boundary` always re-inserts a boundary marker with a fresh UUID, preserving
41//!   the invariant that exactly one boundary exists inside the component after each write.
42//! - Unknown or malformed `key=value` tokens in inline attributes are silently discarded;
43//!   they never cause a parse error.
44//!
45//! ## Evals
46//! - single_range: doc with one component → one `Component`, correct name and content slice
47//! - nested_ranges: outer + inner components → two entries sorted outer-first
48//! - siblings: two adjacent components → two entries, each with correct content
49//! - no_ranges: plain markdown → empty vec, no error
50//! - unmatched_open_error: open without close → `Err` containing "unclosed component"
51//! - unmatched_close_error: close without open → `Err` containing "without matching open"
52//! - mismatched_names_error: `<!-- agent:foo -->…<!-- /agent:bar -->` → `Err` "mismatched"
53//! - invalid_name: name starting with `-` → `Err` "invalid component name"
54//! - markers_in_fenced_code_block_ignored: marker inside ``` block → not parsed as component
55//! - markers_in_inline_code_ignored: marker inside `` `…` `` span → not parsed
56//! - markers_in_tilde_fence_ignored: marker inside ~~~ block → not parsed
57//! - markers_in_indented_fenced_code_block_ignored: up-to-3-space-indented fence → not parsed
58//! - double_backtick_comment_before_agent_marker: `` `<!--` `` followed by real marker → one component
59//! - parse_component_with_patch_attr: `patch=append` on opening tag → `patch_mode()` = "append"
60//! - patch_attr_takes_precedence_over_mode: both `patch=` and `mode=` present → `patch=` wins
61//! - mode_attr_backward_compat: only `mode=append` present → `patch_mode()` = "append"
62//! - replace_roundtrip: replace content, re-parse → one component with new content
63//! - append_with_boundary_no_code_block: boundary found → content inserted, old ID consumed, new boundary present
64//! - append_with_boundary_skips_code_block: boundary inside code block skipped, real boundary used
65
66use anyhow::{bail, Result};
67use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
68use std::collections::HashMap;
69
70/// A parsed component in a document.
71///
72/// Components are bounded regions marked by `<!-- agent:name -->...<!-- /agent:name -->`.
73/// Opening tags may contain inline attributes: `<!-- agent:name key=value -->`.
74#[derive(Debug, Clone, PartialEq, Eq)]
75pub struct Component {
76    pub name: String,
77    /// Inline attributes parsed from the opening tag (e.g., `patch=append`).
78    pub attrs: HashMap<String, String>,
79    /// Byte offset of `<` in opening marker.
80    pub open_start: usize,
81    /// Byte offset past `>` in opening marker (includes trailing newline if present).
82    pub open_end: usize,
83    /// Byte offset of `<` in closing marker.
84    pub close_start: usize,
85    /// Byte offset past `>` in closing marker (includes trailing newline if present).
86    pub close_end: usize,
87}
88
89impl Component {
90    /// Extract the content between the opening and closing markers.
91    #[allow(dead_code)] // public API — used by tests and future consumers
92    pub fn content<'a>(&self, doc: &'a str) -> &'a str {
93        &doc[self.open_end..self.close_start]
94    }
95
96    /// Get the patch mode from inline attributes.
97    ///
98    /// Checks `patch=` first, falls back to `mode=` for backward compatibility.
99    pub fn patch_mode(&self) -> Option<&str> {
100        self.attrs.get("patch").map(|s| s.as_str())
101            .or_else(|| self.attrs.get("mode").map(|s| s.as_str()))
102    }
103
104    /// Replace the content between markers, returning the new document.
105    /// The markers themselves are preserved.
106    pub fn replace_content(&self, doc: &str, new_content: &str) -> String {
107        let mut result = String::with_capacity(doc.len() + new_content.len());
108        result.push_str(&doc[..self.open_end]);
109        result.push_str(new_content);
110        result.push_str(&doc[self.close_start..]);
111        result
112    }
113
114    /// Append content into this component, inserting before the caret position
115    /// if the caret is inside the component. Falls back to normal append if the
116    /// caret is outside the component.
117    ///
118    /// `caret_offset`: byte offset of the caret in the document. Pass `None` for
119    /// normal append behavior.
120    pub fn append_with_caret(&self, doc: &str, content: &str, caret_offset: Option<usize>) -> String {
121        let existing = &doc[self.open_end..self.close_start];
122
123        if let Some(caret) = caret_offset {
124            // Check if caret is inside this component
125            if caret > self.open_end && caret <= self.close_start {
126                // Find the line boundary before the caret
127                let insert_at = doc[..caret].rfind('\n')
128                    .map(|i| i + 1)
129                    .unwrap_or(self.open_end);
130
131                // Clamp to component bounds
132                let insert_at = insert_at.max(self.open_end);
133
134                let mut result = String::with_capacity(doc.len() + content.len() + 1);
135                result.push_str(&doc[..insert_at]);
136                result.push_str(content.trim_end());
137                result.push('\n');
138                result.push_str(&doc[insert_at..]);
139                return result;
140            }
141        }
142
143        // Normal append: add after existing content
144        let mut result = String::with_capacity(doc.len() + content.len() + 1);
145        result.push_str(&doc[..self.open_end]);
146        result.push_str(existing.trim_end());
147        result.push('\n');
148        result.push_str(content.trim_end());
149        result.push('\n');
150        result.push_str(&doc[self.close_start..]);
151        result
152    }
153
154    /// Append content into this component at the boundary marker position.
155    ///
156    /// Finds `<!-- agent:boundary:ID -->` inside the component. If found,
157    /// inserts content at the line start of the boundary marker (replacing
158    /// the marker). Falls back to normal append if the boundary is not found.
159    pub fn append_with_boundary(&self, doc: &str, content: &str, boundary_id: &str) -> String {
160        let boundary_marker = format!("<!-- agent:boundary:{} -->", boundary_id);
161        let content_region = &doc[self.open_end..self.close_start];
162        let code_ranges = find_code_ranges(doc);
163
164        // Search for boundary marker, skipping matches inside code blocks
165        let mut search_from = 0;
166        let found_pos = loop {
167            match content_region[search_from..].find(&boundary_marker) {
168                Some(rel_pos) => {
169                    let abs_pos = self.open_end + search_from + rel_pos;
170                    if code_ranges.iter().any(|&(cs, ce)| abs_pos >= cs && abs_pos < ce) {
171                        // Inside a code block — skip and keep searching
172                        search_from += rel_pos + boundary_marker.len();
173                        continue;
174                    }
175                    break Some(abs_pos);
176                }
177                None => break None,
178            }
179        };
180
181        if let Some(abs_pos) = found_pos {
182            // Find start of the line containing the marker
183            let line_start = doc[..abs_pos]
184                .rfind('\n')
185                .map(|i| i + 1)
186                .unwrap_or(self.open_end)
187                .max(self.open_end);
188
189            // Find end of the marker line (including trailing newline)
190            let marker_end = abs_pos + boundary_marker.len();
191            let line_end = if marker_end < self.close_start
192                && doc.as_bytes().get(marker_end) == Some(&b'\n')
193            {
194                marker_end + 1
195            } else {
196                marker_end
197            };
198            let line_end = line_end.min(self.close_start);
199
200            // Replace the boundary marker with response content + new boundary.
201            // The boundary is consumed and re-inserted, matching the binary's
202            // post-patch behavior in apply_patches_with_overrides().
203            let new_id = crate::new_boundary_id();
204            let new_marker = crate::format_boundary_marker(&new_id);
205            let mut result = String::with_capacity(doc.len() + content.len() + new_marker.len());
206            result.push_str(&doc[..line_start]);
207            result.push_str(content.trim_end());
208            result.push('\n');
209            result.push_str(&new_marker);
210            result.push('\n');
211            result.push_str(&doc[line_end..]);
212            return result;
213        }
214
215        // Boundary not found — fall back to normal append
216        self.append_with_caret(doc, content, None)
217    }
218}
219
220/// Valid name: `[a-zA-Z0-9][a-zA-Z0-9-]*`
221fn is_valid_name(name: &str) -> bool {
222    if name.is_empty() {
223        return false;
224    }
225    let first = name.as_bytes()[0];
226    if !first.is_ascii_alphanumeric() {
227        return false;
228    }
229    name.bytes()
230        .all(|b| b.is_ascii_alphanumeric() || b == b'-')
231}
232
233/// True if the text inside `<!-- ... -->` is an agent component marker.
234///
235/// Matches `agent:NAME [attrs...]` (open) or `/agent:NAME` (close).
236pub fn is_agent_marker(comment_text: &str) -> bool {
237    let trimmed = comment_text.trim();
238    if let Some(rest) = trimmed.strip_prefix("/agent:") {
239        is_valid_name(rest)
240    } else if let Some(rest) = trimmed.strip_prefix("agent:") {
241        // Opening marker may have attributes after the name: `agent:NAME key=value`
242        let name_part = rest.split_whitespace().next().unwrap_or("");
243        is_valid_name(name_part)
244    } else {
245        false
246    }
247}
248
249/// Parse `key=value` pairs from the attribute portion of an opening marker.
250///
251/// Given the text after `agent:NAME `, parses space-separated `key=value` pairs.
252/// Values are unquoted (no quote support needed for simple mode values).
253pub fn parse_attrs(attr_text: &str) -> HashMap<String, String> {
254    let mut attrs = HashMap::new();
255    for token in attr_text.split_whitespace() {
256        if let Some((key, value)) = token.split_once('=')
257            && !key.is_empty()
258            && !value.is_empty()
259        {
260            attrs.insert(key.to_string(), value.to_string());
261        }
262    }
263    attrs
264}
265
266/// Find byte ranges of code regions (fenced code blocks + inline code spans).
267/// Markers inside these ranges are treated as literal text, not component markers.
268///
269/// Uses `pulldown-cmark` AST parsing with `offset_iter()` to accurately detect
270/// code regions per the CommonMark spec.
271pub fn find_code_ranges(doc: &str) -> Vec<(usize, usize)> {
272    let t = std::time::Instant::now();
273    let mut ranges = Vec::new();
274    let parser = Parser::new_ext(doc, Options::empty());
275    let mut iter = parser.into_offset_iter();
276    while let Some((event, range)) = iter.next() {
277        match event {
278            // Inline code span: `code` or ``code``
279            Event::Code(_) => {
280                ranges.push((range.start, range.end));
281            }
282            // Fenced or indented code block: consume until End(CodeBlock)
283            Event::Start(Tag::CodeBlock(_)) => {
284                let block_start = range.start;
285                let mut block_end = range.end;
286                for (inner_event, inner_range) in iter.by_ref() {
287                    block_end = inner_range.end;
288                    if matches!(inner_event, Event::End(TagEnd::CodeBlock)) {
289                        break;
290                    }
291                }
292                ranges.push((block_start, block_end));
293            }
294            _ => {}
295        }
296    }
297    let elapsed = t.elapsed().as_millis();
298    if elapsed > 0 {
299        eprintln!("[perf] find_code_ranges: {}ms", elapsed);
300    }
301    ranges
302}
303
304/// Parse all components from a document.
305///
306/// Uses a stack for nesting. Returns components sorted by `open_start`.
307/// Errors on unmatched open/close markers or invalid names.
308/// Skips markers inside fenced code blocks and inline code spans.
309pub fn parse(doc: &str) -> Result<Vec<Component>> {
310    let bytes = doc.as_bytes();
311    let len = bytes.len();
312    let code_ranges = find_code_ranges(doc);
313    let mut templates: Vec<Component> = Vec::new();
314    // Stack of (name, attrs, open_start, open_end)
315    let mut stack: Vec<(String, HashMap<String, String>, usize, usize)> = Vec::new();
316    let mut pos = 0;
317
318    while pos + 4 <= len {
319        // Look for `<!--`
320        if &bytes[pos..pos + 4] != b"<!--" {
321            pos += 1;
322            continue;
323        }
324
325        // Skip markers inside code regions
326        if code_ranges.iter().any(|&(start, end)| pos >= start && pos < end) {
327            pos += 4;
328            continue;
329        }
330
331        let marker_start = pos;
332
333        // Find closing `-->`
334        let close = match find_comment_end(bytes, pos + 4) {
335            Some(c) => c,
336            None => {
337                pos += 4;
338                continue;
339            }
340        };
341
342        // close points to the byte after `>`
343        let inner = &doc[marker_start + 4..close - 3]; // between `<!--` and `-->`
344        let trimmed = inner.trim();
345
346        // Determine end offset — consume trailing newline if present
347        let mut marker_end = close;
348        if marker_end < len && bytes[marker_end] == b'\n' {
349            marker_end += 1;
350        }
351
352        if let Some(name) = trimmed.strip_prefix("/agent:") {
353            // Closing marker
354            if !is_valid_name(name) {
355                bail!("invalid component name: '{}'", name);
356            }
357            match stack.pop() {
358                Some((open_name, open_attrs, open_start, open_end)) => {
359                    if open_name != name {
360                        bail!(
361                            "mismatched component: opened '{}' but closed '{}'",
362                            open_name,
363                            name
364                        );
365                    }
366                    templates.push(Component {
367                        name: name.to_string(),
368                        attrs: open_attrs,
369                        open_start,
370                        open_end,
371                        close_start: marker_start,
372                        close_end: marker_end,
373                    });
374                }
375                None => bail!("closing marker <!-- /agent:{} --> without matching open", name),
376            }
377        } else if let Some(rest) = trimmed.strip_prefix("agent:") {
378            // Skip boundary markers — these are not component markers
379            if rest.starts_with("boundary:") {
380                pos = close;
381                continue;
382            }
383            // Opening marker — may have attributes: `agent:NAME key=value`
384            let mut parts = rest.splitn(2, |c: char| c.is_whitespace());
385            let name = parts.next().unwrap_or("");
386            let attr_text = parts.next().unwrap_or("");
387            if !is_valid_name(name) {
388                bail!("invalid component name: '{}'", name);
389            }
390            let attrs = parse_attrs(attr_text);
391            stack.push((name.to_string(), attrs, marker_start, marker_end));
392        }
393
394        pos = close;
395    }
396
397    if let Some((name, _, _, _)) = stack.last() {
398        bail!(
399            "unclosed component: <!-- agent:{} --> without matching close",
400            name
401        );
402    }
403
404    templates.sort_by_key(|t| t.open_start);
405    Ok(templates)
406}
407
408/// Find the end of an HTML comment (`-->`), returning byte offset past `>`.
409pub(crate) fn find_comment_end(bytes: &[u8], start: usize) -> Option<usize> {
410    let len = bytes.len();
411    let mut i = start;
412    while i + 3 <= len {
413        if &bytes[i..i + 3] == b"-->" {
414            return Some(i + 3);
415        }
416        i += 1;
417    }
418    None
419}
420
421/// Strip comments from document content for diff comparison.
422///
423/// Removes:
424/// - HTML comments `<!-- ... -->` (single and multiline) — EXCEPT agent range markers
425/// - Link reference comments `[//]: # (...)`
426///
427/// Skips `<!--` sequences inside fenced code blocks and inline backtick spans
428/// to prevent code examples containing `<!--` from being misinterpreted as
429/// comment starts.
430///
431/// This function is the shared implementation used by both `diff::compute` (binary)
432/// and external crates like `eval-runner`.
433pub fn strip_comments(content: &str) -> String {
434    let code_ranges = find_code_ranges(content);
435    let in_code = |pos: usize| code_ranges.iter().any(|&(start, end)| pos >= start && pos < end);
436
437    let mut result = String::with_capacity(content.len());
438    let bytes = content.as_bytes();
439    let len = bytes.len();
440    let mut pos = 0;
441
442    while pos < len {
443        // Check for link reference comment: `[//]: # (...)`
444        if bytes[pos] == b'['
445            && !in_code(pos)
446            && is_line_start_at(bytes, pos)
447            && let Some(end) = match_link_ref_comment_at(bytes, pos)
448        {
449            pos = end;
450            continue;
451        }
452
453        // Check for HTML comment: `<!-- ... -->`
454        if pos + 4 <= len
455            && &bytes[pos..pos + 4] == b"<!--"
456            && !in_code(pos)
457            && let Some((end, inner)) = match_html_comment_at(content, pos)
458        {
459            if is_agent_marker(inner) {
460                // Preserve agent markers — copy them through
461                result.push_str(&content[pos..end]);
462                pos = end;
463            } else {
464                // Strip the comment (and trailing newline if on its own line)
465                let mut skip_to = end;
466                if is_line_start_at(bytes, pos) && skip_to < len && bytes[skip_to] == b'\n' {
467                    skip_to += 1;
468                }
469                pos = skip_to;
470            }
471            continue;
472        }
473
474        result.push(content[pos..].chars().next().unwrap());
475        pos += content[pos..].chars().next().unwrap().len_utf8();
476    }
477
478    result
479}
480
481/// True if `pos` is at the start of a line (pos == 0 or bytes[pos-1] == '\n').
482fn is_line_start_at(bytes: &[u8], pos: usize) -> bool {
483    pos == 0 || bytes[pos - 1] == b'\n'
484}
485
486/// Match `[//]: # (...)` starting at `pos`. Returns byte offset past the line end.
487fn match_link_ref_comment_at(bytes: &[u8], pos: usize) -> Option<usize> {
488    let prefix = b"[//]: # (";
489    let len = bytes.len();
490    if pos + prefix.len() > len || &bytes[pos..pos + prefix.len()] != prefix {
491        return None;
492    }
493    let mut i = pos + prefix.len();
494    while i < len && bytes[i] != b')' && bytes[i] != b'\n' {
495        i += 1;
496    }
497    if i < len && bytes[i] == b')' {
498        i += 1;
499        if i < len && bytes[i] == b'\n' {
500            i += 1;
501        }
502        Some(i)
503    } else {
504        None
505    }
506}
507
508/// Match `<!-- ... -->` starting at `pos`. Returns (end_offset, inner_text).
509fn match_html_comment_at(content: &str, pos: usize) -> Option<(usize, &str)> {
510    let bytes = content.as_bytes();
511    let len = bytes.len();
512    let mut i = pos + 4;
513    while i + 3 <= len {
514        if &bytes[i..i + 3] == b"-->" {
515            let inner = &content[pos + 4..i];
516            return Some((i + 3, inner));
517        }
518        i += 1;
519    }
520    None
521}
522
523#[cfg(test)]
524mod tests {
525    use super::*;
526
527    #[test]
528    fn single_range() {
529        let doc = "before\n<!-- agent:status -->\nHello\n<!-- /agent:status -->\nafter\n";
530        let ranges = parse(doc).unwrap();
531        assert_eq!(ranges.len(), 1);
532        assert_eq!(ranges[0].name, "status");
533        assert_eq!(ranges[0].content(doc), "Hello\n");
534    }
535
536    #[test]
537    fn nested_ranges() {
538        let doc = "\
539<!-- agent:outer -->
540<!-- agent:inner -->
541content
542<!-- /agent:inner -->
543<!-- /agent:outer -->
544";
545        let ranges = parse(doc).unwrap();
546        assert_eq!(ranges.len(), 2);
547        // Sorted by open_start — outer first
548        assert_eq!(ranges[0].name, "outer");
549        assert_eq!(ranges[1].name, "inner");
550        assert_eq!(ranges[1].content(doc), "content\n");
551    }
552
553    #[test]
554    fn siblings() {
555        let doc = "\
556<!-- agent:a -->
557alpha
558<!-- /agent:a -->
559<!-- agent:b -->
560beta
561<!-- /agent:b -->
562";
563        let ranges = parse(doc).unwrap();
564        assert_eq!(ranges.len(), 2);
565        assert_eq!(ranges[0].name, "a");
566        assert_eq!(ranges[0].content(doc), "alpha\n");
567        assert_eq!(ranges[1].name, "b");
568        assert_eq!(ranges[1].content(doc), "beta\n");
569    }
570
571    #[test]
572    fn no_ranges() {
573        let doc = "# Just a document\n\nWith no range templates.\n";
574        let ranges = parse(doc).unwrap();
575        assert!(ranges.is_empty());
576    }
577
578    #[test]
579    fn unmatched_open_error() {
580        let doc = "<!-- agent:orphan -->\nContent\n";
581        let err = parse(doc).unwrap_err();
582        assert!(err.to_string().contains("unclosed component"));
583    }
584
585    #[test]
586    fn unmatched_close_error() {
587        let doc = "Content\n<!-- /agent:orphan -->\n";
588        let err = parse(doc).unwrap_err();
589        assert!(err.to_string().contains("without matching open"));
590    }
591
592    #[test]
593    fn mismatched_names_error() {
594        let doc = "<!-- agent:foo -->\n<!-- /agent:bar -->\n";
595        let err = parse(doc).unwrap_err();
596        assert!(err.to_string().contains("mismatched"));
597    }
598
599    #[test]
600    fn invalid_name() {
601        let doc = "<!-- agent:-bad -->\n<!-- /agent:-bad -->\n";
602        let err = parse(doc).unwrap_err();
603        assert!(err.to_string().contains("invalid component name"));
604    }
605
606    #[test]
607    fn name_validation() {
608        assert!(is_valid_name("status"));
609        assert!(is_valid_name("my-section"));
610        assert!(is_valid_name("a1"));
611        assert!(is_valid_name("A"));
612        assert!(!is_valid_name(""));
613        assert!(!is_valid_name("-bad"));
614        assert!(!is_valid_name("has space"));
615        assert!(!is_valid_name("has_underscore"));
616    }
617
618    #[test]
619    fn content_extraction() {
620        let doc = "<!-- agent:x -->\nfoo\nbar\n<!-- /agent:x -->\n";
621        let ranges = parse(doc).unwrap();
622        assert_eq!(ranges[0].content(doc), "foo\nbar\n");
623    }
624
625    #[test]
626    fn replace_roundtrip() {
627        let doc = "before\n<!-- agent:s -->\nold\n<!-- /agent:s -->\nafter\n";
628        let ranges = parse(doc).unwrap();
629        let new_doc = ranges[0].replace_content(doc, "new\n");
630        assert_eq!(
631            new_doc,
632            "before\n<!-- agent:s -->\nnew\n<!-- /agent:s -->\nafter\n"
633        );
634        // Re-parse should work
635        let ranges2 = parse(&new_doc).unwrap();
636        assert_eq!(ranges2.len(), 1);
637        assert_eq!(ranges2[0].content(&new_doc), "new\n");
638    }
639
640    #[test]
641    fn is_agent_marker_yes() {
642        assert!(is_agent_marker(" agent:status "));
643        assert!(is_agent_marker("/agent:status"));
644        assert!(is_agent_marker("agent:my-thing"));
645        assert!(is_agent_marker(" /agent:A1 "));
646    }
647
648    #[test]
649    fn is_agent_marker_no() {
650        assert!(!is_agent_marker("just a comment"));
651        assert!(!is_agent_marker("agent:"));
652        assert!(!is_agent_marker("/agent:"));
653        assert!(!is_agent_marker("agent:-bad"));
654        assert!(!is_agent_marker("some agent:fake stuff"));
655    }
656
657    #[test]
658    fn regular_comments_ignored() {
659        let doc = "<!-- just a comment -->\n<!-- agent:x -->\ndata\n<!-- /agent:x -->\n";
660        let ranges = parse(doc).unwrap();
661        assert_eq!(ranges.len(), 1);
662        assert_eq!(ranges[0].name, "x");
663    }
664
665    #[test]
666    fn multiline_comment_ignored() {
667        let doc = "\
668<!--
669multi
670line
671comment
672-->
673<!-- agent:s -->
674content
675<!-- /agent:s -->
676";
677        let ranges = parse(doc).unwrap();
678        assert_eq!(ranges.len(), 1);
679        assert_eq!(ranges[0].name, "s");
680    }
681
682    #[test]
683    fn empty_content() {
684        let doc = "<!-- agent:empty --><!-- /agent:empty -->\n";
685        let ranges = parse(doc).unwrap();
686        assert_eq!(ranges.len(), 1);
687        assert_eq!(ranges[0].content(doc), "");
688    }
689
690    #[test]
691    fn markers_in_fenced_code_block_ignored() {
692        let doc = "\
693<!-- agent:real -->
694content
695<!-- /agent:real -->
696```markdown
697<!-- agent:fake -->
698this is just an example
699<!-- /agent:fake -->
700```
701";
702        let ranges = parse(doc).unwrap();
703        assert_eq!(ranges.len(), 1);
704        assert_eq!(ranges[0].name, "real");
705    }
706
707    #[test]
708    fn markers_in_inline_code_ignored() {
709        let doc = "\
710Use `<!-- agent:example -->` markers for components.
711<!-- agent:real -->
712content
713<!-- /agent:real -->
714";
715        let ranges = parse(doc).unwrap();
716        assert_eq!(ranges.len(), 1);
717        assert_eq!(ranges[0].name, "real");
718    }
719
720    #[test]
721    fn markers_in_tilde_fence_ignored() {
722        let doc = "\
723<!-- agent:x -->
724data
725<!-- /agent:x -->
726~~~
727<!-- agent:y -->
728example
729<!-- /agent:y -->
730~~~
731";
732        let ranges = parse(doc).unwrap();
733        assert_eq!(ranges.len(), 1);
734        assert_eq!(ranges[0].name, "x");
735    }
736
737    #[test]
738    fn markers_in_indented_fenced_code_block_ignored() {
739        // CommonMark allows up to 3 spaces before fence opener
740        let doc = "\
741<!-- agent:exchange -->
742Content here.
743<!-- /agent:exchange -->
744
745  ```markdown
746  <!-- agent:fake -->
747  demo without closing tag
748  ```
749";
750        let ranges = parse(doc).unwrap();
751        assert_eq!(ranges.len(), 1);
752        assert_eq!(ranges[0].name, "exchange");
753    }
754
755    #[test]
756    fn indented_fence_inside_component_ignored() {
757        // Indented code block inside a component should not cause mismatched errors
758        let doc = "\
759<!-- agent:exchange -->
760Here's how to set up:
761
762   ```markdown
763   <!-- agent:status -->
764   Your status here
765   ```
766
767Done explaining.
768<!-- /agent:exchange -->
769";
770        let ranges = parse(doc).unwrap();
771        assert_eq!(ranges.len(), 1);
772        assert_eq!(ranges[0].name, "exchange");
773    }
774
775    #[test]
776    fn deeply_indented_fence_ignored() {
777        // Tabs and many spaces should still be detected as a fence
778        let doc = "\
779<!-- agent:x -->
780ok
781<!-- /agent:x -->
782      ```
783      <!-- agent:y -->
784      inside fence
785      ```
786";
787        let ranges = parse(doc).unwrap();
788        assert_eq!(ranges.len(), 1);
789        assert_eq!(ranges[0].name, "x");
790    }
791
792    #[test]
793    fn indented_fence_code_ranges_detected() {
794        let doc = "before\n  ```\n  code\n  ```\nafter\n";
795        let ranges = find_code_ranges(doc);
796        assert_eq!(ranges.len(), 1);
797        assert!(doc[ranges[0].0..ranges[0].1].contains("code"));
798    }
799
800    #[test]
801    fn code_ranges_detected() {
802        let doc = "before\n```\ncode\n```\nafter `inline` end\n";
803        let ranges = find_code_ranges(doc);
804        assert_eq!(ranges.len(), 2);
805        // Fenced block
806        assert!(doc[ranges[0].0..ranges[0].1].contains("code"));
807        // Inline span
808        assert!(doc[ranges[1].0..ranges[1].1].contains("inline"));
809    }
810
811    #[test]
812    fn code_ranges_double_backtick() {
813        // CommonMark: `` `<!--` `` is a code span containing `<!--`
814        let doc = "text `` `<!--` `` more\n";
815        let ranges = find_code_ranges(doc);
816        assert_eq!(ranges.len(), 1);
817        let span = &doc[ranges[0].0..ranges[0].1];
818        assert!(span.contains("<!--"), "double-backtick span should contain <!--: {:?}", span);
819    }
820
821    #[test]
822    fn code_ranges_double_backtick_does_not_match_single() {
823        // `` should not match a single ` close
824        let doc = "text `` foo ` bar `` end\n";
825        let ranges = find_code_ranges(doc);
826        assert_eq!(ranges.len(), 1);
827        let span = &doc[ranges[0].0..ranges[0].1];
828        assert_eq!(span, "`` foo ` bar ``");
829    }
830
831    #[test]
832    fn double_backtick_comment_before_agent_marker() {
833        // Regression: `` `<!--` `` followed by agent marker should not confuse the parser
834        let doc = "\
835<!-- agent:exchange -->\n\
836text `` `<!--` `` description\n\
837new content here\n\
838<!-- /agent:exchange -->\n";
839        let components = parse(doc).unwrap();
840        assert_eq!(components.len(), 1);
841        assert_eq!(components[0].name, "exchange");
842        assert!(components[0].content(doc).contains("new content here"));
843    }
844
845    // --- Inline attribute tests ---
846
847    #[test]
848    fn parse_component_with_mode_attr() {
849        let doc = "<!-- agent:exchange mode=append -->\nContent\n<!-- /agent:exchange -->\n";
850        let components = parse(doc).unwrap();
851        assert_eq!(components.len(), 1);
852        assert_eq!(components[0].name, "exchange");
853        assert_eq!(components[0].attrs.get("mode").map(|s| s.as_str()), Some("append"));
854        assert_eq!(components[0].content(doc), "Content\n");
855    }
856
857    #[test]
858    fn parse_component_with_multiple_attrs() {
859        let doc = "<!-- agent:log mode=prepend timestamp=true -->\nData\n<!-- /agent:log -->\n";
860        let components = parse(doc).unwrap();
861        assert_eq!(components.len(), 1);
862        assert_eq!(components[0].name, "log");
863        assert_eq!(components[0].attrs.get("mode").map(|s| s.as_str()), Some("prepend"));
864        assert_eq!(components[0].attrs.get("timestamp").map(|s| s.as_str()), Some("true"));
865    }
866
867    #[test]
868    fn parse_component_no_attrs_backward_compat() {
869        let doc = "<!-- agent:status -->\nOK\n<!-- /agent:status -->\n";
870        let components = parse(doc).unwrap();
871        assert_eq!(components.len(), 1);
872        assert_eq!(components[0].name, "status");
873        assert!(components[0].attrs.is_empty());
874    }
875
876    #[test]
877    fn is_agent_marker_with_attrs() {
878        assert!(is_agent_marker(" agent:exchange mode=append "));
879        assert!(is_agent_marker("agent:status mode=replace"));
880        assert!(is_agent_marker("agent:log mode=prepend timestamp=true"));
881    }
882
883    #[test]
884    fn closing_tag_unchanged_with_attrs() {
885        // Closing tags never have attributes
886        let doc = "<!-- agent:status mode=replace -->\n- [x] Done\n<!-- /agent:status -->\n";
887        let components = parse(doc).unwrap();
888        assert_eq!(components.len(), 1);
889        let new_doc = components[0].replace_content(doc, "- [ ] Todo\n");
890        assert!(new_doc.contains("<!-- agent:status mode=replace -->"));
891        assert!(new_doc.contains("<!-- /agent:status -->"));
892        assert!(new_doc.contains("- [ ] Todo"));
893    }
894
895    #[test]
896    fn parse_component_with_patch_attr() {
897        let doc = "<!-- agent:exchange patch=append -->\nContent\n<!-- /agent:exchange -->\n";
898        let components = parse(doc).unwrap();
899        assert_eq!(components.len(), 1);
900        assert_eq!(components[0].name, "exchange");
901        assert_eq!(components[0].patch_mode(), Some("append"));
902        assert_eq!(components[0].content(doc), "Content\n");
903    }
904
905    #[test]
906    fn patch_attr_takes_precedence_over_mode() {
907        let doc = "<!-- agent:exchange patch=replace mode=append -->\nContent\n<!-- /agent:exchange -->\n";
908        let components = parse(doc).unwrap();
909        assert_eq!(components[0].patch_mode(), Some("replace"));
910    }
911
912    #[test]
913    fn mode_attr_backward_compat() {
914        let doc = "<!-- agent:exchange mode=append -->\nContent\n<!-- /agent:exchange -->\n";
915        let components = parse(doc).unwrap();
916        assert_eq!(components[0].patch_mode(), Some("append"));
917    }
918
919    #[test]
920    fn no_patch_or_mode_attr() {
921        let doc = "<!-- agent:exchange -->\nContent\n<!-- /agent:exchange -->\n";
922        let components = parse(doc).unwrap();
923        assert_eq!(components[0].patch_mode(), None);
924    }
925
926    // --- Inline backtick code span exclusion tests ---
927
928    #[test]
929    fn single_backtick_component_tag_ignored() {
930        // A component tag wrapped in single backticks should not be parsed
931        let doc = "\
932Use `<!-- agent:pending patch=replace -->` to mark pending sections.
933<!-- agent:real -->
934content
935<!-- /agent:real -->
936";
937        let components = parse(doc).unwrap();
938        assert_eq!(components.len(), 1);
939        assert_eq!(components[0].name, "real");
940    }
941
942    #[test]
943    fn double_backtick_component_tag_ignored() {
944        // A component tag wrapped in double backticks should not be parsed
945        let doc = "\
946Use ``<!-- agent:pending patch=replace -->`` to mark pending sections.
947<!-- agent:real -->
948content
949<!-- /agent:real -->
950";
951        let components = parse(doc).unwrap();
952        assert_eq!(components.len(), 1);
953        assert_eq!(components[0].name, "real");
954    }
955
956    #[test]
957    fn component_tags_not_in_backticks_still_work() {
958        // Tags outside of any backticks are parsed normally
959        let doc = "\
960<!-- agent:a -->
961alpha
962<!-- /agent:a -->
963<!-- agent:b patch=append -->
964beta
965<!-- /agent:b -->
966";
967        let components = parse(doc).unwrap();
968        assert_eq!(components.len(), 2);
969        assert_eq!(components[0].name, "a");
970        assert_eq!(components[1].name, "b");
971        assert_eq!(components[1].patch_mode(), Some("append"));
972    }
973
974    #[test]
975    fn mixed_backtick_and_real_tags() {
976        // Some tags in backticks (ignored), some not (parsed)
977        let doc = "\
978Here is an example: `<!-- agent:fake -->` and ``<!-- /agent:fake -->``.
979<!-- agent:real -->
980real content
981<!-- /agent:real -->
982Another example: `<!-- agent:also-fake patch=replace -->` is just documentation.
983";
984        let components = parse(doc).unwrap();
985        assert_eq!(components.len(), 1);
986        assert_eq!(components[0].name, "real");
987        assert_eq!(components[0].content(doc), "real content\n");
988    }
989
990    #[test]
991    fn inline_code_mid_line_with_surrounding_text_ignored() {
992        // Edge case: component tag inside inline code span on a line with other content
993        // before and after — must not be parsed as a real component marker.
994        let doc = "\
995Wrap markers like `<!-- agent:status -->` in backticks to show them literally.
996<!-- agent:real -->
997actual content
998<!-- /agent:real -->
999";
1000        let components = parse(doc).unwrap();
1001        assert_eq!(components.len(), 1);
1002        assert_eq!(components[0].name, "real");
1003        assert_eq!(components[0].content(doc), "actual content\n");
1004    }
1005
1006    #[test]
1007    fn parse_attrs_unit() {
1008        let attrs = parse_attrs("mode=append");
1009        assert_eq!(attrs.get("mode").map(|s| s.as_str()), Some("append"));
1010
1011        let attrs = parse_attrs("mode=replace timestamp=true");
1012        assert_eq!(attrs.len(), 2);
1013
1014        let attrs = parse_attrs("");
1015        assert!(attrs.is_empty());
1016
1017        // Malformed tokens without = are ignored
1018        let attrs = parse_attrs("mode=append broken novalue=");
1019        assert_eq!(attrs.len(), 1);
1020        assert_eq!(attrs.get("mode").map(|s| s.as_str()), Some("append"));
1021    }
1022
1023    #[test]
1024    fn append_with_boundary_skips_code_block() {
1025        // Boundary marker inside a code block should be ignored;
1026        // the real marker outside should be used.
1027        let boundary_id = "real-uuid";
1028        let doc = format!(
1029            "<!-- agent:exchange patch=append -->\n\
1030             user prompt\n\
1031             ```\n\
1032             <!-- agent:boundary:{boundary_id} -->\n\
1033             ```\n\
1034             more user text\n\
1035             <!-- agent:boundary:{boundary_id} -->\n\
1036             <!-- /agent:exchange -->\n"
1037        );
1038        let components = parse(&doc).unwrap();
1039        let comp = &components[0];
1040        let result = comp.append_with_boundary(&doc, "### Re: Response\n\nContent here.", boundary_id);
1041
1042        // Response should replace the REAL marker (outside code block),
1043        // not the one inside the code block.
1044        assert!(result.contains("### Re: Response"));
1045        assert!(result.contains("more user text"));
1046        // The code block example should be preserved
1047        assert!(result.contains(&format!("<!-- agent:boundary:{boundary_id} -->\n```")));
1048        // The real marker should be consumed (replaced by response)
1049        assert!(!result.contains(&format!("more user text\n<!-- agent:boundary:{boundary_id} -->\n<!-- /agent:exchange -->")));
1050    }
1051
1052    #[test]
1053    fn append_with_boundary_no_code_block() {
1054        // Normal case: boundary marker not in a code block
1055        let boundary_id = "simple-uuid";
1056        let doc = format!(
1057            "<!-- agent:exchange patch=append -->\n\
1058             user prompt\n\
1059             <!-- agent:boundary:{boundary_id} -->\n\
1060             <!-- /agent:exchange -->\n"
1061        );
1062        let components = parse(&doc).unwrap();
1063        let comp = &components[0];
1064        let result = comp.append_with_boundary(&doc, "### Re: Answer\n\nDone.", boundary_id);
1065
1066        assert!(result.contains("### Re: Answer"));
1067        assert!(result.contains("user prompt"));
1068        // Original marker should be consumed, but a NEW boundary re-inserted
1069        assert!(!result.contains(&format!("agent:boundary:{boundary_id}")));
1070        assert!(result.contains("agent:boundary:"));
1071    }
1072
1073    // --- strip_comments tests (moved from diff.rs) ---
1074
1075    #[test]
1076    fn strip_comments_removes_html_comment() {
1077        let result = strip_comments("before\n<!-- a comment -->\nafter\n");
1078        assert_eq!(result, "before\nafter\n");
1079    }
1080
1081    #[test]
1082    fn strip_comments_preserves_agent_markers() {
1083        let input = "text\n<!-- agent:status -->\ncontent\n<!-- /agent:status -->\n";
1084        let result = strip_comments(input);
1085        assert!(result.contains("<!-- agent:status -->"));
1086        assert!(result.contains("<!-- /agent:status -->"));
1087    }
1088
1089    #[test]
1090    fn strip_comments_removes_link_ref() {
1091        let result = strip_comments("[//]: # (hidden note)\nvisible\n");
1092        assert_eq!(result, "visible\n");
1093    }
1094}
agent_doc/component.rs

agent_doc/
component.rs