Skip to main content

gap/
markers.rs

1//! Universal XML marker resolution for `<gap:target>`.
2//!
3//! All formats use `<gap:target id="...">` / `</gap:target>` markers.
4//! The `gap:` namespace prefix is uniquely identifiable and LLMs follow XML tags
5//! reliably. JSON uses pointer addressing instead.
6
7use anyhow::{bail, Context, Result};
8use regex::Regex;
9
10/// Build start and end markers for a target ID.
11///
12/// `<gap:target id="nav">` / `</gap:target>`
13///
14/// JSON (`application/json`) does not support text markers — use pointer addressing.
15pub fn markers_for(target_id: &str, format: &str) -> Result<(String, String)> {
16    if format == "application/json" {
17        bail!("JSON does not support text-based markers; use pointer addressing instead");
18    }
19    Ok((
20        format!(r#"<gap:target id="{target_id}">"#),
21        "</gap:target>".to_string(),
22    ))
23}
24
25const OPEN_PREFIX: &str = "<gap:target ";
26const CLOSE_TAG: &str = "</gap:target>";
27
28/// Find the position of the matching `</gap:target>` for a target whose
29/// opening tag ends at `content_start`. Tracks nesting depth so that inner
30/// `<gap:target …>…</gap:target>` pairs are skipped.
31fn find_matching_close(content: &str, content_start: usize) -> Option<usize> {
32    let mut depth: usize = 1;
33    let mut cursor = content_start;
34
35    while cursor < content.len() && depth > 0 {
36        // Find the next interesting tag (whichever comes first).
37        let next_open = content[cursor..].find(OPEN_PREFIX).map(|i| cursor + i);
38        let next_close = content[cursor..].find(CLOSE_TAG).map(|i| cursor + i);
39
40        match (next_open, next_close) {
41            (Some(o), Some(c)) if o < c => {
42                depth += 1;
43                cursor = o + OPEN_PREFIX.len();
44            }
45            (_, Some(c)) => {
46                depth -= 1;
47                if depth == 0 {
48                    return Some(c);
49                }
50                cursor = c + CLOSE_TAG.len();
51            }
52            _ => break,
53        }
54    }
55    None
56}
57
58/// Find the byte range of a target's content within a string.
59///
60/// Returns `(content_start, content_end)` — byte offsets between markers (exclusive of markers).
61/// Handles nested `<gap:target>` elements via depth counting.
62pub fn find_target_range(
63    content: &str,
64    target_id: &str,
65    format: &str,
66) -> Result<(usize, usize)> {
67    let (start_marker, _) = markers_for(target_id, format)?;
68    let si = content
69        .find(&start_marker)
70        .with_context(|| format!("start marker not found for target: {target_id}"))?;
71    let content_start = si + start_marker.len();
72    let ei = find_matching_close(content, content_start)
73        .with_context(|| format!("end marker not found for target: {target_id}"))?;
74    Ok((content_start, ei))
75}
76
77/// Find the byte range of a target including its markers.
78///
79/// Returns `(marker_start, marker_end)` — byte offsets including both markers and content.
80/// Handles nested `<gap:target>` elements via depth counting.
81pub fn find_target_range_inclusive(
82    content: &str,
83    target_id: &str,
84    format: &str,
85) -> Result<(usize, usize)> {
86    let (start_marker, _) = markers_for(target_id, format)?;
87    let si = content
88        .find(&start_marker)
89        .with_context(|| format!("start marker not found for target: {target_id}"))?;
90    let content_start = si + start_marker.len();
91    let ei = find_matching_close(content, content_start)
92        .with_context(|| format!("end marker not found for target: {target_id}"))?;
93    Ok((si, ei + CLOSE_TAG.len()))
94}
95
96/// Extract all target IDs from artifact content by scanning for `<gap:target id="...">` markers.
97///
98/// Returns IDs in document order. JSON format returns an empty vec (uses pointer addressing).
99pub fn extract_targets(content: &str, format: &str) -> Vec<String> {
100    if format == "application/json" {
101        return Vec::new();
102    }
103    let re = Regex::new(r#"<gap:target id="([^"]+)">"#).expect("valid regex");
104    re.captures_iter(content)
105        .map(|cap| cap[1].to_string())
106        .collect()
107}
108
109#[cfg(test)]
110mod tests {
111    use super::*;
112
113    #[test]
114    fn test_html_markers() {
115        let (start, end) = markers_for("nav", "text/html").unwrap();
116        assert_eq!(start, r#"<gap:target id="nav">"#);
117        assert_eq!(end, "</gap:target>");
118    }
119
120    #[test]
121    fn test_python_markers() {
122        let (start, end) = markers_for("imports", "text/x-python").unwrap();
123        assert_eq!(start, r#"<gap:target id="imports">"#);
124        assert_eq!(end, "</gap:target>");
125    }
126
127    #[test]
128    fn test_json_unsupported() {
129        assert!(markers_for("data", "application/json").is_err());
130    }
131
132    #[test]
133    fn test_find_target_range() {
134        let content = r#"before<gap:target id="stats">old stats</gap:target>after"#;
135        let (start, end) = find_target_range(content, "stats", "text/html").unwrap();
136        assert_eq!(&content[start..end], "old stats");
137    }
138
139    #[test]
140    fn test_find_target_range_nested_inner() {
141        let content = r#"<gap:target id="outer"><gap:target id="inner">val</gap:target></gap:target>"#;
142        let (start, end) = find_target_range(content, "inner", "text/html").unwrap();
143        assert_eq!(&content[start..end], "val");
144    }
145
146    #[test]
147    fn test_find_target_range_nested_outer() {
148        let content = r#"<gap:target id="outer"><gap:target id="inner">val</gap:target></gap:target>"#;
149        let (start, end) = find_target_range(content, "outer", "text/html").unwrap();
150        assert_eq!(&content[start..end], r#"<gap:target id="inner">val</gap:target>"#);
151    }
152
153    #[test]
154    fn test_find_target_range_inclusive() {
155        let content = r#"before<gap:target id="x">data</gap:target>after"#;
156        let (start, end) = find_target_range_inclusive(content, "x", "text/html").unwrap();
157        assert_eq!(&content[start..end], r#"<gap:target id="x">data</gap:target>"#);
158    }
159
160    #[test]
161    fn test_extract_targets_flat() {
162        let content = r#"<gap:target id="a">x</gap:target><gap:target id="b">y</gap:target>"#;
163        assert_eq!(extract_targets(content, "text/html"), vec!["a", "b"]);
164    }
165
166    #[test]
167    fn test_extract_targets_nested() {
168        let content = r#"<gap:target id="outer"><gap:target id="inner">v</gap:target></gap:target>"#;
169        assert_eq!(extract_targets(content, "text/html"), vec!["outer", "inner"]);
170    }
171
172    #[test]
173    fn test_extract_targets_empty() {
174        assert!(extract_targets("no markers here", "text/html").is_empty());
175    }
176
177    #[test]
178    fn test_extract_targets_json_returns_empty() {
179        let content = r#"<gap:target id="a">x</gap:target>"#;
180        assert!(extract_targets(content, "application/json").is_empty());
181    }
182}