Skip to main content

coding_tools/
blockdoc.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2026 Jonathan Shook
3
4//! The ct block document (`.ctb`) — the suite's one script format for batched
5//! operations whose payloads are verbatim text.
6//!
7//! Delimited text, not JSON: payloads are code, and code must paste in with
8//! zero escaping. A **fence line** starts with the fence string (`#%` by
9//! default). A fence whose name the consuming tool declared as item-opening
10//! (`edit` for `ct-edit`, `set`/`delete` for `ct-patch` batches) opens an
11//! item and carries `key=value` attributes; other fence names open verbatim
12//! **payload sections** of the current item; `end` closes the item. Outside
13//! items, blank lines and `#`-comment lines are ignored.
14//!
15//! ```text
16//! #% edit expect="=1" file=src/ast.rs
17//! #% find
18//!             Value::U64(v) => v.to_string(),
19//! #% replace
20//!             Value::U64(v) => v.to_string(),
21//!             Value::I64(v) => v.to_string(),
22//! #% end
23//! ```
24
25/// The default fence string opening every directive line.
26pub const DEFAULT_FENCE: &str = "#%";
27
28/// One parsed item: an opening directive, its attributes, and its payload
29/// sections in document order.
30#[derive(Debug, Clone, PartialEq, Eq)]
31pub struct Item {
32    /// The item-opening directive name (`edit`, `set`, …).
33    pub directive: String,
34    /// `key=value` attributes from the opening fence line, in order.
35    pub attrs: Vec<(String, String)>,
36    /// Payload sections: (name, verbatim payload). Each payload line keeps
37    /// its `\n`; an empty section is the empty string (zero lines).
38    pub sections: Vec<(String, String)>,
39    /// 1-based source line of the opening directive (for diagnostics).
40    pub line: usize,
41}
42
43impl Item {
44    /// The value of attribute `key`, if present.
45    pub fn attr(&self, key: &str) -> Option<&str> {
46        self.attrs
47            .iter()
48            .find(|(k, _)| k == key)
49            .map(|(_, v)| v.as_str())
50    }
51
52    /// The payload of section `name`, if present.
53    pub fn section(&self, name: &str) -> Option<&str> {
54        self.sections
55            .iter()
56            .find(|(k, _)| k == name)
57            .map(|(_, v)| v.as_str())
58    }
59}
60
61/// Parse attributes from the remainder of a directive line. Each token splits
62/// at the *first* `=` (so `expect==1` is key `expect`, value `=1`); a value
63/// may be double-quoted to carry spaces or read unambiguously
64/// (`expect="=1"`).
65fn parse_attrs(rest: &str, line_no: usize) -> Result<Vec<(String, String)>, String> {
66    let mut attrs = Vec::new();
67    let mut chars = rest.char_indices().peekable();
68    while let Some(&(start, c)) = chars.peek() {
69        if c.is_whitespace() {
70            chars.next();
71            continue;
72        }
73        // Key: up to the first '='.
74        let mut eq = None;
75        for (i, c) in rest[start..].char_indices() {
76            if c == '=' {
77                eq = Some(start + i);
78                break;
79            }
80            if c.is_whitespace() {
81                break;
82            }
83        }
84        let Some(eq) = eq else {
85            return Err(format!(
86                "line {line_no}: attribute '{}' is not key=value",
87                rest[start..].split_whitespace().next().unwrap_or("")
88            ));
89        };
90        let key = rest[start..eq].to_string();
91        if key.is_empty() {
92            return Err(format!("line {line_no}: attribute with empty key"));
93        }
94        // Value: double-quoted (to the closing quote) or bare (to whitespace).
95        let vstart = eq + 1;
96        let (value, after) = if rest[vstart..].starts_with('"') {
97            match rest[vstart + 1..].find('"') {
98                Some(close) => (
99                    rest[vstart + 1..vstart + 1 + close].to_string(),
100                    vstart + close + 2,
101                ),
102                None => {
103                    return Err(format!(
104                        "line {line_no}: unterminated quoted value for '{key}'"
105                    ));
106                }
107            }
108        } else {
109            let end = rest[vstart..]
110                .find(char::is_whitespace)
111                .map(|i| vstart + i)
112                .unwrap_or(rest.len());
113            (rest[vstart..end].to_string(), end)
114        };
115        attrs.push((key, value));
116        while let Some(&(i, _)) = chars.peek() {
117            if i < after {
118                chars.next();
119            } else {
120                break;
121            }
122        }
123    }
124    Ok(attrs)
125}
126
127/// Parse a block document. `item_names` declares which directive names open
128/// an item; every other fence name is a payload section. An item-opening
129/// directive implicitly closes the previous item, and `end` closes one
130/// explicitly — so attribute-only items (`#% delete path=…`) need no `end`.
131///
132/// # Examples
133///
134/// ```
135/// use coding_tools::blockdoc::{parse, DEFAULT_FENCE};
136///
137/// let doc = "#% edit expect=\"=1\"\n#% find\nold()\n#% replace\nnew()\n#% end\n";
138/// let items = parse(doc, DEFAULT_FENCE, &["edit"]).unwrap();
139/// assert_eq!(items.len(), 1);
140/// assert_eq!(items[0].attr("expect"), Some("=1"));
141/// assert_eq!(items[0].section("find"), Some("old()\n"));
142/// ```
143pub fn parse(src: &str, fence: &str, item_names: &[&str]) -> Result<Vec<Item>, String> {
144    if fence.is_empty() {
145        return Err("fence string must not be empty".to_string());
146    }
147    let mut items: Vec<Item> = Vec::new();
148    let mut open: Option<Item> = None;
149    let mut section: Option<(String, String)> = None;
150
151    let close_section = |item: &mut Item, section: &mut Option<(String, String)>| {
152        if let Some(s) = section.take() {
153            item.sections.push(s);
154        }
155    };
156
157    for (idx, raw) in src.lines().enumerate() {
158        let line_no = idx + 1;
159        let fenced = raw
160            .strip_prefix(fence)
161            .filter(|rest| rest.is_empty() || rest.starts_with(char::is_whitespace));
162        let Some(rest) = fenced else {
163            // Not a fence line: verbatim payload inside a section, ignorable
164            // (blank or comment) outside any item.
165            match (&mut open, &mut section) {
166                (_, Some((_, payload))) => {
167                    payload.push_str(raw);
168                    payload.push('\n');
169                }
170                (Some(item), None) => {
171                    if !raw.trim().is_empty() {
172                        return Err(format!(
173                            "line {line_no}: stray content inside '{}' item (line {}); payload lines belong in a section",
174                            item.directive, item.line
175                        ));
176                    }
177                }
178                (None, _) => {
179                    if !raw.trim().is_empty() && !raw.trim_start().starts_with('#') {
180                        return Err(format!(
181                            "line {line_no}: content outside any item; expected a '{fence} <directive>' line"
182                        ));
183                    }
184                }
185            }
186            continue;
187        };
188
189        let rest = rest.trim_start();
190        let (name, attr_rest) = match rest.find(char::is_whitespace) {
191            Some(i) => (&rest[..i], &rest[i..]),
192            None => (rest, ""),
193        };
194        if name.is_empty() {
195            return Err(format!("line {line_no}: fence line with no directive name"));
196        }
197
198        if name == "end" {
199            let Some(mut item) = open.take() else {
200                return Err(format!("line {line_no}: 'end' with no open item"));
201            };
202            close_section(&mut item, &mut section);
203            items.push(item);
204        } else if item_names.contains(&name) {
205            if let Some(mut item) = open.take() {
206                close_section(&mut item, &mut section);
207                items.push(item);
208            }
209            open = Some(Item {
210                directive: name.to_string(),
211                attrs: parse_attrs(attr_rest, line_no)?,
212                sections: Vec::new(),
213                line: line_no,
214            });
215        } else {
216            let Some(item) = open.as_mut() else {
217                return Err(format!(
218                    "line {line_no}: unknown directive '{name}' (expected one of: {})",
219                    item_names.join(", ")
220                ));
221            };
222            if !attr_rest.trim().is_empty() {
223                return Err(format!(
224                    "line {line_no}: section '{name}' takes no attributes"
225                ));
226            }
227            close_section(item, &mut section);
228            if item.section(name).is_some() {
229                return Err(format!(
230                    "line {line_no}: duplicate section '{name}' in '{}' item (line {})",
231                    item.directive, item.line
232                ));
233            }
234            section = Some((name.to_string(), String::new()));
235        }
236    }
237
238    if let Some(mut item) = open.take() {
239        close_section(&mut item, &mut section);
240        items.push(item);
241    }
242    Ok(items)
243}
244
245#[cfg(test)]
246mod tests {
247    use super::*;
248
249    #[test]
250    fn parses_items_attrs_and_verbatim_sections() {
251        let doc = "\
252# a comment outside items
253
254#% edit expect==1 mode=literal file=src/a.rs
255#% find
256    old(\"$x\");
257#% replace
258    new(\"$x\");
259    extra();
260#% end
261";
262        let items = parse(doc, DEFAULT_FENCE, &["edit"]).unwrap();
263        assert_eq!(items.len(), 1);
264        let it = &items[0];
265        assert_eq!(it.directive, "edit");
266        // First-'=' split: expect==1 is key 'expect', value '=1'.
267        assert_eq!(it.attr("expect"), Some("=1"));
268        assert_eq!(it.attr("mode"), Some("literal"));
269        assert_eq!(it.attr("file"), Some("src/a.rs"));
270        assert_eq!(it.section("find"), Some("    old(\"$x\");\n"));
271        assert_eq!(it.section("replace"), Some("    new(\"$x\");\n    extra();\n"));
272        assert_eq!(it.line, 3);
273    }
274
275    #[test]
276    fn quoted_values_carry_spaces_and_read_unambiguously() {
277        let items = parse(
278            "#% edit expect=\"=1\" note=\"two words\"\n#% find\nx\n#% end\n",
279            DEFAULT_FENCE,
280            &["edit"],
281        )
282        .unwrap();
283        assert_eq!(items[0].attr("expect"), Some("=1"));
284        assert_eq!(items[0].attr("note"), Some("two words"));
285    }
286
287    #[test]
288    fn empty_section_is_zero_lines_and_end_is_implicit_between_items() {
289        let doc = "#% edit\n#% find\nx\n#% replace\n#% edit\n#% find\ny\n#% replace\nz\n#% end\n";
290        let items = parse(doc, DEFAULT_FENCE, &["edit"]).unwrap();
291        assert_eq!(items.len(), 2);
292        assert_eq!(items[0].section("replace"), Some(""));
293        assert_eq!(items[1].section("replace"), Some("z\n"));
294    }
295
296    #[test]
297    fn custom_fence_lets_payloads_contain_the_default() {
298        let doc = "::: edit\n::: find\n#% not a fence here\n::: replace\nok\n::: end\n";
299        let items = parse(doc, ":::", &["edit"]).unwrap();
300        assert_eq!(items[0].section("find"), Some("#% not a fence here\n"));
301    }
302
303    #[test]
304    fn payload_lines_resembling_the_fence_prefix_are_fences() {
305        // '#%x' is NOT a fence (no separator), so it stays payload.
306        let doc = "#% edit\n#% find\n#%x payload\n#% end\n";
307        let items = parse(doc, DEFAULT_FENCE, &["edit"]).unwrap();
308        assert_eq!(items[0].section("find"), Some("#%x payload\n"));
309    }
310
311    #[test]
312    fn errors_are_specific() {
313        let unknown = parse("#% nonsense\n", DEFAULT_FENCE, &["edit"]).unwrap_err();
314        assert!(unknown.contains("unknown directive"), "{unknown}");
315        let stray = parse("stray\n", DEFAULT_FENCE, &["edit"]).unwrap_err();
316        assert!(stray.contains("outside any item"), "{stray}");
317        let dup = parse(
318            "#% edit\n#% find\nx\n#% find\ny\n#% end\n",
319            DEFAULT_FENCE,
320            &["edit"],
321        )
322        .unwrap_err();
323        assert!(dup.contains("duplicate section"), "{dup}");
324        let unq = parse("#% edit expect=\"=1\n", DEFAULT_FENCE, &["edit"]).unwrap_err();
325        assert!(unq.contains("unterminated"), "{unq}");
326        let end = parse("#% end\n", DEFAULT_FENCE, &["edit"]).unwrap_err();
327        assert!(end.contains("no open item"), "{end}");
328    }
329
330    #[test]
331    fn attribute_only_items_close_implicitly() {
332        let doc = "#% del path=a.b\n#% del path=c.d\n";
333        let items = parse(doc, DEFAULT_FENCE, &["del"]).unwrap();
334        assert_eq!(items.len(), 2);
335        assert_eq!(items[1].attr("path"), Some("c.d"));
336    }
337}