Skip to main content

zsh/extensions/
heredoc_ast.rs

1//! Heredoc AST-glue types — Rust-only, NOT in zsh C.
2//!
3//! zsh tracks pending heredocs via the `struct heredocs` linked-list
4//! node defined at `Src/zsh.h:1152-1157`:
5//!
6//! ```c
7//! struct heredocs {
8//!     struct heredocs *next;
9//!     int type;
10//!     int pc;
11//!     char *str;
12//! };
13//! ```
14//!
15//! The C model defers body collection — the parser records `pc`
16//! (wordcode offset) + `str` (terminator) at the `<<EOF` site, walks
17//! past the redirection emitting normal wordcode for the rest of the
18//! line, then `gethere()` (lex.c:1810) walks the linked list at
19//! newline and reads each body from the input stream into the
20//! wordcode buffer at the saved pc.
21//!
22//! zshrs's pre-wordcode parser collects each heredoc body inline
23//! during lex (no pc, no later resolution), so the live shape of
24//! per-heredoc state is different: `terminator`, `strip_tabs`,
25//! `content`, `quoted`, `processed`. The Vec position carries
26//! ordering (no `next` linked list).
27//!
28//! `HereDoc` is the AST-glue Vec entry the AST consumer
29//! (`fill_heredoc_bodies` in parse.rs) reads. The canonical
30//! `struct heredocs` linked list (parse.c:84) + `gethere()`
31//! (exec.c:4573) are ported as `parse::HDOCS` /
32//! `crate::exec::gethere`; the inline `zshlex()` NEWLIN walk
33//! (lex.c:278-306) writes body content into the next
34//! unprocessed `LEX_HEREDOCS` entry directly (no helper fn).
35//! `HereDocInfo` is the per-redir attachment that flows through
36//! the AST.
37
38use serde::{Deserialize, Serialize};
39
40/// Per-heredoc state collected by the lexer during `<<EOF` parsing.
41/// Held in the lexer-side `LEX_HEREDOCS: Vec<HereDoc>` thread_local
42/// for later attachment to `ZshRedir` entries (via `heredoc_idx`).
43///
44/// Rust-only AST-glue Vec — runs parallel to the canonical
45/// `struct heredocs *hdocs` linked list at `parse::HDOCS` (port of
46/// `Src/parse.c:84`). The inline NEWLIN walk in `zshlex()` drains
47/// both: it pops from HDOCS (the C-faithful list), calls `gethere`,
48/// then walks `LEX_HEREDOCS` to find the next entry with
49/// `processed == false` and writes the body there.
50#[derive(Debug, Clone)]
51pub struct HereDoc {
52    /// `terminator` field.
53    pub terminator: String,
54    /// `strip_tabs` field.
55    pub strip_tabs: bool,
56    /// `content` field.
57    pub content: String,
58    /// True if the terminator was originally quoted (`<<'EOF'`,
59    /// `<<"EOF"`, or `<<\EOF`). Disables variable expansion / command
60    /// substitution / arithmetic in the body.
61    pub quoted: bool,
62    /// True once the inline NEWLIN walk in `zshlex()` has read the
63    /// body via `gethere`. Distinct from "content is empty" because
64    /// an empty heredoc legitimately has empty content.
65    pub processed: bool,
66}
67
68/// Heredoc body+metadata attached to a parsed `ZshRedir`. Carried
69/// through the AST and consumed by the compiler when emitting
70/// `Op::HereDoc(idx)` for the fusevm VM.
71///
72/// Rust-only — the wordcode track stores bodies as strs-region
73/// strings indexed by `WCB_REDIR` slot. The AST track keeps this
74/// per-redir attachment so the fusevm compiler can emit a
75/// `Op::HereDoc(idx)` referencing the body verbatim.
76#[derive(Debug, Clone, Serialize, Deserialize)]
77pub struct HereDocInfo {
78    /// `content` field.
79    pub content: String,
80    /// `terminator` field.
81    pub terminator: String,
82    /// Originally-quoted terminator (`<<'EOF'`, `<<"EOF"`). When true
83    /// the body is passed verbatim — no `$var` / `$(cmd)` / `$((expr))`
84    /// expansion. Plain `<<EOF` runs all expansions.
85    #[serde(default)]
86    pub quoted: bool,
87}
88
89#[cfg(test)]
90mod tests {
91    use super::*;
92
93    #[test]
94    fn heredoc_construct_and_field_access() {
95        let h = HereDoc {
96            terminator: "EOF".into(),
97            strip_tabs: false,
98            content: "hello\n".into(),
99            quoted: false,
100            processed: false,
101        };
102        assert_eq!(h.terminator, "EOF");
103        assert!(!h.strip_tabs);
104        assert_eq!(h.content, "hello\n");
105        assert!(!h.quoted);
106        assert!(!h.processed);
107    }
108
109    #[test]
110    fn heredoc_clone_preserves_all_fields() {
111        let h = HereDoc {
112            terminator: "MARKER".into(),
113            strip_tabs: true,
114            content: "$x and `cmd`\n".into(),
115            quoted: true,
116            processed: true,
117        };
118        let c = h.clone();
119        assert_eq!(c.terminator, h.terminator);
120        assert_eq!(c.strip_tabs, h.strip_tabs);
121        assert_eq!(c.content, h.content);
122        assert_eq!(c.quoted, h.quoted);
123        assert_eq!(c.processed, h.processed);
124    }
125
126    #[test]
127    fn heredoc_info_serde_roundtrip_unquoted() {
128        let info = HereDocInfo {
129            content: "line1\nline2\n".into(),
130            terminator: "END".into(),
131            quoted: false,
132        };
133        let json = serde_json::to_string(&info).expect("serialize");
134        let back: HereDocInfo = serde_json::from_str(&json).expect("deserialize");
135        assert_eq!(back.content, info.content);
136        assert_eq!(back.terminator, info.terminator);
137        assert_eq!(back.quoted, info.quoted);
138    }
139
140    #[test]
141    fn heredoc_info_serde_roundtrip_quoted() {
142        let info = HereDocInfo {
143            content: "literal $var\n".into(),
144            terminator: "EOF".into(),
145            quoted: true,
146        };
147        let json = serde_json::to_string(&info).expect("serialize");
148        let back: HereDocInfo = serde_json::from_str(&json).expect("deserialize");
149        assert!(back.quoted);
150        assert_eq!(back.content, "literal $var\n");
151    }
152
153    #[test]
154    fn heredoc_info_quoted_defaults_to_false_when_missing() {
155        // Older serialized payloads predate `quoted`; `#[serde(default)]`
156        // must populate it as false instead of erroring.
157        let json = r#"{"content":"body\n","terminator":"EOF"}"#;
158        let info: HereDocInfo = serde_json::from_str(json).expect("deserialize");
159        assert_eq!(info.content, "body\n");
160        assert_eq!(info.terminator, "EOF");
161        assert!(!info.quoted, "quoted should default to false");
162    }
163
164    #[test]
165    fn heredoc_info_serializes_empty_body() {
166        let info = HereDocInfo {
167            content: String::new(),
168            terminator: "X".into(),
169            quoted: false,
170        };
171        let json = serde_json::to_string(&info).expect("serialize empty");
172        let back: HereDocInfo = serde_json::from_str(&json).expect("deserialize empty");
173        assert!(back.content.is_empty());
174        assert_eq!(back.terminator, "X");
175    }
176
177    #[test]
178    fn heredoc_info_preserves_special_chars_through_serde() {
179        let info = HereDocInfo {
180            content: "$(echo nested)\n\t`backtick`\n\"quoted\"\n".into(),
181            terminator: "EOF".into(),
182            quoted: false,
183        };
184        let json = serde_json::to_string(&info).expect("serialize");
185        let back: HereDocInfo = serde_json::from_str(&json).expect("deserialize");
186        assert_eq!(back.content, info.content);
187    }
188}