heron_rebuild_workflow/
strings.rs

1use std::cell::Ref;
2
3use anyhow::Result;
4
5use intern::{GetStr, InternStr, LooseInterner, PackedInterner, TypedInterner};
6use syntax::ast;
7
8use crate::value::create_value;
9use crate::{
10    AbstractTaskId, BaselineBranches, BranchSpec, BranchpointId, Error, IdentId, LiteralId,
11    ModuleId, RealTaskKey, RealTaskStrings, RunStrId, StringCache, StringMaker, Value,
12};
13
14use crate::branch::{CompactBranchStrings, FullBranchStrings};
15
16/// Stores all the interned strings associated with a Workflow.
17#[derive(Debug)]
18pub struct WorkflowStrings {
19    /// Names of branchpoints
20    pub branchpoints: TypedInterner<BranchpointId, PackedInterner<u8, u8>>,
21    /// Names of tasks
22    pub tasks: TypedInterner<AbstractTaskId, PackedInterner<u8, u16>>,
23    /// Names of other idents (variables, branches, etc.)
24    pub idents: TypedInterner<IdentId, PackedInterner<u16, u16>>,
25    /// Names of modules
26    pub modules: TypedInterner<ModuleId, PackedInterner<u8, u8>>,
27    /// Literal strings (code blocks, variable values)
28    pub literals: TypedInterner<LiteralId, LooseInterner<u8, u16>>,
29    /// Keep track of which branch is baseline for each branchpoint
30    pub baselines: BaselineBranches,
31    /// Strings used while running workflow: full file paths, debug strings etc.
32    pub run: TypedInterner<RunStrId, PackedInterner<u32, usize>>,
33    /// Cache for user-friendly branch strs e.g. 'A.p1+B.p2' etc.
34    branch_strs: StringCache<BranchSpec, FullBranchStrings>,
35    /// Create compact branch strings that use 'Baseline.baseline' for baseline branches:
36    compact_branch_strs: CompactBranchStrings,
37    /// Cache for user-friendly task strings e.g. 'task_name[A.p1+B.p2]'
38    real_task_strs: StringCache<RealTaskKey, RealTaskStrings>,
39}
40
41impl Default for WorkflowStrings {
42    fn default() -> Self {
43        let mut idents = PackedInterner::with_capacity_and_avg_len(64, 1024);
44        // seed idents with an empty value, so we can use 0 as a special val:
45        idents.intern("").expect("Ident interner seeding should never fail");
46
47        Self {
48            branchpoints: TypedInterner::new(PackedInterner::with_capacity_and_str_len(8, 32)),
49            tasks: TypedInterner::new(PackedInterner::with_capacity_and_str_len(16, 256)),
50            idents: TypedInterner::new(idents),
51            literals: TypedInterner::new(LooseInterner::with_capacity_and_str_len(64, 4096)),
52            modules: TypedInterner::new(PackedInterner::with_capacity_and_str_len(8, 16)),
53            baselines: BaselineBranches::with_capacity(8),
54            compact_branch_strs: CompactBranchStrings,
55            // we'll re-alloc these later when we need them:
56            run: TypedInterner::new(PackedInterner::with_capacity_and_str_len(0, 0)),
57            branch_strs: StringCache::with_capacity_and_str_len(FullBranchStrings, 0, 0),
58            real_task_strs: StringCache::with_capacity_and_str_len(RealTaskStrings, 0, 0),
59        }
60    }
61}
62
63impl WorkflowStrings {
64    /// Allocate space for new strings created during traversal:
65    pub fn alloc_for_traversal(&mut self) {
66        self.branch_strs = StringCache::with_capacity_and_str_len(FullBranchStrings, 32, 1024);
67        self.real_task_strs = StringCache::with_capacity_and_str_len(RealTaskStrings, 64, 2048);
68    }
69
70    /// Since we don't allocate any space for runtime strings up front,
71    /// call this fn to get ready to actually run the workflow.
72    pub fn alloc_for_run(&mut self) {
73        self.run = TypedInterner::new(PackedInterner::with_capacity_and_str_len(64, 4096));
74    }
75
76    /// Get user-friendly branch str, w/ all branches shown.
77    #[inline]
78    pub fn get_full_branch_str(&self, branch: &BranchSpec) -> Result<Ref<str>> {
79        self.branch_strs.get_or_insert(branch, self)
80    }
81
82    /// Get user-friendly task str, e.g. 'task_name[full_branch_str]'.
83    #[inline]
84    pub fn get_real_task_str(&self, task: &RealTaskKey) -> Result<Ref<str>> {
85        self.real_task_strs.get_or_insert(task, self)
86    }
87
88    /// Get user-friendly branch str, w/ 'Baseline.baseline' for baseline branches.
89    /// Since this string will not change when new branchpoints are added,
90    /// it is suitable for filenames that need to be consistent between runs.
91    #[inline]
92    pub fn make_compact_branch_string(&self, branch: &BranchSpec, buf: &mut String) -> Result<()> {
93        use StringMaker;
94        self.compact_branch_strs.make_string(branch, self, buf)
95    }
96
97    /// Create a value from its ast representation.
98    #[inline]
99    pub fn create_value(&mut self, lhs: ast::Ident, rhs: ast::Rhs) -> Result<Value> {
100        create_value(self, lhs, rhs)
101    }
102
103    /// Used while loading branchpoints.txt to make sure our branchpoints are
104    /// ordered consistently, and baselines stay consistent between runs.
105    pub fn pre_load_baseline(&mut self, branchpoint: &str, branchval: &str) -> Result<()> {
106        let k = self.branchpoints.intern(branchpoint)?;
107        let v = self.idents.intern(branchval)?;
108        self.baselines.add(k, v);
109        Ok(())
110    }
111
112    /// Add a new branchpoint to the mapping:
113    #[inline]
114    pub fn add_branchpoint(&mut self, branchpoint: &str) -> Result<BranchpointId> {
115        self.branchpoints.intern(branchpoint)
116    }
117
118    /// Add a new branch name for the given branchpoint:
119    #[inline]
120    pub fn add_branch(
121        &mut self,
122        _branchpoint: BranchpointId,
123        branch_name: &str,
124    ) -> Result<IdentId> {
125        self.idents.intern(branch_name)
126    }
127
128    /// Log sizes of interners at debug level:
129    pub fn log_sizes(&self) {
130        self.log_sizes_for("Branchpoints", &self.branchpoints);
131        self.log_sizes_for("Tasks", &self.tasks);
132        self.log_sizes_for("Idents", &self.idents);
133        self.log_sizes_for("Modules", &self.modules);
134        self.log_sizes_for("Literals", &self.literals);
135    }
136
137    #[inline]
138    fn log_sizes_for<T: GetStr>(&self, name: &str, interner: &T) {
139        log::debug!("{} {name}, str len {}", interner.len(), interner.str_len());
140    }
141}
142
143// string interpolation /////////////////////
144impl WorkflowStrings {
145    /// Realize an interpolated string into `buf`.
146    pub fn make_interpolated(
147        &self,
148        orig: LiteralId,
149        // NB these must be in order of where they appear in the string!
150        vars: &[(IdentId, LiteralId)],
151        buf: &mut String,
152    ) -> Result<()> {
153        let orig_str = self.literals.get(orig)?;
154        buf.push_str(orig_str);
155
156        let mut var_str = String::with_capacity(16);
157        var_str.push('$');
158
159        // keep moving scan start fwd so we don't accidentally mess up
160        // work we already did...
161        let mut scan_start = 0;
162        for (ident, val) in vars {
163            // strip var_str down to just the '$':
164            var_str.truncate(1);
165            // add the identifier to it:
166            let ident_str = self.idents.get(*ident)?;
167            var_str.push_str(ident_str);
168
169            let val_str = self.literals.get(*val)?;
170
171            if let Some(offset) = buf[scan_start..].find(&var_str) {
172                let start = scan_start + offset;
173                let end = start + var_str.len();
174                buf.replace_range(start..end, val_str);
175                scan_start = start + val_str.len();
176            } else {
177                return Err(Error::Interp(var_str, buf.clone()).into());
178            }
179        }
180        Ok(())
181    }
182}
183
184#[cfg(test)]
185mod test {
186    use super::*;
187    #[test]
188    fn test_interpolate() -> Result<()> {
189        let mut strings = WorkflowStrings::default();
190        let orig_id = strings.literals.intern("$v1 and $v2 $v1-$v2.$v2 etc");
191        let v1 = strings.idents.intern("v1")?;
192        let v2 = strings.idents.intern("v2")?;
193        let v1_val = strings.literals.intern("value for var one")?;
194        let v2_val = strings.literals.intern("$$xyz$$")?;
195
196        let mut buf = String::with_capacity(32);
197        buf.push_str("prefix.");
198
199        let vars = &[
200            (v1, v1_val),
201            (v2, v2_val),
202            (v1, v1_val),
203            (v2, v2_val),
204            (v2, v2_val),
205        ];
206
207        strings.make_interpolated(orig_id, vars, &mut buf)?;
208
209        assert_eq!(
210            &buf,
211            "prefix.value for var one and $$xyz$$ value for var one-$$xyz$$.$$xyz$$ etc"
212        );
213
214        // now try a bad one:
215        let v3 = strings.idents.intern("v3_wont_be_found")?;
216        let res = strings.make_interpolated(orig_id, &[(v3, v1_val)], &mut buf);
217        assert!(res.is_err());
218
219        Ok(())
220    }
221}