openjd-expr 0.1.2

Open Job Description expression language — types, evaluation, and path mapping
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// Copyright by contributors to this project.
// SPDX-License-Identifier: (Apache-2.0 OR MIT)

//! Hierarchical symbol table for expression evaluation.
//!
//! Mirrors Python `openjd.expr._symbol_table.SymbolTable`.
//! Supports dotted key paths and nested tables.
//!
//! # Construction
//!
//! ```
//! use openjd_expr::{symtab, SymbolTable, ExprValue, ExprType};
//!
//! // Macro (most concise):
//! let st = symtab! {
//!     "Param.Frame" => 42,
//!     "Param.Name" => "test",
//!     "Session.Dir" => ExprType::PATH,  // auto-wraps as unresolved
//! };
//!
//! // Builder-style:
//! let mut st = SymbolTable::new();
//! st.set("Param.Frame", 42).unwrap();
//! st.set("Param.Name", "test").unwrap();
//!
//! // From iterator:
//! let st: SymbolTable = [
//!     ("Param.Frame", ExprValue::from(42)),
//!     ("Param.Name", "test".into()),
//! ].into_iter().collect();
//! ```

use crate::types::ExprType;
use crate::value::ExprValue;
use std::collections::HashMap;

/// Maximum number of entries permitted in a `SymbolTable` deserialized from
/// the JSON transport format (`SerializedSymbolTable` or the serde
/// `Deserialize` impl on `SymbolTable` itself).
///
/// The deserializer walks an untrusted JSON array and invokes
/// `SymbolTable::set` for each entry. Real-world symbol tables carry a
/// handful of job parameters, a handful of session variables, and a
/// handful of path-mapping-derived bindings — well under a thousand
/// entries in the aggregate. This cap rejects transport blobs that would
/// produce a multi-million-entry table purely to inflate the worker's
/// memory footprint before evaluation begins.
///
/// The cap applies **only** to the transport-deserialization paths. Direct
/// in-process calls to `SymbolTable::set` or `set_table` are trusted
/// (host code), and callers that legitimately need larger tables (e.g.
/// a test builder) can construct them by hand without tripping the cap.
///
/// See `specs/expr/symbol-table.md` (Defensive caps) for rationale.
pub const MAX_SYMBOL_TABLE_ENTRIES: usize = 100_000;

/// Error returned when a [`SymbolTable::set`] call conflicts with an existing entry.
///
/// For example, setting `"A.B.C"` when `"A.B"` is already a scalar value.
#[derive(Debug, Clone)]
pub struct SymbolTableError {
    pub key: String,
    pub conflict: String,
}

impl std::fmt::Display for SymbolTableError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "Cannot set '{}': '{}' is not a table",
            self.key, self.conflict
        )
    }
}

impl std::error::Error for SymbolTableError {}

impl From<SymbolTableError> for crate::error::ExpressionError {
    /// Bubble a symbol-table conflict up as an `ExpressionError::Other`.
    ///
    /// The full `SymbolTableError` message is preserved verbatim so callers
    /// can still see which key was being set and what it conflicted with.
    /// Call sites that have an AST node available can further attach
    /// `.with_node(...)` / `.with_span(...)` for caret-annotated output.
    fn from(e: SymbolTableError) -> Self {
        crate::error::ExpressionError::new(e.to_string())
    }
}

/// Entry in a symbol table: either a nested table or a value.
#[derive(Debug, Clone)]
pub enum SymbolTableEntry {
    Table(SymbolTable),
    Value(ExprValue),
}

/// Hierarchical symbol table mapping names to values or nested tables.
///
/// Supports dotted paths: `table.set("Param.Frame", 42)`
/// creates a nested structure `Param -> Frame -> 42`.
#[derive(Debug, Clone, Default)]
pub struct SymbolTable {
    pub(crate) table: HashMap<String, SymbolTableEntry>,
}

impl SymbolTable {
    pub fn new() -> Self {
        Self {
            table: HashMap::new(),
        }
    }

    /// Construct from a list of (dotted_key, value) pairs.
    /// Build a `SymbolTable` from any iterable of `(dotted_key, value)` pairs.
    ///
    /// Accepts any `IntoIterator`, so callers can pass `Vec`, arrays, iterators
    /// (e.g., from `map`/`filter` chains), or other containers without collecting first.
    pub fn from_pairs<'a, I>(pairs: I) -> Result<Self, SymbolTableError>
    where
        I: IntoIterator<Item = (&'a str, ExprValue)>,
    {
        let mut st = Self::new();
        for (k, v) in pairs {
            st.set(k, v)?;
        }
        Ok(st)
    }

    /// Set a nested SymbolTable at a key (for dict-like nesting).
    pub fn set_table(&mut self, key: &str, subtable: SymbolTable) {
        self.table
            .insert(key.to_string(), SymbolTableEntry::Table(subtable));
    }

    /// Get a subtable at a key, or None.
    pub fn get_table(&self, key: &str) -> Option<&SymbolTable> {
        match self.get(key) {
            Some(SymbolTableEntry::Table(t)) => Some(t),
            _ => None,
        }
    }

    /// Set a value at a dotted path, creating intermediate tables as needed.
    ///
    /// Accepts anything convertible to `ExprValue` via `Into`:
    /// - `i32`, `i64` → `ExprValue::Int`
    /// - `bool` → `ExprValue::Bool`
    /// - `&str`, `String` → `ExprValue::String`
    /// - `ExprType` → `ExprValue::Unresolved` (for type-checking symbol tables)
    /// - `ExprValue` → used directly
    ///
    /// For floats, construct `ExprValue::Float(Float64::new(v)?)` explicitly.
    ///
    /// Returns an error if an intermediate path component is already set to a
    /// value (not a table). For example, setting `"A.B.C"` fails if `"A.B"` is
    /// already a scalar value.
    pub fn set(&mut self, key: &str, value: impl Into<ExprValue>) -> Result<(), SymbolTableError> {
        self.set_value(key, value.into())
    }

    fn set_value(&mut self, key: &str, value: ExprValue) -> Result<(), SymbolTableError> {
        let parts: Vec<&str> = key.split('.').collect();
        if parts.len() == 1 {
            if matches!(self.table.get(key), Some(SymbolTableEntry::Table(_))) {
                return Err(SymbolTableError {
                    key: key.to_string(),
                    conflict: key.to_string(),
                });
            }
            self.table
                .insert(key.to_string(), SymbolTableEntry::Value(value));
            return Ok(());
        }
        let mut current = self;
        for &part in &parts[..parts.len() - 1] {
            let entry = current
                .table
                .entry(part.to_string())
                .or_insert_with(|| SymbolTableEntry::Table(SymbolTable::new()));
            current = match entry {
                SymbolTableEntry::Table(t) => t,
                _ => {
                    return Err(SymbolTableError {
                        key: key.to_string(),
                        conflict: part.to_string(),
                    })
                }
            };
        }
        let last = parts.last().unwrap().to_string();
        if matches!(current.table.get(&last), Some(SymbolTableEntry::Table(_))) {
            return Err(SymbolTableError {
                key: key.to_string(),
                conflict: last,
            });
        }
        current.table.insert(last, SymbolTableEntry::Value(value));
        Ok(())
    }

    /// Set a string value at a dotted path (convenience).
    pub fn set_string(&mut self, key: &str, value: &str) -> Result<(), SymbolTableError> {
        self.set(key, ExprValue::String(value.to_string()))
    }

    /// Get an entry at a dotted path.
    pub fn get(&self, key: &str) -> Option<&SymbolTableEntry> {
        let parts: Vec<&str> = key.split('.').collect();
        let mut current = self;
        for (i, &part) in parts.iter().enumerate() {
            match current.table.get(part) {
                Some(SymbolTableEntry::Table(t)) if i < parts.len() - 1 => current = t,
                Some(entry) if i == parts.len() - 1 => return Some(entry),
                _ => return None,
            }
        }
        None
    }

    /// Get a value at a dotted path, returning None if not found or if it's a table.
    pub fn get_value(&self, key: &str) -> Option<&ExprValue> {
        match self.get(key) {
            Some(SymbolTableEntry::Value(v)) => Some(v),
            _ => None,
        }
    }

    /// Get a string value at a dotted path.
    pub fn get_string(&self, key: &str) -> Option<&str> {
        match self.get_value(key) {
            Some(ExprValue::String(s)) => Some(s),
            Some(ExprValue::Path { value, .. }) => Some(value),
            _ => None,
        }
    }

    pub fn contains(&self, key: &str) -> bool {
        self.get(key).is_some()
    }

    /// Top-level keys.
    pub fn keys(&self) -> impl Iterator<Item = &str> {
        self.table.keys().map(|s| s.as_str())
    }

    /// Collect all leaf symbol paths (dotted names) in this table.
    ///
    /// If `prefix` is non-empty, each returned path is prefixed with
    /// `"{prefix}."`. Use `""` for a top-level walk.
    pub fn all_paths(&self, prefix: &str) -> Vec<String> {
        let mut out = Vec::new();
        self.collect_paths(prefix, &mut out);
        out
    }

    fn collect_paths(&self, prefix: &str, out: &mut Vec<String>) {
        for (key, entry) in &self.table {
            let path = if prefix.is_empty() {
                key.clone()
            } else {
                format!("{prefix}.{key}")
            };
            match entry {
                SymbolTableEntry::Value(_) => out.push(path),
                SymbolTableEntry::Table(sub) => sub.collect_paths(&path, out),
            }
        }
    }

    /// Merge all entries from `other` into this table, overwriting on conflict.
    pub fn merge_from(&mut self, other: &SymbolTable) {
        for (key, entry) in &other.table {
            match entry {
                SymbolTableEntry::Value(v) => {
                    self.table
                        .insert(key.clone(), SymbolTableEntry::Value(v.clone()));
                }
                SymbolTableEntry::Table(sub) => match self.table.get_mut(key) {
                    Some(SymbolTableEntry::Table(existing)) => existing.merge_from(sub),
                    _ => {
                        self.table
                            .insert(key.clone(), SymbolTableEntry::Table(sub.clone()));
                    }
                },
            }
        }
    }
}

impl serde::Serialize for SymbolTable {
    fn serialize<S: serde::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
        use serde::ser::SerializeSeq;
        let paths = self.all_paths("");
        // Collect only resolved values — skip Unresolved entries
        let entries: Vec<_> = paths
            .iter()
            .filter_map(|p| {
                self.get_value(p).and_then(|v| {
                    if matches!(v, ExprValue::Unresolved(_)) {
                        None
                    } else {
                        Some((p, v))
                    }
                })
            })
            .collect();
        let mut seq = s.serialize_seq(Some(entries.len()))?;
        for (path, value) in entries {
            seq.serialize_element(&serde_json::json!({
                "name": path,
                "value": value.transport_value(),
                "type": value.expr_type().to_string(),
            }))?;
        }
        seq.end()
    }
}

impl<'de> serde::Deserialize<'de> for SymbolTable {
    fn deserialize<D: serde::Deserializer<'de>>(d: D) -> Result<Self, D::Error> {
        let arr: Vec<serde_json::Value> = serde::Deserialize::deserialize(d)?;
        if arr.len() > MAX_SYMBOL_TABLE_ENTRIES {
            return Err(serde::de::Error::custom(format!(
                "SymbolTable: too many entries ({}); maximum is {}",
                arr.len(),
                MAX_SYMBOL_TABLE_ENTRIES,
            )));
        }
        let mut st = SymbolTable::new();
        for entry in &arr {
            let name = entry
                .get("name")
                .and_then(|n| n.as_str())
                .ok_or_else(|| serde::de::Error::missing_field("name"))?;
            let type_str = entry
                .get("type")
                .and_then(|t| t.as_str())
                .ok_or_else(|| serde::de::Error::missing_field("type"))?;
            let binding_type = ExprType::parse(type_str).map_err(serde::de::Error::custom)?;
            let raw_value = entry
                .get("value")
                .ok_or_else(|| serde::de::Error::missing_field("value"))?;
            let value = ExprValue::from_transport_value(
                raw_value,
                &binding_type,
                crate::path_mapping::PathFormat::Posix,
            )
            .map_err(serde::de::Error::custom)?;
            st.set(name, value).map_err(serde::de::Error::custom)?;
        }
        Ok(st)
    }
}

/// Collect `(&str, ExprValue)` pairs into a `SymbolTable`.
///
/// # Panics
///
/// Panics if a dotted path conflicts with an existing non-table entry.
/// Use [`SymbolTable::set`] directly if you need error handling.
impl<'a> FromIterator<(&'a str, ExprValue)> for SymbolTable {
    fn from_iter<I: IntoIterator<Item = (&'a str, ExprValue)>>(iter: I) -> Self {
        let mut st = Self::new();
        for (k, v) in iter {
            st.set(k, v)
                .expect("SymbolTable path conflict in FromIterator");
        }
        st
    }
}

// ═══════════════════════════════════════════════════════════════
// SerializedSymbolTable — boundary type between template and session scope
// ═══════════════════════════════════════════════════════════════

/// A symbol table in JSON transport format.
///
/// This is the boundary type between template scope (always Posix paths) and
/// session scope (host-native paths). The session deserializes it with
/// `PathFormat::host()`, ensuring path separators match the worker OS.
///
/// This mirrors the real-world flow where a scheduler serializes the symbol
/// table to JSON and sends it to a worker that may be on a different OS.
#[derive(Debug, Clone, serde::Serialize)]
#[serde(transparent)]
pub struct SerializedSymbolTable(serde_json::Value);

impl SerializedSymbolTable {
    /// Create from a `serde_json::Value` (already-parsed JSON array).
    pub fn from_value(v: serde_json::Value) -> Self {
        Self(v)
    }

    /// Create from a JSON string.
    pub fn from_json_str(s: &str) -> Result<Self, serde_json::Error> {
        Ok(Self(serde_json::from_str(s)?))
    }

    /// Serialize a `SymbolTable` into transport format.
    pub fn from_symtab(st: &SymbolTable) -> Self {
        Self(serde_json::to_value(st).expect("SymbolTable serialization cannot fail"))
    }

    /// Deserialize to a `SymbolTable` with the given path format.
    ///
    /// Path values in the transport format are plain strings; this method
    /// reconstructs them as `ExprValue::Path` with separators normalized
    /// to the specified format.
    ///
    /// Returns an error if the transport array holds more than
    /// [`MAX_SYMBOL_TABLE_ENTRIES`] entries.
    pub fn to_symtab(
        &self,
        path_format: crate::path_mapping::PathFormat,
    ) -> Result<SymbolTable, String> {
        let arr = self
            .0
            .as_array()
            .ok_or("SerializedSymbolTable: expected JSON array")?;
        if arr.len() > MAX_SYMBOL_TABLE_ENTRIES {
            return Err(format!(
                "SerializedSymbolTable: too many entries ({}); maximum is {}",
                arr.len(),
                MAX_SYMBOL_TABLE_ENTRIES,
            ));
        }
        let mut st = SymbolTable::new();
        for entry in arr {
            let name = entry
                .get("name")
                .and_then(|n| n.as_str())
                .ok_or("SerializedSymbolTable: missing 'name' field")?;
            let type_str = entry
                .get("type")
                .and_then(|t| t.as_str())
                .ok_or("SerializedSymbolTable: missing 'type' field")?;
            let binding_type = ExprType::parse(type_str)
                .map_err(|e| format!("SerializedSymbolTable: bad type '{type_str}': {e}"))?;
            let raw_value = entry
                .get("value")
                .ok_or("SerializedSymbolTable: missing 'value' field")?;
            let value = ExprValue::from_transport_value(raw_value, &binding_type, path_format)
                .map_err(|e| format!("SerializedSymbolTable: {e}"))?;
            st.set(name, value)
                .map_err(|e| format!("SerializedSymbolTable: {e}"))?;
        }
        Ok(st)
    }

    /// Get the underlying JSON value.
    pub fn as_value(&self) -> &serde_json::Value {
        &self.0
    }
}

impl<'de> serde::Deserialize<'de> for SerializedSymbolTable {
    fn deserialize<D: serde::Deserializer<'de>>(d: D) -> Result<Self, D::Error> {
        let v: serde_json::Value = serde::Deserialize::deserialize(d)?;
        Ok(Self(v))
    }
}

/// Construct a [`SymbolTable`] from key-value pairs.
///
/// Values can be any type that implements `Into<ExprValue>`:
/// integers, floats, bools, string literals, `ExprValue`, or
/// `ExprType` (auto-wrapped as unresolved for type checking).
///
/// # Panics
///
/// Panics if a dotted path conflicts with an existing non-table entry.
///
/// ```
/// use openjd_expr::{symtab, ExprType};
///
/// let st = symtab! {
///     "Param.Frame" => 42,
///     "Param.Name" => "test",
///     "Session.Dir" => ExprType::PATH,
/// };
/// ```
#[macro_export]
macro_rules! symtab {
    ($($key:expr => $val:expr),* $(,)?) => {{
        let mut st = $crate::SymbolTable::new();
        $(st.set($key, $val).expect("symtab! path conflict");)*
        st
    }};
}