Skip to main content

plg_runtime/builtins/
atomops.rs

1//! Atom/number-string builtins: `atom_length/2`, `atom_chars/2`,
2//! `number_chars/2`, `number_codes/2`.
3//!
4//! Modes follow ISO. Notes:
5//! - `atom_concat/3` is nondeterministic (split mode) and so lives with the
6//!   other predicate-dispatched builtins in `control.rs`, not here.
7//! - `atom_chars/2` works both directions (atom→one-char atoms,
8//!   list→atom).
9//! - `number_chars/2` and `number_codes/2` work both directions; garbage
10//!   input on the reverse direction raises a bare `syntax_error` formal.
11//! - float→chars/codes uses v1's `format_float` ("3.0", not "3").
12
13use crate::cell::*;
14use crate::machine::Machine;
15use crate::unify::unify;
16use plg_shared::atom::ATOM_NIL;
17
18#[inline]
19fn mref<'a>(m: *mut Machine) -> &'a mut Machine {
20    unsafe { &mut *m }
21}
22
23/// Build a nil-terminated list from words; return its word.
24fn build_list(m: &mut Machine, elems: &[Word]) -> Word {
25    let mut tail = make_atom(ATOM_NIL);
26    for &e in elems.iter().rev() {
27        let idx = m.heap.len();
28        m.heap.push(e);
29        m.heap.push(tail);
30        tail = make(TAG_LST, idx as u64);
31    }
32    tail
33}
34
35/// Collect a proper list's element words; `None` if not nil-terminated.
36fn collect_list(m: &Machine, w: Word) -> Option<Vec<Word>> {
37    let mut out = Vec::new();
38    let mut cur = m.deref(w);
39    loop {
40        match tag_of(cur) {
41            TAG_ATOM if atom_id(cur) == ATOM_NIL => return Some(out),
42            TAG_LST => {
43                let idx = payload(cur) as usize;
44                out.push(m.heap[idx]);
45                cur = m.deref(m.heap[idx + 1]);
46            }
47            _ => return None,
48        }
49    }
50}
51
52/// v1 `format_float`: append ".0" to a whole-valued float (so number_*/2
53/// renders 3.0 → "3.0", not "3"). NaN/Inf pass through `{}`.
54fn format_float(f: f64) -> String {
55    if f.is_nan() || f.is_infinite() {
56        return format!("{f}");
57    }
58    let s = format!("{f}");
59    if s.contains('.') || s.contains('e') || s.contains('E') {
60        s
61    } else {
62        format!("{s}.0")
63    }
64}
65
66/// Render a numeric word the way number_chars/number_codes expects.
67fn number_string(m: &Machine, w: Word) -> Option<String> {
68    match tag_of(w) {
69        TAG_INT => Some(int_value(w).to_string()),
70        TAG_BIG => Some((m.heap[payload(w) as usize] as i64).to_string()),
71        TAG_FLT => Some(format_float(f64::from_bits(m.heap[payload(w) as usize]))),
72        _ => None,
73    }
74}
75
76/// Raise v1's bare `syntax_error` formal with the given context.
77fn syntax_error(m: &mut Machine, context: &str) {
78    let f = make_atom(m.atoms.intern("syntax_error"));
79    crate::errors::set_formal(m, f, context, false);
80}
81
82/// Parse a numeric string into an INT or FLT word, mirroring v1's
83/// int-then-float fallback. `None` on a parse failure or a NaN/Inf float.
84fn parse_number(m: &mut Machine, s: &str) -> Option<Word> {
85    if let Ok(n) = s.parse::<i64>() {
86        if (INT_MIN..=INT_MAX).contains(&n) {
87            return Some(make_int(n));
88        }
89        let idx = m.heap.len();
90        m.heap.push(n as u64);
91        return Some(make(TAG_BIG, idx as u64));
92    }
93    if let Ok(f) = s.parse::<f64>() {
94        if f.is_nan() || f.is_infinite() {
95            return None;
96        }
97        let idx = m.heap.len();
98        m.heap.push(f.to_bits());
99        return Some(make(TAG_FLT, idx as u64));
100    }
101    None
102}
103
104/// `atom_length/2`: length (in chars) of an atom. 1 = success.
105#[unsafe(no_mangle)]
106pub extern "C" fn plg_rt_b_atom_length_2(
107    m: *mut Machine,
108    atom: u64,
109    len: u64,
110    site_id: u32,
111) -> i32 {
112    let _site = crate::machine::ErrorSiteGuard::enter(m, site_id);
113    let m = mref(m);
114    let w = m.deref(atom);
115    if tag_of(w) == TAG_ATOM {
116        let n = m.atoms.resolve(atom_id(w)).chars().count() as i64;
117        unify(m, len, make_int(n)) as i32
118    } else {
119        crate::errors::type_error(
120            m,
121            "atom",
122            w,
123            "atom_length/2: first argument must be an atom",
124        );
125        0
126    }
127}
128
129/// `atom_chars/2`: both directions (atom ↔ list of one-char atoms).
130#[unsafe(no_mangle)]
131pub extern "C" fn plg_rt_b_atom_chars_2(
132    m: *mut Machine,
133    atom: u64,
134    list: u64,
135    site_id: u32,
136) -> i32 {
137    let _site = crate::machine::ErrorSiteGuard::enter(m, site_id);
138    let m = mref(m);
139    let w = m.deref(atom);
140    match tag_of(w) {
141        TAG_ATOM => {
142            // Forward: atom → list of single-char atoms.
143            let name = m.atoms.resolve(atom_id(w)).to_string();
144            let chars: Vec<Word> = name
145                .chars()
146                .map(|c| make_atom(m.atoms.intern(&c.to_string())))
147                .collect();
148            let lst = build_list(m, &chars);
149            unify(m, list, lst) as i32
150        }
151        TAG_REF => {
152            // Reverse: list of single-char atoms → atom.
153            let Some(elems) = collect_list(m, list) else {
154                let culprit = m.deref(list);
155                crate::errors::type_error(
156                    m,
157                    "list",
158                    culprit,
159                    "atom_chars/2: second argument must be a character list",
160                );
161                return 0;
162            };
163            match chars_to_string(m, &elems) {
164                Some(s) => {
165                    let id = m.atoms.intern(&s);
166                    unify(m, atom, make_atom(id)) as i32
167                }
168                None => 0, // a non-single-char element → fail (v1 backtrack)
169            }
170        }
171        _ => {
172            crate::errors::type_error(
173                m,
174                "atom",
175                w,
176                "atom_chars/2: first argument must be an atom or variable",
177            );
178            0
179        }
180    }
181}
182
183/// Join single-character atoms into a string; `None` if any element is
184/// not a one-character atom.
185fn chars_to_string(m: &Machine, elems: &[Word]) -> Option<String> {
186    let mut s = String::new();
187    for &e in elems {
188        let e = m.deref(e);
189        if tag_of(e) != TAG_ATOM {
190            return None;
191        }
192        let ch = m.atoms.resolve(atom_id(e));
193        if ch.chars().count() != 1 {
194            return None;
195        }
196        s.push_str(ch);
197    }
198    Some(s)
199}
200
201/// `number_chars/2`: both directions (number ↔ list of one-char atoms).
202#[unsafe(no_mangle)]
203pub extern "C" fn plg_rt_b_number_chars_2(
204    m: *mut Machine,
205    num: u64,
206    chars: u64,
207    site_id: u32,
208) -> i32 {
209    let _site = crate::machine::ErrorSiteGuard::enter(m, site_id);
210    let m = mref(m);
211    let w = m.deref(num);
212    if let Some(s) = number_string(m, w) {
213        let elems: Vec<Word> = s
214            .chars()
215            .map(|c| make_atom(m.atoms.intern(&c.to_string())))
216            .collect();
217        let lst = build_list(m, &elems);
218        return unify(m, chars, lst) as i32;
219    }
220    if tag_of(w) == TAG_REF {
221        return number_from_chars(m, num, chars);
222    }
223    crate::errors::type_error(
224        m,
225        "number",
226        w,
227        "number_chars/2: first argument must be a number",
228    );
229    0
230}
231
232/// Reverse direction for `number_chars/2`.
233fn number_from_chars(m: &mut Machine, num: u64, chars: u64) -> i32 {
234    let Some(elems) = collect_list(m, chars) else {
235        crate::errors::instantiation(m, "number_chars/2: at least one argument must be bound");
236        return 0;
237    };
238    let Some(s) = chars_to_string(m, &elems) else {
239        let culprit = m.deref(chars);
240        crate::errors::domain_error(
241            m,
242            "single_character_atoms",
243            culprit,
244            "number_chars/2: list elements must be single-character atoms",
245        );
246        return 0;
247    };
248    match parse_number(m, &s) {
249        Some(n) => unify(m, num, n) as i32,
250        None => {
251            syntax_error(m, "number_chars/2: invalid number syntax");
252            0
253        }
254    }
255}
256
257/// `number_codes/2`: both directions (number ↔ list of char codes).
258#[unsafe(no_mangle)]
259pub extern "C" fn plg_rt_b_number_codes_2(
260    m: *mut Machine,
261    num: u64,
262    codes: u64,
263    site_id: u32,
264) -> i32 {
265    let _site = crate::machine::ErrorSiteGuard::enter(m, site_id);
266    let m = mref(m);
267    let w = m.deref(num);
268    if let Some(s) = number_string(m, w) {
269        let elems: Vec<Word> = s.chars().map(|c| make_int(c as i64)).collect();
270        let lst = build_list(m, &elems);
271        return unify(m, codes, lst) as i32;
272    }
273    if tag_of(w) == TAG_REF {
274        return number_from_codes(m, num, codes);
275    }
276    crate::errors::type_error(
277        m,
278        "number",
279        w,
280        "number_codes/2: first argument must be a number",
281    );
282    0
283}
284
285/// Reverse direction for `number_codes/2`.
286fn number_from_codes(m: &mut Machine, num: u64, codes: u64) -> i32 {
287    let Some(elems) = collect_list(m, codes) else {
288        crate::errors::instantiation(m, "number_codes/2: at least one argument must be bound");
289        return 0;
290    };
291    let mut s = String::new();
292    for &e in &elems {
293        let e = m.deref(e);
294        let code = match tag_of(e) {
295            TAG_INT => int_value(e),
296            TAG_BIG => m.heap[payload(e) as usize] as i64,
297            _ => return codes_domain_error(m, codes),
298        };
299        match (0..=0x10FFFF)
300            .contains(&code)
301            .then(|| char::from_u32(code as u32))
302        {
303            Some(Some(c)) => s.push(c),
304            _ => return codes_domain_error(m, codes),
305        }
306    }
307    match parse_number(m, &s) {
308        Some(n) => unify(m, num, n) as i32,
309        None => {
310            syntax_error(m, "number_codes/2: invalid number syntax");
311            0
312        }
313    }
314}
315
316fn codes_domain_error(m: &mut Machine, codes: u64) -> i32 {
317    let culprit = m.deref(codes);
318    crate::errors::domain_error(
319        m,
320        "character_codes",
321        culprit,
322        "number_codes/2: list elements must be valid character codes",
323    );
324    0
325}
326
327#[cfg(test)]
328mod tests {
329    use super::*;
330    use crate::machine::NO_SITE;
331    use plg_shared::StringInterner;
332
333    fn machine() -> Box<Machine> {
334        Machine::new(StringInterner::new(), Vec::new())
335    }
336
337    // Thin wrappers: existing tests exercise behavior, not provenance.
338    fn alen(m: *mut Machine, a: u64, l: u64) -> i32 {
339        plg_rt_b_atom_length_2(m, a, l, NO_SITE)
340    }
341    fn achars(m: *mut Machine, a: u64, l: u64) -> i32 {
342        plg_rt_b_atom_chars_2(m, a, l, NO_SITE)
343    }
344    fn nchars(m: *mut Machine, n: u64, c: u64) -> i32 {
345        plg_rt_b_number_chars_2(m, n, c, NO_SITE)
346    }
347    fn ncodes(m: *mut Machine, n: u64, c: u64) -> i32 {
348        plg_rt_b_number_codes_2(m, n, c, NO_SITE)
349    }
350
351    fn msg(m: &Machine) -> &str {
352        m.error.as_ref().unwrap().message.as_str()
353    }
354
355    fn atom_word(m: &mut Machine, s: &str) -> Word {
356        make_atom(m.atoms.intern(s))
357    }
358
359    fn flt(m: &mut Machine, f: f64) -> Word {
360        let idx = m.heap.len();
361        m.heap.push(f.to_bits());
362        make(TAG_FLT, idx as u64)
363    }
364
365    #[test]
366    fn atom_length_ok_and_error() {
367        let mut m = machine();
368        let foo = atom_word(&mut m, "foo");
369        let x = m.new_var();
370        let mp = &mut *m as *mut Machine;
371        assert_eq!(alen(mp, foo, x), 1);
372        assert_eq!(int_value(m.deref(x)), 3);
373        // mismatch fails
374        let mp = &mut *m as *mut Machine;
375        assert_eq!(alen(mp, foo, make_int(5)), 0);
376        // non-atom errors
377        let mp = &mut *m as *mut Machine;
378        let y = m.new_var();
379        assert_eq!(alen(mp, make_int(123), y), 0);
380        assert_eq!(
381            msg(&m),
382            "error(type_error(atom, 123), atom_length/2: first argument must be an atom)"
383        );
384    }
385
386    #[test]
387    fn atom_chars_both_directions() {
388        let mut m = machine();
389        let foo = atom_word(&mut m, "foo");
390        let x = m.new_var();
391        let mp = &mut *m as *mut Machine;
392        assert_eq!(achars(mp, foo, x), 1);
393        let elems = collect_list(&m, x).unwrap();
394        assert_eq!(elems.len(), 3);
395        let f = m.atoms.lookup("f").unwrap();
396        assert_eq!(m.deref(elems[0]), make_atom(f));
397
398        // reverse: [f,o,o] → foo
399        let f = atom_word(&mut m, "f");
400        let o = atom_word(&mut m, "o");
401        let inlist = build_list(&mut m, &[f, o, o]);
402        let a = m.new_var();
403        let mp = &mut *m as *mut Machine;
404        assert_eq!(achars(mp, a, inlist), 1);
405        let foo = m.atoms.lookup("foo").unwrap();
406        assert_eq!(m.deref(a), make_atom(foo));
407    }
408
409    #[test]
410    fn number_chars_both_directions() {
411        let mut m = machine();
412        // 123 → ['1','2','3']
413        let x = m.new_var();
414        let mp = &mut *m as *mut Machine;
415        assert_eq!(nchars(mp, make_int(123), x), 1);
416        let elems = collect_list(&m, x).unwrap();
417        assert_eq!(elems.len(), 3);
418        let one = m.atoms.lookup("1").unwrap();
419        assert_eq!(m.deref(elems[0]), make_atom(one));
420
421        // ['1','2','3'] → 123
422        let c1 = atom_word(&mut m, "1");
423        let c2 = atom_word(&mut m, "2");
424        let c3 = atom_word(&mut m, "3");
425        let inlist = build_list(&mut m, &[c1, c2, c3]);
426        let n = m.new_var();
427        let mp = &mut *m as *mut Machine;
428        assert_eq!(nchars(mp, n, inlist), 1);
429        assert_eq!(int_value(m.deref(n)), 123);
430    }
431
432    #[test]
433    fn number_chars_float_uses_dot_zero() {
434        let mut m = machine();
435        let three = flt(&mut m, 3.0);
436        let x = m.new_var();
437        let mp = &mut *m as *mut Machine;
438        assert_eq!(nchars(mp, three, x), 1);
439        let s = chars_to_string(&m, &collect_list(&m, x).unwrap()).unwrap();
440        assert_eq!(s, "3.0");
441    }
442
443    #[test]
444    fn number_chars_garbage_is_syntax_error() {
445        let mut m = machine();
446        let a = atom_word(&mut m, "a");
447        let inlist = build_list(&mut m, &[a]);
448        let n = m.new_var();
449        let mp = &mut *m as *mut Machine;
450        assert_eq!(nchars(mp, n, inlist), 0);
451        assert_eq!(
452            msg(&m),
453            "error(syntax_error, number_chars/2: invalid number syntax)"
454        );
455    }
456
457    #[test]
458    fn number_codes_both_directions() {
459        let mut m = machine();
460        // 123 → [49,50,51]
461        let x = m.new_var();
462        let mp = &mut *m as *mut Machine;
463        assert_eq!(ncodes(mp, make_int(123), x), 1);
464        let elems = collect_list(&m, x).unwrap();
465        assert_eq!(int_value(m.deref(elems[0])), 49);
466
467        // [49,50,51] → 123
468        let inlist = build_list(&mut m, &[make_int(49), make_int(50), make_int(51)]);
469        let n = m.new_var();
470        let mp = &mut *m as *mut Machine;
471        assert_eq!(ncodes(mp, n, inlist), 1);
472        assert_eq!(int_value(m.deref(n)), 123);
473    }
474}