Skip to main content

plg_runtime/builtins/
atomops.rs

1//! Atom/number-string builtins: `atom_length/2`, `atom_concat/3`,
2//! `atom_chars/2`, `number_chars/2`, `number_codes/2`.
3//!
4//! Ported byte-for-byte from patch-prolog v1 (`solver.rs` arms). Modes
5//! verified against the oracle:
6//! - `atom_concat/3` supports ONLY the deterministic both-atoms-bound
7//!   mode; an unbound prefix/suffix raises `type_error(atom, _)` (v1 is
8//!   not nondeterministic here).
9//! - `atom_chars/2` works both directions (atom→one-char atoms,
10//!   list→atom).
11//! - `number_chars/2` and `number_codes/2` work both directions; garbage
12//!   input on the reverse direction raises a bare `syntax_error` formal.
13//! - float→chars/codes uses v1's `format_float` ("3.0", not "3").
14
15use crate::cell::*;
16use crate::machine::Machine;
17use crate::unify::unify;
18use plg_shared::atom::ATOM_NIL;
19
20#[inline]
21fn mref<'a>(m: *mut Machine) -> &'a mut Machine {
22    unsafe { &mut *m }
23}
24
25/// Build a nil-terminated list from words; return its word.
26fn build_list(m: &mut Machine, elems: &[Word]) -> Word {
27    let mut tail = make_atom(ATOM_NIL);
28    for &e in elems.iter().rev() {
29        let idx = m.heap.len();
30        m.heap.push(e);
31        m.heap.push(tail);
32        tail = make(TAG_LST, idx as u64);
33    }
34    tail
35}
36
37/// Collect a proper list's element words; `None` if not nil-terminated.
38fn collect_list(m: &Machine, w: Word) -> Option<Vec<Word>> {
39    let mut out = Vec::new();
40    let mut cur = m.deref(w);
41    loop {
42        match tag_of(cur) {
43            TAG_ATOM if atom_id(cur) == ATOM_NIL => return Some(out),
44            TAG_LST => {
45                let idx = payload(cur) as usize;
46                out.push(m.heap[idx]);
47                cur = m.deref(m.heap[idx + 1]);
48            }
49            _ => return None,
50        }
51    }
52}
53
54/// v1 `format_float`: append ".0" to a whole-valued float (so number_*/2
55/// renders 3.0 → "3.0", not "3"). NaN/Inf pass through `{}`.
56fn format_float(f: f64) -> String {
57    if f.is_nan() || f.is_infinite() {
58        return format!("{f}");
59    }
60    let s = format!("{f}");
61    if s.contains('.') || s.contains('e') || s.contains('E') {
62        s
63    } else {
64        format!("{s}.0")
65    }
66}
67
68/// Render a numeric word the way number_chars/number_codes expects.
69fn number_string(m: &Machine, w: Word) -> Option<String> {
70    match tag_of(w) {
71        TAG_INT => Some(int_value(w).to_string()),
72        TAG_BIG => Some((m.heap[payload(w) as usize] as i64).to_string()),
73        TAG_FLT => Some(format_float(f64::from_bits(m.heap[payload(w) as usize]))),
74        _ => None,
75    }
76}
77
78/// Raise v1's bare `syntax_error` formal with the given context.
79fn syntax_error(m: &mut Machine, context: &str) {
80    let f = make_atom(m.atoms.intern("syntax_error"));
81    crate::errors::set_formal(m, f, context, false);
82}
83
84/// Parse a numeric string into an INT or FLT word, mirroring v1's
85/// int-then-float fallback. `None` on a parse failure or a NaN/Inf float.
86fn parse_number(m: &mut Machine, s: &str) -> Option<Word> {
87    if let Ok(n) = s.parse::<i64>() {
88        if (INT_MIN..=INT_MAX).contains(&n) {
89            return Some(make_int(n));
90        }
91        let idx = m.heap.len();
92        m.heap.push(n as u64);
93        return Some(make(TAG_BIG, idx as u64));
94    }
95    if let Ok(f) = s.parse::<f64>() {
96        if f.is_nan() || f.is_infinite() {
97            return None;
98        }
99        let idx = m.heap.len();
100        m.heap.push(f.to_bits());
101        return Some(make(TAG_FLT, idx as u64));
102    }
103    None
104}
105
106/// `atom_length/2`: length (in chars) of an atom. 1 = success.
107#[unsafe(no_mangle)]
108pub extern "C" fn plg_rt_b_atom_length_2(
109    m: *mut Machine,
110    atom: u64,
111    len: u64,
112    site_id: u32,
113) -> i32 {
114    let _site = crate::machine::ErrorSiteGuard::enter(m, site_id);
115    let m = mref(m);
116    let w = m.deref(atom);
117    if tag_of(w) == TAG_ATOM {
118        let n = m.atoms.resolve(atom_id(w)).chars().count() as i64;
119        unify(m, len, make_int(n)) as i32
120    } else {
121        crate::errors::type_error(
122            m,
123            "atom",
124            w,
125            "atom_length/2: first argument must be an atom",
126        );
127        0
128    }
129}
130
131/// `atom_concat/3`: concatenate two bound atoms. Only this mode is
132/// supported (v1 raises a type error otherwise).
133#[unsafe(no_mangle)]
134pub extern "C" fn plg_rt_b_atom_concat_3(
135    m: *mut Machine,
136    a: u64,
137    b: u64,
138    result: u64,
139    site_id: u32,
140) -> i32 {
141    let _site = crate::machine::ErrorSiteGuard::enter(m, site_id);
142    let m = mref(m);
143    let wa = m.deref(a);
144    let wb = m.deref(b);
145    if tag_of(wa) == TAG_ATOM && tag_of(wb) == TAG_ATOM {
146        let s = format!(
147            "{}{}",
148            m.atoms.resolve(atom_id(wa)),
149            m.atoms.resolve(atom_id(wb))
150        );
151        let id = m.atoms.intern(&s);
152        unify(m, result, make_atom(id)) as i32
153    } else {
154        let culprit = if tag_of(wa) == TAG_ATOM { wb } else { wa };
155        crate::errors::type_error(
156            m,
157            "atom",
158            culprit,
159            "atom_concat/3: first two arguments must be atoms",
160        );
161        0
162    }
163}
164
165/// `atom_chars/2`: both directions (atom ↔ list of one-char atoms).
166#[unsafe(no_mangle)]
167pub extern "C" fn plg_rt_b_atom_chars_2(
168    m: *mut Machine,
169    atom: u64,
170    list: u64,
171    site_id: u32,
172) -> i32 {
173    let _site = crate::machine::ErrorSiteGuard::enter(m, site_id);
174    let m = mref(m);
175    let w = m.deref(atom);
176    match tag_of(w) {
177        TAG_ATOM => {
178            // Forward: atom → list of single-char atoms.
179            let name = m.atoms.resolve(atom_id(w)).to_string();
180            let chars: Vec<Word> = name
181                .chars()
182                .map(|c| make_atom(m.atoms.intern(&c.to_string())))
183                .collect();
184            let lst = build_list(m, &chars);
185            unify(m, list, lst) as i32
186        }
187        TAG_REF => {
188            // Reverse: list of single-char atoms → atom.
189            let Some(elems) = collect_list(m, list) else {
190                let culprit = m.deref(list);
191                crate::errors::type_error(
192                    m,
193                    "list",
194                    culprit,
195                    "atom_chars/2: second argument must be a character list",
196                );
197                return 0;
198            };
199            match chars_to_string(m, &elems) {
200                Some(s) => {
201                    let id = m.atoms.intern(&s);
202                    unify(m, atom, make_atom(id)) as i32
203                }
204                None => 0, // a non-single-char element → fail (v1 backtrack)
205            }
206        }
207        _ => {
208            crate::errors::type_error(
209                m,
210                "atom",
211                w,
212                "atom_chars/2: first argument must be an atom or variable",
213            );
214            0
215        }
216    }
217}
218
219/// Join single-character atoms into a string; `None` if any element is
220/// not a one-character atom.
221fn chars_to_string(m: &Machine, elems: &[Word]) -> Option<String> {
222    let mut s = String::new();
223    for &e in elems {
224        let e = m.deref(e);
225        if tag_of(e) != TAG_ATOM {
226            return None;
227        }
228        let ch = m.atoms.resolve(atom_id(e));
229        if ch.chars().count() != 1 {
230            return None;
231        }
232        s.push_str(ch);
233    }
234    Some(s)
235}
236
237/// `number_chars/2`: both directions (number ↔ list of one-char atoms).
238#[unsafe(no_mangle)]
239pub extern "C" fn plg_rt_b_number_chars_2(
240    m: *mut Machine,
241    num: u64,
242    chars: u64,
243    site_id: u32,
244) -> i32 {
245    let _site = crate::machine::ErrorSiteGuard::enter(m, site_id);
246    let m = mref(m);
247    let w = m.deref(num);
248    if let Some(s) = number_string(m, w) {
249        let elems: Vec<Word> = s
250            .chars()
251            .map(|c| make_atom(m.atoms.intern(&c.to_string())))
252            .collect();
253        let lst = build_list(m, &elems);
254        return unify(m, chars, lst) as i32;
255    }
256    if tag_of(w) == TAG_REF {
257        return number_from_chars(m, num, chars);
258    }
259    crate::errors::type_error(
260        m,
261        "number",
262        w,
263        "number_chars/2: first argument must be a number",
264    );
265    0
266}
267
268/// Reverse direction for `number_chars/2`.
269fn number_from_chars(m: &mut Machine, num: u64, chars: u64) -> i32 {
270    let Some(elems) = collect_list(m, chars) else {
271        crate::errors::instantiation(m, "number_chars/2: at least one argument must be bound");
272        return 0;
273    };
274    let Some(s) = chars_to_string(m, &elems) else {
275        let culprit = m.deref(chars);
276        crate::errors::domain_error(
277            m,
278            "single_character_atoms",
279            culprit,
280            "number_chars/2: list elements must be single-character atoms",
281        );
282        return 0;
283    };
284    match parse_number(m, &s) {
285        Some(n) => unify(m, num, n) as i32,
286        None => {
287            syntax_error(m, "number_chars/2: invalid number syntax");
288            0
289        }
290    }
291}
292
293/// `number_codes/2`: both directions (number ↔ list of char codes).
294#[unsafe(no_mangle)]
295pub extern "C" fn plg_rt_b_number_codes_2(
296    m: *mut Machine,
297    num: u64,
298    codes: u64,
299    site_id: u32,
300) -> i32 {
301    let _site = crate::machine::ErrorSiteGuard::enter(m, site_id);
302    let m = mref(m);
303    let w = m.deref(num);
304    if let Some(s) = number_string(m, w) {
305        let elems: Vec<Word> = s.chars().map(|c| make_int(c as i64)).collect();
306        let lst = build_list(m, &elems);
307        return unify(m, codes, lst) as i32;
308    }
309    if tag_of(w) == TAG_REF {
310        return number_from_codes(m, num, codes);
311    }
312    crate::errors::type_error(
313        m,
314        "number",
315        w,
316        "number_codes/2: first argument must be a number",
317    );
318    0
319}
320
321/// Reverse direction for `number_codes/2`.
322fn number_from_codes(m: &mut Machine, num: u64, codes: u64) -> i32 {
323    let Some(elems) = collect_list(m, codes) else {
324        crate::errors::instantiation(m, "number_codes/2: at least one argument must be bound");
325        return 0;
326    };
327    let mut s = String::new();
328    for &e in &elems {
329        let e = m.deref(e);
330        let code = match tag_of(e) {
331            TAG_INT => int_value(e),
332            TAG_BIG => m.heap[payload(e) as usize] as i64,
333            _ => return codes_domain_error(m, codes),
334        };
335        match (0..=0x10FFFF)
336            .contains(&code)
337            .then(|| char::from_u32(code as u32))
338        {
339            Some(Some(c)) => s.push(c),
340            _ => return codes_domain_error(m, codes),
341        }
342    }
343    match parse_number(m, &s) {
344        Some(n) => unify(m, num, n) as i32,
345        None => {
346            syntax_error(m, "number_codes/2: invalid number syntax");
347            0
348        }
349    }
350}
351
352fn codes_domain_error(m: &mut Machine, codes: u64) -> i32 {
353    let culprit = m.deref(codes);
354    crate::errors::domain_error(
355        m,
356        "character_codes",
357        culprit,
358        "number_codes/2: list elements must be valid character codes",
359    );
360    0
361}
362
363#[cfg(test)]
364mod tests {
365    use super::*;
366    use crate::machine::NO_SITE;
367    use plg_shared::StringInterner;
368
369    fn machine() -> Box<Machine> {
370        Machine::new(StringInterner::new(), Vec::new())
371    }
372
373    // Thin wrappers: existing tests exercise behavior, not provenance.
374    fn alen(m: *mut Machine, a: u64, l: u64) -> i32 {
375        plg_rt_b_atom_length_2(m, a, l, NO_SITE)
376    }
377    fn acat(m: *mut Machine, a: u64, b: u64, r: u64) -> i32 {
378        plg_rt_b_atom_concat_3(m, a, b, r, NO_SITE)
379    }
380    fn achars(m: *mut Machine, a: u64, l: u64) -> i32 {
381        plg_rt_b_atom_chars_2(m, a, l, NO_SITE)
382    }
383    fn nchars(m: *mut Machine, n: u64, c: u64) -> i32 {
384        plg_rt_b_number_chars_2(m, n, c, NO_SITE)
385    }
386    fn ncodes(m: *mut Machine, n: u64, c: u64) -> i32 {
387        plg_rt_b_number_codes_2(m, n, c, NO_SITE)
388    }
389
390    fn msg(m: &Machine) -> &str {
391        m.error.as_ref().unwrap().message.as_str()
392    }
393
394    fn atom_word(m: &mut Machine, s: &str) -> Word {
395        make_atom(m.atoms.intern(s))
396    }
397
398    fn flt(m: &mut Machine, f: f64) -> Word {
399        let idx = m.heap.len();
400        m.heap.push(f.to_bits());
401        make(TAG_FLT, idx as u64)
402    }
403
404    #[test]
405    fn atom_length_ok_and_error() {
406        let mut m = machine();
407        let foo = atom_word(&mut m, "foo");
408        let x = m.new_var();
409        let mp = &mut *m as *mut Machine;
410        assert_eq!(alen(mp, foo, x), 1);
411        assert_eq!(int_value(m.deref(x)), 3);
412        // mismatch fails
413        let mp = &mut *m as *mut Machine;
414        assert_eq!(alen(mp, foo, make_int(5)), 0);
415        // non-atom errors
416        let mp = &mut *m as *mut Machine;
417        let y = m.new_var();
418        assert_eq!(alen(mp, make_int(123), y), 0);
419        assert_eq!(
420            msg(&m),
421            "error(type_error(atom, 123), atom_length/2: first argument must be an atom)"
422        );
423    }
424
425    #[test]
426    fn atom_concat_both_bound_and_error() {
427        let mut m = machine();
428        let foo = atom_word(&mut m, "foo");
429        let bar = atom_word(&mut m, "bar");
430        let x = m.new_var();
431        let mp = &mut *m as *mut Machine;
432        assert_eq!(acat(mp, foo, bar, x), 1);
433        let foobar = m.atoms.lookup("foobar").unwrap();
434        assert_eq!(m.deref(x), make_atom(foobar));
435
436        // unbound prefix → type_error(atom, _N)
437        let mut m = machine();
438        let bar = atom_word(&mut m, "bar");
439        let foobar = atom_word(&mut m, "foobar");
440        let v = m.new_var();
441        let mp = &mut *m as *mut Machine;
442        assert_eq!(acat(mp, v, bar, foobar), 0);
443        assert!(msg(&m).starts_with("error(type_error(atom, _"));
444        assert!(msg(&m).ends_with("atom_concat/3: first two arguments must be atoms)"));
445    }
446
447    #[test]
448    fn atom_chars_both_directions() {
449        let mut m = machine();
450        let foo = atom_word(&mut m, "foo");
451        let x = m.new_var();
452        let mp = &mut *m as *mut Machine;
453        assert_eq!(achars(mp, foo, x), 1);
454        let elems = collect_list(&m, x).unwrap();
455        assert_eq!(elems.len(), 3);
456        let f = m.atoms.lookup("f").unwrap();
457        assert_eq!(m.deref(elems[0]), make_atom(f));
458
459        // reverse: [f,o,o] → foo
460        let f = atom_word(&mut m, "f");
461        let o = atom_word(&mut m, "o");
462        let inlist = build_list(&mut m, &[f, o, o]);
463        let a = m.new_var();
464        let mp = &mut *m as *mut Machine;
465        assert_eq!(achars(mp, a, inlist), 1);
466        let foo = m.atoms.lookup("foo").unwrap();
467        assert_eq!(m.deref(a), make_atom(foo));
468    }
469
470    #[test]
471    fn number_chars_both_directions() {
472        let mut m = machine();
473        // 123 → ['1','2','3']
474        let x = m.new_var();
475        let mp = &mut *m as *mut Machine;
476        assert_eq!(nchars(mp, make_int(123), x), 1);
477        let elems = collect_list(&m, x).unwrap();
478        assert_eq!(elems.len(), 3);
479        let one = m.atoms.lookup("1").unwrap();
480        assert_eq!(m.deref(elems[0]), make_atom(one));
481
482        // ['1','2','3'] → 123
483        let c1 = atom_word(&mut m, "1");
484        let c2 = atom_word(&mut m, "2");
485        let c3 = atom_word(&mut m, "3");
486        let inlist = build_list(&mut m, &[c1, c2, c3]);
487        let n = m.new_var();
488        let mp = &mut *m as *mut Machine;
489        assert_eq!(nchars(mp, n, inlist), 1);
490        assert_eq!(int_value(m.deref(n)), 123);
491    }
492
493    #[test]
494    fn number_chars_float_uses_dot_zero() {
495        let mut m = machine();
496        let three = flt(&mut m, 3.0);
497        let x = m.new_var();
498        let mp = &mut *m as *mut Machine;
499        assert_eq!(nchars(mp, three, x), 1);
500        let s = chars_to_string(&m, &collect_list(&m, x).unwrap()).unwrap();
501        assert_eq!(s, "3.0");
502    }
503
504    #[test]
505    fn number_chars_garbage_is_syntax_error() {
506        let mut m = machine();
507        let a = atom_word(&mut m, "a");
508        let inlist = build_list(&mut m, &[a]);
509        let n = m.new_var();
510        let mp = &mut *m as *mut Machine;
511        assert_eq!(nchars(mp, n, inlist), 0);
512        assert_eq!(
513            msg(&m),
514            "error(syntax_error, number_chars/2: invalid number syntax)"
515        );
516    }
517
518    #[test]
519    fn number_codes_both_directions() {
520        let mut m = machine();
521        // 123 → [49,50,51]
522        let x = m.new_var();
523        let mp = &mut *m as *mut Machine;
524        assert_eq!(ncodes(mp, make_int(123), x), 1);
525        let elems = collect_list(&m, x).unwrap();
526        assert_eq!(int_value(m.deref(elems[0])), 49);
527
528        // [49,50,51] → 123
529        let inlist = build_list(&mut m, &[make_int(49), make_int(50), make_int(51)]);
530        let n = m.new_var();
531        let mp = &mut *m as *mut Machine;
532        assert_eq!(ncodes(mp, n, inlist), 1);
533        assert_eq!(int_value(m.deref(n)), 123);
534    }
535}