1mod ops;
15
16use crate::cell::{self, Word};
17use crate::machine::Machine;
18use std::collections::HashMap;
19
20pub fn parse_query(m: &mut Machine, src: &str) -> Result<Word, String> {
21 let mut p = QueryParser {
22 chars: src.chars().collect(),
23 pos: 0,
24 vars: HashMap::new(),
25 };
26 let goal = p.parse_level(m, 1200)?;
27 p.skip_ws();
28 if p.peek() == Some('.') {
30 p.pos += 1;
31 p.skip_ws();
32 }
33 if p.pos < p.chars.len() {
34 return Err(format!("unexpected input at column {}", p.pos + 1));
35 }
36 Ok(goal)
37}
38
39pub(crate) struct QueryParser {
40 pub(crate) chars: Vec<char>,
41 pub(crate) pos: usize,
42 vars: HashMap<String, Word>,
43}
44
45impl QueryParser {
46 pub(crate) fn peek(&self) -> Option<char> {
47 self.chars.get(self.pos).copied()
48 }
49
50 pub(crate) fn skip_ws(&mut self) {
51 while let Some(c) = self.peek() {
52 if c.is_whitespace() {
53 self.pos += 1;
54 } else if c == '%' {
55 while self.peek().is_some_and(|c| c != '\n') {
56 self.pos += 1;
57 }
58 } else {
59 break;
60 }
61 }
62 }
63
64 fn expect(&mut self, c: char) -> Result<(), String> {
65 self.skip_ws();
66 if self.peek() == Some(c) {
67 self.pos += 1;
68 Ok(())
69 } else {
70 Err(format!("expected `{c}` at column {}", self.pos + 1))
71 }
72 }
73
74 pub(crate) fn make_binop(&self, m: &mut Machine, name: &str, a: Word, b: Word) -> Word {
75 let id = m.atoms.intern(name);
76 let idx = m.heap.len();
77 m.heap.push(cell::pack_functor(id, 2));
78 m.heap.push(a);
79 m.heap.push(b);
80 cell::make(cell::TAG_STR, idx as u64)
81 }
82
83 pub(crate) fn parse_primary(&mut self, m: &mut Machine) -> Result<Word, String> {
85 self.skip_ws();
86 match self.peek() {
87 None => Err("unexpected end of query".to_string()),
88 Some('(') => {
89 self.pos += 1;
90 let t = self.parse_level(m, 1200)?;
91 self.expect(')')?;
92 Ok(t)
93 }
94 Some('[') => self.parse_list(m),
95 Some('\'') => {
96 let name = self.read_quoted()?;
97 self.parse_atom_or_compound(m, name)
98 }
99 Some(c) if c.is_ascii_digit() => self.parse_number(m, false),
100 Some(c) if is_symbol_atom_char(c) && self.symbol_run_is_standalone() => {
104 let name = self.read_symbol_run();
105 self.parse_atom_or_compound(m, name)
106 }
107 Some('-')
108 if self
109 .chars
110 .get(self.pos + 1)
111 .is_some_and(|c| c.is_ascii_digit()) =>
112 {
113 self.pos += 1;
114 self.parse_number(m, true)
115 }
116 Some('-') => {
117 self.pos += 1;
119 let t = self.parse_primary(m)?;
120 self.make_prefix(m, "-", t)
121 }
122 Some('+') => {
123 self.pos += 1;
126 self.skip_ws();
127 if self.peek().is_some_and(|c| c.is_ascii_digit()) {
128 self.parse_number(m, false)
129 } else {
130 let t = self.parse_primary(m)?;
131 self.make_prefix(m, "+", t)
132 }
133 }
134 Some('\\') => {
135 let name = self.read_symbol_run();
138 if self.peek() == Some('(') {
139 self.parse_atom_or_compound(m, name)
140 } else {
141 let t = self.parse_primary(m)?;
142 self.make_prefix(m, &name, t)
143 }
144 }
145 Some('!') => {
146 self.pos += 1;
147 Ok(cell::make_atom(m.atoms.intern("!")))
148 }
149 Some(c) if c.is_uppercase() || c == '_' => {
150 let name = self.read_ident();
151 Ok(self.var_word(m, &name))
152 }
153 Some(c) if c.is_lowercase() => {
154 let name = self.read_ident();
155 self.parse_atom_or_compound(m, name)
156 }
157 Some(c) => Err(format!("unexpected `{c}` at column {}", self.pos + 1)),
158 }
159 }
160
161 fn parse_atom_or_compound(&mut self, m: &mut Machine, name: String) -> Result<Word, String> {
162 let id = m.atoms.intern(&name);
163 if self.peek() == Some('(') {
165 self.pos += 1;
166 let mut args = vec![self.parse_level(m, 999)?];
168 loop {
169 self.skip_ws();
170 match self.peek() {
171 Some(',') => {
172 self.pos += 1;
173 args.push(self.parse_level(m, 999)?);
174 }
175 Some(')') => {
176 self.pos += 1;
177 break;
178 }
179 _ => return Err(format!("expected `,` or `)` at column {}", self.pos + 1)),
180 }
181 }
182 let idx = m.heap.len();
183 m.heap.push(cell::pack_functor(id, args.len() as u32));
184 m.heap.extend_from_slice(&args);
185 Ok(cell::make(cell::TAG_STR, idx as u64))
186 } else {
187 Ok(cell::make_atom(id))
188 }
189 }
190
191 fn parse_list(&mut self, m: &mut Machine) -> Result<Word, String> {
192 self.expect('[')?;
193 self.skip_ws();
194 if self.peek() == Some(']') {
195 self.pos += 1;
196 return Ok(cell::make_atom(plg_shared::atom::ATOM_NIL));
197 }
198 let mut elements = vec![self.parse_level(m, 999)?];
199 let mut tail = None;
200 loop {
201 self.skip_ws();
202 match self.peek() {
203 Some(',') => {
204 self.pos += 1;
205 elements.push(self.parse_level(m, 999)?);
206 }
207 Some('|') => {
208 self.pos += 1;
209 tail = Some(self.parse_level(m, 999)?);
210 self.expect(']')?;
211 break;
212 }
213 Some(']') => {
214 self.pos += 1;
215 break;
216 }
217 _ => {
218 return Err(format!(
219 "expected `,`, `|` or `]` at column {}",
220 self.pos + 1
221 ));
222 }
223 }
224 }
225 let mut w = tail.unwrap_or(cell::make_atom(plg_shared::atom::ATOM_NIL));
226 for e in elements.into_iter().rev() {
227 let idx = m.heap.len();
228 m.heap.push(e);
229 m.heap.push(w);
230 w = cell::make(cell::TAG_LST, idx as u64);
231 }
232 Ok(w)
233 }
234
235 fn parse_number(&mut self, m: &mut Machine, neg: bool) -> Result<Word, String> {
239 let start = self.pos;
240 while self.peek().is_some_and(|c| c.is_ascii_digit()) {
241 self.pos += 1;
242 }
243 let is_float = self.peek() == Some('.')
244 && self
245 .chars
246 .get(self.pos + 1)
247 .is_some_and(|c| c.is_ascii_digit());
248 if is_float {
249 self.pos += 1; while self.peek().is_some_and(|c| c.is_ascii_digit()) {
251 self.pos += 1;
252 }
253 let text: String = self.chars[start..self.pos].iter().collect();
254 let f: f64 = text
255 .parse()
256 .map_err(|_| format!("invalid float `{text}`"))?;
257 let f = if neg { -f } else { f };
258 let idx = m.heap.len();
259 m.heap.push(f.to_bits());
260 return Ok(cell::make(cell::TAG_FLT, idx as u64));
261 }
262 let digits: String = self.chars[start..self.pos].iter().collect();
263 let n: i64 = digits
264 .parse()
265 .map_err(|_| format!("invalid integer `{digits}`"))?;
266 let n = if neg { -n } else { n };
267 if !(cell::INT_MIN..=cell::INT_MAX).contains(&n) {
268 let idx = m.heap.len();
270 m.heap.push(n as u64);
271 return Ok(cell::make(cell::TAG_BIG, idx as u64));
272 }
273 Ok(cell::make_int(n))
274 }
275
276 pub(crate) fn read_ident(&mut self) -> String {
277 let start = self.pos;
278 while self.peek().is_some_and(|c| c.is_alphanumeric() || c == '_') {
279 self.pos += 1;
280 }
281 self.chars[start..self.pos].iter().collect()
282 }
283
284 fn read_quoted(&mut self) -> Result<String, String> {
285 self.pos += 1; let mut out = String::new();
287 loop {
288 match self.peek() {
289 None => return Err("unterminated quoted atom".to_string()),
290 Some('\'') => {
291 self.pos += 1;
292 if self.peek() == Some('\'') {
293 out.push('\''); self.pos += 1;
295 } else {
296 return Ok(out);
297 }
298 }
299 Some(c) => {
300 out.push(c);
301 self.pos += 1;
302 }
303 }
304 }
305 }
306
307 fn read_symbol_run(&mut self) -> String {
308 let start = self.pos;
309 while self.peek().is_some_and(is_symbol_atom_char) {
310 self.pos += 1;
311 }
312 self.chars[start..self.pos].iter().collect()
313 }
314
315 fn symbol_run_is_standalone(&self) -> bool {
319 let mut p = self.pos;
320 while self.chars.get(p).copied().is_some_and(is_symbol_atom_char) {
321 p += 1;
322 }
323 if self.chars.get(p) == Some(&'(') {
324 return true; }
326 let mut q = p;
327 while self.chars.get(q).is_some_and(|c| c.is_whitespace()) {
328 q += 1;
329 }
330 match self.chars.get(q) {
331 None => {
332 true
336 }
337 Some(')' | ',' | ']' | '|') => true,
338 _ => false,
339 }
340 }
341
342 fn make_prefix(&self, m: &mut Machine, name: &str, t: Word) -> Result<Word, String> {
343 let id = m.atoms.intern(name);
344 let idx = m.heap.len();
345 m.heap.push(cell::pack_functor(id, 1));
346 m.heap.push(t);
347 Ok(cell::make(cell::TAG_STR, idx as u64))
348 }
349
350 fn var_word(&mut self, m: &mut Machine, name: &str) -> Word {
353 if name == "_" {
354 return m.new_var();
355 }
356 if let Some(&w) = self.vars.get(name) {
357 return w;
358 }
359 let w = m.new_var();
360 self.vars.insert(name.to_string(), w);
361 m.query_vars
362 .push((name.to_string(), cell::payload(w) as usize));
363 w
364 }
365}
366
367fn is_symbol_atom_char(c: char) -> bool {
369 matches!(
370 c,
371 '+' | '-' | '*' | '/' | '\\' | '<' | '>' | '=' | ':' | '@' | '^' | '.' | '?' | '&' | '~'
372 )
373}
374
375#[cfg(test)]
376mod tests {
377 use super::*;
378 use crate::cell::*;
379 use plg_shared::StringInterner;
380
381 fn machine() -> Box<Machine> {
382 Machine::new(StringInterner::new(), Vec::new())
383 }
384
385 fn functor_name(m: &Machine, w: Word) -> String {
386 let idx = payload(w) as usize;
387 let (f, _) = unpack_functor(m.heap[idx]);
388 m.atoms.resolve(f).to_string()
389 }
390
391 #[test]
392 fn parses_compound_with_vars() {
393 let mut m = machine();
394 let w = parse_query(&mut m, "parent(tom, X)").unwrap();
395 assert_eq!(tag_of(w), TAG_STR);
396 assert_eq!(functor_name(&m, w), "parent");
397 assert_eq!(m.query_vars.len(), 1);
398 assert_eq!(m.query_vars[0].0, "X");
399 }
400
401 #[test]
402 fn conjunction_shares_variables() {
403 let mut m = machine();
404 let w = parse_query(&mut m, "p(X), q(X, Y)").unwrap();
405 assert_eq!(functor_name(&m, w), ",");
406 assert_eq!(m.query_vars.len(), 2, "X shared, Y new");
407 }
408
409 #[test]
410 fn operator_goals_parse() {
411 let mut m = machine();
412 let w = parse_query(&mut m, "X is 2 + 3 * 4").unwrap();
413 assert_eq!(functor_name(&m, w), "is");
414 let w = parse_query(&mut m, "1 < 2").unwrap();
415 assert_eq!(functor_name(&m, w), "<");
416 let w = parse_query(&mut m, "(a ; b)").unwrap();
417 assert_eq!(functor_name(&m, w), ";");
418 let w = parse_query(&mut m, "(a -> b ; c)").unwrap();
419 assert_eq!(functor_name(&m, w), ";");
420 let w = parse_query(&mut m, "\\+ p(X)").unwrap();
421 assert_eq!(functor_name(&m, w), "\\+");
422 }
423
424 #[test]
425 fn precedence_multiplication_binds_tighter() {
426 let mut m = machine();
427 let w = parse_query(&mut m, "X is 2 + 3 * 4").unwrap();
429 let idx = payload(w) as usize;
430 let rhs = m.deref(m.heap[idx + 2]);
431 assert_eq!(functor_name(&m, rhs), "+");
432 let plus_idx = payload(rhs) as usize;
433 let right = m.deref(m.heap[plus_idx + 2]);
434 assert_eq!(functor_name(&m, right), "*");
435 }
436
437 #[test]
438 fn args_parse_operators_but_not_bare_comma() {
439 let mut m = machine();
440 let w = parse_query(&mut m, "p(1 + 2, X)").unwrap();
441 let idx = payload(w) as usize;
442 let (f, n) = unpack_functor(m.heap[idx]);
443 assert_eq!(m.atoms.resolve(f), "p");
444 assert_eq!(n, 2, "1 + 2 is one arg, X the other");
445 }
446
447 #[test]
448 fn lists_and_quoted_atoms() {
449 let mut m = machine();
450 let w = parse_query(&mut m, "p([1, 2 | T], 'hello world')").unwrap();
451 assert_eq!(tag_of(w), TAG_STR);
452 assert!(m.atoms.lookup("hello world").is_some());
453 assert_eq!(m.query_vars[0].0, "T");
454 }
455
456 #[test]
457 fn underscore_never_recorded() {
458 let mut m = machine();
459 parse_query(&mut m, "p(_, _)").unwrap();
460 assert!(m.query_vars.is_empty());
461 }
462
463 #[test]
464 fn negative_integers_and_trailing_dot() {
465 let mut m = machine();
466 let w = parse_query(&mut m, "p(-42).").unwrap();
467 let idx = payload(w) as usize;
468 assert_eq!(int_value(m.deref(m.heap[idx + 1])), -42);
469 }
470
471 #[test]
472 fn rejects_trailing_garbage() {
473 let mut m = machine();
474 assert!(parse_query(&mut m, "p(a) q").is_err());
475 assert!(parse_query(&mut m, "p(").is_err());
476 }
477}