Skip to main content

khive_request/
lib.rs

1//! `khive-request` — request-DSL parser, transport-agnostic.
2//!
3//! ## Scope
4//!
5//! Conceptually every transport into khive walks the same pipeline:
6//!
7//! ```text
8//! request string  →  parse  →  ParsedRequest  →  dispatch (VerbRegistry)  →  result
9//! ```
10//!
11//! This crate owns only the *parse* step. The AST it produces (`ParsedRequest`,
12//! `ParsedOp`) is consumed by transports (MCP today; HTTP gateway, FFI, CLI
13//! in future) which then dispatch through `khive-runtime`'s [`VerbRegistry`].
14//!
15//! Keeping the parser in its own crate frees us to grow the syntax — pipe
16//! chains, `$prev` substitution, LNDL-style natural-language declarations,
17//! bash-flavoured redirections — without touching the runtime layering.
18//!
19//! ## Today's syntax (v0.2 — ADR-020)
20//!
21//! - **Function-call form**: `tool_name(arg=value, arg=value)`
22//! - **Function-call batch**: `[tool_name(...), tool_name(...)]`
23//! - **JSON form**: `[{"tool":"...", "args": {...}}, ...]` (or a single object)
24//!
25//! Argument values are JSON literals — strings, numbers, booleans, `null`,
26//! arrays, objects. Top-level operations inside `[...]` run in parallel by
27//! convention (the parser preserves order; the transport drives concurrency).
28//!
29//! ## Planned (deferred to dedicated ADRs)
30//!
31//! - Pipe chains for sequential dependent ops (`v1(...) | v2(id=$prev.id)`).
32//! - LNDL frontend — parses lact-block source and emits the same `ParsedRequest`.
33//! - Bash-style redirection / substitution for ops that produce stream output.
34
35use std::fmt;
36
37use serde_json::{Map, Value};
38
39/// Hard cap on operations per request. ADR-020 §Why-100.
40pub const MAX_OPS: usize = 100;
41
42/// A single parsed operation: tool name + named argument bag.
43#[derive(Debug, Clone, PartialEq)]
44pub struct ParsedOp {
45    pub tool: String,
46    pub args: Map<String, Value>,
47}
48
49/// Result of parsing a `request` input string.
50#[derive(Debug, Clone, PartialEq)]
51pub struct ParsedRequest {
52    pub ops: Vec<ParsedOp>,
53}
54
55/// Parser error — surfaced as `invalid_params` at the MCP boundary.
56#[derive(Debug, Clone, PartialEq)]
57pub enum DslError {
58    Empty,
59    TooManyOps {
60        count: usize,
61        max: usize,
62    },
63    UnexpectedChar {
64        pos: usize,
65        found: char,
66        expected: &'static str,
67    },
68    UnexpectedEof {
69        expected: &'static str,
70    },
71    InvalidIdentifier {
72        pos: usize,
73    },
74    DuplicateArg {
75        name: String,
76    },
77    InvalidValue {
78        pos: usize,
79        error: String,
80    },
81    InvalidJson {
82        error: String,
83    },
84    UnclosedString,
85    UnclosedBracket {
86        kind: char,
87    },
88}
89
90impl fmt::Display for DslError {
91    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
92        match self {
93            DslError::Empty => write!(f, "request is empty"),
94            DslError::TooManyOps { count, max } => {
95                write!(f, "batch has {count} ops; max is {max}")
96            }
97            DslError::UnexpectedChar {
98                pos,
99                found,
100                expected,
101            } => {
102                write!(f, "at position {pos}: expected {expected}, found {found:?}")
103            }
104            DslError::UnexpectedEof { expected } => {
105                write!(f, "unexpected end of input; expected {expected}")
106            }
107            DslError::InvalidIdentifier { pos } => {
108                write!(
109                    f,
110                    "at position {pos}: invalid identifier (expected [A-Za-z_][A-Za-z0-9_]*)"
111                )
112            }
113            DslError::DuplicateArg { name } => write!(f, "duplicate argument {name:?}"),
114            DslError::InvalidValue { pos, error } => {
115                write!(f, "at position {pos}: invalid value: {error}")
116            }
117            DslError::InvalidJson { error } => write!(f, "invalid JSON form: {error}"),
118            DslError::UnclosedString => write!(f, "unterminated string literal"),
119            DslError::UnclosedBracket { kind } => {
120                write!(f, "unclosed bracket: {kind:?} has no matching close")
121            }
122        }
123    }
124}
125
126impl std::error::Error for DslError {}
127
128/// Parse a request input string, returning either a single op or a batch.
129pub fn parse_request(input: &str) -> Result<ParsedRequest, DslError> {
130    let trimmed = input.trim();
131    if trimmed.is_empty() {
132        return Err(DslError::Empty);
133    }
134
135    // JSON form: `[{...}, ...]` or `{...}`. After `[`, JSON whitespace is legal
136    // before the first element — common when pretty-printers emit `[ {...} ]`.
137    let first = trimmed.as_bytes()[0];
138    let looks_like_json = first == b'{'
139        || (first == b'['
140            && trimmed
141                .as_bytes()
142                .iter()
143                .skip(1)
144                .find(|b| !matches!(b, b' ' | b'\t' | b'\n' | b'\r'))
145                .is_some_and(|b| *b == b'{'));
146    if looks_like_json {
147        return parse_json_form(trimmed);
148    }
149
150    // Function-call batch.
151    if first == b'[' {
152        return parse_fn_batch(trimmed);
153    }
154
155    // Single op.
156    let mut p = Parser::new(trimmed);
157    let op = p.parse_op()?;
158    p.skip_ws();
159    if !p.eof() {
160        return Err(DslError::UnexpectedChar {
161            pos: p.pos,
162            found: p.peek().unwrap(),
163            expected: "end of input",
164        });
165    }
166    Ok(ParsedRequest { ops: vec![op] })
167}
168
169fn parse_json_form(input: &str) -> Result<ParsedRequest, DslError> {
170    let v: Value = serde_json::from_str(input).map_err(|e| DslError::InvalidJson {
171        error: e.to_string(),
172    })?;
173    let arr: Vec<Value> = match v {
174        Value::Array(arr) => arr,
175        Value::Object(_) => vec![v],
176        other => {
177            return Err(DslError::InvalidJson {
178                error: format!("expected object or array of objects, got {other}"),
179            })
180        }
181    };
182    if arr.len() > MAX_OPS {
183        return Err(DslError::TooManyOps {
184            count: arr.len(),
185            max: MAX_OPS,
186        });
187    }
188    let mut ops = Vec::with_capacity(arr.len());
189    for entry in arr {
190        let obj = entry.as_object().ok_or_else(|| DslError::InvalidJson {
191            error: "each batch entry must be an object".into(),
192        })?;
193        let tool = obj
194            .get("tool")
195            .and_then(Value::as_str)
196            .ok_or_else(|| DslError::InvalidJson {
197                error: "each entry needs a \"tool\" string".into(),
198            })?
199            .to_owned();
200        let args = obj
201            .get("args")
202            .cloned()
203            .unwrap_or_else(|| Value::Object(Map::new()));
204        let args = match args {
205            Value::Object(m) => m,
206            other => {
207                return Err(DslError::InvalidJson {
208                    error: format!("\"args\" must be an object, got {other}"),
209                })
210            }
211        };
212        ops.push(ParsedOp { tool, args });
213    }
214    Ok(ParsedRequest { ops })
215}
216
217fn parse_fn_batch(input: &str) -> Result<ParsedRequest, DslError> {
218    let mut p = Parser::new(input);
219    p.expect_char('[')?;
220    p.skip_ws();
221    let mut ops = Vec::new();
222    if p.peek() == Some(']') {
223        p.advance(1);
224        return Ok(ParsedRequest { ops });
225    }
226    loop {
227        if ops.len() >= MAX_OPS {
228            return Err(DslError::TooManyOps {
229                count: ops.len() + 1,
230                max: MAX_OPS,
231            });
232        }
233        let op = p.parse_op()?;
234        ops.push(op);
235        p.skip_ws();
236        match p.peek() {
237            Some(',') => {
238                p.advance(1);
239                p.skip_ws();
240            }
241            Some(']') => {
242                p.advance(1);
243                break;
244            }
245            Some(c) => {
246                return Err(DslError::UnexpectedChar {
247                    pos: p.pos,
248                    found: c,
249                    expected: "',' or ']'",
250                });
251            }
252            None => return Err(DslError::UnexpectedEof { expected: "']'" }),
253        }
254    }
255    p.skip_ws();
256    if !p.eof() {
257        return Err(DslError::UnexpectedChar {
258            pos: p.pos,
259            found: p.peek().unwrap(),
260            expected: "end of input",
261        });
262    }
263    Ok(ParsedRequest { ops })
264}
265
266// ── recursive-descent parser ────────────────────────────────────────────────
267
268struct Parser<'a> {
269    src: &'a [u8],
270    pos: usize,
271}
272
273impl<'a> Parser<'a> {
274    fn new(src: &'a str) -> Self {
275        Self {
276            src: src.as_bytes(),
277            pos: 0,
278        }
279    }
280
281    fn eof(&self) -> bool {
282        self.pos >= self.src.len()
283    }
284
285    fn peek(&self) -> Option<char> {
286        self.src.get(self.pos).map(|b| *b as char)
287    }
288
289    fn advance(&mut self, n: usize) {
290        self.pos = (self.pos + n).min(self.src.len());
291    }
292
293    fn skip_ws(&mut self) {
294        while let Some(c) = self.peek() {
295            if c.is_ascii_whitespace() {
296                self.advance(1);
297            } else {
298                break;
299            }
300        }
301    }
302
303    fn expect_char(&mut self, want: char) -> Result<(), DslError> {
304        self.skip_ws();
305        match self.peek() {
306            Some(c) if c == want => {
307                self.advance(1);
308                Ok(())
309            }
310            Some(c) => Err(DslError::UnexpectedChar {
311                pos: self.pos,
312                found: c,
313                expected: char_label(want),
314            }),
315            None => Err(DslError::UnexpectedEof {
316                expected: char_label(want),
317            }),
318        }
319    }
320
321    fn parse_identifier(&mut self) -> Result<String, DslError> {
322        self.skip_ws();
323        let start = self.pos;
324        match self.peek() {
325            Some(c) if c.is_ascii_alphabetic() || c == '_' => {}
326            _ => return Err(DslError::InvalidIdentifier { pos: self.pos }),
327        }
328        while let Some(c) = self.peek() {
329            if c.is_ascii_alphanumeric() || c == '_' {
330                self.advance(1);
331            } else {
332                break;
333            }
334        }
335        Ok(std::str::from_utf8(&self.src[start..self.pos])
336            .expect("ascii-only chunk")
337            .to_owned())
338    }
339
340    fn parse_op(&mut self) -> Result<ParsedOp, DslError> {
341        let tool = self.parse_identifier()?;
342        self.expect_char('(')?;
343        self.skip_ws();
344        let mut args: Map<String, Value> = Map::new();
345        if self.peek() == Some(')') {
346            self.advance(1);
347            return Ok(ParsedOp { tool, args });
348        }
349        loop {
350            let name = self.parse_identifier()?;
351            self.expect_char('=')?;
352            self.skip_ws();
353            let value = self.parse_value()?;
354            if args.contains_key(&name) {
355                return Err(DslError::DuplicateArg { name });
356            }
357            args.insert(name, value);
358            self.skip_ws();
359            match self.peek() {
360                Some(',') => {
361                    self.advance(1);
362                    self.skip_ws();
363                }
364                Some(')') => {
365                    self.advance(1);
366                    return Ok(ParsedOp { tool, args });
367                }
368                Some(c) => {
369                    return Err(DslError::UnexpectedChar {
370                        pos: self.pos,
371                        found: c,
372                        expected: "',' or ')'",
373                    });
374                }
375                None => return Err(DslError::UnexpectedEof { expected: "')'" }),
376            }
377        }
378    }
379
380    fn parse_value(&mut self) -> Result<Value, DslError> {
381        self.skip_ws();
382        let start = self.pos;
383        let end = self.scan_value_end()?;
384        let slice = std::str::from_utf8(&self.src[start..end])
385            .expect("ascii-or-utf8 maintained by scanner");
386        let value: Value =
387            serde_json::from_str(slice.trim()).map_err(|e| DslError::InvalidValue {
388                pos: start,
389                error: e.to_string(),
390            })?;
391        self.pos = end;
392        Ok(value)
393    }
394
395    /// Walk forward through the input to find the end of a JSON value, respecting
396    /// nested brackets / braces and string literals. The returned index is one
397    /// past the last byte of the value (exclusive).
398    fn scan_value_end(&self) -> Result<usize, DslError> {
399        let mut i = self.pos;
400        let mut depth_paren: i32 = 0; // `(` from the surrounding op
401        let mut depth_brack: i32 = 0;
402        let mut depth_brace: i32 = 0;
403        while i < self.src.len() {
404            let c = self.src[i] as char;
405            match c {
406                '"' => {
407                    i = scan_string_end(self.src, i)?;
408                    continue;
409                }
410                '[' => depth_brack += 1,
411                ']' => {
412                    if depth_brack == 0 {
413                        if depth_paren == 0 && depth_brace == 0 {
414                            return Ok(i);
415                        }
416                        // we never opened a paren here; this terminates the value.
417                        return Ok(i);
418                    }
419                    depth_brack -= 1;
420                }
421                '{' => depth_brace += 1,
422                '}' => {
423                    if depth_brace == 0 {
424                        return Err(DslError::UnclosedBracket { kind: '{' });
425                    }
426                    depth_brace -= 1;
427                }
428                '(' => depth_paren += 1,
429                ')' => {
430                    if depth_paren == 0 && depth_brack == 0 && depth_brace == 0 {
431                        return Ok(i);
432                    }
433                    if depth_paren == 0 {
434                        return Err(DslError::UnclosedBracket { kind: '(' });
435                    }
436                    depth_paren -= 1;
437                }
438                ',' => {
439                    if depth_paren == 0 && depth_brack == 0 && depth_brace == 0 {
440                        return Ok(i);
441                    }
442                }
443                _ => {}
444            }
445            i += 1;
446        }
447        if depth_brack > 0 {
448            return Err(DslError::UnclosedBracket { kind: '[' });
449        }
450        if depth_brace > 0 {
451            return Err(DslError::UnclosedBracket { kind: '{' });
452        }
453        Ok(i)
454    }
455}
456
457fn scan_string_end(src: &[u8], start: usize) -> Result<usize, DslError> {
458    let mut i = start + 1;
459    while i < src.len() {
460        match src[i] as char {
461            '\\' => {
462                i += 2; // skip escape pair
463                continue;
464            }
465            '"' => return Ok(i + 1),
466            _ => i += 1,
467        }
468    }
469    Err(DslError::UnclosedString)
470}
471
472fn char_label(c: char) -> &'static str {
473    match c {
474        '(' => "'('",
475        ')' => "')'",
476        '[' => "'['",
477        ']' => "']'",
478        '=' => "'='",
479        ',' => "','",
480        _ => "expected char",
481    }
482}
483
484#[cfg(test)]
485mod tests {
486    use super::*;
487    use serde_json::json;
488
489    fn ops(s: &str) -> Vec<ParsedOp> {
490        parse_request(s)
491            .unwrap_or_else(|e| panic!("parse({s:?}) failed: {e}"))
492            .ops
493    }
494
495    #[test]
496    fn single_op_no_args() {
497        let v = ops("next()");
498        assert_eq!(v.len(), 1);
499        assert_eq!(v[0].tool, "next");
500        assert!(v[0].args.is_empty());
501    }
502
503    #[test]
504    fn single_op_with_string_arg() {
505        let v = ops(r#"assign(title="ship release")"#);
506        assert_eq!(v[0].tool, "assign");
507        assert_eq!(v[0].args["title"], json!("ship release"));
508    }
509
510    #[test]
511    fn single_op_with_multiple_typed_args() {
512        let v = ops(
513            r#"create(kind="entity", entity_kind="concept", name="LoRA", weight=0.9, active=true)"#,
514        );
515        assert_eq!(v[0].tool, "create");
516        assert_eq!(v[0].args["kind"], json!("entity"));
517        assert_eq!(v[0].args["weight"], json!(0.9));
518        assert_eq!(v[0].args["active"], json!(true));
519    }
520
521    #[test]
522    fn batch_three_ops() {
523        let v = ops(
524            r#"[create(kind="entity", name="A"), create(kind="entity", name="B"), link(source_id="x", target_id="y", relation="extends")]"#,
525        );
526        assert_eq!(v.len(), 3);
527        assert_eq!(v[0].tool, "create");
528        assert_eq!(v[2].tool, "link");
529        assert_eq!(v[2].args["relation"], json!("extends"));
530    }
531
532    #[test]
533    fn empty_batch_is_legal() {
534        let v = ops("[]");
535        assert!(v.is_empty());
536    }
537
538    #[test]
539    fn nested_array_and_object_values() {
540        let v = ops(r#"assign(title="x", tags=["a","b"], properties={"k":"v","n":1})"#);
541        assert_eq!(v[0].args["tags"], json!(["a", "b"]));
542        assert_eq!(v[0].args["properties"], json!({"k": "v", "n": 1}));
543    }
544
545    #[test]
546    fn string_with_comma_and_paren_inside() {
547        let v = ops(r#"assign(title="hello, world (now)")"#);
548        assert_eq!(v[0].args["title"], json!("hello, world (now)"));
549    }
550
551    #[test]
552    fn string_with_escaped_quote() {
553        let v = ops(r#"assign(title="he said \"hi\"")"#);
554        assert_eq!(v[0].args["title"], json!("he said \"hi\""));
555    }
556
557    #[test]
558    fn null_and_negative_number() {
559        let v = ops(r#"update(id="x", description=null, weight=-0.5)"#);
560        assert_eq!(v[0].args["description"], json!(null));
561        assert_eq!(v[0].args["weight"], json!(-0.5));
562    }
563
564    #[test]
565    fn json_form_batch_parses() {
566        let v = ops(r#"[{"tool":"next","args":{}}, {"tool":"complete","args":{"id":"abc"}}]"#);
567        assert_eq!(v.len(), 2);
568        assert_eq!(v[1].tool, "complete");
569        assert_eq!(v[1].args["id"], json!("abc"));
570    }
571
572    #[test]
573    fn json_form_with_leading_whitespace_inside_array_parses() {
574        // Pretty-printers commonly emit `[ {...} ]` with spaces or newlines after `[`.
575        // The whitespace is legal JSON, so the parser must route this to the JSON
576        // path rather than the function-call batch parser.
577        let v = ops(r#"[  {"tool":"next","args":{}} ]"#);
578        assert_eq!(v.len(), 1);
579        assert_eq!(v[0].tool, "next");
580
581        let v = ops("[\n  {\"tool\":\"next\",\"args\":{}},\n  {\"tool\":\"complete\",\"args\":{\"id\":\"x\"}}\n]");
582        assert_eq!(v.len(), 2);
583        assert_eq!(v[1].tool, "complete");
584    }
585
586    #[test]
587    fn json_form_single_object_is_treated_as_one_op() {
588        let v = ops(r#"{"tool":"next","args":{}}"#);
589        assert_eq!(v.len(), 1);
590        assert_eq!(v[0].tool, "next");
591    }
592
593    #[test]
594    fn duplicate_arg_rejected() {
595        let err = parse_request(r#"assign(title="a", title="b")"#).unwrap_err();
596        assert!(matches!(err, DslError::DuplicateArg { ref name } if name == "title"));
597    }
598
599    #[test]
600    fn unknown_token_after_op_rejected() {
601        let err = parse_request(r#"next() garbage"#).unwrap_err();
602        assert!(matches!(err, DslError::UnexpectedChar { .. }));
603    }
604
605    #[test]
606    fn unclosed_paren_rejected() {
607        let err = parse_request(r#"assign(title="a""#).unwrap_err();
608        // The string is closed; the args list isn't.
609        assert!(matches!(err, DslError::UnexpectedEof { .. }));
610    }
611
612    #[test]
613    fn unterminated_string_rejected() {
614        let err = parse_request(r#"assign(title="oops)"#).unwrap_err();
615        assert!(matches!(err, DslError::UnclosedString));
616    }
617
618    #[test]
619    fn too_many_ops_rejected() {
620        let one = r#"next(),"#;
621        let mut s = String::from("[");
622        for _ in 0..MAX_OPS + 1 {
623            s.push_str(one);
624        }
625        s.push_str("next()]");
626        let err = parse_request(&s).unwrap_err();
627        assert!(matches!(err, DslError::TooManyOps { .. }));
628    }
629
630    #[test]
631    fn empty_request_rejected() {
632        let err = parse_request("   ").unwrap_err();
633        assert!(matches!(err, DslError::Empty));
634    }
635
636    // ── Required prompt examples ───────────────────────────────────────────────
637
638    #[test]
639    fn recall_with_query_arg() {
640        let v = ops(r#"recall(query="test")"#);
641        assert_eq!(v.len(), 1);
642        assert_eq!(v[0].tool, "recall");
643        assert_eq!(v[0].args["query"], json!("test"));
644    }
645
646    #[test]
647    fn search_with_query_and_limit() {
648        let v = ops(r#"search(query="test", limit=5)"#);
649        assert_eq!(v.len(), 1);
650        assert_eq!(v[0].tool, "search");
651        assert_eq!(v[0].args["query"], json!("test"));
652        assert_eq!(v[0].args["limit"], json!(5));
653    }
654
655    #[test]
656    fn parallel_recall_and_inbox() {
657        let v = ops(r#"[recall(query="x"), inbox()]"#);
658        assert_eq!(v.len(), 2);
659        assert_eq!(v[0].tool, "recall");
660        assert_eq!(v[0].args["query"], json!("x"));
661        assert_eq!(v[1].tool, "inbox");
662        assert!(v[1].args.is_empty());
663    }
664
665    // ── JSON form edge cases ───────────────────────────────────────────────────
666
667    #[test]
668    fn json_missing_args_defaults_to_empty_map() {
669        let v = ops(r#"{"tool":"inbox"}"#);
670        assert_eq!(v.len(), 1);
671        assert_eq!(v[0].tool, "inbox");
672        assert!(v[0].args.is_empty());
673    }
674
675    #[test]
676    fn json_args_as_array_rejected() {
677        let err = parse_request(r#"{"tool":"x","args":[]}"#).unwrap_err();
678        assert!(matches!(err, DslError::InvalidJson { .. }));
679    }
680
681    // ── Identifier grammar ────────────────────────────────────────────────────
682
683    #[test]
684    fn dotted_tool_name_rejected_as_unexpected_char() {
685        // The parser reads "brain" as identifier then hits '.' expecting '('.
686        let err = parse_request("brain.state()").unwrap_err();
687        assert!(matches!(err, DslError::UnexpectedChar { .. }));
688    }
689
690    #[test]
691    fn leading_underscore_identifier_is_valid() {
692        let v = ops("_internal()");
693        assert_eq!(v[0].tool, "_internal");
694        assert!(v[0].args.is_empty());
695    }
696
697    #[test]
698    fn identifier_starting_with_digit_rejected() {
699        let err = parse_request("1bad()").unwrap_err();
700        assert!(matches!(err, DslError::InvalidIdentifier { pos: 0 }));
701    }
702
703    // ── Argument value edge cases ─────────────────────────────────────────────
704
705    #[test]
706    fn boolean_false_as_arg_value() {
707        let v = ops("flag(active=false)");
708        assert_eq!(v[0].args["active"], json!(false));
709    }
710
711    #[test]
712    fn unicode_string_arg_preserved() {
713        let v = ops(r#"assign(title="café")"#);
714        assert_eq!(v[0].args["title"], json!("café"));
715    }
716}