Skip to main content

mcp_compressor_core/cli/
parser.rs

1//! CLI argument parser: `argv → tool_input`.
2//!
3//! Parses a list of CLI arguments (everything after the subcommand) into a
4//! `serde_json::Value` dict that can be passed directly as `tool_input` to the
5//! backend MCP server.
6//!
7//! # Argument conventions (mirrors Python `parse_argv_to_tool_input`)
8//!
9//! | Syntax | Produces |
10//! |---|---|
11//! | `--flag value` | `{"flag": "value"}` (string) |
12//! | `--flag` | `{"flag": true}` (boolean) |
13//! | `--no-flag` | `{"flag": false}` (boolean) |
14//! | `--flag true` / `--flag false` | explicit bool |
15//! | `--flag 5` (integer prop) | `{"flag": 5}` |
16//! | `--flag 0.5` (number prop) | `{"flag": 0.5}` |
17//! | `--tag a --tag b` (array prop) | `{"tag": ["a","b"]}` |
18//! | `--json '{"k":"v"}'` | `{"k": "v"}` (raw JSON escape-hatch) |
19//! | `--page-id 123` (kebab flag) | `{"page_id": "123"}` (snake prop) |
20//!
21//! Unknown flags and positional arguments are errors.
22//! Missing required arguments are errors.
23
24use serde_json::{Map, Number, Value};
25
26use crate::compression::engine::Tool;
27use crate::Error;
28
29/// Parse CLI `argv` (everything after the subcommand itself) into a JSON
30/// object suitable for use as `tool_input`.
31///
32/// The `tool`'s `input_schema` drives type coercion and required-argument
33/// checking.
34pub fn parse_argv(argv: &[String], tool: &Tool) -> Result<serde_json::Value, Error> {
35    if argv.first().is_some_and(|arg| arg == "--json") {
36        let json = argv
37            .get(1)
38            .ok_or_else(|| Error::Parse("--json requires a value".to_string()))?;
39        if argv.len() > 2 {
40            return Err(Error::Parse(
41                "--json cannot be combined with other arguments".to_string(),
42            ));
43        }
44        return Ok(serde_json::from_str(json)?);
45    }
46
47    let properties = schema_properties(tool);
48    let required = required_properties(tool);
49    let mut output = Map::new();
50    let mut index = 0;
51
52    while index < argv.len() {
53        let arg = &argv[index];
54        if !arg.starts_with("--") || arg == "--" {
55            return Err(Error::Parse(format!(
56                "unexpected positional argument: {arg}"
57            )));
58        }
59
60        let (property_name, forced_bool) = parse_flag_name(arg);
61        let schema = properties
62            .get(&property_name)
63            .ok_or_else(|| Error::Parse(format!("unknown flag: {arg}")))?;
64        let schema_type = schema_type(schema);
65
66        let (raw_value, consumed) = if forced_bool == Some(false) {
67            if schema_type != Some("boolean") {
68                return Err(Error::Parse(format!(
69                    "{arg} can only be used with boolean properties"
70                )));
71            }
72            (None, 1)
73        } else if schema_type == Some("boolean") {
74            match argv.get(index + 1) {
75                Some(next) if !next.starts_with("--") => (Some(next.as_str()), 2),
76                _ => (None, 1),
77            }
78        } else {
79            let value = argv
80                .get(index + 1)
81                .filter(|next| !next.starts_with("--"))
82                .ok_or_else(|| Error::Parse(format!("{arg} requires a value")))?;
83            (Some(value.as_str()), 2)
84        };
85
86        let value = coerce_value(&property_name, schema, raw_value, forced_bool)?;
87        insert_value(&mut output, &property_name, schema, value);
88        index += consumed;
89    }
90
91    for property in required {
92        if !output.contains_key(&property) {
93            return Err(Error::Validation(format!(
94                "missing required argument: {property}"
95            )));
96        }
97    }
98
99    Ok(Value::Object(output))
100}
101
102fn schema_properties(tool: &Tool) -> Map<String, Value> {
103    tool.input_schema
104        .get("properties")
105        .and_then(Value::as_object)
106        .cloned()
107        .unwrap_or_default()
108}
109
110fn required_properties(tool: &Tool) -> Vec<String> {
111    tool.input_schema
112        .get("required")
113        .and_then(Value::as_array)
114        .map(|required| {
115            required
116                .iter()
117                .filter_map(Value::as_str)
118                .map(ToString::to_string)
119                .collect()
120        })
121        .unwrap_or_default()
122}
123
124fn parse_flag_name(flag: &str) -> (String, Option<bool>) {
125    let name = flag.trim_start_matches("--");
126    if let Some(name) = name.strip_prefix("no-") {
127        (flag_to_property_name(name), Some(false))
128    } else {
129        (flag_to_property_name(name), None)
130    }
131}
132
133fn flag_to_property_name(flag: &str) -> String {
134    flag.replace('-', "_")
135}
136
137fn schema_type(schema: &Value) -> Option<&str> {
138    schema.get("type").and_then(Value::as_str)
139}
140
141fn array_item_schema(schema: &Value) -> Option<&Value> {
142    schema.get("items")
143}
144
145fn coerce_value(
146    property_name: &str,
147    schema: &Value,
148    raw_value: Option<&str>,
149    forced_bool: Option<bool>,
150) -> Result<Value, Error> {
151    if let Some(value) = forced_bool {
152        return Ok(Value::Bool(value));
153    }
154
155    match schema_type(schema) {
156        Some("boolean") => coerce_bool(property_name, raw_value),
157        Some("integer") => coerce_integer(property_name, raw_value),
158        Some("number") => coerce_number(property_name, raw_value),
159        Some("array") => {
160            let raw = raw_value.unwrap_or_default();
161            if let Ok(Value::Array(values)) = serde_json::from_str::<Value>(raw) {
162                return Ok(Value::Array(values));
163            }
164            let item_schema = array_item_schema(schema).unwrap_or(&Value::Null);
165            coerce_value(property_name, item_schema, raw_value, None)
166        }
167        Some("object") => coerce_json_or_string(raw_value),
168        _ => coerce_json_or_string(raw_value),
169    }
170}
171
172fn coerce_json_or_string(raw_value: Option<&str>) -> Result<Value, Error> {
173    let raw = raw_value.unwrap_or_default();
174    Ok(serde_json::from_str::<Value>(raw).unwrap_or_else(|_| Value::String(raw.to_string())))
175}
176
177fn coerce_bool(property_name: &str, raw_value: Option<&str>) -> Result<Value, Error> {
178    match raw_value {
179        None => Ok(Value::Bool(true)),
180        Some("true") => Ok(Value::Bool(true)),
181        Some("false") => Ok(Value::Bool(false)),
182        Some(value) => Err(Error::Parse(format!(
183            "invalid boolean value for {property_name}: {value}"
184        ))),
185    }
186}
187
188fn coerce_integer(property_name: &str, raw_value: Option<&str>) -> Result<Value, Error> {
189    let value =
190        raw_value.ok_or_else(|| Error::Parse(format!("{property_name} requires a value")))?;
191    let parsed = value.parse::<i64>().map_err(|_| {
192        Error::Parse(format!(
193            "invalid integer value for {property_name}: {value}"
194        ))
195    })?;
196    Ok(Value::Number(Number::from(parsed)))
197}
198
199fn coerce_number(property_name: &str, raw_value: Option<&str>) -> Result<Value, Error> {
200    let value =
201        raw_value.ok_or_else(|| Error::Parse(format!("{property_name} requires a value")))?;
202    let parsed = value
203        .parse::<f64>()
204        .map_err(|_| Error::Parse(format!("invalid number value for {property_name}: {value}")))?;
205    let number = Number::from_f64(parsed).ok_or_else(|| {
206        Error::Parse(format!("invalid number value for {property_name}: {value}"))
207    })?;
208    Ok(Value::Number(number))
209}
210
211fn insert_value(
212    output: &mut Map<String, Value>,
213    property_name: &str,
214    schema: &Value,
215    value: Value,
216) {
217    if schema_type(schema) == Some("array") {
218        let array = output
219            .entry(property_name.to_string())
220            .or_insert_with(|| Value::Array(Vec::new()))
221            .as_array_mut()
222            .expect("array property should be stored as array");
223        match value {
224            Value::Array(values) => array.extend(values),
225            value => array.push(value),
226        }
227    } else {
228        output.insert(property_name.to_string(), value);
229    }
230}
231
232// ---------------------------------------------------------------------------
233// Tests
234// ---------------------------------------------------------------------------
235
236#[cfg(test)]
237mod tests {
238    use super::*;
239    use serde_json::json;
240
241    // Helper: build a Tool with just a name and a given JSON Schema.
242    fn tool_with_schema(schema: serde_json::Value) -> Tool {
243        Tool::new("test_tool", None::<String>, schema)
244    }
245
246    // Helper: args vec from string literals.
247    fn args(parts: &[&str]) -> Vec<String> {
248        parts.iter().map(|s| s.to_string()).collect()
249    }
250
251    // ------------------------------------------------------------------
252    // String arguments
253    // ------------------------------------------------------------------
254
255    /// A simple `--flag value` pair produces a string in the output dict.
256    #[test]
257    fn string_arg() {
258        let tool = tool_with_schema(json!({
259            "type": "object",
260            "properties": { "url": { "type": "string" } },
261            "required": ["url"]
262        }));
263        let result = parse_argv(&args(&["--url", "https://example.com"]), &tool).unwrap();
264        assert_eq!(result, json!({ "url": "https://example.com" }));
265    }
266
267    /// Multiple string flags are captured independently.
268    #[test]
269    fn multiple_string_args() {
270        let tool = tool_with_schema(json!({
271            "type": "object",
272            "properties": {
273                "url":    { "type": "string" },
274                "method": { "type": "string" }
275            }
276        }));
277        let result = parse_argv(
278            &args(&["--url", "https://example.com", "--method", "GET"]),
279            &tool,
280        )
281        .unwrap();
282        assert_eq!(
283            result,
284            json!({ "url": "https://example.com", "method": "GET" })
285        );
286    }
287
288    // ------------------------------------------------------------------
289    // Boolean arguments
290    // ------------------------------------------------------------------
291
292    /// A bare `--flag` (no value following) produces `true`.
293    #[test]
294    fn boolean_flag_bare() {
295        let tool = tool_with_schema(json!({
296            "type": "object",
297            "properties": { "verbose": { "type": "boolean" } }
298        }));
299        let result = parse_argv(&args(&["--verbose"]), &tool).unwrap();
300        assert_eq!(result, json!({ "verbose": true }));
301    }
302
303    /// `--flag true` produces `true`.
304    #[test]
305    fn boolean_flag_explicit_true() {
306        let tool = tool_with_schema(json!({
307            "type": "object",
308            "properties": { "verbose": { "type": "boolean" } }
309        }));
310        let result = parse_argv(&args(&["--verbose", "true"]), &tool).unwrap();
311        assert_eq!(result, json!({ "verbose": true }));
312    }
313
314    /// `--flag false` produces `false`.
315    #[test]
316    fn boolean_flag_explicit_false() {
317        let tool = tool_with_schema(json!({
318            "type": "object",
319            "properties": { "verbose": { "type": "boolean" } }
320        }));
321        let result = parse_argv(&args(&["--verbose", "false"]), &tool).unwrap();
322        assert_eq!(result, json!({ "verbose": false }));
323    }
324
325    /// `--no-flag` produces `false` for a boolean property.
326    #[test]
327    fn no_prefix_produces_false() {
328        let tool = tool_with_schema(json!({
329            "type": "object",
330            "properties": { "verbose": { "type": "boolean" } }
331        }));
332        let result = parse_argv(&args(&["--no-verbose"]), &tool).unwrap();
333        assert_eq!(result, json!({ "verbose": false }));
334    }
335
336    // ------------------------------------------------------------------
337    // Integer and number arguments
338    // ------------------------------------------------------------------
339
340    /// An `integer` property is coerced from the string value.
341    #[test]
342    fn integer_arg() {
343        let tool = tool_with_schema(json!({
344            "type": "object",
345            "properties": { "count": { "type": "integer" } }
346        }));
347        let result = parse_argv(&args(&["--count", "5"]), &tool).unwrap();
348        assert_eq!(result, json!({ "count": 5 }));
349    }
350
351    /// A `number` property is coerced to a float.
352    #[test]
353    fn number_arg_float() {
354        let tool = tool_with_schema(json!({
355            "type": "object",
356            "properties": { "ratio": { "type": "number" } }
357        }));
358        let result = parse_argv(&args(&["--ratio", "0.5"]), &tool).unwrap();
359        assert_eq!(result, json!({ "ratio": 0.5 }));
360    }
361
362    /// Passing a non-numeric string to an integer property is an error.
363    #[test]
364    fn integer_arg_invalid_value() {
365        let tool = tool_with_schema(json!({
366            "type": "object",
367            "properties": { "count": { "type": "integer" } }
368        }));
369        assert!(parse_argv(&args(&["--count", "notanumber"]), &tool).is_err());
370    }
371
372    // ------------------------------------------------------------------
373    // Array arguments (repeated flag)
374    // ------------------------------------------------------------------
375
376    /// Repeating a flag for an array property accumulates values.
377    #[test]
378    fn array_arg_repeated_flag() {
379        let tool = tool_with_schema(json!({
380            "type": "object",
381            "properties": {
382                "tags": { "type": "array", "items": { "type": "string" } }
383            }
384        }));
385        let result = parse_argv(&args(&["--tags", "a", "--tags", "b"]), &tool).unwrap();
386        assert_eq!(result, json!({ "tags": ["a", "b"] }));
387    }
388
389    /// A JSON array value is expanded for array properties.
390    #[test]
391    fn array_arg_json_array_value() {
392        let tool = tool_with_schema(json!({
393            "type": "object",
394            "properties": {
395                "tags": { "type": "array", "items": { "type": "string" } }
396            }
397        }));
398        let result = parse_argv(&args(&["--tags", "[\"a\",\"b\"]"]), &tool).unwrap();
399        assert_eq!(result, json!({ "tags": ["a", "b"] }));
400    }
401
402    /// Object properties parse JSON values, matching legacy Python CLI mode.
403    #[test]
404    fn object_arg_json_value() {
405        let tool = tool_with_schema(json!({
406            "type": "object",
407            "properties": { "metadata": { "type": "object" } }
408        }));
409        let result = parse_argv(&args(&["--metadata", "{\"ok\":true}"]), &tool).unwrap();
410        assert_eq!(result, json!({ "metadata": { "ok": true } }));
411    }
412
413    /// Complex values fall back to strings when JSON parsing fails.
414    #[test]
415    fn object_arg_invalid_json_falls_back_to_string() {
416        let tool = tool_with_schema(json!({
417            "type": "object",
418            "properties": { "metadata": { "type": "object" } }
419        }));
420        let result = parse_argv(&args(&["--metadata", "not-json"]), &tool).unwrap();
421        assert_eq!(result, json!({ "metadata": "not-json" }));
422    }
423
424    /// A single-element array works correctly.
425    #[test]
426    fn array_arg_single_element() {
427        let tool = tool_with_schema(json!({
428            "type": "object",
429            "properties": {
430                "tags": { "type": "array", "items": { "type": "string" } }
431            }
432        }));
433        let result = parse_argv(&args(&["--tags", "only"]), &tool).unwrap();
434        assert_eq!(result, json!({ "tags": ["only"] }));
435    }
436
437    // ------------------------------------------------------------------
438    // kebab-case → snake_case flag mapping
439    // ------------------------------------------------------------------
440
441    /// A kebab-case CLI flag maps to the corresponding snake_case property.
442    #[test]
443    fn kebab_flag_maps_to_snake_prop() {
444        let tool = tool_with_schema(json!({
445            "type": "object",
446            "properties": { "page_id": { "type": "string" } },
447            "required": ["page_id"]
448        }));
449        let result = parse_argv(&args(&["--page-id", "ABC123"]), &tool).unwrap();
450        assert_eq!(result, json!({ "page_id": "ABC123" }));
451    }
452
453    /// The snake_case version of a flag name is also accepted directly.
454    #[test]
455    fn snake_flag_also_accepted() {
456        let tool = tool_with_schema(json!({
457            "type": "object",
458            "properties": { "page_id": { "type": "string" } },
459            "required": ["page_id"]
460        }));
461        let result = parse_argv(&args(&["--page_id", "ABC123"]), &tool).unwrap();
462        assert_eq!(result, json!({ "page_id": "ABC123" }));
463    }
464
465    // ------------------------------------------------------------------
466    // Required argument validation
467    // ------------------------------------------------------------------
468
469    /// A missing required argument is an error.
470    #[test]
471    fn missing_required_arg_is_error() {
472        let tool = tool_with_schema(json!({
473            "type": "object",
474            "properties": { "url": { "type": "string" } },
475            "required": ["url"]
476        }));
477        assert!(parse_argv(&[], &tool).is_err());
478    }
479
480    /// Optional arguments may be omitted without error.
481    #[test]
482    fn optional_arg_may_be_omitted() {
483        let tool = tool_with_schema(json!({
484            "type": "object",
485            "properties": {
486                "url":     { "type": "string" },
487                "timeout": { "type": "number" }
488            },
489            "required": ["url"]
490        }));
491        let result = parse_argv(&args(&["--url", "https://example.com"]), &tool).unwrap();
492        assert_eq!(result, json!({ "url": "https://example.com" }));
493    }
494
495    // ------------------------------------------------------------------
496    // Error cases
497    // ------------------------------------------------------------------
498
499    /// An unknown flag is an error.
500    #[test]
501    fn unknown_flag_is_error() {
502        let tool = tool_with_schema(json!({
503            "type": "object",
504            "properties": { "url": { "type": "string" } }
505        }));
506        assert!(parse_argv(&args(&["--unknown", "value"]), &tool).is_err());
507    }
508
509    /// A positional argument (no `--` prefix) is an error.
510    #[test]
511    fn positional_arg_is_error() {
512        let tool = tool_with_schema(json!({
513            "type": "object",
514            "properties": { "url": { "type": "string" } }
515        }));
516        assert!(parse_argv(&args(&["positional"]), &tool).is_err());
517    }
518
519    /// A flag missing its value (end of argv) is an error.
520    #[test]
521    fn flag_missing_value_is_error() {
522        let tool = tool_with_schema(json!({
523            "type": "object",
524            "properties": { "url": { "type": "string" } }
525        }));
526        assert!(parse_argv(&args(&["--url"]), &tool).is_err());
527    }
528
529    // ------------------------------------------------------------------
530    // --json escape hatch
531    // ------------------------------------------------------------------
532
533    /// `--json '{"k":"v"}'` passes the raw JSON object through unchanged.
534    #[test]
535    fn json_escape_hatch() {
536        let tool = tool_with_schema(json!({ "type": "object", "properties": {} }));
537        let result = parse_argv(&args(&["--json", r#"{"key": "val"}"#]), &tool).unwrap();
538        assert_eq!(result, json!({ "key": "val" }));
539    }
540
541    /// `--json` with no following value is an error.
542    #[test]
543    fn json_escape_hatch_requires_value() {
544        let tool = tool_with_schema(json!({ "type": "object", "properties": {} }));
545        assert!(parse_argv(&args(&["--json"]), &tool).is_err());
546    }
547
548    /// `--json` accepts a JSON array (not just objects).
549    #[test]
550    fn json_escape_hatch_array() {
551        let tool = tool_with_schema(json!({ "type": "object", "properties": {} }));
552        let result = parse_argv(&args(&["--json", "[1,2,3]"]), &tool).unwrap();
553        assert_eq!(result, json!([1, 2, 3]));
554    }
555
556    // ------------------------------------------------------------------
557    // Empty arguments
558    // ------------------------------------------------------------------
559
560    /// An empty argv with no required args succeeds with an empty dict.
561    #[test]
562    fn empty_argv_no_required() {
563        let tool = tool_with_schema(json!({ "type": "object", "properties": {} }));
564        let result = parse_argv(&[], &tool).unwrap();
565        assert_eq!(result, json!({}));
566    }
567}