Skip to main content

mcp_compressor_core/cli/
parser.rs

1//! CLI argument parser: `argv → tool_input`.
2//!
3//! Parses a list of CLI arguments (everything after the subcommand) into a
4//! `serde_json::Value` dict that can be passed directly as `tool_input` to the
5//! backend MCP server.
6//!
7//! # Argument conventions (mirrors Python `parse_argv_to_tool_input`)
8//!
9//! | Syntax | Produces |
10//! |---|---|
11//! | `--flag value` | `{"flag": "value"}` (string) |
12//! | `--flag` | `{"flag": true}` (boolean) |
13//! | `--no-flag` | `{"flag": false}` (boolean) |
14//! | `--flag true` / `--flag false` | explicit bool |
15//! | `--flag 5` (integer prop) | `{"flag": 5}` |
16//! | `--flag 0.5` (number prop) | `{"flag": 0.5}` |
17//! | `--tag a --tag b` (array prop) | `{"tag": ["a","b"]}` |
18//! | `--json '{"k":"v"}'` | `{"k": "v"}` (raw JSON escape-hatch) |
19//! | `--page-id 123` (kebab flag) | `{"page_id": "123"}` (snake prop) |
20//!
21//! Unknown flags and positional arguments are errors.
22//! Missing required arguments are errors.
23
24use serde_json::{Map, Number, Value};
25
26use crate::compression::engine::Tool;
27use crate::Error;
28
29/// Parse CLI `argv` (everything after the subcommand itself) into a JSON
30/// object suitable for use as `tool_input`.
31///
32/// The `tool`'s `input_schema` drives type coercion and required-argument
33/// checking.
34pub fn parse_argv(argv: &[String], tool: &Tool) -> Result<serde_json::Value, Error> {
35    if argv.first().is_some_and(|arg| arg == "--json") {
36        let json = argv
37            .get(1)
38            .ok_or_else(|| Error::Parse("--json requires a value".to_string()))?;
39        if argv.len() > 2 {
40            return Err(Error::Parse(
41                "--json cannot be combined with other arguments".to_string(),
42            ));
43        }
44        return Ok(serde_json::from_str(json)?);
45    }
46
47    let properties = schema_properties(tool);
48    let required = required_properties(tool);
49    let mut output = Map::new();
50    let mut index = 0;
51
52    while index < argv.len() {
53        let arg = &argv[index];
54        if !arg.starts_with("--") || arg == "--" {
55            return Err(Error::Parse(format!(
56                "unexpected positional argument: {arg}"
57            )));
58        }
59
60        let (flag_property_name, forced_bool) = parse_flag_name(arg);
61        let property_name = resolve_property_name(&properties, &flag_property_name)
62            .ok_or_else(|| Error::Parse(format!("unknown flag: {arg}")))?;
63        let schema = properties
64            .get(&property_name)
65            .ok_or_else(|| Error::Parse(format!("unknown flag: {arg}")))?;
66        let schema_type = schema_type(schema);
67
68        let (raw_value, consumed) = if forced_bool == Some(false) {
69            if schema_type != Some("boolean") {
70                return Err(Error::Parse(format!(
71                    "{arg} can only be used with boolean properties"
72                )));
73            }
74            (None, 1)
75        } else if schema_type == Some("boolean") {
76            match argv.get(index + 1) {
77                Some(next) if !next.starts_with("--") => (Some(next.as_str()), 2),
78                _ => (None, 1),
79            }
80        } else {
81            let value = argv
82                .get(index + 1)
83                .filter(|next| !next.starts_with("--"))
84                .ok_or_else(|| Error::Parse(format!("{arg} requires a value")))?;
85            (Some(value.as_str()), 2)
86        };
87
88        let value = coerce_value(&property_name, schema, raw_value, forced_bool)?;
89        insert_value(&mut output, &property_name, schema, value);
90        index += consumed;
91    }
92
93    for property in required {
94        if !output.contains_key(&property) {
95            return Err(Error::Validation(format!(
96                "missing required argument: {property}"
97            )));
98        }
99    }
100
101    Ok(Value::Object(output))
102}
103
104fn schema_properties(tool: &Tool) -> Map<String, Value> {
105    tool.input_schema
106        .get("properties")
107        .and_then(Value::as_object)
108        .cloned()
109        .unwrap_or_default()
110}
111
112fn required_properties(tool: &Tool) -> Vec<String> {
113    tool.input_schema
114        .get("required")
115        .and_then(Value::as_array)
116        .map(|required| {
117            required
118                .iter()
119                .filter_map(Value::as_str)
120                .map(ToString::to_string)
121                .collect()
122        })
123        .unwrap_or_default()
124}
125
126fn parse_flag_name(flag: &str) -> (String, Option<bool>) {
127    let name = flag.trim_start_matches("--");
128    if let Some(name) = name.strip_prefix("no-") {
129        (flag_to_property_name(name), Some(false))
130    } else {
131        (flag_to_property_name(name), None)
132    }
133}
134
135fn flag_to_property_name(flag: &str) -> String {
136    flag.replace('-', "_")
137}
138
139fn resolve_property_name(
140    properties: &serde_json::Map<String, Value>,
141    flag_property_name: &str,
142) -> Option<String> {
143    if properties.contains_key(flag_property_name) {
144        return Some(flag_property_name.to_string());
145    }
146    let canonical = canonical_property_name(flag_property_name);
147    properties
148        .keys()
149        .find(|property| canonical_property_name(property) == canonical)
150        .cloned()
151}
152
153fn canonical_property_name(value: &str) -> String {
154    value
155        .chars()
156        .filter(|ch| *ch != '-' && *ch != '_')
157        .flat_map(char::to_lowercase)
158        .collect()
159}
160
161fn schema_type(schema: &Value) -> Option<&str> {
162    schema.get("type").and_then(Value::as_str)
163}
164
165fn array_item_schema(schema: &Value) -> Option<&Value> {
166    schema.get("items")
167}
168
169fn coerce_value(
170    property_name: &str,
171    schema: &Value,
172    raw_value: Option<&str>,
173    forced_bool: Option<bool>,
174) -> Result<Value, Error> {
175    if let Some(value) = forced_bool {
176        return Ok(Value::Bool(value));
177    }
178
179    match schema_type(schema) {
180        Some("boolean") => coerce_bool(property_name, raw_value),
181        Some("integer") => coerce_integer(property_name, raw_value),
182        Some("number") => coerce_number(property_name, raw_value),
183        Some("array") => {
184            let raw = raw_value.unwrap_or_default();
185            if let Ok(Value::Array(values)) = serde_json::from_str::<Value>(raw) {
186                return Ok(Value::Array(values));
187            }
188            let item_schema = array_item_schema(schema).unwrap_or(&Value::Null);
189            coerce_value(property_name, item_schema, raw_value, None)
190        }
191        Some("object") => coerce_json_or_string(raw_value),
192        _ => coerce_json_or_string(raw_value),
193    }
194}
195
196fn coerce_json_or_string(raw_value: Option<&str>) -> Result<Value, Error> {
197    let raw = raw_value.unwrap_or_default();
198    Ok(serde_json::from_str::<Value>(raw).unwrap_or_else(|_| Value::String(raw.to_string())))
199}
200
201fn coerce_bool(property_name: &str, raw_value: Option<&str>) -> Result<Value, Error> {
202    match raw_value {
203        None => Ok(Value::Bool(true)),
204        Some("true") => Ok(Value::Bool(true)),
205        Some("false") => Ok(Value::Bool(false)),
206        Some(value) => Err(Error::Parse(format!(
207            "invalid boolean value for {property_name}: {value}"
208        ))),
209    }
210}
211
212fn coerce_integer(property_name: &str, raw_value: Option<&str>) -> Result<Value, Error> {
213    let value =
214        raw_value.ok_or_else(|| Error::Parse(format!("{property_name} requires a value")))?;
215    let parsed = value.parse::<i64>().map_err(|_| {
216        Error::Parse(format!(
217            "invalid integer value for {property_name}: {value}"
218        ))
219    })?;
220    Ok(Value::Number(Number::from(parsed)))
221}
222
223fn coerce_number(property_name: &str, raw_value: Option<&str>) -> Result<Value, Error> {
224    let value =
225        raw_value.ok_or_else(|| Error::Parse(format!("{property_name} requires a value")))?;
226    let parsed = value
227        .parse::<f64>()
228        .map_err(|_| Error::Parse(format!("invalid number value for {property_name}: {value}")))?;
229    let number = Number::from_f64(parsed).ok_or_else(|| {
230        Error::Parse(format!("invalid number value for {property_name}: {value}"))
231    })?;
232    Ok(Value::Number(number))
233}
234
235fn insert_value(
236    output: &mut Map<String, Value>,
237    property_name: &str,
238    schema: &Value,
239    value: Value,
240) {
241    if schema_type(schema) == Some("array") {
242        let array = output
243            .entry(property_name.to_string())
244            .or_insert_with(|| Value::Array(Vec::new()))
245            .as_array_mut()
246            .expect("array property should be stored as array");
247        match value {
248            Value::Array(values) => array.extend(values),
249            value => array.push(value),
250        }
251    } else {
252        output.insert(property_name.to_string(), value);
253    }
254}
255
256// ---------------------------------------------------------------------------
257// Tests
258// ---------------------------------------------------------------------------
259
260#[cfg(test)]
261mod tests {
262    use super::*;
263    use serde_json::json;
264
265    // Helper: build a Tool with just a name and a given JSON Schema.
266    fn tool_with_schema(schema: serde_json::Value) -> Tool {
267        Tool::new("test_tool", None::<String>, schema)
268    }
269
270    // Helper: args vec from string literals.
271    fn args(parts: &[&str]) -> Vec<String> {
272        parts.iter().map(|s| s.to_string()).collect()
273    }
274
275    // ------------------------------------------------------------------
276    // String arguments
277    // ------------------------------------------------------------------
278
279    /// A simple `--flag value` pair produces a string in the output dict.
280    #[test]
281    fn string_arg() {
282        let tool = tool_with_schema(json!({
283            "type": "object",
284            "properties": { "url": { "type": "string" } },
285            "required": ["url"]
286        }));
287        let result = parse_argv(&args(&["--url", "https://example.com"]), &tool).unwrap();
288        assert_eq!(result, json!({ "url": "https://example.com" }));
289    }
290
291    /// Multiple string flags are captured independently.
292    #[test]
293    fn multiple_string_args() {
294        let tool = tool_with_schema(json!({
295            "type": "object",
296            "properties": {
297                "url":    { "type": "string" },
298                "method": { "type": "string" }
299            }
300        }));
301        let result = parse_argv(
302            &args(&["--url", "https://example.com", "--method", "GET"]),
303            &tool,
304        )
305        .unwrap();
306        assert_eq!(
307            result,
308            json!({ "url": "https://example.com", "method": "GET" })
309        );
310    }
311
312    // ------------------------------------------------------------------
313    // Boolean arguments
314    // ------------------------------------------------------------------
315
316    /// A bare `--flag` (no value following) produces `true`.
317    #[test]
318    fn boolean_flag_bare() {
319        let tool = tool_with_schema(json!({
320            "type": "object",
321            "properties": { "verbose": { "type": "boolean" } }
322        }));
323        let result = parse_argv(&args(&["--verbose"]), &tool).unwrap();
324        assert_eq!(result, json!({ "verbose": true }));
325    }
326
327    /// `--flag true` produces `true`.
328    #[test]
329    fn boolean_flag_explicit_true() {
330        let tool = tool_with_schema(json!({
331            "type": "object",
332            "properties": { "verbose": { "type": "boolean" } }
333        }));
334        let result = parse_argv(&args(&["--verbose", "true"]), &tool).unwrap();
335        assert_eq!(result, json!({ "verbose": true }));
336    }
337
338    /// `--flag false` produces `false`.
339    #[test]
340    fn boolean_flag_explicit_false() {
341        let tool = tool_with_schema(json!({
342            "type": "object",
343            "properties": { "verbose": { "type": "boolean" } }
344        }));
345        let result = parse_argv(&args(&["--verbose", "false"]), &tool).unwrap();
346        assert_eq!(result, json!({ "verbose": false }));
347    }
348
349    /// `--no-flag` produces `false` for a boolean property.
350    #[test]
351    fn no_prefix_produces_false() {
352        let tool = tool_with_schema(json!({
353            "type": "object",
354            "properties": { "verbose": { "type": "boolean" } }
355        }));
356        let result = parse_argv(&args(&["--no-verbose"]), &tool).unwrap();
357        assert_eq!(result, json!({ "verbose": false }));
358    }
359
360    // ------------------------------------------------------------------
361    // Integer and number arguments
362    // ------------------------------------------------------------------
363
364    /// An `integer` property is coerced from the string value.
365    #[test]
366    fn integer_arg() {
367        let tool = tool_with_schema(json!({
368            "type": "object",
369            "properties": { "count": { "type": "integer" } }
370        }));
371        let result = parse_argv(&args(&["--count", "5"]), &tool).unwrap();
372        assert_eq!(result, json!({ "count": 5 }));
373    }
374
375    /// A `number` property is coerced to a float.
376    #[test]
377    fn number_arg_float() {
378        let tool = tool_with_schema(json!({
379            "type": "object",
380            "properties": { "ratio": { "type": "number" } }
381        }));
382        let result = parse_argv(&args(&["--ratio", "0.5"]), &tool).unwrap();
383        assert_eq!(result, json!({ "ratio": 0.5 }));
384    }
385
386    /// Passing a non-numeric string to an integer property is an error.
387    #[test]
388    fn integer_arg_invalid_value() {
389        let tool = tool_with_schema(json!({
390            "type": "object",
391            "properties": { "count": { "type": "integer" } }
392        }));
393        assert!(parse_argv(&args(&["--count", "notanumber"]), &tool).is_err());
394    }
395
396    // ------------------------------------------------------------------
397    // Array arguments (repeated flag)
398    // ------------------------------------------------------------------
399
400    /// Repeating a flag for an array property accumulates values.
401    #[test]
402    fn array_arg_repeated_flag() {
403        let tool = tool_with_schema(json!({
404            "type": "object",
405            "properties": {
406                "tags": { "type": "array", "items": { "type": "string" } }
407            }
408        }));
409        let result = parse_argv(&args(&["--tags", "a", "--tags", "b"]), &tool).unwrap();
410        assert_eq!(result, json!({ "tags": ["a", "b"] }));
411    }
412
413    /// A JSON array value is expanded for array properties.
414    #[test]
415    fn array_arg_json_array_value() {
416        let tool = tool_with_schema(json!({
417            "type": "object",
418            "properties": {
419                "tags": { "type": "array", "items": { "type": "string" } }
420            }
421        }));
422        let result = parse_argv(&args(&["--tags", "[\"a\",\"b\"]"]), &tool).unwrap();
423        assert_eq!(result, json!({ "tags": ["a", "b"] }));
424    }
425
426    /// Object properties parse JSON values, matching legacy Python CLI mode.
427    #[test]
428    fn object_arg_json_value() {
429        let tool = tool_with_schema(json!({
430            "type": "object",
431            "properties": { "metadata": { "type": "object" } }
432        }));
433        let result = parse_argv(&args(&["--metadata", "{\"ok\":true}"]), &tool).unwrap();
434        assert_eq!(result, json!({ "metadata": { "ok": true } }));
435    }
436
437    /// Complex values fall back to strings when JSON parsing fails.
438    #[test]
439    fn object_arg_invalid_json_falls_back_to_string() {
440        let tool = tool_with_schema(json!({
441            "type": "object",
442            "properties": { "metadata": { "type": "object" } }
443        }));
444        let result = parse_argv(&args(&["--metadata", "not-json"]), &tool).unwrap();
445        assert_eq!(result, json!({ "metadata": "not-json" }));
446    }
447
448    /// A single-element array works correctly.
449    #[test]
450    fn array_arg_single_element() {
451        let tool = tool_with_schema(json!({
452            "type": "object",
453            "properties": {
454                "tags": { "type": "array", "items": { "type": "string" } }
455            }
456        }));
457        let result = parse_argv(&args(&["--tags", "only"]), &tool).unwrap();
458        assert_eq!(result, json!({ "tags": ["only"] }));
459    }
460
461    // ------------------------------------------------------------------
462    // kebab-case → snake_case flag mapping
463    // ------------------------------------------------------------------
464
465    /// A kebab-case CLI flag maps to the corresponding snake_case property.
466    #[test]
467    fn kebab_flag_maps_to_snake_prop() {
468        let tool = tool_with_schema(json!({
469            "type": "object",
470            "properties": { "page_id": { "type": "string" } },
471            "required": ["page_id"]
472        }));
473        let result = parse_argv(&args(&["--page-id", "ABC123"]), &tool).unwrap();
474        assert_eq!(result, json!({ "page_id": "ABC123" }));
475    }
476
477    /// The snake_case version of a flag name is also accepted directly.
478    #[test]
479    fn snake_flag_also_accepted() {
480        let tool = tool_with_schema(json!({
481            "type": "object",
482            "properties": { "page_id": { "type": "string" } },
483            "required": ["page_id"]
484        }));
485        let result = parse_argv(&args(&["--page_id", "ABC123"]), &tool).unwrap();
486        assert_eq!(result, json!({ "page_id": "ABC123" }));
487    }
488
489    // ------------------------------------------------------------------
490    // Required argument validation
491    // ------------------------------------------------------------------
492
493    /// A missing required argument is an error.
494    #[test]
495    fn missing_required_arg_is_error() {
496        let tool = tool_with_schema(json!({
497            "type": "object",
498            "properties": { "url": { "type": "string" } },
499            "required": ["url"]
500        }));
501        assert!(parse_argv(&[], &tool).is_err());
502    }
503
504    /// Optional arguments may be omitted without error.
505    #[test]
506    fn optional_arg_may_be_omitted() {
507        let tool = tool_with_schema(json!({
508            "type": "object",
509            "properties": {
510                "url":     { "type": "string" },
511                "timeout": { "type": "number" }
512            },
513            "required": ["url"]
514        }));
515        let result = parse_argv(&args(&["--url", "https://example.com"]), &tool).unwrap();
516        assert_eq!(result, json!({ "url": "https://example.com" }));
517    }
518
519    // ------------------------------------------------------------------
520    // Error cases
521    // ------------------------------------------------------------------
522
523    /// An unknown flag is an error.
524    #[test]
525    fn unknown_flag_is_error() {
526        let tool = tool_with_schema(json!({
527            "type": "object",
528            "properties": { "url": { "type": "string" } }
529        }));
530        assert!(parse_argv(&args(&["--unknown", "value"]), &tool).is_err());
531    }
532
533    /// A positional argument (no `--` prefix) is an error.
534    #[test]
535    fn positional_arg_is_error() {
536        let tool = tool_with_schema(json!({
537            "type": "object",
538            "properties": { "url": { "type": "string" } }
539        }));
540        assert!(parse_argv(&args(&["positional"]), &tool).is_err());
541    }
542
543    /// A flag missing its value (end of argv) is an error.
544    #[test]
545    fn flag_missing_value_is_error() {
546        let tool = tool_with_schema(json!({
547            "type": "object",
548            "properties": { "url": { "type": "string" } }
549        }));
550        assert!(parse_argv(&args(&["--url"]), &tool).is_err());
551    }
552
553    // ------------------------------------------------------------------
554    // --json escape hatch
555    // ------------------------------------------------------------------
556
557    /// `--json '{"k":"v"}'` passes the raw JSON object through unchanged.
558    #[test]
559    fn json_escape_hatch() {
560        let tool = tool_with_schema(json!({ "type": "object", "properties": {} }));
561        let result = parse_argv(&args(&["--json", r#"{"key": "val"}"#]), &tool).unwrap();
562        assert_eq!(result, json!({ "key": "val" }));
563    }
564
565    /// `--json` with no following value is an error.
566    #[test]
567    fn json_escape_hatch_requires_value() {
568        let tool = tool_with_schema(json!({ "type": "object", "properties": {} }));
569        assert!(parse_argv(&args(&["--json"]), &tool).is_err());
570    }
571
572    /// `--json` accepts a JSON array (not just objects).
573    #[test]
574    fn json_escape_hatch_array() {
575        let tool = tool_with_schema(json!({ "type": "object", "properties": {} }));
576        let result = parse_argv(&args(&["--json", "[1,2,3]"]), &tool).unwrap();
577        assert_eq!(result, json!([1, 2, 3]));
578    }
579
580    // ------------------------------------------------------------------
581    // Empty arguments
582    // ------------------------------------------------------------------
583
584    /// An empty argv with no required args succeeds with an empty dict.
585    #[test]
586    fn empty_argv_no_required() {
587        let tool = tool_with_schema(json!({ "type": "object", "properties": {} }));
588        let result = parse_argv(&[], &tool).unwrap();
589        assert_eq!(result, json!({}));
590    }
591}