car-inference 0.18.0

Local model inference for CAR — Candle backend with Qwen3 models
Documentation
[
  {
    "from_ids": [
      "mlx/qwen3-30b-a3b:4bit",
      "qwen/qwen3-30b-a3b:q4_k_m",
      "vllm-mlx/mlx-community_Qwen3-30B-A3B-4bit"
    ],
    "to_id": "vllm-mlx/qwen3.6-35b-a3b:4bit",
    "reason": "Qwen3.6-35B-A3B is the newer open-weight MoE in this line",
    "target_runtime": "vllm-mlx",
    "target_runtime_requirement": "Serve mlx-community/Qwen3.6-35B-A3B-4bit through vLLM-MLX 0.2.8+ or an OpenAI-compatible mlx-vlm 0.4.4+ server",
    "minimum_runtimes": [
      {
        "name": "vllm-mlx",
        "minimum_version": "0.2.8"
      },
      {
        "name": "mlx-vlm",
        "minimum_version": "0.4.4"
      }
    ],
    "remove_old_after_available": true
  }
]