mecha10-cli 0.1.47

#!/usr/bin/env python3
"""
ONNX Dynamic INT8 Quantization Script

This script is embedded in the mecha10 CLI binary and automatically called
during model download when quantization is enabled in the model catalog.

Usage:
    python quantize_int8.py <input_model.onnx> <output_model.onnx>

Requirements:
    pip install onnx onnxruntime
"""

import sys
from pathlib import Path

try:
    from onnxruntime.quantization import QuantType, quantize_dynamic
except ImportError:
    print("Error: onnxruntime not found", file=sys.stderr)
    print("Install with: pip install onnx onnxruntime", file=sys.stderr)
    sys.exit(1)


def quantize_model(input_path: str, output_path: str):
    """Quantize ONNX model to dynamic INT8"""

    input_path = Path(input_path)
    output_path = Path(output_path)

    if not input_path.exists():
        print(f"Error: Input model not found: {input_path}", file=sys.stderr)
        sys.exit(1)

    print(f"Quantizing {input_path.name} to INT8...")
    print(f"Input:  {input_path}")
    print(f"Output: {output_path}")

    try:
        quantize_dynamic(
            model_input=str(input_path),
            model_output=str(output_path),
            weight_type=QuantType.QUInt8,
            optimize_model=True,
            per_channel=False,
        )
        print(f"✅ Successfully quantized to {output_path.name}")
    except Exception as e:
        print(f"Error: Quantization failed: {e}", file=sys.stderr)
        sys.exit(1)


if __name__ == "__main__":
    if len(sys.argv) != 3:
        print("Usage: quantize_int8.py <input_model.onnx> <output_model.onnx>", file=sys.stderr)
        sys.exit(1)

    input_path = sys.argv[1]
    output_path = sys.argv[2]

    quantize_model(input_path, output_path)