import sys
from pathlib import Path
try:
from onnxruntime.quantization import QuantType, quantize_dynamic
except ImportError:
print("Error: onnxruntime not found", file=sys.stderr)
print("Install with: pip install onnx onnxruntime", file=sys.stderr)
sys.exit(1)
def quantize_model(input_path: str, output_path: str):
input_path = Path(input_path)
output_path = Path(output_path)
if not input_path.exists():
print(f"Error: Input model not found: {input_path}", file=sys.stderr)
sys.exit(1)
print(f"Quantizing {input_path.name} to INT8...")
print(f"Input: {input_path}")
print(f"Output: {output_path}")
try:
quantize_dynamic(
model_input=str(input_path),
model_output=str(output_path),
weight_type=QuantType.QUInt8,
optimize_model=True,
per_channel=False,
)
print(f"✅ Successfully quantized to {output_path.name}")
except Exception as e:
print(f"Error: Quantization failed: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: quantize_int8.py <input_model.onnx> <output_model.onnx>", file=sys.stderr)
sys.exit(1)
input_path = sys.argv[1]
output_path = sys.argv[2]
quantize_model(input_path, output_path)