from collections import namedtuple
from functools import partial
from ..module import Module
from .fake_quant import TQT, FakeQuantize
from .observer import (
ExponentialMovingAverageObserver,
HistogramObserver,
MinMaxObserver,
PassiveObserver,
SyncExponentialMovingAverageObserver,
SyncMinMaxObserver,
)
class QConfig(
namedtuple(
"QConfig",
["weight_observer", "act_observer", "weight_fake_quant", "act_fake_quant"],
)
):
def __new__(cls, weight_observer, act_observer, weight_fake_quant, act_fake_quant):
if isinstance(act_observer, Module) or isinstance(weight_observer, Module):
raise ValueError(
"QConfig must not receive observer instance, please pass observer"
" class generator using `partial(Observer, ...)` instead. Use"
" partial(MyObserver, x=1) to override arguments to constructor if needed"
)
return super().__new__(
cls, weight_observer, act_observer, weight_fake_quant, act_fake_quant
)
min_max_fakequant_qconfig = QConfig(
weight_observer=partial(MinMaxObserver, dtype="qint8_narrow"),
act_observer=partial(MinMaxObserver, dtype="qint8"),
weight_fake_quant=partial(FakeQuantize, dtype="qint8_narrow"),
act_fake_quant=partial(FakeQuantize, dtype="qint8"),
)
ema_fakequant_qconfig = QConfig(
weight_observer=partial(MinMaxObserver, dtype="qint8_narrow"),
act_observer=partial(ExponentialMovingAverageObserver, dtype="qint8"),
weight_fake_quant=partial(FakeQuantize, dtype="qint8_narrow"),
act_fake_quant=partial(FakeQuantize, dtype="qint8"),
)
sync_ema_fakequant_qconfig = QConfig(
weight_observer=partial(SyncMinMaxObserver, dtype="qint8_narrow"),
act_observer=partial(SyncExponentialMovingAverageObserver, dtype="qint8"),
weight_fake_quant=partial(FakeQuantize, dtype="qint8_narrow"),
act_fake_quant=partial(FakeQuantize, dtype="qint8"),
)
ema_lowbit_fakequant_qconfig = QConfig(
weight_observer=partial(MinMaxObserver, dtype="qint4"),
act_observer=partial(ExponentialMovingAverageObserver, dtype="qint4"),
weight_fake_quant=partial(FakeQuantize, dtype="qint4"),
act_fake_quant=partial(FakeQuantize, dtype="qint4"),
)
calibration_qconfig = QConfig(
weight_observer=partial(MinMaxObserver, dtype="qint8_narrow"),
act_observer=partial(HistogramObserver, dtype="qint8"),
weight_fake_quant=None,
act_fake_quant=None,
)
tqt_qconfig = QConfig(
weight_observer=None,
act_observer=None,
weight_fake_quant=partial(TQT, dtype="qint8_narrow"),
act_fake_quant=partial(TQT, dtype="qint8"),
)
passive_qconfig = QConfig(
weight_observer=partial(PassiveObserver, dtype="qint8_narrow"),
act_observer=partial(PassiveObserver, dtype="qint8"),
weight_fake_quant=partial(FakeQuantize, dtype="qint8_narrow"),
act_fake_quant=partial(FakeQuantize, dtype="qint8"),
)
easyquant_qconfig = passive_qconfig