Skip to content

Commit 0792aac

Browse files
committed
fix(quant): disable runtime quantization on metal to avoid mps issues
1 parent 80b0413 commit 0792aac

1 file changed

Lines changed: 13 additions & 0 deletions

File tree

crates/infer-deepseek/src/transformer/weights.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,19 @@ fn maybe_quantize_linear(
444444
) -> Result<Option<Arc<QMatMul>>> {
445445
let quant = QuantizationState::global();
446446
let config = quant.config();
447+
// Disable runtime quantization entirely on Metal to avoid MPS kernel issues.
448+
if weight.device().is_metal() {
449+
tracing::trace!(
450+
tensor = tensor_name,
451+
?group,
452+
action = "fallback",
453+
reason = "metal_disabled",
454+
backend = crate::quantization::backend_label(&weight.device()),
455+
"quant-linear"
456+
);
457+
quant.record_attempt(module, QuantizationOutcome::Fallback);
458+
return Ok(None);
459+
}
447460
if !quant.enabled_for(group) {
448461
trace!(
449462
tensor = tensor_name,

0 commit comments

Comments
 (0)