We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 80b0413 commit 0792aacCopy full SHA for 0792aac
1 file changed
crates/infer-deepseek/src/transformer/weights.rs
@@ -444,6 +444,19 @@ fn maybe_quantize_linear(
444
) -> Result<Option<Arc<QMatMul>>> {
445
let quant = QuantizationState::global();
446
let config = quant.config();
447
+ // Disable runtime quantization entirely on Metal to avoid MPS kernel issues.
448
+ if weight.device().is_metal() {
449
+ tracing::trace!(
450
+ tensor = tensor_name,
451
+ ?group,
452
+ action = "fallback",
453
+ reason = "metal_disabled",
454
+ backend = crate::quantization::backend_label(&weight.device()),
455
+ "quant-linear"
456
+ );
457
+ quant.record_attempt(module, QuantizationOutcome::Fallback);
458
+ return Ok(None);
459
+ }
460
if !quant.enabled_for(group) {
461
trace!(
462
tensor = tensor_name,
0 commit comments