Fix NaNs in Q4_K/Q5_K quantized MiniMax-2.7 models on CUDA

This commit is contained in:
Kawrakow 2026-04-19 11:06:58 +00:00
parent 64234e3c4e
commit 97369ccd1c
2 changed files with 6 additions and 2 deletions

View File

@ -129,7 +129,7 @@ static __global__ void quantize_mmq_q8_1(
}
}
const float d = amax/127.f;
float d = amax/127.f;
const float d_inv = d > 0 ? 1/d : 0.f;
char4 q;
q.x = roundf(xi.x*d_inv);
@ -162,6 +162,8 @@ static __global__ void quantize_mmq_q8_1(
}
if (ds_layout == MMQ_Q8_1_DS_LAYOUT_DS4) {
d = max(-65504.0f, min(65504.f, d));
sum = max(-65504.0f, min(65504.f, sum));
y[ib].ds4[iqs/32] = make_half2(d, sum);
} else {
y[ib].d4[iqs/32] = d;

View File

@ -91,9 +91,11 @@ static __global__ void quantize_mmq_q8_1(
return;
}
const float d = 1.0f / d_inv;
float d = 1.0f / d_inv;
if (ds_layout == MMQ_Q8_1_DS_LAYOUT_DS4) {
d = max(-65504.0f, min(65504.f, d));
sum = max(-65504.0f, min(65504.f, sum));
y[ib].ds4[iqs/32] = make_half2(d, sum);
} else {
y[ib].d4[iqs/32] = d;