mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-06-28 04:30:15 -05:00
Fix NaNs in Q4_K/Q5_K quantized MiniMax-2.7 models on CUDA
This commit is contained in:
parent
64234e3c4e
commit
97369ccd1c
@ -129,7 +129,7 @@ static __global__ void quantize_mmq_q8_1(
|
||||
}
|
||||
}
|
||||
|
||||
const float d = amax/127.f;
|
||||
float d = amax/127.f;
|
||||
const float d_inv = d > 0 ? 1/d : 0.f;
|
||||
char4 q;
|
||||
q.x = roundf(xi.x*d_inv);
|
||||
@ -162,6 +162,8 @@ static __global__ void quantize_mmq_q8_1(
|
||||
}
|
||||
|
||||
if (ds_layout == MMQ_Q8_1_DS_LAYOUT_DS4) {
|
||||
d = max(-65504.0f, min(65504.f, d));
|
||||
sum = max(-65504.0f, min(65504.f, sum));
|
||||
y[ib].ds4[iqs/32] = make_half2(d, sum);
|
||||
} else {
|
||||
y[ib].d4[iqs/32] = d;
|
||||
|
||||
@ -91,9 +91,11 @@ static __global__ void quantize_mmq_q8_1(
|
||||
return;
|
||||
}
|
||||
|
||||
const float d = 1.0f / d_inv;
|
||||
float d = 1.0f / d_inv;
|
||||
|
||||
if (ds_layout == MMQ_Q8_1_DS_LAYOUT_DS4) {
|
||||
d = max(-65504.0f, min(65504.f, d));
|
||||
sum = max(-65504.0f, min(65504.f, sum));
|
||||
y[ib].ds4[iqs/32] = make_half2(d, sum);
|
||||
} else {
|
||||
y[ib].d4[iqs/32] = d;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user