mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-27 23:50:20 -05:00
vulkan: Apply bias before softmax in FA, to avoid overflow (#24909)
This commit is contained in:
parent
be4a6a63eb
commit
ac4105d68b
@ -463,6 +463,7 @@ void main() {
|
|||||||
}
|
}
|
||||||
rowmaxf = max(rowmaxf, float(Sf[r][c]));
|
rowmaxf = max(rowmaxf, float(Sf[r][c]));
|
||||||
}
|
}
|
||||||
|
rowmaxf += FATTN_KQ_MAX_OFFSET;
|
||||||
float Moldf = Mf[r];
|
float Moldf = Mf[r];
|
||||||
|
|
||||||
// M = max(rowmax, Mold)
|
// M = max(rowmax, Mold)
|
||||||
|
|||||||
@ -352,6 +352,7 @@ void main() {
|
|||||||
}
|
}
|
||||||
rowmaxf = max(rowmaxf, float(sfsh[r_vec + (c * cols_per_iter + col_tid) * sfshstride][r_comp]));
|
rowmaxf = max(rowmaxf, float(sfsh[r_vec + (c * cols_per_iter + col_tid) * sfshstride][r_comp]));
|
||||||
}
|
}
|
||||||
|
rowmaxf += FATTN_KQ_MAX_OFFSET;
|
||||||
float Moldf = Mf[r];
|
float Moldf = Mf[r];
|
||||||
|
|
||||||
// Compute max across the row
|
// Compute max across the row
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user