ggml-cpu: fix SVE leftover path in ggml_vec_dot_f32 (#24699)

* ggml-cpu: fix SVE leftover path in ggml_vec_dot_f32

2D convolutions with kernel size 9 produced different results on SVE
enabled ARM devices. After debugging it turned out that ggml_vec_dot_f32
was using data from inactive lanes.

Use svmla_f32_m(pg, sum1, ax1, ay1) so inactive lanes retain sum1.

* cont : clean-up

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
Tarek Dakhran 2026-06-26 09:41:56 +02:00 committed by GitHub
parent 1a87dcdc45
commit c16c35b814
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 61 additions and 28 deletions

View File

@ -75,12 +75,12 @@ void ggml_vec_dot_f32(int n, float * GGML_RESTRICT s, size_t bs, const float * G
ay1 = GGML_F32_VEC_LOAD(y + i);
sum1 = GGML_F32_VEC_FMA(sum1, ax1, ay1);
}
// maximum number of leftover elements will be less that ggml_f32_epr. Apply predicated svmad on available elements only
// maximum number of leftover elements will be less that ggml_f32_epr. Apply predicated svmla on available elements only
if (np2 < n) {
svbool_t pg = svwhilelt_b32(np2, n);
ax1 = svld1_f32(pg, x + np2);
ay1 = svld1_f32(pg, y + np2);
sum1 = svmad_f32_m(pg, ax1, ay1, sum1);
sum1 = svmla_f32_m(pg, sum1, ax1, ay1);
}
// reduce sum1,sum2 to sum1
GGML_F32_VEC_REDUCE(sumf, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8);

View File

@ -102,21 +102,34 @@ static float dot_product_error(const ggml_type_traits * qfns, const ggml_type_tr
return fabsf(result - dot_ref) / test_size;
}
int main(int argc, char * argv[]) {
bool verbose = false;
const size_t test_size = 32 * 128;
static int test_vec_dot_f32(bool verbose) {
const auto * f32 = ggml_get_type_traits_cpu(GGML_TYPE_F32);
int num_failed = 0;
for (int n : {1, 2, 3, 5, 7, 8, 15, 16, 17, 31, 33, 63, 67, 127, 129, 193, 255, 1023}) {
std::vector<float> a(n);
std::vector<float> b(n);
generate_data(0.0, n, a.data());
generate_data(1.0, n, b.data());
std::string arg;
for (int i = 1; i < argc; i++) {
arg = argv[i];
float result = 0.0f;
f32->vec_dot(n, &result, 0, a.data(), 0, b.data(), 0, 1);
const float ref = dot_product(a.data(), b.data(), n);
const float error = fabsf(result - ref) / n;
if (arg == "-v") {
verbose = true;
} else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
return 1;
const bool failed = !(error < MAX_QUANTIZATION_REFERENCE_ERROR);
num_failed += failed;
if (failed || verbose) {
printf(" f32 vec_dot n=%4d: %s (ref=%f got=%f err=%f)\n",
n, RESULT_STR[failed], ref, result, error);
}
}
return num_failed;
}
static int test_vec_dot_q(bool verbose) {
int num_failed = 0;
const size_t test_size = 32 * 128;
std::vector<float> test_data(test_size);
std::vector<float> test_data2(test_size);
@ -124,11 +137,6 @@ int main(int argc, char * argv[]) {
generate_data(0.0, test_data.size(), test_data.data());
generate_data(1.0, test_data2.size(), test_data2.data());
ggml_cpu_init();
int num_failed = 0;
bool failed = false;
for (int i = 0; i < GGML_TYPE_COUNT; i++) {
ggml_type type = (ggml_type) i;
const auto * qfns = ggml_get_type_traits(type);
@ -156,7 +164,7 @@ int main(int argc, char * argv[]) {
type == GGML_TYPE_IQ3_S ? MAX_QUANTIZATION_TOTAL_ERROR_3BITS :
type == GGML_TYPE_IQ3_XXS ? MAX_QUANTIZATION_TOTAL_ERROR_3BITS_XXS :
type == GGML_TYPE_NVFP4 ? MAX_QUANTIZATION_TOTAL_ERROR_FP4 : MAX_QUANTIZATION_TOTAL_ERROR;
failed = !(total_error < max_quantization_error);
bool failed = !(total_error < max_quantization_error);
num_failed += failed;
if (failed || verbose) {
printf("%5s absolute quantization error: %s (%f)\n", ggml_type_name(type), RESULT_STR[failed], total_error);
@ -171,15 +179,15 @@ int main(int argc, char * argv[]) {
const float vec_dot_error = dot_product_error(qfns, qfns_cpu, test_size, test_data.data(), test_data2.data());
const float max_allowed_error = type == GGML_TYPE_Q2_K || type == GGML_TYPE_IQ2_XS || type == GGML_TYPE_IQ2_XXS ||
type == GGML_TYPE_IQ3_XXS || type == GGML_TYPE_IQ3_S || type == GGML_TYPE_IQ2_S
? MAX_DOT_PRODUCT_ERROR_LOWBIT
: type == GGML_TYPE_Q1_0
? MAX_DOT_PRODUCT_ERROR_BINARY
: type == GGML_TYPE_TQ1_0 || type == GGML_TYPE_TQ2_0
? MAX_DOT_PRODUCT_ERROR_TERNARY
: type == GGML_TYPE_NVFP4
? MAX_DOT_PRODUCT_ERROR_FP4
: MAX_DOT_PRODUCT_ERROR;
type == GGML_TYPE_IQ3_XXS || type == GGML_TYPE_IQ3_S || type == GGML_TYPE_IQ2_S
? MAX_DOT_PRODUCT_ERROR_LOWBIT
: type == GGML_TYPE_Q1_0
? MAX_DOT_PRODUCT_ERROR_BINARY
: type == GGML_TYPE_TQ1_0 || type == GGML_TYPE_TQ2_0
? MAX_DOT_PRODUCT_ERROR_TERNARY
: type == GGML_TYPE_NVFP4
? MAX_DOT_PRODUCT_ERROR_FP4
: MAX_DOT_PRODUCT_ERROR;
failed = !(vec_dot_error < max_allowed_error);
num_failed += failed;
if (failed || verbose) {
@ -188,6 +196,31 @@ int main(int argc, char * argv[]) {
}
}
return num_failed;
}
int main(int argc, char * argv[]) {
bool verbose = false;
std::string arg;
for (int i = 1; i < argc; i++) {
arg = argv[i];
if (arg == "-v") {
verbose = true;
} else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
return 1;
}
}
ggml_cpu_init();
int num_failed = 0;
num_failed += test_vec_dot_f32(verbose);
num_failed += test_vec_dot_q(verbose);
if (num_failed || verbose) {
printf("%d tests failed\n", num_failed);
}