From 9eaf86a7c710be044d1dfc1f8ed612f413d37db5 Mon Sep 17 00:00:00 2001 From: Nexes the Elder <124105151+Nexesenex@users.noreply.github.com> Date: Tue, 23 Jun 2026 09:37:48 +0200 Subject: [PATCH] Fix minor CUDA discrepencies (#2005) * CUDA : typo * CUDA: Add missing GGML_CALL to function definition * CUDA: only log GGML_CUDA_FORCE_MMQ/CUBLAS when enabled * CUDA: Fix softcap bug in flash_attn_tile_ext_f16 The else branch (softcap != 0) incorrectly called launch_fattn_tile_f16_64_128 with use_softcap=false instead of true, causing logit softcap to be silently ignored for the col_per_block=32, parallel_blocks=1 path. --- ggml/include/ggml-cuda.h | 4 ++-- ggml/src/ggml-cuda.cu | 10 +++------- ggml/src/ggml-cuda/cpy-utils.cuh | 2 +- ggml/src/ggml-cuda/fattn-tile-f16.cu | 2 +- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/ggml/include/ggml-cuda.h b/ggml/include/ggml-cuda.h index b73abcc3..a9787eb5 100644 --- a/ggml/include/ggml-cuda.h +++ b/ggml/include/ggml-cuda.h @@ -41,9 +41,9 @@ GGML_API GGML_CALL void ggml_backend_cuda_get_device_memory(int device, size_t * GGML_API GGML_CALL bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size); GGML_API GGML_CALL void ggml_backend_cuda_unregister_host_buffer(void * buffer); -GGML_API void ggml_backend_cuda_log_set_callback(ggml_log_callback log_callback, void * user_data); +GGML_API GGML_CALL void ggml_backend_cuda_log_set_callback(ggml_log_callback log_callback, void * user_data); -GGML_API void ggml_backend_cuda_invalidate_graphs(void); +GGML_API GGML_CALL void ggml_backend_cuda_invalidate_graphs(void); #ifdef __cplusplus } #endif diff --git a/ggml/src/ggml-cuda.cu b/ggml/src/ggml-cuda.cu index 39a04058..e4b41bee 100644 --- a/ggml/src/ggml-cuda.cu +++ b/ggml/src/ggml-cuda.cu @@ -94,7 +94,7 @@ static void ggml_cuda_default_log_callback(enum ggml_log_level level, const char ggml_log_callback ggml_cuda_log_callback = ggml_cuda_default_log_callback; void * ggml_cuda_log_user_data = NULL; -GGML_API void ggml_backend_cuda_log_set_callback(ggml_log_callback log_callback, void * user_data) { +GGML_CALL void ggml_backend_cuda_log_set_callback(ggml_log_callback log_callback, void * user_data) { ggml_cuda_log_callback = log_callback; ggml_cuda_log_user_data = user_data; } @@ -204,14 +204,10 @@ static ggml_cuda_device_info ggml_cuda_init() { int64_t total_vram = 0; #ifdef GGML_CUDA_FORCE_MMQ - GGML_CUDA_LOG_INFO("%s: GGML_CUDA_FORCE_MMQ: yes\n", __func__); -#else - GGML_CUDA_LOG_INFO("%s: GGML_CUDA_FORCE_MMQ: no\n", __func__); + GGML_CUDA_LOG_INFO("%s: GGML_CUDA_FORCE_MMQ (instead of CUBLAS): yes\n", __func__); #endif // GGML_CUDA_FORCE_MMQ #ifdef GGML_CUDA_FORCE_CUBLAS - GGML_CUDA_LOG_INFO("%s: GGML_CUDA_FORCE_CUBLAS: yes\n", __func__); -#else - GGML_CUDA_LOG_INFO("%s: GGML_CUDA_FORCE_CUBLAS: no\n", __func__); + GGML_CUDA_LOG_INFO("%s: GGML_CUDA_FORCE_CUBLAS (Instead of MMQ): yes\n", __func__); #endif // GGML_CUDA_FORCE_CUBLAS GGML_CUDA_LOG_INFO("%s: found %d " GGML_CUDA_NAME " devices:\n", __func__, info.device_count); for (int id = 0; id < info.device_count; ++id) { diff --git a/ggml/src/ggml-cuda/cpy-utils.cuh b/ggml/src/ggml-cuda/cpy-utils.cuh index cf96c94f..970eceec 100644 --- a/ggml/src/ggml-cuda/cpy-utils.cuh +++ b/ggml/src/ggml-cuda/cpy-utils.cuh @@ -229,7 +229,7 @@ static __device__ void quantize_f32_q6_0_block(const float * __restrict__ xi, bl for (int j = 0; j < QK6_0/2; ++j) { const float x0 = xi[0 + j]*id; - const float x1 = xi[QK4_0/2 + j]*id; + const float x1 = xi[QK6_0/2 + j]*id; const uint8_t xi0 = min(63, (int8_t)(x0 + 32.5f)); const uint8_t xi1 = min(63, (int8_t)(x1 + 32.5f)); diff --git a/ggml/src/ggml-cuda/fattn-tile-f16.cu b/ggml/src/ggml-cuda/fattn-tile-f16.cu index 6dcb12c2..a2fe6bd3 100644 --- a/ggml/src/ggml-cuda/fattn-tile-f16.cu +++ b/ggml/src/ggml-cuda/fattn-tile-f16.cu @@ -350,7 +350,7 @@ void ggml_cuda_flash_attn_ext_tile_f16(ggml_backend_cuda_context & ctx, ggml_ten if (softcap == 0.0f) { launch_fattn_tile_f16_64_128(ctx, dst); } else { - launch_fattn_tile_f16_64_128(ctx, dst); + launch_fattn_tile_f16_64_128(ctx, dst); } }