From f3ba33ec3593d7cc0baa735b8f17fb590aef2a33 Mon Sep 17 00:00:00 2001 From: Ruben Ortlam Date: Mon, 25 May 2026 08:52:58 +0200 Subject: [PATCH] address feedback --- ggml/src/ggml-cuda/ggml-cuda.cu | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 83f82b62e2..5b8d219d3f 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -86,10 +86,14 @@ #include #if defined(_WIN32) +#define WIN32_LEAN_AND_MEAN +#ifndef NOMINMAX + #define NOMINMAX +#endif #include #else #include -#endif +#endif // defined(_WIN32) static_assert(sizeof(half) == sizeof(ggml_fp16_t), "wrong fp16 size"); @@ -4994,7 +4998,7 @@ struct ggml_cuda_nvml { auto init = (ggml_nvml_init_fn) dlsym(lib, "nvmlInit_v2"); get_handle = (ggml_nvml_handle_fn) dlsym(lib, "nvmlDeviceGetHandleByPciBusId_v2"); get_mem = (ggml_nvml_mem_fn) dlsym(lib, "nvmlDeviceGetMemoryInfo"); -#endif +#endif // defined(_WIN32) if (!init || !get_handle || !get_mem) { return; } @@ -5051,12 +5055,12 @@ static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * #if !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) if (ggml_cuda_nvml_get_memory(ctx->pci_bus_id, free, total)) { - GGML_LOG_DEBUG("%s: %s: using NVML context-free path\n", __func__, ctx->name.c_str()); + GGML_LOG_DEBUG("%s: %s: using NVML path\n", __func__, ctx->name.c_str()); return; } -#endif +#endif // !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) - GGML_LOG_DEBUG("%s: %s: using cudaMemGetInfo fallback\n", __func__, ctx->name.c_str()); + GGML_LOG_DEBUG("%s: %s: using cudaMemGetInfo fallback - this results in eager memory allocation\n", __func__, ctx->name.c_str()); ggml_cuda_set_device(ctx->device); CUDA_CHECK(cudaMemGetInfo(free, total)); }