It is better to use llama_context pointers as keys

This commit is contained in:
Kawrakow 2026-06-24 13:53:59 +00:00
parent de6c2dfdec
commit 1f5828eaa4

View File

@ -7162,7 +7162,7 @@ struct llama_context * llama_init_from_model(
#elif defined(GGML_USE_CUDA) #elif defined(GGML_USE_CUDA)
if (model->split_mode == LLAMA_SPLIT_MODE_NONE) { if (model->split_mode == LLAMA_SPLIT_MODE_NONE) {
// with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_GRAPH, only the main GPU backend is used // with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_GRAPH, only the main GPU backend is used
ggml_backend_t backend = ggml_backend_cuda_init(main_gpu_id, cparams.cuda_params, model); ggml_backend_t backend = ggml_backend_cuda_init(main_gpu_id, cparams.cuda_params, ctx);
if (backend == nullptr) { if (backend == nullptr) {
LLAMA_LOG_ERROR("%s: failed to initialize CUDA%d backend\n", __func__, main_gpu_id); LLAMA_LOG_ERROR("%s: failed to initialize CUDA%d backend\n", __func__, main_gpu_id);
llama_free(ctx); llama_free(ctx);
@ -7181,7 +7181,7 @@ struct llama_context * llama_init_from_model(
params = new_params.data(); params = new_params.data();
} }
for (int device = 0; device < ggml_backend_cuda_get_device_count(); ++device) { for (int device = 0; device < ggml_backend_cuda_get_device_count(); ++device) {
ggml_backend_t backend = ggml_backend_cuda_init(device, params, model); ggml_backend_t backend = ggml_backend_cuda_init(device, params, ctx);
if (backend == nullptr) { if (backend == nullptr) {
LLAMA_LOG_ERROR("%s: failed to initialize CUDA%d backend\n", __func__, device); LLAMA_LOG_ERROR("%s: failed to initialize CUDA%d backend\n", __func__, device);
llama_free(ctx); llama_free(ctx);