mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-06-28 04:30:15 -05:00
It is better to use llama_context pointers as keys
This commit is contained in:
parent
de6c2dfdec
commit
1f5828eaa4
@ -7162,7 +7162,7 @@ struct llama_context * llama_init_from_model(
|
|||||||
#elif defined(GGML_USE_CUDA)
|
#elif defined(GGML_USE_CUDA)
|
||||||
if (model->split_mode == LLAMA_SPLIT_MODE_NONE) {
|
if (model->split_mode == LLAMA_SPLIT_MODE_NONE) {
|
||||||
// with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_GRAPH, only the main GPU backend is used
|
// with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_GRAPH, only the main GPU backend is used
|
||||||
ggml_backend_t backend = ggml_backend_cuda_init(main_gpu_id, cparams.cuda_params, model);
|
ggml_backend_t backend = ggml_backend_cuda_init(main_gpu_id, cparams.cuda_params, ctx);
|
||||||
if (backend == nullptr) {
|
if (backend == nullptr) {
|
||||||
LLAMA_LOG_ERROR("%s: failed to initialize CUDA%d backend\n", __func__, main_gpu_id);
|
LLAMA_LOG_ERROR("%s: failed to initialize CUDA%d backend\n", __func__, main_gpu_id);
|
||||||
llama_free(ctx);
|
llama_free(ctx);
|
||||||
@ -7181,7 +7181,7 @@ struct llama_context * llama_init_from_model(
|
|||||||
params = new_params.data();
|
params = new_params.data();
|
||||||
}
|
}
|
||||||
for (int device = 0; device < ggml_backend_cuda_get_device_count(); ++device) {
|
for (int device = 0; device < ggml_backend_cuda_get_device_count(); ++device) {
|
||||||
ggml_backend_t backend = ggml_backend_cuda_init(device, params, model);
|
ggml_backend_t backend = ggml_backend_cuda_init(device, params, ctx);
|
||||||
if (backend == nullptr) {
|
if (backend == nullptr) {
|
||||||
LLAMA_LOG_ERROR("%s: failed to initialize CUDA%d backend\n", __func__, device);
|
LLAMA_LOG_ERROR("%s: failed to initialize CUDA%d backend\n", __func__, device);
|
||||||
llama_free(ctx);
|
llama_free(ctx);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user