diff --git a/src/llama.cpp b/src/llama.cpp index a67fa8039a..0de6048f28 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -249,7 +249,7 @@ static bool llama_prepare_model_devices(const llama_model_params & params, llama } // if using single GPU mode, remove all except the main GPU - if (params.split_mode == LLAMA_SPLIT_MODE_NONE) { + if (params.split_mode == LLAMA_SPLIT_MODE_NONE && !model->devices.empty()) { if (params.main_gpu < 0) { model->devices.clear(); } else {