From 0c280a1bd26ecdb438053a4991295fff4cb671cf Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Thu, 26 Mar 2026 09:00:10 +0000 Subject: [PATCH] Ignore MTP layer(s) when computing required memory --- src/llama.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/llama.cpp b/src/llama.cpp index 5e04b776..7b76edd8 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -2064,6 +2064,9 @@ static std::pair, double> get_layer_sizes(const llama_model_ LLAMA_LOG_WARN("Oops: strange layer index %d for tensor %s\n", il, name.c_str()); continue; } + if (!model.mtp && model.hparams.nextn_predict_layers > 0 && il >= n_layer - model.hparams.nextn_predict_layers) { + continue; + } result[il] += size; if (auto pos = name.rfind(".bias"); pos < name.size() && name.size() - pos == 4) { // bias, we don't need to account for those