From 0c280a1bd26ecdb438053a4991295fff4cb671cf Mon Sep 17 00:00:00 2001
From: Kawrakow <iwankawrakow@gmail.com>
Date: Thu, 26 Mar 2026 09:00:10 +0000
Subject: [PATCH] Ignore MTP layer(s) when computing required memory

---
 src/llama.cpp | 3 +++
 1 file changed, 3 insertions(+)
diff --git a/src/llama.cpp b/src/llama.cpp
index 5e04b776..7b76edd8 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -2064,6 +2064,9 @@ static std::pair<std::vector<double>, double> get_layer_sizes(const llama_model_
             LLAMA_LOG_WARN("Oops: strange layer index %d for tensor %s\n", il, name.c_str());
             continue;
         }
+        if (!model.mtp && model.hparams.nextn_predict_layers > 0 && il >= n_layer - model.hparams.nextn_predict_layers) {
+            continue;
+        }
         result[il] += size;
         if (auto pos = name.rfind(".bias"); pos < name.size() && name.size() - pos == 4) {
             // bias, we don't need to account for those